From 10c879f65ca1751625f1b9955f5dcf4a5ca231ec Mon Sep 17 00:00:00 2001 From: WrathfulSpatula Date: Sat, 26 Oct 2024 11:01:52 -0400 Subject: [PATCH] Fix CUDA --- src/common/parallel_for.cpp | 5 ++--- src/common/qengine.cu | 20 +++++++------------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/src/common/parallel_for.cpp b/src/common/parallel_for.cpp index fbe2368c9..a43d8ff58 100644 --- a/src/common/parallel_for.cpp +++ b/src/common/parallel_for.cpp @@ -31,9 +31,8 @@ namespace Qrack { ParallelFor::ParallelFor() #if ENABLE_ENV_VARS - : pStride(getenv("QRACK_PSTRIDEPOW") - ? pow2Ocl((bitLenInt)std::stoi(std::string(getenv("QRACK_PSTRIDEPOW")))) - : pow2Ocl((bitLenInt)PSTRIDEPOW)) + : pStride(getenv("QRACK_PSTRIDEPOW") ? pow2Ocl((bitLenInt)std::stoi(std::string(getenv("QRACK_PSTRIDEPOW")))) + : pow2Ocl((bitLenInt)PSTRIDEPOW)) #else : pStride(pow2Ocl((bitLenInt)PSTRIDEPOW)) #endif diff --git a/src/common/qengine.cu b/src/common/qengine.cu index ab37e012b..ee758416f 100644 --- a/src/common/qengine.cu +++ b/src/common/qengine.cu @@ -395,7 +395,7 @@ __global__ void phasemask(qCudaCmplx* stateVec, bitCapIntOcl* bitCapIntOclPtr) const bitCapIntOcl maxI = bitCapIntOclPtr[0]; const bitCapIntOcl mask = bitCapIntOclPtr[1]; const bitCapIntOcl nPhases = bitCapIntOclPtr[2]; - const real1 phaseAngle = -PI_R1 / bitCapIntOclPtr[3]; + const real1 phaseAngle = (real1)(-PI_R1_CUDA / bitCapIntOclPtr[3]); for (bitCapIntOcl lcv = ID; lcv < maxI; lcv += Nthreads) { bitCapIntOcl popCount = 0; @@ -665,13 +665,10 @@ __global__ void decomposeprob(qCudaCmplx* stateVec, bitCapIntOcl* bitCapIntOclPt for (bitCapIntOcl k = 0U; k < partPower; k++) { bitCapIntOcl l = j | (k << start); - qCudaCmplx amp = stateVec[l]; - qCudaReal1_f nrm = (qCudaReal1_f)qCudaDot(amp, amp); + const qCudaCmplx amp = stateVec[l]; + const qCudaReal1_f nrm = dot(amp, amp); partProb += nrm; - - if (nrm >= REAL1_EPSILON_CUDA) { - partStateAngle[k] = qCudaArg(amp); - } + partStateAngle[k] += arg(amp) * nrm; } remainderStateProb[lcv] = partProb; @@ -687,13 +684,10 @@ __global__ void decomposeprob(qCudaCmplx* stateVec, bitCapIntOcl* bitCapIntOclPt l |= (k ^ l) << len; l = j | l; - qCudaCmplx amp = stateVec[l]; - qCudaReal1_f nrm = (qCudaReal1_f)qCudaDot(amp, amp); + const qCudaCmplx amp = stateVec[l]; + const qCudaReal1_f nrm = dot(amp, amp); partProb += nrm; - - if (nrm >= REAL1_EPSILON_CUDA) { - remainderStateAngle[k] = qCudaArg(amp); - } + remainderStateAngle[k] += arg(amp) * nrm; } partStateProb[lcv] = partProb;