Skip to content

Commit

Permalink
Debug OpenCL/CUDA
Browse files Browse the repository at this point in the history
  • Loading branch information
WrathfulSpatula committed Nov 2, 2024
1 parent 8a7fdea commit 052424a
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 19 deletions.
12 changes: 6 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,6 @@ else (ENABLE_SSE3)
set(SSE3_MACRO "0")
endif (ENABLE_SSE3)

if (QBCAPPOW GREATER 6)
set(QRACK_CUDA_COMPILE_OPTS -O3 -use_fast_math -Xcompiler -fpermissive --ptxas-options -O3,)
else (QBCAPPOW GREATER 6)
set(QRACK_CUDA_COMPILE_OPTS -O3 -use_fast_math -Werror all-warnings --ptxas-options -O3,)
endif (QBCAPPOW GREATER 6)

if (MSVC)
if (CPP_STD GREATER_EQUAL 23)
set(QRACK_CPP_STD_OPT /std:c++23)
Expand All @@ -197,6 +191,12 @@ else (MSVC)
endif ()
endif (MSVC)

if (QBCAPPOW GREATER 6)
set(QRACK_CUDA_COMPILE_OPTS -O3 -use_fast_math -Xcompiler -fpermissive ${QRACK_CPP_STD_OPT} --ptxas-options -O3,)
else (QBCAPPOW GREATER 6)
set(QRACK_CUDA_COMPILE_OPTS -O3 -use_fast_math -Werror all-warnings ${QRACK_CPP_STD_OPT} --ptxas-options -O3,)
endif (QBCAPPOW GREATER 6)

if (MSVC)
set(QRACK_COMPILE_OPTS ${QRACK_CPP_STD_OPT} /Wall)
set(TEST_COMPILE_OPTS ${QRACK_CPP_STD_OPT} /Wall)
Expand Down
13 changes: 10 additions & 3 deletions src/common/qengine.cl
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,9 @@ void kernel decomposeprob(global cmplx* stateVec, constant bitCapIntOcl4* bitCap
const cmplx amp = stateVec[j | (k << start)];
const real1 nrm = dot(amp, amp);
partProb += nrm;
partStateAngle[k] += arg(amp) * nrm;
if (nrm > REAL1_EPSILON) {
partStateAngle[k] += arg(amp) * nrm;
}
}

remainderStateProb[lcv] = partProb;
Expand All @@ -606,7 +608,9 @@ void kernel decomposeprob(global cmplx* stateVec, constant bitCapIntOcl4* bitCap
const cmplx amp = stateVec[l];
const real1 nrm = dot(amp, amp);
partProb += nrm;
remainderStateAngle[k] += arg(amp) * nrm;
if (nrm > REAL1_EPSILON) {
remainderStateAngle[k] += arg(amp) * nrm;
}
}

if (partProb > REAL1_EPSILON) {
Expand Down Expand Up @@ -667,7 +671,10 @@ void kernel disposeprob(global cmplx* stateVec, constant bitCapIntOcl4* bitCapIn
l |= j | ((k ^ l) << len);

const cmplx amp = stateVec[l];
remainderStateAngle[k] += arg(amp) * dot(amp, amp);
const real1 nrm = dot(amp, amp);
if (nrm > REAL1_EPSILON) {
remainderStateAngle[k] += arg(amp) * nrm;
}
}
}

Expand Down
16 changes: 9 additions & 7 deletions src/common/qengine.cu
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,9 @@ __global__ void decomposeprob(qCudaCmplx* stateVec, bitCapIntOcl* bitCapIntOclPt
const qCudaCmplx amp = stateVec[j | (k << start)];
const qCudaReal1_f nrm = (qCudaReal1_f)qCudaDot(amp, amp);
partProb += nrm;
partStateAngle[k] += qCudaArg(amp) * (qCudaReal1)nrm;
if (nrm > REAL1_EPSILON_CUDA) {
partStateAngle[k] += qCudaArg(amp) * (qCudaReal1)nrm;
}
}

remainderStateProb[lcv] = partProb;
Expand All @@ -695,7 +697,9 @@ __global__ void decomposeprob(qCudaCmplx* stateVec, bitCapIntOcl* bitCapIntOclPt
const qCudaCmplx amp = stateVec[l];
const qCudaReal1_f nrm = (qCudaReal1_f)qCudaDot(amp, amp);
partProb += nrm;
remainderStateAngle[k] += qCudaArg(amp) * (qCudaReal1)nrm;
if (nrm > REAL1_EPSILON_CUDA) {
remainderStateAngle[k] += qCudaArg(amp) * (qCudaReal1)nrm;
}
}

if (partProb > REAL1_EPSILON_CUDA) {
Expand Down Expand Up @@ -734,8 +738,6 @@ __global__ void disposeprob(qCudaCmplx* stateVec, bitCapIntOcl* bitCapIntOclPtr,
const bitLenInt start = (bitLenInt)bitCapIntOclPtr[2];
const bitCapIntOcl startMask = (1U << start) - 1U;
const bitLenInt len = bitCapIntOclPtr[3];
const qCudaReal1_f angleThresh = -8 * PI_R1_CUDA;
const qCudaReal1_f initAngle = -16 * PI_R1_CUDA;

for (bitCapIntOcl lcv = ID; lcv < remainderPower; lcv += Nthreads) {
bitCapIntOcl j = lcv & startMask;
Expand All @@ -755,15 +757,15 @@ __global__ void disposeprob(qCudaCmplx* stateVec, bitCapIntOcl* bitCapIntOclPtr,
for (bitCapIntOcl lcv = ID; lcv < partPower; lcv += Nthreads) {
const bitCapIntOcl j = lcv << start;

qCudaReal1_f firstAngle = initAngle;

for (bitCapIntOcl k = 0U; k < remainderPower; ++k) {
bitCapIntOcl l = k & startMask;
l |= j | ((k ^ l) << len);

const qCudaCmplx amp = stateVec[l];
const qCudaReal1_f nrm = (qCudaReal1_f)qCudaDot(amp, amp);
remainderStateAngle[k] += qCudaArg(amp) * (qCudaReal1)nrm;
if (nrm > REAL1_EPSILON_CUDA) {
remainderStateAngle[k] += qCudaArg(amp) * (qCudaReal1)nrm;
}
}
}

Expand Down
13 changes: 10 additions & 3 deletions src/qengine/state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1183,7 +1183,9 @@ void QEngineCPU::DecomposeDispose(bitLenInt start, bitLenInt length, QEngineCPUP
const complex amp = stateVec->read(j | (k << start));
const real1 nrm = norm(amp);
remainderStateProb[lcv] += nrm;
partStateAngle[k] += arg(amp) * nrm;
if (nrm > REAL1_EPSILON) {
partStateAngle[k] += arg(amp) * nrm;
}
}
});

Expand All @@ -1198,7 +1200,9 @@ void QEngineCPU::DecomposeDispose(bitLenInt start, bitLenInt length, QEngineCPUP
const complex amp = stateVec->read(l);
const real1 nrm = norm(amp);
partStateProb[lcv] += nrm;
remainderStateAngle[k] += arg(amp) * nrm;
if (nrm > REAL1_EPSILON) {
remainderStateAngle[k] += arg(amp) * nrm;
}
}

const real1 prob = partStateProb[lcv];
Expand Down Expand Up @@ -1231,7 +1235,10 @@ void QEngineCPU::DecomposeDispose(bitLenInt start, bitLenInt length, QEngineCPUP
l |= j | ((k ^ l) << length);

const complex amp = stateVec->read(l);
remainderStateAngle[k] += arg(amp) * norm(amp);
const real1 nrm = norm(amp);
if (nrm > REAL1_EPSILON) {
remainderStateAngle[k] += arg(amp) * nrm;
}
}
});
par_for(0U, remainderPower, [&](const bitCapIntOcl& lcv, const unsigned& cpu) {
Expand Down

0 comments on commit 052424a

Please sign in to comment.