Skip to content

Commit

Permalink
GPU: Fix some problems when running only selected reconstruction step…
Browse files Browse the repository at this point in the history
…s on GPU
  • Loading branch information
davidrohr committed Oct 25, 2023
1 parent 2bc8ac3 commit bc2e06c
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 16 deletions.
37 changes: 24 additions & 13 deletions GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ int GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
}
GPUCA_OPENMP(parallel for if(!doGPU && GetProcessingSettings().ompKernels != 1) num_threads(mRec->SetAndGetNestedLoopOmpFactor(!doGPU, GetProcessingSettings().nTPCClustererLanes)))
for (int lane = 0; lane < maxLane; lane++) {
if (fragment.index != 0) {
if (doGPU && fragment.index != 0) {
SynchronizeStream(lane); // Don't overwrite charge map from previous iteration until cluster computation is finished
}

Expand Down Expand Up @@ -713,13 +713,14 @@ int GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
}
DoDebugAndDump(RecoStep::TPCClusterFinding, 0, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges", doGPU);

if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) {
TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, mInputsHost->mResourceZS, lane);
SynchronizeStream(GetProcessingSettings().nTPCClustererLanes + lane);
if (doGPU) {
if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) {
TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, mInputsHost->mResourceZS, lane);
SynchronizeStream(GetProcessingSettings().nTPCClustererLanes + lane);
}
SynchronizeStream(mRec->NStreams() - 1); // Wait for copying to constant memory
}

SynchronizeStream(mRec->NStreams() - 1); // Wait for copying to constant memory

if (mIOPtrs.tpcZS && (mCFContext->abandonTimeframe || !mCFContext->nPagesSector[iSlice] || mCFContext->zsVersion == -1)) {
clusterer.mPmemory->counters.nPositions = 0;
continue;
Expand Down Expand Up @@ -776,7 +777,9 @@ int GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
GPUCA_OPENMP(parallel for if(!doGPU && GetProcessingSettings().ompKernels != 1) num_threads(mRec->SetAndGetNestedLoopOmpFactor(!doGPU, GetProcessingSettings().nTPCClustererLanes)))
for (int lane = 0; lane < maxLane; lane++) {
unsigned int iSlice = iSliceBase + lane;
SynchronizeStream(lane);
if (doGPU) {
SynchronizeStream(lane);
}
if (mIOPtrs.tpcZS) {
CfFragment f = fragment.next();
int nextSlice = iSlice;
Expand Down Expand Up @@ -826,7 +829,9 @@ int GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
unsigned int iSlice = iSliceBase + lane;
GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice];
GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer;
SynchronizeStream(lane);
if (doGPU) {
SynchronizeStream(lane);
}
if (clusterer.mPmemory->counters.nPeaks == 0) {
continue;
}
Expand All @@ -843,7 +848,9 @@ int GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
unsigned int iSlice = iSliceBase + lane;
GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice];
GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer;
SynchronizeStream(lane);
if (doGPU) {
SynchronizeStream(lane);
}

if (fragment.index == 0) {
runKernel<GPUMemClean16>(GetGridAutoStep(lane, RecoStep::TPCClusterFinding), krnlRunRangeNone, {nullptr, transferRunning[lane] == 1 ? &mEvents->stream[lane] : nullptr}, clustererShadow.mPclusterInRow, GPUCA_ROW_COUNT * sizeof(*clustererShadow.mPclusterInRow));
Expand All @@ -860,7 +867,9 @@ int GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
runKernel<GPUTPCCFClusterizer>(GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSlice}, {}, 0);
if (doGPU && propagateMCLabels) {
TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane);
SynchronizeStream(lane);
if (doGPU) {
SynchronizeStream(lane);
}
runKernel<GPUTPCCFClusterizer>(GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}, {}, 1);
}
if (GetProcessingSettings().debugLevel >= 3) {
Expand All @@ -881,7 +890,9 @@ int GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
for (int lane = 0; lane < maxLane; lane++) {
unsigned int iSlice = iSliceBase + lane;
std::fill(&tmpNative->nClusters[iSlice][0], &tmpNative->nClusters[iSlice][0] + MAXGLOBALPADROW, 0);
SynchronizeStream(lane);
if (doGPU) {
SynchronizeStream(lane);
}
GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice];
GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer;

Expand Down Expand Up @@ -1014,10 +1025,10 @@ int GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
mInputsHost->mPclusterNativeAccess->setOffsetPtrs();
TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, mInputsHost->mResourceClusterNativeAccess, 0);
}
if (synchronizeOutput) {
if (doGPU && synchronizeOutput) {
SynchronizeStream(mRec->NStreams() - 1);
}
if (synchronizeCalibUpdate) {
if (doGPU && synchronizeCalibUpdate) {
SynchronizeStream(0);
}
if (buildNativeHost && GetProcessingSettings().debugLevel >= 4) {
Expand Down
8 changes: 5 additions & 3 deletions GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,10 @@ int GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
}
if (param().rec.tpc.mergeLoopersAfterburner) {
runKernel<GPUTPCGMMergerMergeLoopers, 0>(doGPUall ? GetGrid(Merger.NOutputTracks(), 0, deviceType) : GetGridAuto(0, deviceType), krnlRunRangeNone, krnlEventNone);
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0);
SynchronizeStream(0); // TODO: could probably synchronize on an event after runKernel<GPUTPCGMMergerMergeLoopers, 1>
if (doGPU) {
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0);
SynchronizeStream(0); // TODO: could probably synchronize on an event after runKernel<GPUTPCGMMergerMergeLoopers, 1>
}
runKernel<GPUTPCGMMergerMergeLoopers, 1>(GetGridAuto(0, deviceType), krnlRunRangeNone, krnlEventNone);
runKernel<GPUTPCGMMergerMergeLoopers, 2>(doGPUall ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType), krnlRunRangeNone, krnlEventNone);
}
Expand Down Expand Up @@ -315,7 +317,7 @@ int GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
mRec->PopNonPersistentMemory(RecoStep::TPCMerging, qStr2Tag("TPCMERG2"));
}
#endif
if (synchronizeOutput || GetProcessingSettings().clearO2OutputFromGPU) {
if (doGPU && (synchronizeOutput || GetProcessingSettings().clearO2OutputFromGPU)) {
SynchronizeStream(outputStream);
}
if (GetProcessingSettings().clearO2OutputFromGPU) {
Expand Down

0 comments on commit bc2e06c

Please sign in to comment.