Simplify cudacompat layer to use a 1-dimensional grid (#586)

Remove the possibility of changing the grid size used by the cms::cudacompat layer, and make it a constant equal to {1, 1, 1}. This avoids a thread-related problem caused by TBB using worker threads where the grid size had not been initialised. The kernel for pixel clustering need to be rewritten to support a one-dimensional grid to run on the CPU. Currently they are only used on the GPU in the Patatrack workflows, but they are exercised on the CPU by the gpuClustering_t tests; those tests have been commented out until the kernels can be updated.
cms-patatrack · Dec 29, 2020 · ddf8b5a · ddf8b5a
1 parent d1ec999
commit ddf8b5a
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 10 deletions.
diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h
@@ -10,11 +10,10 @@
 #include <vector>
 
 #ifdef __CUDACC__
-
-#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
 #include "HeterogeneousCore/CUDAUtilities/interface/launch.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
 #endif
 
 #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h"
@@ -33,7 +32,6 @@ int main(void) {
   auto h_x = std::make_unique<uint16_t[]>(numElements);
   auto h_y = std::make_unique<uint16_t[]>(numElements);
   auto h_adc = std::make_unique<uint16_t[]>(numElements);
-
   auto h_clus = std::make_unique<int[]>(numElements);
 
 #ifdef __CUDACC__
@@ -46,11 +44,9 @@ int main(void) {
   auto d_clusInModule = cms::cuda::make_device_unique<uint32_t[]>(MaxNumModules, nullptr);
   auto d_moduleId = cms::cuda::make_device_unique<uint32_t[]>(MaxNumModules, nullptr);
 #else
-
   auto h_moduleStart = std::make_unique<uint32_t[]>(MaxNumModules + 1);
   auto h_clusInModule = std::make_unique<uint32_t[]>(MaxNumModules);
   auto h_moduleId = std::make_unique<uint32_t[]>(MaxNumModules);
-
 #endif
 
   // later random number
@@ -301,9 +297,12 @@ int main(void) {
 
     cudaDeviceSynchronize();
 #else
+
     h_moduleStart[0] = nModules;
     countModules(h_id.get(), h_moduleStart.get(), h_clus.get(), n);
     memset(h_clusInModule.get(), 0, MaxNumModules * sizeof(uint32_t));
+#ifdef TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE
+    // FIXME the findClus kernel should be rewritten to avoid relying on a predefined grid size
     gridDim.x = MaxNumModules;  //not needed in the kernel for this specific case;
     assert(blockIdx.x == 0);
     for (; blockIdx.x < gridDim.x; ++blockIdx.x)
@@ -315,7 +314,7 @@ int main(void) {
                h_moduleId.get(),
                h_clus.get(),
                n);
-    resetGrid();
+#endif  // TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE
 
     nModules = h_moduleStart[0];
     auto nclus = h_clusInModule.get();
@@ -330,12 +329,14 @@ int main(void) {
     if (ncl != std::accumulate(nclus, nclus + MaxNumModules, 0))
       std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl;
 
+#ifdef TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE
+    // FIXME the clusterChargeCut kernel should be rewritten to avoid relying on a predefined grid size
     gridDim.x = MaxNumModules;  // no needed in the kernel for in this specific case
     assert(blockIdx.x == 0);
     for (; blockIdx.x < gridDim.x; ++blockIdx.x)
       clusterChargeCut(
           h_id.get(), h_adc.get(), h_moduleStart.get(), h_clusInModule.get(), h_moduleId.get(), h_clus.get(), n);
-    resetGrid();
+#endif  // TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE
 
 #endif
 

diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc
@@ -209,8 +209,6 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv
     // filled creates view
     SiPixelDigisCUDA::DeviceConstView digiView{xx_.data(), yy_.data(), adc_.data(), moduleInd_.data(), clus_.data()};
     assert(digiView.adc(0) != 0);
-    // not needed...
-    cms::cudacompat::resetGrid();
     // we run on blockId.x==0
     gpuPixelRecHits::getHits(&cpeView, &bsHost, &digiView, ndigi, &clusterView, output->view());
     for (auto h = fc; h < lc; ++h)