Skip to content

Commit

Permalink
Simplify cudacompat layer to use a 1-dimensional grid (#586)
Browse files Browse the repository at this point in the history
Remove the possibility of changing the grid size used by the
cms::cudacompat layer, and make it a constant equal to {1, 1, 1}.

This avoids a thread-related problem caused by TBB using worker threads
where the grid size had not been initialised.

The kernel for pixel clustering need to be rewritten to support a
one-dimensional grid to run on the CPU.
Currently they are only used on the GPU in the Patatrack workflows, but
they are exercised on the CPU by the gpuClustering_t tests; those tests
have been commented out until the kernels can be updated.
  • Loading branch information
cmsbuild authored and fwyzard committed Dec 25, 2020
1 parent 085c41d commit 97f1491
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 10 deletions.
17 changes: 9 additions & 8 deletions RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@
#include <vector>

#ifdef __CUDACC__

#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/launch.h"
#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
#endif

#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h"
Expand All @@ -33,7 +32,6 @@ int main(void) {
auto h_x = std::make_unique<uint16_t[]>(numElements);
auto h_y = std::make_unique<uint16_t[]>(numElements);
auto h_adc = std::make_unique<uint16_t[]>(numElements);

auto h_clus = std::make_unique<int[]>(numElements);

#ifdef __CUDACC__
Expand All @@ -46,11 +44,9 @@ int main(void) {
auto d_clusInModule = cms::cuda::make_device_unique<uint32_t[]>(MaxNumModules, nullptr);
auto d_moduleId = cms::cuda::make_device_unique<uint32_t[]>(MaxNumModules, nullptr);
#else

auto h_moduleStart = std::make_unique<uint32_t[]>(MaxNumModules + 1);
auto h_clusInModule = std::make_unique<uint32_t[]>(MaxNumModules);
auto h_moduleId = std::make_unique<uint32_t[]>(MaxNumModules);

#endif

// later random number
Expand Down Expand Up @@ -301,9 +297,12 @@ int main(void) {

cudaDeviceSynchronize();
#else

h_moduleStart[0] = nModules;
countModules(h_id.get(), h_moduleStart.get(), h_clus.get(), n);
memset(h_clusInModule.get(), 0, MaxNumModules * sizeof(uint32_t));
#ifdef TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE
// FIXME the findClus kernel should be rewritten to avoid relying on a predefined grid size
gridDim.x = MaxNumModules; //not needed in the kernel for this specific case;
assert(blockIdx.x == 0);
for (; blockIdx.x < gridDim.x; ++blockIdx.x)
Expand All @@ -315,7 +314,7 @@ int main(void) {
h_moduleId.get(),
h_clus.get(),
n);
resetGrid();
#endif // TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE

nModules = h_moduleStart[0];
auto nclus = h_clusInModule.get();
Expand All @@ -330,12 +329,14 @@ int main(void) {
if (ncl != std::accumulate(nclus, nclus + MaxNumModules, 0))
std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl;

#ifdef TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE
// FIXME the clusterChargeCut kernel should be rewritten to avoid relying on a predefined grid size
gridDim.x = MaxNumModules; // no needed in the kernel for in this specific case
assert(blockIdx.x == 0);
for (; blockIdx.x < gridDim.x; ++blockIdx.x)
clusterChargeCut(
h_id.get(), h_adc.get(), h_moduleStart.get(), h_clusInModule.get(), h_moduleId.get(), h_clus.get(), n);
resetGrid();
#endif // TODO_FIX_CLUSTERIZER_FOR_ANY_GRID_SIZE

#endif

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,6 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv
// filled creates view
SiPixelDigisCUDA::DeviceConstView digiView{xx_.data(), yy_.data(), adc_.data(), moduleInd_.data(), clus_.data()};
assert(digiView.adc(0) != 0);
// not needed...
cms::cudacompat::resetGrid();
// we run on blockId.x==0
gpuPixelRecHits::getHits(&cpeView, &bsHost, &digiView, ndigi, &clusterView, output->view());
for (auto h = fc; h < lc; ++h)
Expand Down

0 comments on commit 97f1491

Please sign in to comment.