From 574271e151b38d992b6bc25620722901be10587f Mon Sep 17 00:00:00 2001 From: Matti Kortelainen Date: Fri, 17 Jan 2020 09:10:53 -0600 Subject: [PATCH] Implement changes from the CUDA framework review (#429) Rename the cudautils namespace to cms::cuda or cms::cudatest, and drop the CUDA prefix from the symbols defined there. Always record and query the CUDA event, to minimize need for error checking in CUDAScopedContextProduce destructor. Add comments to highlight the pieces in CachingDeviceAllocator that have been changed wrt. cub. Various other updates and clean up: - enable CUDA for compute capability 3.5. - clean up CUDAService, CUDA tests and plugins. - add CUDA existence protections to BuildFiles. - mark thread-safe static variables with CMS_THREAD_SAFE. --- .../interface/SiPixelClustersCUDA.h | 10 ++--- .../SiPixelCluster/src/SiPixelClustersCUDA.cc | 14 +++--- CUDADataFormats/SiPixelCluster/src/classes.h | 2 +- .../SiPixelCluster/src/classes_def.xml | 4 +- .../interface/SiPixelDigiErrorsCUDA.h | 8 ++-- .../SiPixelDigi/interface/SiPixelDigisCUDA.h | 24 +++++----- .../SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc | 16 +++---- .../SiPixelDigi/src/SiPixelDigisCUDA.cc | 44 +++++++++---------- CUDADataFormats/SiPixelDigi/src/classes.h | 2 +- .../SiPixelDigi/src/classes_def.xml | 8 ++-- .../interface/TrackingRecHit2DHeterogeneous.h | 10 ++--- .../src/TrackingRecHit2DCUDA.cc | 10 ++--- CUDADataFormats/TrackingRecHit/src/classes.h | 2 +- .../TrackingRecHit/src/classes_def.xml | 4 +- .../test/TrackingRecHit2DCUDA_t.cpp | 4 +- .../SiPixelGainCalibrationForHLTGPU.h | 4 +- .../plugins/SiPixelDigiErrorsSoAFromCUDA.cc | 13 +++--- .../plugins/SiPixelDigisSoAFromCUDA.cc | 18 ++++---- .../SiPixelFedCablingMapGPUWrapper.h | 8 ++-- .../plugins/SiPixelRawToClusterCUDA.cc | 24 +++++----- .../plugins/SiPixelRawToClusterGPUKernel.cu | 10 ++--- .../plugins/SiPixelRawToClusterGPUKernel.h | 8 ++-- .../src/SiPixelFedCablingMapGPUWrapper.cc | 6 +-- .../SiPixelClusterizer/test/gpuClustering_t.h | 33 ++++++-------- .../SiPixelRecHits/interface/PixelCPEFast.h | 4 +- .../SiPixelRecHits/plugins/PixelRecHits.cu | 4 +- .../plugins/SiPixelRecHitCUDA.cc | 29 ++++++------ .../plugins/SiPixelRecHitFromSOA.cc | 21 ++++----- .../plugins/SiPixelRecHitSoAFromLegacy.cc | 3 +- 29 files changed, 170 insertions(+), 177 deletions(-) diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h index d3650e164d44e..dbfb5ff5e1761 100644 --- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h +++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h @@ -58,14 +58,14 @@ class SiPixelClustersCUDA { DeviceConstView *view() const { return view_d.get(); } private: - cudautils::device::unique_ptr moduleStart_d; // index of the first pixel of each module - cudautils::device::unique_ptr clusInModule_d; // number of clusters found in each module - cudautils::device::unique_ptr moduleId_d; // module id of each module + cms::cuda::device::unique_ptr moduleStart_d; // index of the first pixel of each module + cms::cuda::device::unique_ptr clusInModule_d; // number of clusters found in each module + cms::cuda::device::unique_ptr moduleId_d; // module id of each module // originally from rechits - cudautils::device::unique_ptr clusModuleStart_d; // index of the first cluster of each module + cms::cuda::device::unique_ptr clusModuleStart_d; // index of the first cluster of each module - cudautils::device::unique_ptr view_d; // "me" pointer + cms::cuda::device::unique_ptr view_d; // "me" pointer uint32_t nClusters_h; }; diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc index c814cd4a2e131..7bef9d0d8a52f 100644 --- a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc +++ b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc @@ -5,17 +5,17 @@ #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream) { - moduleStart_d = cudautils::make_device_unique(maxClusters + 1, stream); - clusInModule_d = cudautils::make_device_unique(maxClusters, stream); - moduleId_d = cudautils::make_device_unique(maxClusters, stream); - clusModuleStart_d = cudautils::make_device_unique(maxClusters + 1, stream); + moduleStart_d = cms::cuda::make_device_unique(maxClusters + 1, stream); + clusInModule_d = cms::cuda::make_device_unique(maxClusters, stream); + moduleId_d = cms::cuda::make_device_unique(maxClusters, stream); + clusModuleStart_d = cms::cuda::make_device_unique(maxClusters + 1, stream); - auto view = cudautils::make_host_unique(stream); + auto view = cms::cuda::make_host_unique(stream); view->moduleStart_ = moduleStart_d.get(); view->clusInModule_ = clusInModule_d.get(); view->moduleId_ = moduleId_d.get(); view->clusModuleStart_ = clusModuleStart_d.get(); - view_d = cudautils::make_device_unique(stream); - cudautils::copyAsync(view_d, view, stream); + view_d = cms::cuda::make_device_unique(stream); + cms::cuda::copyAsync(view_d, view, stream); } diff --git a/CUDADataFormats/SiPixelCluster/src/classes.h b/CUDADataFormats/SiPixelCluster/src/classes.h index 08d46244adc7d..0698cb103dab9 100644 --- a/CUDADataFormats/SiPixelCluster/src/classes.h +++ b/CUDADataFormats/SiPixelCluster/src/classes.h @@ -1,7 +1,7 @@ #ifndef CUDADataFormats_SiPixelCluster_classes_h #define CUDADataFormats_SiPixelCluster_classes_h -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "DataFormats/Common/interface/Wrapper.h" diff --git a/CUDADataFormats/SiPixelCluster/src/classes_def.xml b/CUDADataFormats/SiPixelCluster/src/classes_def.xml index ba0706ac4b8aa..70decb9f27df7 100644 --- a/CUDADataFormats/SiPixelCluster/src/classes_def.xml +++ b/CUDADataFormats/SiPixelCluster/src/classes_def.xml @@ -1,4 +1,4 @@ - - + + diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h index 7c18d58a3fc12..1557fd64750e7 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h @@ -26,15 +26,15 @@ class SiPixelDigiErrorsCUDA { GPU::SimpleVector const* c_error() const { return error_d.get(); } using HostDataError = - std::pair, cudautils::host::unique_ptr>; + std::pair, cms::cuda::host::unique_ptr>; HostDataError dataErrorToHostAsync(cudaStream_t stream) const; void copyErrorToHostAsync(cudaStream_t stream); private: - cudautils::device::unique_ptr data_d; - cudautils::device::unique_ptr> error_d; - cudautils::host::unique_ptr> error_h; + cms::cuda::device::unique_ptr data_d; + cms::cuda::device::unique_ptr> error_d; + cms::cuda::host::unique_ptr> error_h; PixelFormatterErrors formatterErrors_h; }; diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index 47efe634ad93d..04207f3e0b385 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -50,10 +50,10 @@ class SiPixelDigisCUDA { uint32_t const *c_pdigi() const { return pdigi_d.get(); } uint32_t const *c_rawIdArr() const { return rawIdArr_d.get(); } - cudautils::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; - cudautils::host::unique_ptr clusToHostAsync(cudaStream_t stream) const; - cudautils::host::unique_ptr pdigiToHostAsync(cudaStream_t stream) const; - cudautils::host::unique_ptr rawIdArrToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr clusToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr pdigiToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr rawIdArrToHostAsync(cudaStream_t stream) const; class DeviceConstView { public: @@ -79,17 +79,17 @@ class SiPixelDigisCUDA { private: // These are consumed by downstream device code - cudautils::device::unique_ptr xx_d; // local coordinates of each pixel - cudautils::device::unique_ptr yy_d; // - cudautils::device::unique_ptr adc_d; // ADC of each pixel - cudautils::device::unique_ptr moduleInd_d; // module id of each pixel - cudautils::device::unique_ptr clus_d; // cluster id of each pixel - cudautils::device::unique_ptr view_d; // "me" pointer + cms::cuda::device::unique_ptr xx_d; // local coordinates of each pixel + cms::cuda::device::unique_ptr yy_d; // + cms::cuda::device::unique_ptr adc_d; // ADC of each pixel + cms::cuda::device::unique_ptr moduleInd_d; // module id of each pixel + cms::cuda::device::unique_ptr clus_d; // cluster id of each pixel + cms::cuda::device::unique_ptr view_d; // "me" pointer // These are for CPU output; should we (eventually) place them to a // separate product? - cudautils::device::unique_ptr pdigi_d; - cudautils::device::unique_ptr rawIdArr_d; + cms::cuda::device::unique_ptr pdigi_d; + cms::cuda::device::unique_ptr rawIdArr_d; uint32_t nModules_h = 0; uint32_t nDigis_h = 0; diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc index 7640348c15f08..ffef71092f6c9 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -9,32 +9,32 @@ SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream) : formatterErrors_h(std::move(errors)) { - error_d = cudautils::make_device_unique>(stream); - data_d = cudautils::make_device_unique(maxFedWords, stream); + error_d = cms::cuda::make_device_unique>(stream); + data_d = cms::cuda::make_device_unique(maxFedWords, stream); - cudautils::memsetAsync(data_d, 0x00, maxFedWords, stream); + cms::cuda::memsetAsync(data_d, 0x00, maxFedWords, stream); - error_h = cudautils::make_host_unique>(stream); + error_h = cms::cuda::make_host_unique>(stream); GPU::make_SimpleVector(error_h.get(), maxFedWords, data_d.get()); assert(error_h->empty()); assert(error_h->capacity() == static_cast(maxFedWords)); - cudautils::copyAsync(error_d, error_h, stream); + cms::cuda::copyAsync(error_d, error_h, stream); } void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) { - cudautils::copyAsync(error_h, error_d, stream); + cms::cuda::copyAsync(error_h, error_d, stream); } SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const { // On one hand size() could be sufficient. On the other hand, if // someone copies the SimpleVector<>, (s)he might expect the data // buffer to actually have space for capacity() elements. - auto data = cudautils::make_host_unique(error_h->capacity(), stream); + auto data = cms::cuda::make_host_unique(error_h->capacity(), stream); // but transfer only the required amount if (not error_h->empty()) { - cudautils::copyAsync(data, data_d, error_h->size(), stream); + cms::cuda::copyAsync(data, data_d, error_h->size(), stream); } auto err = *error_h; err.set_data(data.get()); diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc index a8aab7ab5a4b8..664364b6ff25a 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc @@ -5,46 +5,46 @@ #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) { - xx_d = cudautils::make_device_unique(maxFedWords, stream); - yy_d = cudautils::make_device_unique(maxFedWords, stream); - adc_d = cudautils::make_device_unique(maxFedWords, stream); - moduleInd_d = cudautils::make_device_unique(maxFedWords, stream); - clus_d = cudautils::make_device_unique(maxFedWords, stream); + xx_d = cms::cuda::make_device_unique(maxFedWords, stream); + yy_d = cms::cuda::make_device_unique(maxFedWords, stream); + adc_d = cms::cuda::make_device_unique(maxFedWords, stream); + moduleInd_d = cms::cuda::make_device_unique(maxFedWords, stream); + clus_d = cms::cuda::make_device_unique(maxFedWords, stream); - pdigi_d = cudautils::make_device_unique(maxFedWords, stream); - rawIdArr_d = cudautils::make_device_unique(maxFedWords, stream); + pdigi_d = cms::cuda::make_device_unique(maxFedWords, stream); + rawIdArr_d = cms::cuda::make_device_unique(maxFedWords, stream); - auto view = cudautils::make_host_unique(stream); + auto view = cms::cuda::make_host_unique(stream); view->xx_ = xx_d.get(); view->yy_ = yy_d.get(); view->adc_ = adc_d.get(); view->moduleInd_ = moduleInd_d.get(); view->clus_ = clus_d.get(); - view_d = cudautils::make_device_unique(stream); - cudautils::copyAsync(view_d, view, stream); + view_d = cms::cuda::make_device_unique(stream); + cms::cuda::copyAsync(view_d, view, stream); } -cudautils::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const { - auto ret = cudautils::make_host_unique(nDigis(), stream); - cudautils::copyAsync(ret, adc_d, nDigis(), stream); +cms::cuda::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nDigis(), stream); + cms::cuda::copyAsync(ret, adc_d, nDigis(), stream); return ret; } -cudautils::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const { - auto ret = cudautils::make_host_unique(nDigis(), stream); - cudautils::copyAsync(ret, clus_d, nDigis(), stream); +cms::cuda::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nDigis(), stream); + cms::cuda::copyAsync(ret, clus_d, nDigis(), stream); return ret; } -cudautils::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const { - auto ret = cudautils::make_host_unique(nDigis(), stream); - cudautils::copyAsync(ret, pdigi_d, nDigis(), stream); +cms::cuda::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nDigis(), stream); + cms::cuda::copyAsync(ret, pdigi_d, nDigis(), stream); return ret; } -cudautils::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const { - auto ret = cudautils::make_host_unique(nDigis(), stream); - cudautils::copyAsync(ret, rawIdArr_d, nDigis(), stream); +cms::cuda::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(nDigis(), stream); + cms::cuda::copyAsync(ret, rawIdArr_d, nDigis(), stream); return ret; } diff --git a/CUDADataFormats/SiPixelDigi/src/classes.h b/CUDADataFormats/SiPixelDigi/src/classes.h index 41b135640b883..fca0811e4650f 100644 --- a/CUDADataFormats/SiPixelDigi/src/classes.h +++ b/CUDADataFormats/SiPixelDigi/src/classes.h @@ -1,7 +1,7 @@ #ifndef CUDADataFormats_SiPixelDigi_classes_h #define CUDADataFormats_SiPixelDigi_classes_h -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" #include "DataFormats/Common/interface/Wrapper.h" diff --git a/CUDADataFormats/SiPixelDigi/src/classes_def.xml b/CUDADataFormats/SiPixelDigi/src/classes_def.xml index 9d6816ed3b14c..ff775afdc2046 100644 --- a/CUDADataFormats/SiPixelDigi/src/classes_def.xml +++ b/CUDADataFormats/SiPixelDigi/src/classes_def.xml @@ -1,7 +1,7 @@ - - + + - - + + diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index aa551f21b4aad..955f97ca6bd54 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -37,9 +37,9 @@ class TrackingRecHit2DHeterogeneous { auto iphi() { return m_iphi; } // only the local coord and detector index - cudautils::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const; - cudautils::host::unique_ptr detIndexToHostAsync(cudaStream_t stream) const; - cudautils::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr detIndexToHostAsync(cudaStream_t stream) const; + cms::cuda::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; private: static constexpr uint32_t n16 = 4; @@ -89,7 +89,7 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH constexpr #endif (std::is_same::value) { - cudautils::copyAsync(m_view, view, stream); + cms::cuda::copyAsync(m_view, view, stream); } else { m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version } @@ -136,7 +136,7 @@ TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nH constexpr #endif (std::is_same::value) { - cudautils::copyAsync(m_view, view, stream); + cms::cuda::copyAsync(m_view, view, stream); } else { m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version } diff --git a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DCUDA.cc b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DCUDA.cc index e6f223bfec4e3..7b04ed2d530a0 100644 --- a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DCUDA.cc +++ b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DCUDA.cc @@ -5,15 +5,15 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" template <> -cudautils::host::unique_ptr TrackingRecHit2DCUDA::localCoordToHostAsync(cudaStream_t stream) const { - auto ret = cudautils::make_host_unique(4 * nHits(), stream); - cudautils::copyAsync(ret, m_store32, 4 * nHits(), stream); +cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::localCoordToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(4 * nHits(), stream); + cms::cuda::copyAsync(ret, m_store32, 4 * nHits(), stream); return ret; } template <> -cudautils::host::unique_ptr TrackingRecHit2DCUDA::hitsModuleStartToHostAsync(cudaStream_t stream) const { - auto ret = cudautils::make_host_unique(2001, stream); +cms::cuda::host::unique_ptr TrackingRecHit2DCUDA::hitsModuleStartToHostAsync(cudaStream_t stream) const { + auto ret = cms::cuda::make_host_unique(2001, stream); cudaCheck(cudaMemcpyAsync(ret.get(), m_hitsModuleStart, 4 * 2001, cudaMemcpyDefault, stream)); return ret; } diff --git a/CUDADataFormats/TrackingRecHit/src/classes.h b/CUDADataFormats/TrackingRecHit/src/classes.h index 90cfd0945d76e..d80226ec7a14b 100644 --- a/CUDADataFormats/TrackingRecHit/src/classes.h +++ b/CUDADataFormats/TrackingRecHit/src/classes.h @@ -1,7 +1,7 @@ #ifndef CUDADataFormats_SiPixelCluster_src_classes_h #define CUDADataFormats_SiPixelCluster_src_classes_h -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/Common/interface/HostProduct.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" #include "DataFormats/Common/interface/Wrapper.h" diff --git a/CUDADataFormats/TrackingRecHit/src/classes_def.xml b/CUDADataFormats/TrackingRecHit/src/classes_def.xml index 4e8325ddce87e..02b0eb37d157b 100644 --- a/CUDADataFormats/TrackingRecHit/src/classes_def.xml +++ b/CUDADataFormats/TrackingRecHit/src/classes_def.xml @@ -1,10 +1,10 @@ - + - + diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp index 592f0267c2f7d..32af6c181ae68 100644 --- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp @@ -1,6 +1,6 @@ #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireCUDADevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" namespace testTrackingRecHit2D { @@ -10,7 +10,7 @@ namespace testTrackingRecHit2D { } int main() { - requireCUDADevices(); + cms::cudatest::requireDevices(); cudaStream_t stream; cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h index 8bfefee5c3387..6fb487a244e71 100644 --- a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h @@ -2,7 +2,7 @@ #define CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h #include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" class SiPixelGainCalibrationForHLT; class SiPixelGainForHLTonGPU; @@ -26,7 +26,7 @@ class SiPixelGainCalibrationForHLTGPU { SiPixelGainForHLTonGPU *gainForHLTonGPU = nullptr; SiPixelGainForHLTonGPU_DecodingStructure *gainDataOnGPU = nullptr; }; - CUDAESProduct gpuData_; + cms::cuda::ESProduct gpuData_; }; #endif // CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc index ad6c46082be8b..be4cc5d9a3336 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiErrorsSoAFromCUDA.cc @@ -1,4 +1,4 @@ -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" #include "DataFormats/SiPixelDigi/interface/SiPixelDigiErrorsSoA.h" #include "FWCore/Framework/interface/EventSetup.h" @@ -8,7 +8,7 @@ #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" class SiPixelDigiErrorsSoAFromCUDA : public edm::stream::EDProducer { @@ -24,16 +24,17 @@ class SiPixelDigiErrorsSoAFromCUDA : public edm::stream::EDProducer> digiErrorGetToken_; + edm::EDGetTokenT> digiErrorGetToken_; edm::EDPutTokenT digiErrorPutToken_; - cudautils::host::unique_ptr data_; + cms::cuda::host::unique_ptr data_; GPU::SimpleVector error_; const PixelFormatterErrors* formatterErrors_ = nullptr; }; SiPixelDigiErrorsSoAFromCUDA::SiPixelDigiErrorsSoAFromCUDA(const edm::ParameterSet& iConfig) - : digiErrorGetToken_(consumes>(iConfig.getParameter("src"))), + : digiErrorGetToken_( + consumes>(iConfig.getParameter("src"))), digiErrorPutToken_(produces()) {} void SiPixelDigiErrorsSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -46,7 +47,7 @@ void SiPixelDigiErrorsSoAFromCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { // Do the transfer in a CUDA stream parallel to the computation CUDA stream - CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; + cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; const auto& gpuDigiErrors = ctx.get(iEvent, digiErrorGetToken_); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc index 7794032154e98..dbec74585998f 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigisSoAFromCUDA.cc @@ -1,4 +1,4 @@ -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "DataFormats/SiPixelDigi/interface/SiPixelDigisSoA.h" #include "FWCore/Framework/interface/EventSetup.h" @@ -8,7 +8,7 @@ #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" class SiPixelDigisSoAFromCUDA : public edm::stream::EDProducer { @@ -24,19 +24,19 @@ class SiPixelDigisSoAFromCUDA : public edm::stream::EDProducer> digiGetToken_; + edm::EDGetTokenT> digiGetToken_; edm::EDPutTokenT digiPutToken_; - cudautils::host::unique_ptr pdigi_; - cudautils::host::unique_ptr rawIdArr_; - cudautils::host::unique_ptr adc_; - cudautils::host::unique_ptr clus_; + cms::cuda::host::unique_ptr pdigi_; + cms::cuda::host::unique_ptr rawIdArr_; + cms::cuda::host::unique_ptr adc_; + cms::cuda::host::unique_ptr clus_; int nDigis_; }; SiPixelDigisSoAFromCUDA::SiPixelDigisSoAFromCUDA(const edm::ParameterSet& iConfig) - : digiGetToken_(consumes>(iConfig.getParameter("src"))), + : digiGetToken_(consumes>(iConfig.getParameter("src"))), digiPutToken_(produces()) {} void SiPixelDigisSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -49,7 +49,7 @@ void SiPixelDigisSoAFromCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { // Do the transfer in a CUDA stream parallel to the computation CUDA stream - CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; + cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)}; const auto& gpuDigis = ctx.get(iEvent, digiGetToken_); diff --git a/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h index 14a5d25504479..2f9eb092bc648 100644 --- a/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h +++ b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h @@ -1,7 +1,7 @@ #ifndef RecoLocalTracker_SiPixelClusterizer_SiPixelFedCablingMapGPUWrapper_h #define RecoLocalTracker_SiPixelClusterizer_SiPixelFedCablingMapGPUWrapper_h -#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" #include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h" #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPU.h" @@ -29,7 +29,7 @@ class SiPixelFedCablingMapGPUWrapper { // returns pointer to GPU memory const unsigned char *getModToUnpAllAsync(cudaStream_t cudaStream) const; - cudautils::device::unique_ptr getModToUnpRegionalAsync(std::set const &modules, + cms::cuda::device::unique_ptr getModToUnpRegionalAsync(std::set const &modules, cudaStream_t cudaStream) const; private: @@ -44,13 +44,13 @@ class SiPixelFedCablingMapGPUWrapper { ~GPUData(); SiPixelFedCablingMapGPU *cablingMapDevice = nullptr; // pointer to struct in GPU }; - CUDAESProduct gpuData_; + cms::cuda::ESProduct gpuData_; struct ModulesToUnpack { ~ModulesToUnpack(); unsigned char *modToUnpDefault = nullptr; // pointer to GPU }; - CUDAESProduct modToUnp_; + cms::cuda::ESProduct modToUnp_; }; #endif diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc index 62004d385577d..95aac36dbd197 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterCUDA.cc @@ -1,4 +1,4 @@ -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h" @@ -25,7 +25,7 @@ #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ServiceRegistry/interface/Service.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" #include "RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h" #include "RecoTracker/Record/interface/CkfComponentsRecord.h" @@ -51,11 +51,11 @@ class SiPixelRawToClusterCUDA : public edm::stream::EDProducer rawGetToken_; - edm::EDPutTokenT> digiPutToken_; - edm::EDPutTokenT> digiErrorPutToken_; - edm::EDPutTokenT> clusterPutToken_; + edm::EDPutTokenT> digiPutToken_; + edm::EDPutTokenT> digiErrorPutToken_; + edm::EDPutTokenT> clusterPutToken_; - CUDAContextState ctxState_; + cms::cuda::ContextState ctxState_; edm::ESWatcher recordWatcher_; edm::ESGetToken gpuMapToken_; @@ -78,8 +78,8 @@ class SiPixelRawToClusterCUDA : public edm::stream::EDProducer(iConfig.getParameter("InputLabel"))), - digiPutToken_(produces>()), - clusterPutToken_(produces>()), + digiPutToken_(produces>()), + clusterPutToken_(produces>()), gpuMapToken_(esConsumes()), gainsToken_(esConsumes()), cablingMapToken_(esConsumes( @@ -89,7 +89,7 @@ SiPixelRawToClusterCUDA::SiPixelRawToClusterCUDA(const edm::ParameterSet& iConfi usePilotBlade_(iConfig.getParameter("UsePilotBlade")) // Control the usage of pilot-blade data, FED=40 { if (includeErrors_) { - digiErrorPutToken_ = produces>(); + digiErrorPutToken_ = produces>(); } // regions @@ -128,7 +128,7 @@ void SiPixelRawToClusterCUDA::fillDescriptions(edm::ConfigurationDescriptions& d void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, const edm::EventSetup& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder), ctxState_}; + cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder), ctxState_}; auto hgpuMap = iSetup.getHandle(gpuMapToken_); if (hgpuMap->hasQuality() != useQuality_) { @@ -143,7 +143,7 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, // get the GPU product already here so that the async transfer can begin const auto* gpuGains = hgains->getGPUProductAsync(ctx.stream()); - cudautils::device::unique_ptr modulesToUnpackRegional; + cms::cuda::device::unique_ptr modulesToUnpackRegional; const unsigned char* gpuModulesToUnpack; if (regions_) { @@ -247,7 +247,7 @@ void SiPixelRawToClusterCUDA::acquire(const edm::Event& iEvent, } void SiPixelRawToClusterCUDA::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { - CUDAScopedContextProduce ctx{ctxState_}; + cms::cuda::ScopedContextProduce ctx{ctxState_}; auto tmp = gpuAlgo_.getResults(); ctx.emplace(iEvent, digiPutToken_, std::move(tmp.first)); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 8e0d5123e6ecc..53af26ac7527d 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -47,8 +47,8 @@ namespace pixelgpudetails { constexpr uint32_t MAX_FED_WORDS = pixelgpudetails::MAX_FED * pixelgpudetails::MAX_WORD; SiPixelRawToClusterGPUKernel::WordFedAppender::WordFedAppender() { - word_ = cudautils::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); - fedId_ = cudautils::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); + word_ = cms::cuda::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); + fedId_ = cms::cuda::make_host_noncached_unique(MAX_FED_WORDS, cudaHostAllocWriteCombined); } void SiPixelRawToClusterGPUKernel::WordFedAppender::initializeWordFed(int fedId, @@ -549,7 +549,7 @@ namespace pixelgpudetails { } clusters_d = SiPixelClustersCUDA(gpuClustering::MaxNumModules, stream); - nModules_Clusters_h = cudautils::make_host_unique(2, stream); + nModules_Clusters_h = cms::cuda::make_host_unique(2, stream); if (wordCounter) // protect in case of empty event.... { @@ -558,8 +558,8 @@ namespace pixelgpudetails { assert(0 == wordCounter % 2); // wordCounter is the total no of words in each event to be trasfered on device - auto word_d = cudautils::make_device_unique(wordCounter, stream); - auto fedId_d = cudautils::make_device_unique(wordCounter, stream); + auto word_d = cms::cuda::make_device_unique(wordCounter, stream); + auto fedId_d = cms::cuda::make_device_unique(wordCounter, stream); cudaCheck( cudaMemcpyAsync(word_d.get(), wordFed.word(), wordCounter * sizeof(uint32_t), cudaMemcpyDefault, stream)); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 888fc07953d9d..767c5a1e92ad0 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -156,8 +156,8 @@ namespace pixelgpudetails { const unsigned char* fedId() const { return fedId_.get(); } private: - cudautils::host::noncached::unique_ptr word_; - cudautils::host::noncached::unique_ptr fedId_; + cms::cuda::host::noncached::unique_ptr word_; + cms::cuda::host::noncached::unique_ptr fedId_; }; SiPixelRawToClusterGPUKernel() = default; @@ -187,7 +187,7 @@ namespace pixelgpudetails { // stream is still alive // // technically the statement above is not true anymore now that - // the CUDA streams are cached within the CUDAStreamCache, but it is + // the CUDA streams are cached within the cms::cuda::StreamCache, but it is // still better to release as early as possible nModules_Clusters_h.reset(); return std::make_pair(std::move(digis_d), std::move(clusters_d)); @@ -199,7 +199,7 @@ namespace pixelgpudetails { uint32_t nDigis = 0; // Data to be put in the event - cudautils::host::unique_ptr nModules_Clusters_h; + cms::cuda::host::unique_ptr nModules_Clusters_h; SiPixelDigisCUDA digis_d; SiPixelClustersCUDA clusters_d; SiPixelDigiErrorsCUDA digiErrors_d; diff --git a/RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc b/RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc index d4b8e40dea76b..7d3a9aa8d9b07 100644 --- a/RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc +++ b/RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc @@ -127,10 +127,10 @@ const unsigned char* SiPixelFedCablingMapGPUWrapper::getModToUnpAllAsync(cudaStr return data.modToUnpDefault; } -cudautils::device::unique_ptr SiPixelFedCablingMapGPUWrapper::getModToUnpRegionalAsync( +cms::cuda::device::unique_ptr SiPixelFedCablingMapGPUWrapper::getModToUnpRegionalAsync( std::set const& modules, cudaStream_t cudaStream) const { - auto modToUnpDevice = cudautils::make_device_unique(pixelgpudetails::MAX_SIZE, cudaStream); - auto modToUnpHost = cudautils::make_host_unique(pixelgpudetails::MAX_SIZE, cudaStream); + auto modToUnpDevice = cms::cuda::make_device_unique(pixelgpudetails::MAX_SIZE, cudaStream); + auto modToUnpHost = cms::cuda::make_host_unique(pixelgpudetails::MAX_SIZE, cudaStream); std::vector const& fedIds = cablingMap_->fedIds(); std::unique_ptr const& cabling = cablingMap_->cablingTree(); diff --git a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h index b22e7a35a6ac7..8ec665f8960b6 100644 --- a/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h +++ b/RecoLocalTracker/SiPixelClusterizer/test/gpuClustering_t.h @@ -13,9 +13,8 @@ #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireCUDADevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/launch.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaDeviceCount.h" #endif #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" @@ -23,13 +22,7 @@ int main(void) { #ifdef __CUDACC__ - requireCUDADevices(); - - if (cudautils::cudaDeviceCount() == 0) { - std::cerr << "No CUDA devices on this system" - << "\n"; - exit(EXIT_FAILURE); - } + cms::cudatest::requireDevices(); #endif using namespace gpuClustering; @@ -44,14 +37,14 @@ int main(void) { auto h_clus = std::make_unique(numElements); #ifdef __CUDACC__ - auto d_id = cudautils::make_device_unique(numElements, nullptr); - auto d_x = cudautils::make_device_unique(numElements, nullptr); - auto d_y = cudautils::make_device_unique(numElements, nullptr); - auto d_adc = cudautils::make_device_unique(numElements, nullptr); - auto d_clus = cudautils::make_device_unique(numElements, nullptr); - auto d_moduleStart = cudautils::make_device_unique(MaxNumModules + 1, nullptr); - auto d_clusInModule = cudautils::make_device_unique(MaxNumModules, nullptr); - auto d_moduleId = cudautils::make_device_unique(MaxNumModules, nullptr); + auto d_id = cms::cuda::make_device_unique(numElements, nullptr); + auto d_x = cms::cuda::make_device_unique(numElements, nullptr); + auto d_y = cms::cuda::make_device_unique(numElements, nullptr); + auto d_adc = cms::cuda::make_device_unique(numElements, nullptr); + auto d_clus = cms::cuda::make_device_unique(numElements, nullptr); + auto d_moduleStart = cms::cuda::make_device_unique(MaxNumModules + 1, nullptr); + auto d_clusInModule = cms::cuda::make_device_unique(MaxNumModules, nullptr); + auto d_moduleId = cms::cuda::make_device_unique(MaxNumModules, nullptr); #else auto h_moduleStart = std::make_unique(MaxNumModules + 1); @@ -262,7 +255,7 @@ int main(void) { std::cout << "CUDA countModules kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads\n"; - cudautils::launch(countModules, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_moduleStart.get(), d_clus.get(), n); + cms::cuda::launch(countModules, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_moduleStart.get(), d_clus.get(), n); blocksPerGrid = MaxNumModules; //nModules; @@ -270,7 +263,7 @@ int main(void) { << " threads\n"; cudaCheck(cudaMemset(d_clusInModule.get(), 0, MaxNumModules * sizeof(uint32_t))); - cudautils::launch(findClus, + cms::cuda::launch(findClus, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_x.get(), @@ -296,7 +289,7 @@ int main(void) { if (ncl != std::accumulate(nclus, nclus + MaxNumModules, 0)) std::cout << "ERROR!!!!! wrong number of cluster found" << std::endl; - cudautils::launch(clusterChargeCut, + cms::cuda::launch(clusterChargeCut, {blocksPerGrid, threadsPerBlock}, d_id.get(), d_adc.get(), diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index e43c45f90523f..70e30563c66c3 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -6,7 +6,7 @@ #include "CalibTracker/SiPixelESProducers/interface/SiPixelCPEGenericDBErrorParametrization.h" #include "CondFormats/SiPixelTransient/interface/SiPixelGenError.h" #include "CondFormats/SiPixelTransient/interface/SiPixelTemplate.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h" +#include "HeterogeneousCore/CUDACore/interface/ESProduct.h" #include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" @@ -95,7 +95,7 @@ class PixelCPEFast final : public PixelCPEBase { pixelCPEforGPU::ParamsOnGPU h_paramsOnGPU; pixelCPEforGPU::ParamsOnGPU *d_paramsOnGPU = nullptr; // copy of the above on the Device }; - CUDAESProduct gpuData_; + cms::cuda::ESProduct gpuData_; void fillParamsForGpu(); }; diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu index 1342ab916e472..4e4f38f329d01 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu @@ -63,8 +63,8 @@ namespace pixelgpudetails { } if (nHits) { - auto hws = cudautils::make_device_unique(TrackingRecHit2DSOAView::Hist::wsSize(), stream); - cudautils::fillManyFromVector( + auto hws = cms::cuda::make_device_unique(TrackingRecHit2DSOAView::Hist::wsSize(), stream); + cms::cuda::fillManyFromVector( hits_d.phiBinner(), hws.get(), 10, hits_d.iphi(), hits_d.hitsLayerStart(), nHits, 256, stream); cudaCheck(cudaGetLastError()); } diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc index 1641719d0537d..4d85c41339020 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitCUDA.cc @@ -1,8 +1,7 @@ #include #include "CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h" -#include "CUDADataFormats/Common/interface/CUDAProduct.h" -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" @@ -18,7 +17,7 @@ #include "FWCore/Utilities/interface/InputTag.h" #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" @@ -36,11 +35,11 @@ class SiPixelRecHitCUDA : public edm::global::EDProducer<> { void produce(edm::StreamID streamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; // The mess with inputs will be cleaned up when migrating to the new framework - edm::EDGetTokenT> tBeamSpot; - edm::EDGetTokenT> token_; - edm::EDGetTokenT> tokenDigi_; + edm::EDGetTokenT> tBeamSpot; + edm::EDGetTokenT> token_; + edm::EDGetTokenT> tokenDigi_; - edm::EDPutTokenT> tokenHit_; + edm::EDPutTokenT> tokenHit_; std::string cpeName_; @@ -48,10 +47,10 @@ class SiPixelRecHitCUDA : public edm::global::EDProducer<> { }; SiPixelRecHitCUDA::SiPixelRecHitCUDA(const edm::ParameterSet& iConfig) - : tBeamSpot(consumes>(iConfig.getParameter("beamSpot"))), - token_(consumes>(iConfig.getParameter("src"))), - tokenDigi_(consumes>(iConfig.getParameter("src"))), - tokenHit_(produces>()), + : tBeamSpot(consumes>(iConfig.getParameter("beamSpot"))), + token_(consumes>(iConfig.getParameter("src"))), + tokenDigi_(consumes>(iConfig.getParameter("src"))), + tokenHit_(produces>()), cpeName_(iConfig.getParameter("CPE")) {} void SiPixelRecHitCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -82,17 +81,17 @@ void SiPixelRecHitCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, cons throw cms::Exception("Configuration") << "too bad, not a fast cpe gpu processing not possible...."; } - edm::Handle> hclusters; + edm::Handle> hclusters; iEvent.getByToken(token_, hclusters); - CUDAScopedContextProduce ctx{*hclusters}; + cms::cuda::ScopedContextProduce ctx{*hclusters}; auto const& clusters = ctx.get(*hclusters); - edm::Handle> hdigis; + edm::Handle> hdigis; iEvent.getByToken(tokenDigi_, hdigis); auto const& digis = ctx.get(*hdigis); - edm::Handle> hbs; + edm::Handle> hbs; iEvent.getByToken(tBeamSpot, hbs); auto const& bs = ctx.get(*hbs); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSOA.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSOA.cc index a4f19ac276a7a..7b072abc1dd47 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSOA.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitFromSOA.cc @@ -1,6 +1,6 @@ #include -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/Common/interface/HostProduct.h" #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" #include "DataFormats/Common/interface/DetSetVectorNew.h" @@ -19,7 +19,7 @@ #include "Geometry/CommonDetUnit/interface/PixelGeomDetUnit.h" #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" class SiPixelRecHitFromSOA : public edm::stream::EDProducer { @@ -37,17 +37,18 @@ class SiPixelRecHitFromSOA : public edm::stream::EDProducer { edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override; - edm::EDGetTokenT> tokenHit_; // CUDA hits - edm::EDGetTokenT clusterToken_; // Legacy Clusters + edm::EDGetTokenT> tokenHit_; // CUDA hits + edm::EDGetTokenT clusterToken_; // Legacy Clusters uint32_t m_nHits; - cudautils::host::unique_ptr m_store16; - cudautils::host::unique_ptr m_store32; - cudautils::host::unique_ptr m_hitsModuleStart; + cms::cuda::host::unique_ptr m_store16; + cms::cuda::host::unique_ptr m_store32; + cms::cuda::host::unique_ptr m_hitsModuleStart; }; SiPixelRecHitFromSOA::SiPixelRecHitFromSOA(const edm::ParameterSet& iConfig) - : tokenHit_(consumes>(iConfig.getParameter("pixelRecHitSrc"))), + : tokenHit_( + consumes>(iConfig.getParameter("pixelRecHitSrc"))), clusterToken_(consumes(iConfig.getParameter("src"))) { produces(); produces(); @@ -63,8 +64,8 @@ void SiPixelRecHitFromSOA::fillDescriptions(edm::ConfigurationDescriptions& desc void SiPixelRecHitFromSOA::acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - CUDAProduct const& inputDataWrapped = iEvent.get(tokenHit_); - CUDAScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; + cms::cuda::Product const& inputDataWrapped = iEvent.get(tokenHit_); + cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; auto const& inputData = ctx.get(inputDataWrapped); m_nHits = inputData.nHits(); diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index 7900cf8b2289a..fbe0fd13b84a4 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -22,7 +22,6 @@ #include "FWCore/Utilities/interface/InputTag.h" #include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" #include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" #include "RecoLocalTracker/Records/interface/TkPixelCPERecord.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEBase.h" #include "RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h" @@ -251,7 +250,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv for (auto i = 0; i < 11; ++i) { output->hitsLayerStart()[i] = hitsModuleStart[cpeView.layerGeometry().layerStart[i]]; } - cudautils::fillManyFromVector( + cms::cuda::fillManyFromVector( output->phiBinner(), nullptr, 10, output->iphi(), output->hitsLayerStart(), numberOfHits, 256, nullptr); // std::cout << "created HitSoa for " << numberOfClusters << " clusters in " << numberOfDetUnits << " Dets" << std::endl;