diff --git a/CUDADataFormats/SiPixelCluster/BuildFile.xml b/CUDADataFormats/SiPixelCluster/BuildFile.xml index 6db6a1f62cda1..5e401d215c4eb 100644 --- a/CUDADataFormats/SiPixelCluster/BuildFile.xml +++ b/CUDADataFormats/SiPixelCluster/BuildFile.xml @@ -1,7 +1,7 @@ - + diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h index 1db6e29459ea7..d3650e164d44e 100644 --- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h +++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h @@ -3,15 +3,14 @@ #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" - -#include - #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" +#include + class SiPixelClustersCUDA { public: SiPixelClustersCUDA() = default; - explicit SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<> &stream); + explicit SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream); ~SiPixelClustersCUDA() = default; SiPixelClustersCUDA(const SiPixelClustersCUDA &) = delete; diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc index 4a145bb1231c2..c814cd4a2e131 100644 --- a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc +++ b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc @@ -4,7 +4,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<>& stream) { +SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream) { moduleStart_d = cudautils::make_device_unique(maxClusters + 1, stream); clusInModule_d = cudautils::make_device_unique(maxClusters, stream); moduleId_d = cudautils::make_device_unique(maxClusters, stream); diff --git a/CUDADataFormats/SiPixelDigi/BuildFile.xml b/CUDADataFormats/SiPixelDigi/BuildFile.xml index c29c9c9b9f44d..ee357e2d4e157 100644 --- a/CUDADataFormats/SiPixelDigi/BuildFile.xml +++ b/CUDADataFormats/SiPixelDigi/BuildFile.xml @@ -2,7 +2,7 @@ - + diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h index 7e016efb708de..7c18d58a3fc12 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h @@ -6,12 +6,12 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h" -#include +#include class SiPixelDigiErrorsCUDA { public: SiPixelDigiErrorsCUDA() = default; - explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream); + explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream); ~SiPixelDigiErrorsCUDA() = default; SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete; @@ -27,9 +27,9 @@ class SiPixelDigiErrorsCUDA { using HostDataError = std::pair, cudautils::host::unique_ptr>; - HostDataError dataErrorToHostAsync(cuda::stream_t<>& stream) const; + HostDataError dataErrorToHostAsync(cudaStream_t stream) const; - void copyErrorToHostAsync(cuda::stream_t<>& stream); + void copyErrorToHostAsync(cudaStream_t stream); private: cudautils::device::unique_ptr data_d; diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index 0d0e025ef52da..47efe634ad93d 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -3,14 +3,14 @@ #include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" - -#include #include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h" +#include + class SiPixelDigisCUDA { public: SiPixelDigisCUDA() = default; - explicit SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<> &stream); + explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream); ~SiPixelDigisCUDA() = default; SiPixelDigisCUDA(const SiPixelDigisCUDA &) = delete; @@ -50,10 +50,10 @@ class SiPixelDigisCUDA { uint32_t const *c_pdigi() const { return pdigi_d.get(); } uint32_t const *c_rawIdArr() const { return rawIdArr_d.get(); } - cudautils::host::unique_ptr adcToHostAsync(cuda::stream_t<> &stream) const; - cudautils::host::unique_ptr clusToHostAsync(cuda::stream_t<> &stream) const; - cudautils::host::unique_ptr pdigiToHostAsync(cuda::stream_t<> &stream) const; - cudautils::host::unique_ptr rawIdArrToHostAsync(cuda::stream_t<> &stream) const; + cudautils::host::unique_ptr adcToHostAsync(cudaStream_t stream) const; + cudautils::host::unique_ptr clusToHostAsync(cudaStream_t stream) const; + cudautils::host::unique_ptr pdigiToHostAsync(cudaStream_t stream) const; + cudautils::host::unique_ptr rawIdArrToHostAsync(cudaStream_t stream) const; class DeviceConstView { public: diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc index 2aa2b24ddf316..7640348c15f08 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -7,7 +7,7 @@ #include -SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream) +SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream) : formatterErrors_h(std::move(errors)) { error_d = cudautils::make_device_unique>(stream); data_d = cudautils::make_device_unique(maxFedWords, stream); @@ -22,11 +22,11 @@ SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterE cudautils::copyAsync(error_d, error_h, stream); } -void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cuda::stream_t<>& stream) { +void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) { cudautils::copyAsync(error_h, error_d, stream); } -SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cuda::stream_t<>& stream) const { +SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const { // On one hand size() could be sufficient. On the other hand, if // someone copies the SimpleVector<>, (s)he might expect the data // buffer to actually have space for capacity() elements. diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc index fe4aedd686546..a8aab7ab5a4b8 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc @@ -4,7 +4,7 @@ #include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" -SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<>& stream) { +SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) { xx_d = cudautils::make_device_unique(maxFedWords, stream); yy_d = cudautils::make_device_unique(maxFedWords, stream); adc_d = cudautils::make_device_unique(maxFedWords, stream); @@ -25,25 +25,25 @@ SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<>& stream) cudautils::copyAsync(view_d, view, stream); } -cudautils::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cuda::stream_t<>& stream) const { +cudautils::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const { auto ret = cudautils::make_host_unique(nDigis(), stream); cudautils::copyAsync(ret, adc_d, nDigis(), stream); return ret; } -cudautils::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cuda::stream_t<>& stream) const { +cudautils::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const { auto ret = cudautils::make_host_unique(nDigis(), stream); cudautils::copyAsync(ret, clus_d, nDigis(), stream); return ret; } -cudautils::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cuda::stream_t<>& stream) const { +cudautils::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const { auto ret = cudautils::make_host_unique(nDigis(), stream); cudautils::copyAsync(ret, pdigi_d, nDigis(), stream); return ret; } -cudautils::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cuda::stream_t<>& stream) const { +cudautils::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const { auto ret = cudautils::make_host_unique(nDigis(), stream); cudautils::copyAsync(ret, rawIdArr_d, nDigis(), stream); return ret; diff --git a/CUDADataFormats/TrackingRecHit/BuildFile.xml b/CUDADataFormats/TrackingRecHit/BuildFile.xml index 8af605862d1ac..8dc569d40b6c4 100644 --- a/CUDADataFormats/TrackingRecHit/BuildFile.xml +++ b/CUDADataFormats/TrackingRecHit/BuildFile.xml @@ -1,4 +1,4 @@ - + diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index a3d6354732ecf..1b27558efe3ee 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -17,7 +17,7 @@ class TrackingRecHit2DHeterogeneous { explicit TrackingRecHit2DHeterogeneous(uint32_t nHits, pixelCPEforGPU::ParamsOnGPU const* cpeParams, uint32_t const* hitsModuleStart, - cuda::stream_t<>& stream); + cudaStream_t stream); ~TrackingRecHit2DHeterogeneous() = default; @@ -37,9 +37,9 @@ class TrackingRecHit2DHeterogeneous { auto iphi() { return m_iphi; } // only the local coord and detector index - cudautils::host::unique_ptr localCoordToHostAsync(cuda::stream_t<>& stream) const; - cudautils::host::unique_ptr detIndexToHostAsync(cuda::stream_t<>& stream) const; - cudautils::host::unique_ptr hitsModuleStartToHostAsync(cuda::stream_t<>& stream) const; + cudautils::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const; + cudautils::host::unique_ptr detIndexToHostAsync(cudaStream_t stream) const; + cudautils::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const; private: static constexpr uint32_t n16 = 4; @@ -71,7 +71,7 @@ template TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nHits, pixelCPEforGPU::ParamsOnGPU const* cpeParams, uint32_t const* hitsModuleStart, - cuda::stream_t<>& stream) + cudaStream_t stream) : m_nHits(nHits), m_hitsModuleStart(hitsModuleStart) { auto view = Traits::template make_host_unique(stream); diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp index cf2221dc71b95..135545ba65832 100644 --- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp +++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp @@ -1,6 +1,7 @@ #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h" #include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h" #include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" namespace testTrackingRecHit2D { @@ -11,13 +12,18 @@ namespace testTrackingRecHit2D { int main() { exitSansCUDADevices(); - auto current_device = cuda::device::current::get(); - auto stream = current_device.create_stream(cuda::stream::implicitly_synchronizes_with_default_stream); + cudaStream_t stream; + cudaCheck(cudaStreamCreate(&stream)); - auto nHits = 200; - TrackingRecHit2DCUDA tkhit(nHits, nullptr, nullptr, stream); + // inner scope to deallocate memory before destroying the stream + { + auto nHits = 200; + TrackingRecHit2DCUDA tkhit(nHits, nullptr, nullptr, stream); - testTrackingRecHit2D::runKernels(tkhit.view()); + testTrackingRecHit2D::runKernels(tkhit.view()); + } + + cudaCheck(cudaStreamDestroy(stream)); return 0; } diff --git a/CalibTracker/SiPixelESProducers/BuildFile.xml b/CalibTracker/SiPixelESProducers/BuildFile.xml index 69d258da21ed1..02a36e17ed732 100644 --- a/CalibTracker/SiPixelESProducers/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/BuildFile.xml @@ -9,7 +9,7 @@ - + diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h index d6d2e1a262dc8..00386b06c2e6c 100644 --- a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h +++ b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h @@ -16,7 +16,7 @@ class SiPixelGainCalibrationForHLTGPU { explicit SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT &gains, const TrackerGeometry &geom); ~SiPixelGainCalibrationForHLTGPU(); - const SiPixelGainForHLTonGPU *getGPUProductAsync(cuda::stream_t<> &cudaStream) const; + const SiPixelGainForHLTonGPU *getGPUProductAsync(cudaStream_t cudaStream) const; const SiPixelGainForHLTonGPU *getCPUProduct() const { return gainForHLTonHost_; } const SiPixelGainCalibrationForHLT *getOriginalProduct() { return gains_; } diff --git a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml index b33657e273036..57bf68a1b7518 100644 --- a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml +++ b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml @@ -7,7 +7,7 @@ - + diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc index d94e9f1959190..9758731f0bb8e 100644 --- a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc +++ b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc @@ -84,23 +84,23 @@ SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() { cudaCheck(cudaFree(gainDataOnGPU)); } -const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cuda::stream_t<>& cudaStream) const { - const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) { +const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cudaStream_t cudaStream) const { + const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { cudaCheck(cudaMalloc((void**)&data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU))); cudaCheck( cudaMalloc((void**)&data.gainDataOnGPU, this->gains_->data().size())); // TODO: this could be changed to cuda::memory::device::unique_ptr<> // gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory cudaCheck(cudaMemcpyAsync( - data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream.id())); + data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream)); cudaCheck(cudaMemcpyAsync( - data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream.id())); + data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream)); cudaCheck(cudaMemcpyAsync(&(data.gainForHLTonGPU->v_pedestals), &(data.gainDataOnGPU), sizeof(SiPixelGainForHLTonGPU_DecodingStructure*), cudaMemcpyDefault, - stream.id())); + stream)); }); return data.gainForHLTonGPU; } diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index 4d2b5ebf45542..212738e941533 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -1,7 +1,7 @@ - + diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu index 556c0dcec4fb1..8e0d5123e6ecc 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu @@ -536,7 +536,7 @@ namespace pixelgpudetails { bool useQualityInfo, bool includeErrors, bool debug, - cuda::stream_t<> &stream) { + cudaStream_t stream) { nDigis = wordCounter; #ifdef GPU_DEBUG @@ -561,13 +561,13 @@ namespace pixelgpudetails { auto word_d = cudautils::make_device_unique(wordCounter, stream); auto fedId_d = cudautils::make_device_unique(wordCounter, stream); + cudaCheck( + cudaMemcpyAsync(word_d.get(), wordFed.word(), wordCounter * sizeof(uint32_t), cudaMemcpyDefault, stream)); cudaCheck(cudaMemcpyAsync( - word_d.get(), wordFed.word(), wordCounter * sizeof(uint32_t), cudaMemcpyDefault, stream.id())); - cudaCheck(cudaMemcpyAsync( - fedId_d.get(), wordFed.fedId(), wordCounter * sizeof(uint8_t) / 2, cudaMemcpyDefault, stream.id())); + fedId_d.get(), wordFed.fedId(), wordCounter * sizeof(uint8_t) / 2, cudaMemcpyDefault, stream)); // Launch rawToDigi kernel - RawToDigi_kernel<<>>( + RawToDigi_kernel<<>>( cablingMap, modToUnp, wordCounter, @@ -602,15 +602,15 @@ namespace pixelgpudetails { int blocks = (std::max(int(wordCounter), int(gpuClustering::MaxNumModules)) + threadsPerBlock - 1) / threadsPerBlock; - gpuCalibPixel::calibDigis<<>>(digis_d.moduleInd(), - digis_d.c_xx(), - digis_d.c_yy(), - digis_d.adc(), - gains, - wordCounter, - clusters_d.moduleStart(), - clusters_d.clusInModule(), - clusters_d.clusModuleStart()); + gpuCalibPixel::calibDigis<<>>(digis_d.moduleInd(), + digis_d.c_xx(), + digis_d.c_yy(), + digis_d.adc(), + gains, + wordCounter, + clusters_d.moduleStart(), + clusters_d.clusInModule(), + clusters_d.clusModuleStart()); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG cudaDeviceSynchronize(); @@ -622,27 +622,27 @@ namespace pixelgpudetails { << " threads\n"; #endif - countModules<<>>( + countModules<<>>( digis_d.c_moduleInd(), clusters_d.moduleStart(), digis_d.clus(), wordCounter); cudaCheck(cudaGetLastError()); // read the number of modules into a data member, used by getProduct()) cudaCheck(cudaMemcpyAsync( - &(nModules_Clusters_h[0]), clusters_d.moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream.id())); + &(nModules_Clusters_h[0]), clusters_d.moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream)); threadsPerBlock = 256; blocks = MaxNumModules; #ifdef GPU_DEBUG std::cout << "CUDA findClus kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n"; #endif - findClus<<>>(digis_d.c_moduleInd(), - digis_d.c_xx(), - digis_d.c_yy(), - clusters_d.c_moduleStart(), - clusters_d.clusInModule(), - clusters_d.moduleId(), - digis_d.clus(), - wordCounter); + findClus<<>>(digis_d.c_moduleInd(), + digis_d.c_xx(), + digis_d.c_yy(), + clusters_d.c_moduleStart(), + clusters_d.clusInModule(), + clusters_d.moduleId(), + digis_d.clus(), + wordCounter); cudaCheck(cudaGetLastError()); #ifdef GPU_DEBUG cudaDeviceSynchronize(); @@ -650,13 +650,13 @@ namespace pixelgpudetails { #endif // apply charge cut - clusterChargeCut<<>>(digis_d.moduleInd(), - digis_d.c_adc(), - clusters_d.c_moduleStart(), - clusters_d.clusInModule(), - clusters_d.c_moduleId(), - digis_d.clus(), - wordCounter); + clusterChargeCut<<>>(digis_d.moduleInd(), + digis_d.c_adc(), + clusters_d.c_moduleStart(), + clusters_d.clusInModule(), + clusters_d.c_moduleId(), + digis_d.clus(), + wordCounter); cudaCheck(cudaGetLastError()); // count the module start indices already here (instead of @@ -665,14 +665,14 @@ namespace pixelgpudetails { // synchronization/ExternalWork // MUST be ONE block - fillHitsModuleStart<<<1, 1024, 0, stream.id()>>>(clusters_d.c_clusInModule(), clusters_d.clusModuleStart()); + fillHitsModuleStart<<<1, 1024, 0, stream>>>(clusters_d.c_clusInModule(), clusters_d.clusModuleStart()); // last element holds the number of all clusters cudaCheck(cudaMemcpyAsync(&(nModules_Clusters_h[1]), clusters_d.clusModuleStart() + gpuClustering::MaxNumModules, sizeof(uint32_t), cudaMemcpyDefault, - stream.id())); + stream)); #ifdef GPU_DEBUG cudaDeviceSynchronize(); diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h index 6d53eaf6a71c0..f1ffb191e9959 100644 --- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h +++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h @@ -179,7 +179,7 @@ namespace pixelgpudetails { bool useQualityInfo, bool includeErrors, bool debug, - cuda::stream_t<>& stream); + cudaStream_t stream); std::pair getResults() { digis_d.setNModulesDigis(nModules_Clusters_h[0], nDigis); diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml index a3d2c6a35e642..a626c35727249 100644 --- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml @@ -13,7 +13,6 @@ - diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h index 5666fc1f8453d..575c72d33a69a 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h @@ -47,7 +47,7 @@ class PixelCPEFast final : public PixelCPEBase { // The return value can only be used safely in kernels launched on // the same cudaStream, or after cudaStreamSynchronize. - const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cuda::stream_t<> &cudaStream) const; + const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cudaStream_t cudaStream) const; pixelCPEforGPU::ParamsOnGPU const &getCPUProduct() const { return cpuData_; } diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml index 9385896a5e287..0868b38c07652 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml +++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml @@ -9,6 +9,5 @@ - diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc index 27e2528ceb97c..f58d81faaeea1 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc @@ -151,8 +151,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv assert(numberOfClusters == int(hitsModuleStart[2000])); // output SoA - auto dummyStream = cuda::stream::wrap(0, 0, false); - auto output = std::make_unique(numberOfClusters, &cpeView, hitsModuleStart, dummyStream); + auto output = std::make_unique(numberOfClusters, &cpeView, hitsModuleStart, nullptr); if (0 == numberOfClusters) { iEvent.put(std::move(output)); diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc index 4aa3e60ae8320..356713ad4b45c 100644 --- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc +++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc @@ -70,8 +70,8 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const& conf, }; } -const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const { - const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) { +const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cudaStream_t cudaStream) const { + const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) { // and now copy to device... cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_commonParams, sizeof(pixelCPEforGPU::CommonParams))); cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_detParams, @@ -81,27 +81,27 @@ const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cuda::stream cudaCheck(cudaMalloc((void**)&data.d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU))); cudaCheck(cudaMemcpyAsync( - data.d_paramsOnGPU, &data.h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream.id())); + data.d_paramsOnGPU, &data.h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream)); cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_commonParams, &this->m_commonParamsGPU, sizeof(pixelCPEforGPU::CommonParams), cudaMemcpyDefault, - stream.id())); + stream)); cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_averageGeometry, &this->m_averageGeometry, sizeof(pixelCPEforGPU::AverageGeometry), cudaMemcpyDefault, - stream.id())); + stream)); cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_layerGeometry, &this->m_layerGeometry, sizeof(pixelCPEforGPU::LayerGeometry), cudaMemcpyDefault, - stream.id())); + stream)); cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_detParams, this->m_detParamsGPU.data(), this->m_detParamsGPU.size() * sizeof(pixelCPEforGPU::DetParams), cudaMemcpyDefault, - stream.id())); + stream)); }); return data.d_paramsOnGPU; }