diff --git a/CUDADataFormats/SiPixelCluster/BuildFile.xml b/CUDADataFormats/SiPixelCluster/BuildFile.xml
index 6db6a1f62cda1..5e401d215c4eb 100644
--- a/CUDADataFormats/SiPixelCluster/BuildFile.xml
+++ b/CUDADataFormats/SiPixelCluster/BuildFile.xml
@@ -1,7 +1,7 @@
-
+
diff --git a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
index 1db6e29459ea7..d3650e164d44e 100644
--- a/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
+++ b/CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
@@ -3,15 +3,14 @@
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
-
-#include
-
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
+#include
+
class SiPixelClustersCUDA {
public:
SiPixelClustersCUDA() = default;
- explicit SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<> &stream);
+ explicit SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream);
~SiPixelClustersCUDA() = default;
SiPixelClustersCUDA(const SiPixelClustersCUDA &) = delete;
diff --git a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc
index 4a145bb1231c2..c814cd4a2e131 100644
--- a/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc
+++ b/CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc
@@ -4,7 +4,7 @@
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
-SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cuda::stream_t<>& stream) {
+SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream) {
moduleStart_d = cudautils::make_device_unique(maxClusters + 1, stream);
clusInModule_d = cudautils::make_device_unique(maxClusters, stream);
moduleId_d = cudautils::make_device_unique(maxClusters, stream);
diff --git a/CUDADataFormats/SiPixelDigi/BuildFile.xml b/CUDADataFormats/SiPixelDigi/BuildFile.xml
index c29c9c9b9f44d..ee357e2d4e157 100644
--- a/CUDADataFormats/SiPixelDigi/BuildFile.xml
+++ b/CUDADataFormats/SiPixelDigi/BuildFile.xml
@@ -2,7 +2,7 @@
-
+
diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h
index 7e016efb708de..7c18d58a3fc12 100644
--- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h
+++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h
@@ -6,12 +6,12 @@
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h"
-#include
+#include
class SiPixelDigiErrorsCUDA {
public:
SiPixelDigiErrorsCUDA() = default;
- explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream);
+ explicit SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream);
~SiPixelDigiErrorsCUDA() = default;
SiPixelDigiErrorsCUDA(const SiPixelDigiErrorsCUDA&) = delete;
@@ -27,9 +27,9 @@ class SiPixelDigiErrorsCUDA {
using HostDataError =
std::pair, cudautils::host::unique_ptr>;
- HostDataError dataErrorToHostAsync(cuda::stream_t<>& stream) const;
+ HostDataError dataErrorToHostAsync(cudaStream_t stream) const;
- void copyErrorToHostAsync(cuda::stream_t<>& stream);
+ void copyErrorToHostAsync(cudaStream_t stream);
private:
cudautils::device::unique_ptr data_d;
diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
index 0d0e025ef52da..47efe634ad93d 100644
--- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
+++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
@@ -3,14 +3,14 @@
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
-
-#include
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCompat.h"
+#include
+
class SiPixelDigisCUDA {
public:
SiPixelDigisCUDA() = default;
- explicit SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<> &stream);
+ explicit SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream);
~SiPixelDigisCUDA() = default;
SiPixelDigisCUDA(const SiPixelDigisCUDA &) = delete;
@@ -50,10 +50,10 @@ class SiPixelDigisCUDA {
uint32_t const *c_pdigi() const { return pdigi_d.get(); }
uint32_t const *c_rawIdArr() const { return rawIdArr_d.get(); }
- cudautils::host::unique_ptr adcToHostAsync(cuda::stream_t<> &stream) const;
- cudautils::host::unique_ptr clusToHostAsync(cuda::stream_t<> &stream) const;
- cudautils::host::unique_ptr pdigiToHostAsync(cuda::stream_t<> &stream) const;
- cudautils::host::unique_ptr rawIdArrToHostAsync(cuda::stream_t<> &stream) const;
+ cudautils::host::unique_ptr adcToHostAsync(cudaStream_t stream) const;
+ cudautils::host::unique_ptr clusToHostAsync(cudaStream_t stream) const;
+ cudautils::host::unique_ptr pdigiToHostAsync(cudaStream_t stream) const;
+ cudautils::host::unique_ptr rawIdArrToHostAsync(cudaStream_t stream) const;
class DeviceConstView {
public:
diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc
index 2aa2b24ddf316..7640348c15f08 100644
--- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc
+++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc
@@ -7,7 +7,7 @@
#include
-SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cuda::stream_t<>& stream)
+SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream)
: formatterErrors_h(std::move(errors)) {
error_d = cudautils::make_device_unique>(stream);
data_d = cudautils::make_device_unique(maxFedWords, stream);
@@ -22,11 +22,11 @@ SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterE
cudautils::copyAsync(error_d, error_h, stream);
}
-void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cuda::stream_t<>& stream) {
+void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) {
cudautils::copyAsync(error_h, error_d, stream);
}
-SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cuda::stream_t<>& stream) const {
+SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const {
// On one hand size() could be sufficient. On the other hand, if
// someone copies the SimpleVector<>, (s)he might expect the data
// buffer to actually have space for capacity() elements.
diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc
index fe4aedd686546..a8aab7ab5a4b8 100644
--- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc
+++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc
@@ -4,7 +4,7 @@
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
-SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<>& stream) {
+SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) {
xx_d = cudautils::make_device_unique(maxFedWords, stream);
yy_d = cudautils::make_device_unique(maxFedWords, stream);
adc_d = cudautils::make_device_unique(maxFedWords, stream);
@@ -25,25 +25,25 @@ SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cuda::stream_t<>& stream)
cudautils::copyAsync(view_d, view, stream);
}
-cudautils::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cuda::stream_t<>& stream) const {
+cudautils::host::unique_ptr SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique(nDigis(), stream);
cudautils::copyAsync(ret, adc_d, nDigis(), stream);
return ret;
}
-cudautils::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cuda::stream_t<>& stream) const {
+cudautils::host::unique_ptr SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique(nDigis(), stream);
cudautils::copyAsync(ret, clus_d, nDigis(), stream);
return ret;
}
-cudautils::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cuda::stream_t<>& stream) const {
+cudautils::host::unique_ptr SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique(nDigis(), stream);
cudautils::copyAsync(ret, pdigi_d, nDigis(), stream);
return ret;
}
-cudautils::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cuda::stream_t<>& stream) const {
+cudautils::host::unique_ptr SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique(nDigis(), stream);
cudautils::copyAsync(ret, rawIdArr_d, nDigis(), stream);
return ret;
diff --git a/CUDADataFormats/TrackingRecHit/BuildFile.xml b/CUDADataFormats/TrackingRecHit/BuildFile.xml
index 8af605862d1ac..8dc569d40b6c4 100644
--- a/CUDADataFormats/TrackingRecHit/BuildFile.xml
+++ b/CUDADataFormats/TrackingRecHit/BuildFile.xml
@@ -1,4 +1,4 @@
-
+
diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h
index a3d6354732ecf..1b27558efe3ee 100644
--- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h
+++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h
@@ -17,7 +17,7 @@ class TrackingRecHit2DHeterogeneous {
explicit TrackingRecHit2DHeterogeneous(uint32_t nHits,
pixelCPEforGPU::ParamsOnGPU const* cpeParams,
uint32_t const* hitsModuleStart,
- cuda::stream_t<>& stream);
+ cudaStream_t stream);
~TrackingRecHit2DHeterogeneous() = default;
@@ -37,9 +37,9 @@ class TrackingRecHit2DHeterogeneous {
auto iphi() { return m_iphi; }
// only the local coord and detector index
- cudautils::host::unique_ptr localCoordToHostAsync(cuda::stream_t<>& stream) const;
- cudautils::host::unique_ptr detIndexToHostAsync(cuda::stream_t<>& stream) const;
- cudautils::host::unique_ptr hitsModuleStartToHostAsync(cuda::stream_t<>& stream) const;
+ cudautils::host::unique_ptr localCoordToHostAsync(cudaStream_t stream) const;
+ cudautils::host::unique_ptr detIndexToHostAsync(cudaStream_t stream) const;
+ cudautils::host::unique_ptr hitsModuleStartToHostAsync(cudaStream_t stream) const;
private:
static constexpr uint32_t n16 = 4;
@@ -71,7 +71,7 @@ template
TrackingRecHit2DHeterogeneous::TrackingRecHit2DHeterogeneous(uint32_t nHits,
pixelCPEforGPU::ParamsOnGPU const* cpeParams,
uint32_t const* hitsModuleStart,
- cuda::stream_t<>& stream)
+ cudaStream_t stream)
: m_nHits(nHits), m_hitsModuleStart(hitsModuleStart) {
auto view = Traits::template make_host_unique(stream);
diff --git a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp
index cf2221dc71b95..135545ba65832 100644
--- a/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp
+++ b/CUDADataFormats/TrackingRecHit/test/TrackingRecHit2DCUDA_t.cpp
@@ -1,6 +1,7 @@
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
+#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
namespace testTrackingRecHit2D {
@@ -11,13 +12,18 @@ namespace testTrackingRecHit2D {
int main() {
exitSansCUDADevices();
- auto current_device = cuda::device::current::get();
- auto stream = current_device.create_stream(cuda::stream::implicitly_synchronizes_with_default_stream);
+ cudaStream_t stream;
+ cudaCheck(cudaStreamCreate(&stream));
- auto nHits = 200;
- TrackingRecHit2DCUDA tkhit(nHits, nullptr, nullptr, stream);
+ // inner scope to deallocate memory before destroying the stream
+ {
+ auto nHits = 200;
+ TrackingRecHit2DCUDA tkhit(nHits, nullptr, nullptr, stream);
- testTrackingRecHit2D::runKernels(tkhit.view());
+ testTrackingRecHit2D::runKernels(tkhit.view());
+ }
+
+ cudaCheck(cudaStreamDestroy(stream));
return 0;
}
diff --git a/CalibTracker/SiPixelESProducers/BuildFile.xml b/CalibTracker/SiPixelESProducers/BuildFile.xml
index 69d258da21ed1..02a36e17ed732 100644
--- a/CalibTracker/SiPixelESProducers/BuildFile.xml
+++ b/CalibTracker/SiPixelESProducers/BuildFile.xml
@@ -9,7 +9,7 @@
-
+
diff --git a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h
index d6d2e1a262dc8..00386b06c2e6c 100644
--- a/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h
+++ b/CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTGPU.h
@@ -16,7 +16,7 @@ class SiPixelGainCalibrationForHLTGPU {
explicit SiPixelGainCalibrationForHLTGPU(const SiPixelGainCalibrationForHLT &gains, const TrackerGeometry &geom);
~SiPixelGainCalibrationForHLTGPU();
- const SiPixelGainForHLTonGPU *getGPUProductAsync(cuda::stream_t<> &cudaStream) const;
+ const SiPixelGainForHLTonGPU *getGPUProductAsync(cudaStream_t cudaStream) const;
const SiPixelGainForHLTonGPU *getCPUProduct() const { return gainForHLTonHost_; }
const SiPixelGainCalibrationForHLT *getOriginalProduct() { return gains_; }
diff --git a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml
index b33657e273036..57bf68a1b7518 100644
--- a/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml
+++ b/CalibTracker/SiPixelESProducers/plugins/BuildFile.xml
@@ -7,7 +7,7 @@
-
+
diff --git a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc
index d94e9f1959190..9758731f0bb8e 100644
--- a/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc
+++ b/CalibTracker/SiPixelESProducers/src/SiPixelGainCalibrationForHLTGPU.cc
@@ -84,23 +84,23 @@ SiPixelGainCalibrationForHLTGPU::GPUData::~GPUData() {
cudaCheck(cudaFree(gainDataOnGPU));
}
-const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
- const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) {
+const SiPixelGainForHLTonGPU* SiPixelGainCalibrationForHLTGPU::getGPUProductAsync(cudaStream_t cudaStream) const {
+ const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) {
cudaCheck(cudaMalloc((void**)&data.gainForHLTonGPU, sizeof(SiPixelGainForHLTonGPU)));
cudaCheck(
cudaMalloc((void**)&data.gainDataOnGPU,
this->gains_->data().size())); // TODO: this could be changed to cuda::memory::device::unique_ptr<>
// gains.data().data() is used also for non-GPU code, we cannot allocate it on aligned and write-combined memory
cudaCheck(cudaMemcpyAsync(
- data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream.id()));
+ data.gainDataOnGPU, this->gains_->data().data(), this->gains_->data().size(), cudaMemcpyDefault, stream));
cudaCheck(cudaMemcpyAsync(
- data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream.id()));
+ data.gainForHLTonGPU, this->gainForHLTonHost_, sizeof(SiPixelGainForHLTonGPU), cudaMemcpyDefault, stream));
cudaCheck(cudaMemcpyAsync(&(data.gainForHLTonGPU->v_pedestals),
&(data.gainDataOnGPU),
sizeof(SiPixelGainForHLTonGPU_DecodingStructure*),
cudaMemcpyDefault,
- stream.id()));
+ stream));
});
return data.gainForHLTonGPU;
}
diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml
index 4d2b5ebf45542..212738e941533 100644
--- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml
+++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml
@@ -1,7 +1,7 @@
-
+
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu
index 556c0dcec4fb1..8e0d5123e6ecc 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu
@@ -536,7 +536,7 @@ namespace pixelgpudetails {
bool useQualityInfo,
bool includeErrors,
bool debug,
- cuda::stream_t<> &stream) {
+ cudaStream_t stream) {
nDigis = wordCounter;
#ifdef GPU_DEBUG
@@ -561,13 +561,13 @@ namespace pixelgpudetails {
auto word_d = cudautils::make_device_unique(wordCounter, stream);
auto fedId_d = cudautils::make_device_unique(wordCounter, stream);
+ cudaCheck(
+ cudaMemcpyAsync(word_d.get(), wordFed.word(), wordCounter * sizeof(uint32_t), cudaMemcpyDefault, stream));
cudaCheck(cudaMemcpyAsync(
- word_d.get(), wordFed.word(), wordCounter * sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
- cudaCheck(cudaMemcpyAsync(
- fedId_d.get(), wordFed.fedId(), wordCounter * sizeof(uint8_t) / 2, cudaMemcpyDefault, stream.id()));
+ fedId_d.get(), wordFed.fedId(), wordCounter * sizeof(uint8_t) / 2, cudaMemcpyDefault, stream));
// Launch rawToDigi kernel
- RawToDigi_kernel<<>>(
+ RawToDigi_kernel<<>>(
cablingMap,
modToUnp,
wordCounter,
@@ -602,15 +602,15 @@ namespace pixelgpudetails {
int blocks =
(std::max(int(wordCounter), int(gpuClustering::MaxNumModules)) + threadsPerBlock - 1) / threadsPerBlock;
- gpuCalibPixel::calibDigis<<>>(digis_d.moduleInd(),
- digis_d.c_xx(),
- digis_d.c_yy(),
- digis_d.adc(),
- gains,
- wordCounter,
- clusters_d.moduleStart(),
- clusters_d.clusInModule(),
- clusters_d.clusModuleStart());
+ gpuCalibPixel::calibDigis<<>>(digis_d.moduleInd(),
+ digis_d.c_xx(),
+ digis_d.c_yy(),
+ digis_d.adc(),
+ gains,
+ wordCounter,
+ clusters_d.moduleStart(),
+ clusters_d.clusInModule(),
+ clusters_d.clusModuleStart());
cudaCheck(cudaGetLastError());
#ifdef GPU_DEBUG
cudaDeviceSynchronize();
@@ -622,27 +622,27 @@ namespace pixelgpudetails {
<< " threads\n";
#endif
- countModules<<>>(
+ countModules<<>>(
digis_d.c_moduleInd(), clusters_d.moduleStart(), digis_d.clus(), wordCounter);
cudaCheck(cudaGetLastError());
// read the number of modules into a data member, used by getProduct())
cudaCheck(cudaMemcpyAsync(
- &(nModules_Clusters_h[0]), clusters_d.moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream.id()));
+ &(nModules_Clusters_h[0]), clusters_d.moduleStart(), sizeof(uint32_t), cudaMemcpyDefault, stream));
threadsPerBlock = 256;
blocks = MaxNumModules;
#ifdef GPU_DEBUG
std::cout << "CUDA findClus kernel launch with " << blocks << " blocks of " << threadsPerBlock << " threads\n";
#endif
- findClus<<>>(digis_d.c_moduleInd(),
- digis_d.c_xx(),
- digis_d.c_yy(),
- clusters_d.c_moduleStart(),
- clusters_d.clusInModule(),
- clusters_d.moduleId(),
- digis_d.clus(),
- wordCounter);
+ findClus<<>>(digis_d.c_moduleInd(),
+ digis_d.c_xx(),
+ digis_d.c_yy(),
+ clusters_d.c_moduleStart(),
+ clusters_d.clusInModule(),
+ clusters_d.moduleId(),
+ digis_d.clus(),
+ wordCounter);
cudaCheck(cudaGetLastError());
#ifdef GPU_DEBUG
cudaDeviceSynchronize();
@@ -650,13 +650,13 @@ namespace pixelgpudetails {
#endif
// apply charge cut
- clusterChargeCut<<>>(digis_d.moduleInd(),
- digis_d.c_adc(),
- clusters_d.c_moduleStart(),
- clusters_d.clusInModule(),
- clusters_d.c_moduleId(),
- digis_d.clus(),
- wordCounter);
+ clusterChargeCut<<>>(digis_d.moduleInd(),
+ digis_d.c_adc(),
+ clusters_d.c_moduleStart(),
+ clusters_d.clusInModule(),
+ clusters_d.c_moduleId(),
+ digis_d.clus(),
+ wordCounter);
cudaCheck(cudaGetLastError());
// count the module start indices already here (instead of
@@ -665,14 +665,14 @@ namespace pixelgpudetails {
// synchronization/ExternalWork
// MUST be ONE block
- fillHitsModuleStart<<<1, 1024, 0, stream.id()>>>(clusters_d.c_clusInModule(), clusters_d.clusModuleStart());
+ fillHitsModuleStart<<<1, 1024, 0, stream>>>(clusters_d.c_clusInModule(), clusters_d.clusModuleStart());
// last element holds the number of all clusters
cudaCheck(cudaMemcpyAsync(&(nModules_Clusters_h[1]),
clusters_d.clusModuleStart() + gpuClustering::MaxNumModules,
sizeof(uint32_t),
cudaMemcpyDefault,
- stream.id()));
+ stream));
#ifdef GPU_DEBUG
cudaDeviceSynchronize();
diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h
index 6d53eaf6a71c0..f1ffb191e9959 100644
--- a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h
+++ b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h
@@ -179,7 +179,7 @@ namespace pixelgpudetails {
bool useQualityInfo,
bool includeErrors,
bool debug,
- cuda::stream_t<>& stream);
+ cudaStream_t stream);
std::pair getResults() {
digis_d.setNModulesDigis(nModules_Clusters_h[0], nDigis);
diff --git a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml
index a3d2c6a35e642..a626c35727249 100644
--- a/RecoLocalTracker/SiPixelRecHits/BuildFile.xml
+++ b/RecoLocalTracker/SiPixelRecHits/BuildFile.xml
@@ -13,7 +13,6 @@
-
diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h
index 5666fc1f8453d..575c72d33a69a 100644
--- a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h
+++ b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h
@@ -47,7 +47,7 @@ class PixelCPEFast final : public PixelCPEBase {
// The return value can only be used safely in kernels launched on
// the same cudaStream, or after cudaStreamSynchronize.
- const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cuda::stream_t<> &cudaStream) const;
+ const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cudaStream_t cudaStream) const;
pixelCPEforGPU::ParamsOnGPU const &getCPUProduct() const { return cpuData_; }
diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml
index 9385896a5e287..0868b38c07652 100644
--- a/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml
+++ b/RecoLocalTracker/SiPixelRecHits/plugins/BuildFile.xml
@@ -9,6 +9,5 @@
-
diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc
index 27e2528ceb97c..f58d81faaeea1 100644
--- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc
+++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitSoAFromLegacy.cc
@@ -151,8 +151,7 @@ void SiPixelRecHitSoAFromLegacy::produce(edm::StreamID streamID, edm::Event& iEv
assert(numberOfClusters == int(hitsModuleStart[2000]));
// output SoA
- auto dummyStream = cuda::stream::wrap(0, 0, false);
- auto output = std::make_unique(numberOfClusters, &cpeView, hitsModuleStart, dummyStream);
+ auto output = std::make_unique(numberOfClusters, &cpeView, hitsModuleStart, nullptr);
if (0 == numberOfClusters) {
iEvent.put(std::move(output));
diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc
index 4aa3e60ae8320..356713ad4b45c 100644
--- a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc
+++ b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc
@@ -70,8 +70,8 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const& conf,
};
}
-const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
- const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cuda::stream_t<>& stream) {
+const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cudaStream_t cudaStream) const {
+ const auto& data = gpuData_.dataForCurrentDeviceAsync(cudaStream, [this](GPUData& data, cudaStream_t stream) {
// and now copy to device...
cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_commonParams, sizeof(pixelCPEforGPU::CommonParams)));
cudaCheck(cudaMalloc((void**)&data.h_paramsOnGPU.m_detParams,
@@ -81,27 +81,27 @@ const pixelCPEforGPU::ParamsOnGPU* PixelCPEFast::getGPUProductAsync(cuda::stream
cudaCheck(cudaMalloc((void**)&data.d_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU)));
cudaCheck(cudaMemcpyAsync(
- data.d_paramsOnGPU, &data.h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream.id()));
+ data.d_paramsOnGPU, &data.h_paramsOnGPU, sizeof(pixelCPEforGPU::ParamsOnGPU), cudaMemcpyDefault, stream));
cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_commonParams,
&this->m_commonParamsGPU,
sizeof(pixelCPEforGPU::CommonParams),
cudaMemcpyDefault,
- stream.id()));
+ stream));
cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_averageGeometry,
&this->m_averageGeometry,
sizeof(pixelCPEforGPU::AverageGeometry),
cudaMemcpyDefault,
- stream.id()));
+ stream));
cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_layerGeometry,
&this->m_layerGeometry,
sizeof(pixelCPEforGPU::LayerGeometry),
cudaMemcpyDefault,
- stream.id()));
+ stream));
cudaCheck(cudaMemcpyAsync((void*)data.h_paramsOnGPU.m_detParams,
this->m_detParamsGPU.data(),
this->m_detParamsGPU.size() * sizeof(pixelCPEforGPU::DetParams),
cudaMemcpyDefault,
- stream.id()));
+ stream));
});
return data.d_paramsOnGPU;
}