Skip to content

Commit

Permalink
Implement changes from the CUDA framework review (#429)
Browse files Browse the repository at this point in the history
Rename the cudautils namespace to cms::cuda or cms::cudatest, and drop the CUDA prefix from the symbols defined there.

Always record and query the CUDA event, to minimize need for error checking in CUDAScopedContextProduce destructor.

Add comments to highlight the pieces in CachingDeviceAllocator that have been changed wrt. cub.

Various other updates and clean up:
  - enable CUDA for compute capability 3.5.
  - clean up CUDAService, CUDA tests and plugins.
  - add CUDA existence protections to BuildFiles.
  - mark thread-safe static variables with CMS_THREAD_SAFE.
  • Loading branch information
makortel authored and fwyzard committed Dec 29, 2020
1 parent b3879eb commit e77a278
Show file tree
Hide file tree
Showing 25 changed files with 147 additions and 153 deletions.
2 changes: 2 additions & 0 deletions CUDADataFormats/Common/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
<iftool name="cuda-gcc-support">
<use name="HeterogeneousCore/CUDAUtilities"/>

<export>
<lib name="1"/>
</export>
</iftool>
10 changes: 5 additions & 5 deletions CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ class SiPixelClustersCUDA {
DeviceConstView *view() const { return view_d.get(); }

private:
cudautils::device::unique_ptr<uint32_t[]> moduleStart_d; // index of the first pixel of each module
cudautils::device::unique_ptr<uint32_t[]> clusInModule_d; // number of clusters found in each module
cudautils::device::unique_ptr<uint32_t[]> moduleId_d; // module id of each module
cms::cuda::device::unique_ptr<uint32_t[]> moduleStart_d; // index of the first pixel of each module
cms::cuda::device::unique_ptr<uint32_t[]> clusInModule_d; // number of clusters found in each module
cms::cuda::device::unique_ptr<uint32_t[]> moduleId_d; // module id of each module

// originally from rechits
cudautils::device::unique_ptr<uint32_t[]> clusModuleStart_d; // index of the first cluster of each module
cms::cuda::device::unique_ptr<uint32_t[]> clusModuleStart_d; // index of the first cluster of each module

cudautils::device::unique_ptr<DeviceConstView> view_d; // "me" pointer
cms::cuda::device::unique_ptr<DeviceConstView> view_d; // "me" pointer

uint32_t nClusters_h;
};
Expand Down
14 changes: 7 additions & 7 deletions CUDADataFormats/SiPixelCluster/src/SiPixelClustersCUDA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"

SiPixelClustersCUDA::SiPixelClustersCUDA(size_t maxClusters, cudaStream_t stream) {
moduleStart_d = cudautils::make_device_unique<uint32_t[]>(maxClusters + 1, stream);
clusInModule_d = cudautils::make_device_unique<uint32_t[]>(maxClusters, stream);
moduleId_d = cudautils::make_device_unique<uint32_t[]>(maxClusters, stream);
clusModuleStart_d = cudautils::make_device_unique<uint32_t[]>(maxClusters + 1, stream);
moduleStart_d = cms::cuda::make_device_unique<uint32_t[]>(maxClusters + 1, stream);
clusInModule_d = cms::cuda::make_device_unique<uint32_t[]>(maxClusters, stream);
moduleId_d = cms::cuda::make_device_unique<uint32_t[]>(maxClusters, stream);
clusModuleStart_d = cms::cuda::make_device_unique<uint32_t[]>(maxClusters + 1, stream);

auto view = cudautils::make_host_unique<DeviceConstView>(stream);
auto view = cms::cuda::make_host_unique<DeviceConstView>(stream);
view->moduleStart_ = moduleStart_d.get();
view->clusInModule_ = clusInModule_d.get();
view->moduleId_ = moduleId_d.get();
view->clusModuleStart_ = clusModuleStart_d.get();

view_d = cudautils::make_device_unique<DeviceConstView>(stream);
cudautils::copyAsync(view_d, view, stream);
view_d = cms::cuda::make_device_unique<DeviceConstView>(stream);
cms::cuda::copyAsync(view_d, view, stream);
}
2 changes: 1 addition & 1 deletion CUDADataFormats/SiPixelCluster/src/classes.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef CUDADataFormats_SiPixelCluster_classes_h
#define CUDADataFormats_SiPixelCluster_classes_h

#include "CUDADataFormats/Common/interface/CUDAProduct.h"
#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/SiPixelCluster/interface/SiPixelClustersCUDA.h"
#include "DataFormats/Common/interface/Wrapper.h"

Expand Down
4 changes: 2 additions & 2 deletions CUDADataFormats/SiPixelCluster/src/classes_def.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<lcgdict>
<class name="CUDAProduct<SiPixelClustersCUDA>" persistent="false"/>
<class name="edm::Wrapper<CUDAProduct<SiPixelClustersCUDA>>" persistent="false"/>
<class name="cms::cuda::Product<SiPixelClustersCUDA>" persistent="false"/>
<class name="edm::Wrapper<cms::cuda::Product<SiPixelClustersCUDA>>" persistent="false"/>
</lcgdict>
8 changes: 4 additions & 4 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@ class SiPixelDigiErrorsCUDA {
GPU::SimpleVector<PixelErrorCompact> const* c_error() const { return error_d.get(); }

using HostDataError =
std::pair<GPU::SimpleVector<PixelErrorCompact>, cudautils::host::unique_ptr<PixelErrorCompact[]>>;
std::pair<GPU::SimpleVector<PixelErrorCompact>, cms::cuda::host::unique_ptr<PixelErrorCompact[]>>;
HostDataError dataErrorToHostAsync(cudaStream_t stream) const;

void copyErrorToHostAsync(cudaStream_t stream);

private:
cudautils::device::unique_ptr<PixelErrorCompact[]> data_d;
cudautils::device::unique_ptr<GPU::SimpleVector<PixelErrorCompact>> error_d;
cudautils::host::unique_ptr<GPU::SimpleVector<PixelErrorCompact>> error_h;
cms::cuda::device::unique_ptr<PixelErrorCompact[]> data_d;
cms::cuda::device::unique_ptr<GPU::SimpleVector<PixelErrorCompact>> error_d;
cms::cuda::host::unique_ptr<GPU::SimpleVector<PixelErrorCompact>> error_h;
PixelFormatterErrors formatterErrors_h;
};

Expand Down
24 changes: 12 additions & 12 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ class SiPixelDigisCUDA {
uint32_t const *c_pdigi() const { return pdigi_d.get(); }
uint32_t const *c_rawIdArr() const { return rawIdArr_d.get(); }

cudautils::host::unique_ptr<uint16_t[]> adcToHostAsync(cudaStream_t stream) const;
cudautils::host::unique_ptr<int32_t[]> clusToHostAsync(cudaStream_t stream) const;
cudautils::host::unique_ptr<uint32_t[]> pdigiToHostAsync(cudaStream_t stream) const;
cudautils::host::unique_ptr<uint32_t[]> rawIdArrToHostAsync(cudaStream_t stream) const;
cms::cuda::host::unique_ptr<uint16_t[]> adcToHostAsync(cudaStream_t stream) const;
cms::cuda::host::unique_ptr<int32_t[]> clusToHostAsync(cudaStream_t stream) const;
cms::cuda::host::unique_ptr<uint32_t[]> pdigiToHostAsync(cudaStream_t stream) const;
cms::cuda::host::unique_ptr<uint32_t[]> rawIdArrToHostAsync(cudaStream_t stream) const;

class DeviceConstView {
public:
Expand All @@ -79,17 +79,17 @@ class SiPixelDigisCUDA {

private:
// These are consumed by downstream device code
cudautils::device::unique_ptr<uint16_t[]> xx_d; // local coordinates of each pixel
cudautils::device::unique_ptr<uint16_t[]> yy_d; //
cudautils::device::unique_ptr<uint16_t[]> adc_d; // ADC of each pixel
cudautils::device::unique_ptr<uint16_t[]> moduleInd_d; // module id of each pixel
cudautils::device::unique_ptr<int32_t[]> clus_d; // cluster id of each pixel
cudautils::device::unique_ptr<DeviceConstView> view_d; // "me" pointer
cms::cuda::device::unique_ptr<uint16_t[]> xx_d; // local coordinates of each pixel
cms::cuda::device::unique_ptr<uint16_t[]> yy_d; //
cms::cuda::device::unique_ptr<uint16_t[]> adc_d; // ADC of each pixel
cms::cuda::device::unique_ptr<uint16_t[]> moduleInd_d; // module id of each pixel
cms::cuda::device::unique_ptr<int32_t[]> clus_d; // cluster id of each pixel
cms::cuda::device::unique_ptr<DeviceConstView> view_d; // "me" pointer

// These are for CPU output; should we (eventually) place them to a
// separate product?
cudautils::device::unique_ptr<uint32_t[]> pdigi_d;
cudautils::device::unique_ptr<uint32_t[]> rawIdArr_d;
cms::cuda::device::unique_ptr<uint32_t[]> pdigi_d;
cms::cuda::device::unique_ptr<uint32_t[]> rawIdArr_d;

uint32_t nModules_h = 0;
uint32_t nDigis_h = 0;
Expand Down
16 changes: 8 additions & 8 deletions CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,32 @@

SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, PixelFormatterErrors errors, cudaStream_t stream)
: formatterErrors_h(std::move(errors)) {
error_d = cudautils::make_device_unique<GPU::SimpleVector<PixelErrorCompact>>(stream);
data_d = cudautils::make_device_unique<PixelErrorCompact[]>(maxFedWords, stream);
error_d = cms::cuda::make_device_unique<GPU::SimpleVector<PixelErrorCompact>>(stream);
data_d = cms::cuda::make_device_unique<PixelErrorCompact[]>(maxFedWords, stream);

cudautils::memsetAsync(data_d, 0x00, maxFedWords, stream);
cms::cuda::memsetAsync(data_d, 0x00, maxFedWords, stream);

error_h = cudautils::make_host_unique<GPU::SimpleVector<PixelErrorCompact>>(stream);
error_h = cms::cuda::make_host_unique<GPU::SimpleVector<PixelErrorCompact>>(stream);
GPU::make_SimpleVector(error_h.get(), maxFedWords, data_d.get());
assert(error_h->empty());
assert(error_h->capacity() == static_cast<int>(maxFedWords));

cudautils::copyAsync(error_d, error_h, stream);
cms::cuda::copyAsync(error_d, error_h, stream);
}

void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) {
cudautils::copyAsync(error_h, error_d, stream);
cms::cuda::copyAsync(error_h, error_d, stream);
}

SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const {
// On one hand size() could be sufficient. On the other hand, if
// someone copies the SimpleVector<>, (s)he might expect the data
// buffer to actually have space for capacity() elements.
auto data = cudautils::make_host_unique<PixelErrorCompact[]>(error_h->capacity(), stream);
auto data = cms::cuda::make_host_unique<PixelErrorCompact[]>(error_h->capacity(), stream);

// but transfer only the required amount
if (not error_h->empty()) {
cudautils::copyAsync(data, data_d, error_h->size(), stream);
cms::cuda::copyAsync(data, data_d, error_h->size(), stream);
}
auto err = *error_h;
err.set_data(data.get());
Expand Down
44 changes: 22 additions & 22 deletions CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,46 +5,46 @@
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"

SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) {
xx_d = cudautils::make_device_unique<uint16_t[]>(maxFedWords, stream);
yy_d = cudautils::make_device_unique<uint16_t[]>(maxFedWords, stream);
adc_d = cudautils::make_device_unique<uint16_t[]>(maxFedWords, stream);
moduleInd_d = cudautils::make_device_unique<uint16_t[]>(maxFedWords, stream);
clus_d = cudautils::make_device_unique<int32_t[]>(maxFedWords, stream);
xx_d = cms::cuda::make_device_unique<uint16_t[]>(maxFedWords, stream);
yy_d = cms::cuda::make_device_unique<uint16_t[]>(maxFedWords, stream);
adc_d = cms::cuda::make_device_unique<uint16_t[]>(maxFedWords, stream);
moduleInd_d = cms::cuda::make_device_unique<uint16_t[]>(maxFedWords, stream);
clus_d = cms::cuda::make_device_unique<int32_t[]>(maxFedWords, stream);

pdigi_d = cudautils::make_device_unique<uint32_t[]>(maxFedWords, stream);
rawIdArr_d = cudautils::make_device_unique<uint32_t[]>(maxFedWords, stream);
pdigi_d = cms::cuda::make_device_unique<uint32_t[]>(maxFedWords, stream);
rawIdArr_d = cms::cuda::make_device_unique<uint32_t[]>(maxFedWords, stream);

auto view = cudautils::make_host_unique<DeviceConstView>(stream);
auto view = cms::cuda::make_host_unique<DeviceConstView>(stream);
view->xx_ = xx_d.get();
view->yy_ = yy_d.get();
view->adc_ = adc_d.get();
view->moduleInd_ = moduleInd_d.get();
view->clus_ = clus_d.get();

view_d = cudautils::make_device_unique<DeviceConstView>(stream);
cudautils::copyAsync(view_d, view, stream);
view_d = cms::cuda::make_device_unique<DeviceConstView>(stream);
cms::cuda::copyAsync(view_d, view, stream);
}

cudautils::host::unique_ptr<uint16_t[]> SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique<uint16_t[]>(nDigis(), stream);
cudautils::copyAsync(ret, adc_d, nDigis(), stream);
cms::cuda::host::unique_ptr<uint16_t[]> SiPixelDigisCUDA::adcToHostAsync(cudaStream_t stream) const {
auto ret = cms::cuda::make_host_unique<uint16_t[]>(nDigis(), stream);
cms::cuda::copyAsync(ret, adc_d, nDigis(), stream);
return ret;
}

cudautils::host::unique_ptr<int32_t[]> SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique<int32_t[]>(nDigis(), stream);
cudautils::copyAsync(ret, clus_d, nDigis(), stream);
cms::cuda::host::unique_ptr<int32_t[]> SiPixelDigisCUDA::clusToHostAsync(cudaStream_t stream) const {
auto ret = cms::cuda::make_host_unique<int32_t[]>(nDigis(), stream);
cms::cuda::copyAsync(ret, clus_d, nDigis(), stream);
return ret;
}

cudautils::host::unique_ptr<uint32_t[]> SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique<uint32_t[]>(nDigis(), stream);
cudautils::copyAsync(ret, pdigi_d, nDigis(), stream);
cms::cuda::host::unique_ptr<uint32_t[]> SiPixelDigisCUDA::pdigiToHostAsync(cudaStream_t stream) const {
auto ret = cms::cuda::make_host_unique<uint32_t[]>(nDigis(), stream);
cms::cuda::copyAsync(ret, pdigi_d, nDigis(), stream);
return ret;
}

cudautils::host::unique_ptr<uint32_t[]> SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const {
auto ret = cudautils::make_host_unique<uint32_t[]>(nDigis(), stream);
cudautils::copyAsync(ret, rawIdArr_d, nDigis(), stream);
cms::cuda::host::unique_ptr<uint32_t[]> SiPixelDigisCUDA::rawIdArrToHostAsync(cudaStream_t stream) const {
auto ret = cms::cuda::make_host_unique<uint32_t[]>(nDigis(), stream);
cms::cuda::copyAsync(ret, rawIdArr_d, nDigis(), stream);
return ret;
}
2 changes: 1 addition & 1 deletion CUDADataFormats/SiPixelDigi/src/classes.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef CUDADataFormats_SiPixelDigi_classes_h
#define CUDADataFormats_SiPixelDigi_classes_h

#include "CUDADataFormats/Common/interface/CUDAProduct.h"
#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h"
#include "DataFormats/Common/interface/Wrapper.h"
Expand Down
8 changes: 4 additions & 4 deletions CUDADataFormats/SiPixelDigi/src/classes_def.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<lcgdict>
<class name="CUDAProduct<SiPixelDigisCUDA>" persistent="false"/>
<class name="edm::Wrapper<CUDAProduct<SiPixelDigisCUDA>>" persistent="false"/>
<class name="cms::cuda::Product<SiPixelDigisCUDA>" persistent="false"/>
<class name="edm::Wrapper<cms::cuda::Product<SiPixelDigisCUDA>>" persistent="false"/>

<class name="CUDAProduct<SiPixelDigiErrorsCUDA>" persistent="false"/>
<class name="edm::Wrapper<CUDAProduct<SiPixelDigiErrorsCUDA>>" persistent="false"/>
<class name="cms::cuda::Product<SiPixelDigiErrorsCUDA>" persistent="false"/>
<class name="edm::Wrapper<cms::cuda::Product<SiPixelDigiErrorsCUDA>>" persistent="false"/>
</lcgdict>
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ class TrackingRecHit2DHeterogeneous {
auto iphi() { return m_iphi; }

// only the local coord and detector index
cudautils::host::unique_ptr<float[]> localCoordToHostAsync(cudaStream_t stream) const;
cudautils::host::unique_ptr<uint16_t[]> detIndexToHostAsync(cudaStream_t stream) const;
cudautils::host::unique_ptr<uint32_t[]> hitsModuleStartToHostAsync(cudaStream_t stream) const;
cms::cuda::host::unique_ptr<float[]> localCoordToHostAsync(cudaStream_t stream) const;
cms::cuda::host::unique_ptr<uint16_t[]> detIndexToHostAsync(cudaStream_t stream) const;
cms::cuda::host::unique_ptr<uint32_t[]> hitsModuleStartToHostAsync(cudaStream_t stream) const;

private:
static constexpr uint32_t n16 = 4;
Expand Down Expand Up @@ -89,7 +89,7 @@ TrackingRecHit2DHeterogeneous<Traits>::TrackingRecHit2DHeterogeneous(uint32_t nH
constexpr
#endif
(std::is_same<Traits, cudaCompat::GPUTraits>::value) {
cudautils::copyAsync(m_view, view, stream);
cms::cuda::copyAsync(m_view, view, stream);
} else {
m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
}
Expand Down Expand Up @@ -136,7 +136,7 @@ TrackingRecHit2DHeterogeneous<Traits>::TrackingRecHit2DHeterogeneous(uint32_t nH
constexpr
#endif
(std::is_same<Traits, cudaCompat::GPUTraits>::value) {
cudautils::copyAsync(m_view, view, stream);
cms::cuda::copyAsync(m_view, view, stream);
} else {
m_view.reset(view.release()); // NOLINT: std::move() breaks CUDA version
}
Expand Down
2 changes: 1 addition & 1 deletion CUDADataFormats/TrackingRecHit/src/classes.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef CUDADataFormats_SiPixelCluster_src_classes_h
#define CUDADataFormats_SiPixelCluster_src_classes_h

#include "CUDADataFormats/Common/interface/CUDAProduct.h"
#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/Common/interface/HostProduct.h"
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
#include "DataFormats/Common/interface/Wrapper.h"
Expand Down
4 changes: 2 additions & 2 deletions CUDADataFormats/TrackingRecHit/src/classes_def.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<lcgdict>
<class name="TrackingRecHit2DCPU" persistent="false"/>
<class name="TrackingRecHit2DHost" persistent="false"/>
<class name="CUDAProduct<TrackingRecHit2DGPU>" persistent="false"/>
<class name="cms::cuda::Product<TrackingRecHit2DGPU>" persistent="false"/>
<class name="edm::Wrapper<TrackingRecHit2DCPU>" persistent="false"/>
<class name="edm::Wrapper<TrackingRecHit2DHost>" persistent="false"/>
<class name="edm::Wrapper<CUDAProduct<TrackingRecHit2DGPU>>" persistent="false"/>
<class name="edm::Wrapper<cms::cuda::Product<TrackingRecHit2DGPU>>" persistent="false"/>
<class name="HostProduct<unsigned int[]>" persistent="false"/>
<class name="edm::Wrapper<HostProduct<unsigned int[]>>" persistent="false"/>
</lcgdict>
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DCUDA.h"
#include "HeterogeneousCore/CUDAUtilities/interface/copyAsync.h"
#include "HeterogeneousCore/CUDAUtilities/interface/requireCUDADevices.h"
#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"

namespace testTrackingRecHit2D {
Expand All @@ -10,7 +10,7 @@ namespace testTrackingRecHit2D {
}

int main() {
requireCUDADevices();
cms::cudatest::requireDevices();

cudaStream_t stream;
cudaCheck(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#define CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h

#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h"
#include "HeterogeneousCore/CUDACore/interface/CUDAESProduct.h"
#include "HeterogeneousCore/CUDACore/interface/ESProduct.h"

class SiPixelGainCalibrationForHLT;
class SiPixelGainForHLTonGPU;
Expand All @@ -26,7 +26,7 @@ class SiPixelGainCalibrationForHLTGPU {
SiPixelGainForHLTonGPU *gainForHLTonGPU = nullptr;
SiPixelGainForHLTonGPU_DecodingStructure *gainDataOnGPU = nullptr;
};
CUDAESProduct<GPUData> gpuData_;
cms::cuda::ESProduct<GPUData> gpuData_;
};

#endif // CalibTracker_SiPixelESProducers_interface_SiPixelGainCalibrationForHLTGPU_h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "CUDADataFormats/Common/interface/CUDAProduct.h"
#include "CUDADataFormats/Common/interface/Product.h"
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h"
#include "DataFormats/SiPixelDigi/interface/SiPixelDigiErrorsSoA.h"
#include "FWCore/Framework/interface/EventSetup.h"
Expand All @@ -8,7 +8,7 @@
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h"
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"

class SiPixelDigiErrorsSoAFromCUDA : public edm::stream::EDProducer<edm::ExternalWork> {
Expand All @@ -24,16 +24,17 @@ class SiPixelDigiErrorsSoAFromCUDA : public edm::stream::EDProducer<edm::Externa
edm::WaitingTaskWithArenaHolder waitingTaskHolder) override;
void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override;

edm::EDGetTokenT<CUDAProduct<SiPixelDigiErrorsCUDA>> digiErrorGetToken_;
edm::EDGetTokenT<cms::cuda::Product<SiPixelDigiErrorsCUDA>> digiErrorGetToken_;
edm::EDPutTokenT<SiPixelDigiErrorsSoA> digiErrorPutToken_;

cudautils::host::unique_ptr<PixelErrorCompact[]> data_;
cms::cuda::host::unique_ptr<PixelErrorCompact[]> data_;
GPU::SimpleVector<PixelErrorCompact> error_;
const PixelFormatterErrors* formatterErrors_ = nullptr;
};

SiPixelDigiErrorsSoAFromCUDA::SiPixelDigiErrorsSoAFromCUDA(const edm::ParameterSet& iConfig)
: digiErrorGetToken_(consumes<CUDAProduct<SiPixelDigiErrorsCUDA>>(iConfig.getParameter<edm::InputTag>("src"))),
: digiErrorGetToken_(
consumes<cms::cuda::Product<SiPixelDigiErrorsCUDA>>(iConfig.getParameter<edm::InputTag>("src"))),
digiErrorPutToken_(produces<SiPixelDigiErrorsSoA>()) {}

void SiPixelDigiErrorsSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
Expand All @@ -46,7 +47,7 @@ void SiPixelDigiErrorsSoAFromCUDA::acquire(const edm::Event& iEvent,
const edm::EventSetup& iSetup,
edm::WaitingTaskWithArenaHolder waitingTaskHolder) {
// Do the transfer in a CUDA stream parallel to the computation CUDA stream
CUDAScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)};
cms::cuda::ScopedContextAcquire ctx{iEvent.streamID(), std::move(waitingTaskHolder)};

const auto& gpuDigiErrors = ctx.get(iEvent, digiErrorGetToken_);

Expand Down
Loading

0 comments on commit e77a278

Please sign in to comment.