diff --git a/CUDADataFormats/Track/src/classes.h b/CUDADataFormats/Track/src/classes.h index 699e45ede05d4..8a38f939bc68b 100644 --- a/CUDADataFormats/Track/src/classes.h +++ b/CUDADataFormats/Track/src/classes.h @@ -1,7 +1,7 @@ -#ifndef CUDADataFormats__src_classes_h -#define CUDADataFormats__src_classes_h +#ifndef CUDADataFormats_Track_src_classes_h +#define CUDADataFormats_Track__src_classes_h -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/Common/interface/HostProduct.h" #include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h" #include "CUDADataFormats/Common/interface/ArrayShadow.h" diff --git a/CUDADataFormats/Track/src/classes_def.xml b/CUDADataFormats/Track/src/classes_def.xml index a4c2e766582dd..7c73c676ad13d 100644 --- a/CUDADataFormats/Track/src/classes_def.xml +++ b/CUDADataFormats/Track/src/classes_def.xml @@ -1,6 +1,6 @@ - - + + diff --git a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h index 1fbe6a73da910..c8e92aca2628f 100644 --- a/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h +++ b/CUDADataFormats/Track/test/TrajectoryStateSOA_t.h @@ -51,13 +51,13 @@ __global__ void testTSSoA(TS* pts, int n) { } #ifdef __CUDACC__ -#include "HeterogeneousCore/CUDAUtilities/interface/requireCUDADevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #endif int main() { #ifdef __CUDACC__ - requireCUDADevices(); + cms::cudatest::requireDevices(); #endif TS ts; diff --git a/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc b/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc index 3e73cfd7a4e96..c8310bc645db3 100644 --- a/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc +++ b/RecoPixelVertexing/PixelTrackFitting/plugins/PixelTrackSoAFromCUDA.cc @@ -1,6 +1,6 @@ #include -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "CUDADataFormats/Common/interface/HostProduct.h" #include "CUDADataFormats/Track/interface/PixelTrackHeterogeneous.h" #include "DataFormats/Common/interface/Handle.h" @@ -15,7 +15,7 @@ #include "FWCore/PluginManager/interface/ModuleDef.h" #include "FWCore/Utilities/interface/EDGetToken.h" #include "FWCore/Utilities/interface/InputTag.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" class PixelTrackSoAFromCUDA : public edm::stream::EDProducer { public: @@ -30,14 +30,14 @@ class PixelTrackSoAFromCUDA : public edm::stream::EDProducer edm::WaitingTaskWithArenaHolder waitingTaskHolder) override; void produce(edm::Event& iEvent, edm::EventSetup const& iSetup) override; - edm::EDGetTokenT> tokenCUDA_; + edm::EDGetTokenT> tokenCUDA_; edm::EDPutTokenT tokenSOA_; - cudautils::host::unique_ptr m_soa; + cms::cuda::host::unique_ptr m_soa; }; PixelTrackSoAFromCUDA::PixelTrackSoAFromCUDA(const edm::ParameterSet& iConfig) - : tokenCUDA_(consumes>(iConfig.getParameter("src"))), + : tokenCUDA_(consumes>(iConfig.getParameter("src"))), tokenSOA_(produces()) {} void PixelTrackSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -50,8 +50,8 @@ void PixelTrackSoAFromCUDA::fillDescriptions(edm::ConfigurationDescriptions& des void PixelTrackSoAFromCUDA::acquire(edm::Event const& iEvent, edm::EventSetup const& iSetup, edm::WaitingTaskWithArenaHolder waitingTaskHolder) { - CUDAProduct const& inputDataWrapped = iEvent.get(tokenCUDA_); - CUDAScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; + cms::cuda::Product const& inputDataWrapped = iEvent.get(tokenCUDA_); + cms::cuda::ScopedContextAcquire ctx{inputDataWrapped, std::move(waitingTaskHolder)}; auto const& inputData = ctx.get(inputDataWrapped); m_soa = inputData.toHostAsync(ctx.stream()); diff --git a/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPU.cu b/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPU.cu index e1606ab54c9c6..f0b641361aee4 100644 --- a/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPU.cu +++ b/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPU.cu @@ -4,7 +4,7 @@ #include #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireCUDADevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" #ifdef USE_BL #include "RecoPixelVertexing/PixelTrackFitting/interface/BrokenLine.h" @@ -329,7 +329,7 @@ void testFit() { } int main(int argc, char* argv[]) { - requireCUDADevices(); + cms::cudatest::requireDevices(); testFit<4>(); testFit<3>(); diff --git a/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPUNoFit.cu b/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPUNoFit.cu index 7ef3f572603b0..6ac1088943305 100644 --- a/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPUNoFit.cu +++ b/RecoPixelVertexing/PixelTrackFitting/test/testEigenGPUNoFit.cu @@ -4,7 +4,7 @@ #include #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "HeterogeneousCore/CUDAUtilities/interface/requireCUDADevices.h" +#include "HeterogeneousCore/CUDAUtilities/interface/requireDevices.h" #include "test_common.h" using namespace Eigen; @@ -215,7 +215,7 @@ void testEigenvalues() { } int main(int argc, char *argv[]) { - requireCUDADevices(); + cms::cudatest::requireDevices(); testEigenvalues(); testInverse3x3(); diff --git a/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cu b/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cu index 660cf75e1f460..6fc537237286f 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cu +++ b/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cu @@ -11,11 +11,11 @@ void HelixFitOnGPU::launchBrokenLineKernels(HitsView const *hv, auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize; // Fit internals - auto hitsGPU_ = cudautils::make_device_unique( + auto hitsGPU_ = cms::cuda::make_device_unique( maxNumberOfConcurrentFits_ * sizeof(Rfit::Matrix3xNd<4>) / sizeof(double), stream); - auto hits_geGPU_ = cudautils::make_device_unique( + auto hits_geGPU_ = cms::cuda::make_device_unique( maxNumberOfConcurrentFits_ * sizeof(Rfit::Matrix6x4f) / sizeof(float), stream); - auto fast_fit_resultsGPU_ = cudautils::make_device_unique( + auto fast_fit_resultsGPU_ = cms::cuda::make_device_unique( maxNumberOfConcurrentFits_ * sizeof(Rfit::Vector4d) / sizeof(double), stream); for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) { diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletCUDA.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletCUDA.cc index 11b644d466768..31e5070e55e05 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletCUDA.cc +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletCUDA.cc @@ -1,6 +1,6 @@ #include -#include "CUDADataFormats/Common/interface/CUDAProduct.h" +#include "CUDADataFormats/Common/interface/Product.h" #include "DataFormats/Common/interface/Handle.h" #include "FWCore/Framework/interface/ESHandle.h" #include "FWCore/Framework/interface/Event.h" @@ -15,7 +15,7 @@ #include "FWCore/PluginManager/interface/ModuleDef.h" #include "FWCore/Utilities/interface/EDGetToken.h" #include "FWCore/Utilities/interface/RunningAverage.h" -#include "HeterogeneousCore/CUDACore/interface/CUDAScopedContext.h" +#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h" #include "RecoTracker/TkMSParametrization/interface/PixelRecoUtilities.h" #include "CAHitNtupletGeneratorOnGPU.h" @@ -34,8 +34,8 @@ class CAHitNtupletCUDA : public edm::global::EDProducer<> { bool m_OnGPU; - edm::EDGetTokenT> tokenHitGPU_; - edm::EDPutTokenT> tokenTrackGPU_; + edm::EDGetTokenT> tokenHitGPU_; + edm::EDPutTokenT> tokenTrackGPU_; edm::EDGetTokenT tokenHitCPU_; edm::EDPutTokenT tokenTrackCPU_; @@ -45,8 +45,9 @@ class CAHitNtupletCUDA : public edm::global::EDProducer<> { CAHitNtupletCUDA::CAHitNtupletCUDA(const edm::ParameterSet& iConfig) : m_OnGPU(iConfig.getParameter("onGPU")), gpuAlgo_(iConfig, consumesCollector()) { if (m_OnGPU) { - tokenHitGPU_ = consumes>(iConfig.getParameter("pixelRecHitSrc")); - tokenTrackGPU_ = produces>(); + tokenHitGPU_ = + consumes>(iConfig.getParameter("pixelRecHitSrc")); + tokenTrackGPU_ = produces>(); } else { tokenHitCPU_ = consumes(iConfig.getParameter("pixelRecHitSrc")); tokenTrackCPU_ = produces(); @@ -68,10 +69,10 @@ void CAHitNtupletCUDA::produce(edm::StreamID streamID, edm::Event& iEvent, const auto bf = 1. / PixelRecoUtilities::fieldInInvGev(es); if (m_OnGPU) { - edm::Handle> hHits; + edm::Handle> hHits; iEvent.getByToken(tokenHitGPU_, hHits); - CUDAScopedContextProduce ctx{*hHits}; + cms::cuda::ScopedContextProduce ctx{*hHits}; auto const& hits = ctx.get(*hHits); ctx.emplace(iEvent, tokenTrackGPU_, gpuAlgo_.makeTuplesAsync(hits, bf, ctx.stream())); diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc index 75066458dc170..05106a1bfed41 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc @@ -67,7 +67,7 @@ void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA * assert(tuples_d && quality_d); // zero tuples - cudautils::launchZero(tuples_d, cudaStream); + cms::cuda::launchZero(tuples_d, cudaStream); auto nhits = hh.nHits(); assert(nhits <= pixelGPUConstants::maxNumberOfHits); @@ -108,13 +108,13 @@ void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA * if (m_params.doStats_) kernel_mark_used(hh.view(), device_theCells_.get(), device_nCells_); - cudautils::finalizeBulk(device_hitTuple_apc_, tuples_d); + cms::cuda::finalizeBulk(device_hitTuple_apc_, tuples_d); // remove duplicates (tracks that share a doublet) kernel_earlyDuplicateRemover(device_theCells_.get(), device_nCells_, tuples_d, quality_d); kernel_countMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get()); - cudautils::launchFinalize(device_tupleMultiplicity_.get(), device_tmws_, cudaStream); + cms::cuda::launchFinalize(device_tupleMultiplicity_.get(), device_tmws_, cudaStream); kernel_fillMultiplicity(tuples_d, quality_d, device_tupleMultiplicity_.get()); if (nhits > 1 && m_params.lateFishbone_) { @@ -154,7 +154,7 @@ void CAHitNtupletGeneratorKernelsCPU::classifyTuples(HitsOnCPU const &hh, TkSoA // fill hit->track "map" kernel_countHitInTracks(tuples_d, quality_d, device_hitToTuple_.get()); - cudautils::launchFinalize(device_hitToTuple_.get(), device_tmws_, cudaStream); + cms::cuda::launchFinalize(device_hitToTuple_.get(), device_tmws_, cudaStream); kernel_fillHitInTracks(tuples_d, quality_d, device_hitToTuple_.get()); // remove duplicates (tracks that share a hit) diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu index aaf882633f17d..7bfee1c8d557f 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu @@ -21,7 +21,7 @@ void CAHitNtupletGeneratorKernelsGPU::launchKernels(HitsOnCPU const &hh, TkSoA * auto *quality_d = (Quality *)(&tracks_d->m_quality); // zero tuples - cudautils::launchZero(tuples_d, cudaStream); + cms::cuda::launchZero(tuples_d, cudaStream); auto nhits = hh.nHits(); assert(nhits <= pixelGPUConstants::maxNumberOfHits); @@ -96,7 +96,7 @@ void CAHitNtupletGeneratorKernelsGPU::launchKernels(HitsOnCPU const &hh, TkSoA * blockSize = 128; numberOfBlocks = (HitContainer::totbins() + blockSize - 1) / blockSize; - cudautils::finalizeBulk<<>>(device_hitTuple_apc_, tuples_d); + cms::cuda::finalizeBulk<<>>(device_hitTuple_apc_, tuples_d); // remove duplicates (tracks that share a doublet) numberOfBlocks = (3 * m_params.maxNumberOfDoublets_ / 4 + blockSize - 1) / blockSize; @@ -108,7 +108,7 @@ void CAHitNtupletGeneratorKernelsGPU::launchKernels(HitsOnCPU const &hh, TkSoA * numberOfBlocks = (3 * CAConstants::maxTuples() / 4 + blockSize - 1) / blockSize; kernel_countMultiplicity<<>>( tuples_d, quality_d, device_tupleMultiplicity_.get()); - cudautils::launchFinalize(device_tupleMultiplicity_.get(), device_tmws_, cudaStream); + cms::cuda::launchFinalize(device_tupleMultiplicity_.get(), device_tmws_, cudaStream); kernel_fillMultiplicity<<>>( tuples_d, quality_d, device_tupleMultiplicity_.get()); cudaCheck(cudaGetLastError()); @@ -160,7 +160,7 @@ void CAHitNtupletGeneratorKernelsGPU::buildDoublets(HitsOnCPU const &hh, cudaStr #endif // in principle we can use "nhits" to heuristically dimension the workspace... - device_isOuterHitOfCell_ = cudautils::make_device_unique(std::max(1U, nhits), stream); + device_isOuterHitOfCell_ = cms::cuda::make_device_unique(std::max(1U, nhits), stream); assert(device_isOuterHitOfCell_.get()); { int threadsPerBlock = 128; @@ -175,7 +175,7 @@ void CAHitNtupletGeneratorKernelsGPU::buildDoublets(HitsOnCPU const &hh, cudaStr cudaCheck(cudaGetLastError()); } - device_theCells_ = cudautils::make_device_unique(m_params.maxNumberOfDoublets_, stream); + device_theCells_ = cms::cuda::make_device_unique(m_params.maxNumberOfDoublets_, stream); #ifdef GPU_DEBUG cudaDeviceSynchronize(); @@ -252,7 +252,7 @@ void CAHitNtupletGeneratorKernelsGPU::classifyTuples(HitsOnCPU const &hh, TkSoA kernel_countHitInTracks<<>>( tuples_d, quality_d, device_hitToTuple_.get()); cudaCheck(cudaGetLastError()); - cudautils::launchFinalize(device_hitToTuple_.get(), device_tmws_, cudaStream); + cms::cuda::launchFinalize(device_hitToTuple_.get(), device_tmws_, cudaStream); cudaCheck(cudaGetLastError()); kernel_fillHitInTracks<<>>(tuples_d, quality_d, device_hitToTuple_.get()); cudaCheck(cudaGetLastError()); diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.h index b91911c66924e..592aee9770ae4 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.h +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.h @@ -46,6 +46,6 @@ void CAHitNtupletGeneratorKernelsCPU::allocateOnGPU(cudaStream_t stream) { } else { *device_nCells_ = 0; } - cudautils::launchZero(device_tupleMultiplicity_.get(), stream); - cudautils::launchZero(device_hitToTuple_.get(), stream); // we may wish to keep it in the edm... + cms::cuda::launchZero(device_tupleMultiplicity_.get(), stream); + cms::cuda::launchZero(device_hitToTuple_.get(), stream); // we may wish to keep it in the edm... } diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc index 2e875caba7130..4a8240706efc2 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorOnGPU.cc @@ -162,7 +162,7 @@ void CAHitNtupletGeneratorOnGPU::fillDescriptions(edm::ParameterSetDescription& PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuplesAsync(TrackingRecHit2DCUDA const& hits_d, float bfield, cudaStream_t stream) const { - PixelTrackHeterogeneous tracks(cudautils::make_device_unique(stream)); + PixelTrackHeterogeneous tracks(cms::cuda::make_device_unique(stream)); auto* soa = tracks.get(); diff --git a/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cu b/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cu index cb5d32b47aea3..1077bb7736667 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cu +++ b/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cu @@ -11,14 +11,14 @@ void HelixFitOnGPU::launchRiemannKernels(HitsView const *hv, auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize; // Fit internals - auto hitsGPU_ = cudautils::make_device_unique( + auto hitsGPU_ = cms::cuda::make_device_unique( maxNumberOfConcurrentFits_ * sizeof(Rfit::Matrix3xNd<4>) / sizeof(double), stream); - auto hits_geGPU_ = cudautils::make_device_unique( + auto hits_geGPU_ = cms::cuda::make_device_unique( maxNumberOfConcurrentFits_ * sizeof(Rfit::Matrix6x4f) / sizeof(float), stream); - auto fast_fit_resultsGPU_ = cudautils::make_device_unique( + auto fast_fit_resultsGPU_ = cms::cuda::make_device_unique( maxNumberOfConcurrentFits_ * sizeof(Rfit::Vector4d) / sizeof(double), stream); auto circle_fit_resultsGPU_holder = - cudautils::make_device_unique(maxNumberOfConcurrentFits_ * sizeof(Rfit::circle_fit), stream); + cms::cuda::make_device_unique(maxNumberOfConcurrentFits_ * sizeof(Rfit::circle_fit), stream); Rfit::circle_fit *circle_fit_resultsGPU_ = (Rfit::circle_fit *)(circle_fit_resultsGPU_holder.get()); for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {