Back to GPU struct of pointers

cms-patatrack · Jan 8, 2019 · cbeb333 · cbeb333
1 parent 6d62b3d
commit cbeb333
Show file tree

Hide file tree

Showing 12 changed files with 50 additions and 37 deletions.
diff --git a/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h b/RecoLocalTracker/SiPixelClusterizer/interface/SiPixelFedCablingMapGPUWrapper.h
@@ -25,7 +25,7 @@ class SiPixelFedCablingMapGPUWrapper {
   bool hasQuality() const { return hasQuality_; }
 
   // returns pointer to GPU memory
-  SiPixelFedCablingMapGPU getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
+  const SiPixelFedCablingMapGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
 
   // returns pointer to GPU memory
   const unsigned char *getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const;
@@ -37,13 +37,7 @@ class SiPixelFedCablingMapGPUWrapper {
   unsigned char *modToUnpDefault = nullptr;
 
   CUDAESManaged helper_;
-  unsigned int *fedMap = nullptr;
-  unsigned int *linkMap = nullptr;
-  unsigned int *rocMap = nullptr;
-  unsigned int *RawId = nullptr;
-  unsigned int *rocInDet = nullptr;
-  unsigned int *moduleId = nullptr;
-  unsigned char *badRocs = nullptr;
+  SiPixelFedCablingMapGPU *cablingGPU_ = nullptr;
   unsigned int size;
   bool hasQuality_;
 };

diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.cu
@@ -84,9 +84,9 @@ namespace pixelgpudetails {
     return (1==((rawId>>25)&0x7));
   }
 
-  __device__ pixelgpudetails::DetIdGPU getRawId(SiPixelFedCablingMapGPU cablingMap, uint32_t fed, uint32_t link, uint32_t roc) {
+  __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU *cablingMap, uint32_t fed, uint32_t link, uint32_t roc) {
     uint32_t index = fed * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc;
-    pixelgpudetails::DetIdGPU detId = { cablingMap.RawId[index], cablingMap.rocInDet[index], cablingMap.moduleId[index] };
+    pixelgpudetails::DetIdGPU detId = { cablingMap->RawId[index], cablingMap->rocInDet[index], cablingMap->moduleId[index] };
     return detId;
   }
 
@@ -219,7 +219,7 @@ namespace pixelgpudetails {
     return ((dcol < 26) &  (2 <= pxid) & (pxid < 162));
   }
 
-  __device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, SiPixelFedCablingMapGPU cablingMap, bool debug = false)
+  __device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false)
   {
     int errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask;
     if (errorType < 25) return false;
@@ -229,8 +229,8 @@ namespace pixelgpudetails {
       case(25) : {
         errorFound = true;
         uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + 1;
-        if (index > 1 && index <= cablingMap.size) {
-          if (!(link == cablingMap.link[index] && 1 == cablingMap.roc[index])) errorFound = false;
+        if (index > 1 && index <= cablingMap->size) {
+          if (!(link == cablingMap->link[index] && 1 == cablingMap->roc[index])) errorFound = false;
         }
         if (debug&errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n");
         break;
@@ -283,7 +283,7 @@ namespace pixelgpudetails {
     return errorFound? errorType : 0;
   }
 
-  __device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, SiPixelFedCablingMapGPU cablingMap, bool debug = false)
+  __device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false)
   {
     uint32_t rID = 0xffffffff;
 
@@ -393,7 +393,7 @@ namespace pixelgpudetails {
 
 
   // Kernel to perform Raw to Digi conversion
-  __global__ void RawToDigi_kernel(SiPixelFedCablingMapGPU cablingMap, const unsigned char *modToUnp,
+  __global__ void RawToDigi_kernel(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp,
       const uint32_t wordCounter, const uint32_t *word, const uint8_t *fedIds,
       uint16_t *xx, uint16_t *yy, uint16_t *adc,
       uint32_t *pdigi, uint32_t *rawIdArr, uint16_t *moduleId,
@@ -442,7 +442,7 @@ namespace pixelgpudetails {
 
         uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc;
         if (useQualityInfo) {
-          skipROC = cablingMap.badRocs[index];
+          skipROC = cablingMap->badRocs[index];
           if (skipROC) continue;
         }
         skipROC = modToUnp[index];
@@ -510,7 +510,7 @@ namespace pixelgpudetails {
 
   // Interface to outside
   void SiPixelRawToClusterGPUKernel::makeClustersAsync(
-      const SiPixelFedCablingMapGPU& cablingMap,
+      const SiPixelFedCablingMapGPU *cablingMap,
       const unsigned char *modToUnp,
       const SiPixelGainForHLTonGPU *gains,
       const WordFedAppender& wordFed,

diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterGPUKernel.h
@@ -204,7 +204,7 @@ namespace pixelgpudetails {
     SiPixelRawToClusterGPUKernel& operator=(const SiPixelRawToClusterGPUKernel&) = delete;
     SiPixelRawToClusterGPUKernel& operator=(SiPixelRawToClusterGPUKernel&&) = delete;
 
-    void makeClustersAsync(const SiPixelFedCablingMapGPU& cablingMap, const unsigned char *modToUnp,
+    void makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp,
                            const SiPixelGainForHLTonGPU *gains,
                            const WordFedAppender& wordFed,
                            const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons,

diff --git a/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterHeterogeneous.cc b/RecoLocalTracker/SiPixelClusterizer/plugins/SiPixelRawToClusterHeterogeneous.cc
@@ -466,7 +466,7 @@ void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEv
     throw cms::Exception("LogicError") << "UseQuality of the module (" << useQuality<< ") differs the one from SiPixelFedCablingMapGPUWrapper. Please fix your configuration.";
   }
   // get the GPU product already here so that the async transfer can begin
-  const auto gpuMap = hgpuMap->getGPUProductAsync(cudaStream);
+  const auto *gpuMap = hgpuMap->getGPUProductAsync(cudaStream);
 
   edm::cuda::device::unique_ptr<unsigned char[]> modulesToUnpackRegional;
   const unsigned char *gpuModulesToUnpack;

diff --git a/RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc b/RecoLocalTracker/SiPixelClusterizer/src/SiPixelFedCablingMapGPUWrapper.cc
@@ -24,6 +24,14 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
                                                                SiPixelQuality const *badPixelInfo):
   hasQuality_(badPixelInfo != nullptr)
 {
+  unsigned int *fedMap = nullptr;
+  unsigned int *linkMap = nullptr;
+  unsigned int *rocMap = nullptr;
+  unsigned int *RawId = nullptr;
+  unsigned int *rocInDet = nullptr;
+  unsigned int *moduleId = nullptr;
+  unsigned char *badRocs = nullptr;
+
   helper_.allocate(&fedMap,   pixelgpudetails::MAX_SIZE);
   helper_.allocate(&linkMap,  pixelgpudetails::MAX_SIZE);
   helper_.allocate(&rocMap,   pixelgpudetails::MAX_SIZE);
@@ -95,12 +103,20 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
     LogDebug("SiPixelFedCablingMapGPU") << "----------------------------------------------------------------------------" << std::endl;
     LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << fedMap[i]  << std::setw(20) << linkMap[i]  << std::setw(20) << rocMap[i] << std::endl;
     LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << RawId[i]   << std::setw(20) << rocInDet[i] << std::setw(20) << moduleId[i] << std::endl;
-    LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << (bool)badRocs[i] << std::setw(20) << std::endl;
+    LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << static_cast<bool>(badRocs[i]) << std::setw(20) << std::endl;
     LogDebug("SiPixelFedCablingMapGPU") << "----------------------------------------------------------------------------" << std::endl;
 
   }
 
-  size = index-1;
+  helper_.allocate(&cablingGPU_, 1);
+  cablingGPU_->size = index-1;
+  cablingGPU_->fed = fedMap;
+  cablingGPU_->link = linkMap;
+  cablingGPU_->roc = rocMap;
+  cablingGPU_->RawId = RawId;
+  cablingGPU_->rocInDet = rocInDet;
+  cablingGPU_->moduleId = moduleId;
+  cablingGPU_->badRocs = badRocs;
   helper_.advise();
   helperUnp_.advise();
 }
@@ -109,12 +125,9 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
 SiPixelFedCablingMapGPUWrapper::~SiPixelFedCablingMapGPUWrapper() {}
 
 
-SiPixelFedCablingMapGPU SiPixelFedCablingMapGPUWrapper::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
+const SiPixelFedCablingMapGPU *SiPixelFedCablingMapGPUWrapper::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
   helper_.prefetchAsync(cudaStream);
-  return SiPixelFedCablingMapGPU{size,
-      fedMap, linkMap, rocMap,
-      RawId, rocInDet, moduleId,
-      badRocs};
+  return cablingGPU_;
 }
 
 const unsigned char *SiPixelFedCablingMapGPUWrapper::getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const {

diff --git a/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h b/RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h
@@ -46,7 +46,7 @@ class PixelCPEFast final : public PixelCPEBase
 
     // The return value can only be used safely in kernels launched on
     // the same cudaStream, or after cudaStreamSynchronize.
-   pixelCPEforGPU::ParamsOnGPU getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
+   const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
 
 private:
    ClusterParam * createClusterParam(const SiPixelCluster & cl) const override;
@@ -83,6 +83,7 @@ class PixelCPEFast final : public PixelCPEBase
    CUDAESManaged m_helper;
    pixelCPEforGPU::DetParams *m_detParamsGPU = nullptr;
    pixelCPEforGPU::CommonParams *m_commonParamsGPU = nullptr;
+   pixelCPEforGPU::ParamsOnGPU *m_paramsGPU = nullptr;
 
    void fillParamsForGpu();
 };

diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu
@@ -131,7 +131,7 @@ namespace pixelgpudetails {
 
   void PixelRecHitGPUKernel::makeHitsAsync(const siPixelRawToClusterHeterogeneousProduct::GPUProduct& input,
                                            float const * bs,
-                                           pixelCPEforGPU::ParamsOnGPU const& cpeParams,
+                                           pixelCPEforGPU::ParamsOnGPU const *cpeParams,
                                            bool transferToCPU,
                                            cuda::stream_t<>& stream) {
     cudaCheck(cudaMemcpyAsync(gpu_.bs_d, bs, 3 * sizeof(float), cudaMemcpyDefault, stream.id()));

diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h
@@ -33,7 +33,7 @@ namespace pixelgpudetails {
 
     void makeHitsAsync(const siPixelRawToClusterHeterogeneousProduct::GPUProduct& input,
                        float const * bs,
-                       pixelCPEforGPU::ParamsOnGPU const& cpeParams,
+                       pixelCPEforGPU::ParamsOnGPU const *cpeParams,
                        bool transferToCPU,
                        cuda::stream_t<>& stream);
 

diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h
@@ -14,7 +14,7 @@ namespace gpuPixelRecHits {
 
 
 
-  __global__ void getHits(pixelCPEforGPU::ParamsOnGPU cpeParams,
+  __global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * __restrict__ cpeParams,
                           float const * __restrict__  bs,
                           uint16_t const * __restrict__  id,
 			  uint16_t const * __restrict__  x,
@@ -125,8 +125,8 @@ namespace gpuPixelRecHits {
 
     assert(h < 2000*256);
 
-    pixelCPEforGPU::position(cpeParams.commonParams(), cpeParams.detParams(me), clusParams, ic);
-    pixelCPEforGPU::errorFromDB(cpeParams.commonParams(), cpeParams.detParams(me), clusParams, ic);
+    pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
+    pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
 
     chargeh[h] = clusParams.charge[ic];
 
@@ -141,7 +141,7 @@ namespace gpuPixelRecHits {
     mc[h]= clusParams.minCol[ic];
 
     // to global and compute phi... 
-    cpeParams.detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]);
+    cpeParams->detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]);
     // here correct for the beamspot...
     xg[h]-=bs[0];
     yg[h]-=bs[1];

diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h b/RecoLocalTracker/SiPixelRecHits/plugins/siPixelRecHitsHeterogeneousProduct.h
@@ -18,7 +18,7 @@ namespace siPixelRecHitsHeterogeneousProduct {
   using hindex_type = uint16_t; // if above is <=2^16
 
   struct HitsOnGPU{
-     pixelCPEforGPU::ParamsOnGPU cpeParams;    // forwarded from setup, NOT owned
+     const pixelCPEforGPU::ParamsOnGPU *cpeParams;    // forwarded from setup, NOT owned
      float * bs_d;
      const uint32_t * hitsModuleStart_d; // forwarded from clusters
      uint32_t * hitsLayerStart_d;

diff --git a/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc b/RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc
@@ -66,9 +66,9 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const & conf,
    fillParamsForGpu();   
 }
 
-pixelCPEforGPU::ParamsOnGPU PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
+const pixelCPEforGPU::ParamsOnGPU *PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
   m_helper.prefetchAsync(cudaStream);
-  return pixelCPEforGPU::ParamsOnGPU{m_commonParamsGPU, m_detParamsGPU};
+  return m_paramsGPU;
 }
 
 void PixelCPEFast::fillParamsForGpu() {
@@ -196,6 +196,11 @@ void PixelCPEFast::fillParamsForGpu() {
    }
 
   }
+
+  m_helper.allocate(&m_paramsGPU, 1);
+  m_paramsGPU->m_commonParams = m_commonParamsGPU;
+  m_paramsGPU->m_detParams = m_detParamsGPU;
+
   m_helper.advise();
 }
 

diff --git a/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cu b/RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.cu
@@ -53,7 +53,7 @@ void kernelFastFitAllHits(TuplesOnGPU::Container const * __restrict__ foundNtupl
     auto hit = hitId[i];
     // printf("Hit global: %f,%f,%f\n", hhp->xg_d[hit],hhp->yg_d[hit],hhp->zg_d[hit]);
     float ge[6];
-    hhp->cpeParams.detParams(hhp->detInd_d[hit]).frame.toGlobal(hhp->xerr_d[hit], 0, hhp->yerr_d[hit], ge);
+    hhp->cpeParams->detParams(hhp->detInd_d[hit]).frame.toGlobal(hhp->xerr_d[hit], 0, hhp->yerr_d[hit], ge);
     // printf("Error: %d: %f,%f,%f,%f,%f,%f\n",hhp->detInd_d[hit],ge[0],ge[1],ge[2],ge[3],ge[4],ge[5]);
 
     hits.col(i) << hhp->xg_d[hit], hhp->yg_d[hit], hhp->zg_d[hit];