Skip to content

Commit

Permalink
Back to GPU struct of pointers
Browse files Browse the repository at this point in the history
  • Loading branch information
makortel committed Jan 8, 2019
1 parent 6d62b3d commit cbeb333
Show file tree
Hide file tree
Showing 12 changed files with 50 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class SiPixelFedCablingMapGPUWrapper {
bool hasQuality() const { return hasQuality_; }

// returns pointer to GPU memory
SiPixelFedCablingMapGPU getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
const SiPixelFedCablingMapGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;

// returns pointer to GPU memory
const unsigned char *getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const;
Expand All @@ -37,13 +37,7 @@ class SiPixelFedCablingMapGPUWrapper {
unsigned char *modToUnpDefault = nullptr;

CUDAESManaged helper_;
unsigned int *fedMap = nullptr;
unsigned int *linkMap = nullptr;
unsigned int *rocMap = nullptr;
unsigned int *RawId = nullptr;
unsigned int *rocInDet = nullptr;
unsigned int *moduleId = nullptr;
unsigned char *badRocs = nullptr;
SiPixelFedCablingMapGPU *cablingGPU_ = nullptr;
unsigned int size;
bool hasQuality_;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ namespace pixelgpudetails {
return (1==((rawId>>25)&0x7));
}

__device__ pixelgpudetails::DetIdGPU getRawId(SiPixelFedCablingMapGPU cablingMap, uint32_t fed, uint32_t link, uint32_t roc) {
__device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU *cablingMap, uint32_t fed, uint32_t link, uint32_t roc) {
uint32_t index = fed * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc;
pixelgpudetails::DetIdGPU detId = { cablingMap.RawId[index], cablingMap.rocInDet[index], cablingMap.moduleId[index] };
pixelgpudetails::DetIdGPU detId = { cablingMap->RawId[index], cablingMap->rocInDet[index], cablingMap->moduleId[index] };
return detId;
}

Expand Down Expand Up @@ -219,7 +219,7 @@ namespace pixelgpudetails {
return ((dcol < 26) & (2 <= pxid) & (pxid < 162));
}

__device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, SiPixelFedCablingMapGPU cablingMap, bool debug = false)
__device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false)
{
int errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask;
if (errorType < 25) return false;
Expand All @@ -229,8 +229,8 @@ namespace pixelgpudetails {
case(25) : {
errorFound = true;
uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + 1;
if (index > 1 && index <= cablingMap.size) {
if (!(link == cablingMap.link[index] && 1 == cablingMap.roc[index])) errorFound = false;
if (index > 1 && index <= cablingMap->size) {
if (!(link == cablingMap->link[index] && 1 == cablingMap->roc[index])) errorFound = false;
}
if (debug&errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n");
break;
Expand Down Expand Up @@ -283,7 +283,7 @@ namespace pixelgpudetails {
return errorFound? errorType : 0;
}

__device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, SiPixelFedCablingMapGPU cablingMap, bool debug = false)
__device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *cablingMap, bool debug = false)
{
uint32_t rID = 0xffffffff;

Expand Down Expand Up @@ -393,7 +393,7 @@ namespace pixelgpudetails {


// Kernel to perform Raw to Digi conversion
__global__ void RawToDigi_kernel(SiPixelFedCablingMapGPU cablingMap, const unsigned char *modToUnp,
__global__ void RawToDigi_kernel(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp,
const uint32_t wordCounter, const uint32_t *word, const uint8_t *fedIds,
uint16_t *xx, uint16_t *yy, uint16_t *adc,
uint32_t *pdigi, uint32_t *rawIdArr, uint16_t *moduleId,
Expand Down Expand Up @@ -442,7 +442,7 @@ namespace pixelgpudetails {

uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc;
if (useQualityInfo) {
skipROC = cablingMap.badRocs[index];
skipROC = cablingMap->badRocs[index];
if (skipROC) continue;
}
skipROC = modToUnp[index];
Expand Down Expand Up @@ -510,7 +510,7 @@ namespace pixelgpudetails {

// Interface to outside
void SiPixelRawToClusterGPUKernel::makeClustersAsync(
const SiPixelFedCablingMapGPU& cablingMap,
const SiPixelFedCablingMapGPU *cablingMap,
const unsigned char *modToUnp,
const SiPixelGainForHLTonGPU *gains,
const WordFedAppender& wordFed,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ namespace pixelgpudetails {
SiPixelRawToClusterGPUKernel& operator=(const SiPixelRawToClusterGPUKernel&) = delete;
SiPixelRawToClusterGPUKernel& operator=(SiPixelRawToClusterGPUKernel&&) = delete;

void makeClustersAsync(const SiPixelFedCablingMapGPU& cablingMap, const unsigned char *modToUnp,
void makeClustersAsync(const SiPixelFedCablingMapGPU *cablingMap, const unsigned char *modToUnp,
const SiPixelGainForHLTonGPU *gains,
const WordFedAppender& wordFed,
const uint32_t wordCounter, const uint32_t fedCounter, bool convertADCtoElectrons,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEv
throw cms::Exception("LogicError") << "UseQuality of the module (" << useQuality<< ") differs the one from SiPixelFedCablingMapGPUWrapper. Please fix your configuration.";
}
// get the GPU product already here so that the async transfer can begin
const auto gpuMap = hgpuMap->getGPUProductAsync(cudaStream);
const auto *gpuMap = hgpuMap->getGPUProductAsync(cudaStream);

edm::cuda::device::unique_ptr<unsigned char[]> modulesToUnpackRegional;
const unsigned char *gpuModulesToUnpack;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
SiPixelQuality const *badPixelInfo):
hasQuality_(badPixelInfo != nullptr)
{
unsigned int *fedMap = nullptr;
unsigned int *linkMap = nullptr;
unsigned int *rocMap = nullptr;
unsigned int *RawId = nullptr;
unsigned int *rocInDet = nullptr;
unsigned int *moduleId = nullptr;
unsigned char *badRocs = nullptr;

helper_.allocate(&fedMap, pixelgpudetails::MAX_SIZE);
helper_.allocate(&linkMap, pixelgpudetails::MAX_SIZE);
helper_.allocate(&rocMap, pixelgpudetails::MAX_SIZE);
Expand Down Expand Up @@ -95,12 +103,20 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
LogDebug("SiPixelFedCablingMapGPU") << "----------------------------------------------------------------------------" << std::endl;
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << fedMap[i] << std::setw(20) << linkMap[i] << std::setw(20) << rocMap[i] << std::endl;
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << RawId[i] << std::setw(20) << rocInDet[i] << std::setw(20) << moduleId[i] << std::endl;
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << (bool)badRocs[i] << std::setw(20) << std::endl;
LogDebug("SiPixelFedCablingMapGPU") << i << std::setw(20) << static_cast<bool>(badRocs[i]) << std::setw(20) << std::endl;
LogDebug("SiPixelFedCablingMapGPU") << "----------------------------------------------------------------------------" << std::endl;

}

size = index-1;
helper_.allocate(&cablingGPU_, 1);
cablingGPU_->size = index-1;
cablingGPU_->fed = fedMap;
cablingGPU_->link = linkMap;
cablingGPU_->roc = rocMap;
cablingGPU_->RawId = RawId;
cablingGPU_->rocInDet = rocInDet;
cablingGPU_->moduleId = moduleId;
cablingGPU_->badRocs = badRocs;
helper_.advise();
helperUnp_.advise();
}
Expand All @@ -109,12 +125,9 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
SiPixelFedCablingMapGPUWrapper::~SiPixelFedCablingMapGPUWrapper() {}


SiPixelFedCablingMapGPU SiPixelFedCablingMapGPUWrapper::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
const SiPixelFedCablingMapGPU *SiPixelFedCablingMapGPUWrapper::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
helper_.prefetchAsync(cudaStream);
return SiPixelFedCablingMapGPU{size,
fedMap, linkMap, rocMap,
RawId, rocInDet, moduleId,
badRocs};
return cablingGPU_;
}

const unsigned char *SiPixelFedCablingMapGPUWrapper::getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const {
Expand Down
3 changes: 2 additions & 1 deletion RecoLocalTracker/SiPixelRecHits/interface/PixelCPEFast.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class PixelCPEFast final : public PixelCPEBase

// The return value can only be used safely in kernels launched on
// the same cudaStream, or after cudaStreamSynchronize.
pixelCPEforGPU::ParamsOnGPU getGPUProductAsync(cuda::stream_t<>& cudaStream) const;
const pixelCPEforGPU::ParamsOnGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;

private:
ClusterParam * createClusterParam(const SiPixelCluster & cl) const override;
Expand Down Expand Up @@ -83,6 +83,7 @@ class PixelCPEFast final : public PixelCPEBase
CUDAESManaged m_helper;
pixelCPEforGPU::DetParams *m_detParamsGPU = nullptr;
pixelCPEforGPU::CommonParams *m_commonParamsGPU = nullptr;
pixelCPEforGPU::ParamsOnGPU *m_paramsGPU = nullptr;

void fillParamsForGpu();
};
Expand Down
2 changes: 1 addition & 1 deletion RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.cu
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ namespace pixelgpudetails {

void PixelRecHitGPUKernel::makeHitsAsync(const siPixelRawToClusterHeterogeneousProduct::GPUProduct& input,
float const * bs,
pixelCPEforGPU::ParamsOnGPU const& cpeParams,
pixelCPEforGPU::ParamsOnGPU const *cpeParams,
bool transferToCPU,
cuda::stream_t<>& stream) {
cudaCheck(cudaMemcpyAsync(gpu_.bs_d, bs, 3 * sizeof(float), cudaMemcpyDefault, stream.id()));
Expand Down
2 changes: 1 addition & 1 deletion RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace pixelgpudetails {

void makeHitsAsync(const siPixelRawToClusterHeterogeneousProduct::GPUProduct& input,
float const * bs,
pixelCPEforGPU::ParamsOnGPU const& cpeParams,
pixelCPEforGPU::ParamsOnGPU const *cpeParams,
bool transferToCPU,
cuda::stream_t<>& stream);

Expand Down
8 changes: 4 additions & 4 deletions RecoLocalTracker/SiPixelRecHits/plugins/gpuPixelRecHits.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace gpuPixelRecHits {



__global__ void getHits(pixelCPEforGPU::ParamsOnGPU cpeParams,
__global__ void getHits(pixelCPEforGPU::ParamsOnGPU const * __restrict__ cpeParams,
float const * __restrict__ bs,
uint16_t const * __restrict__ id,
uint16_t const * __restrict__ x,
Expand Down Expand Up @@ -125,8 +125,8 @@ namespace gpuPixelRecHits {

assert(h < 2000*256);

pixelCPEforGPU::position(cpeParams.commonParams(), cpeParams.detParams(me), clusParams, ic);
pixelCPEforGPU::errorFromDB(cpeParams.commonParams(), cpeParams.detParams(me), clusParams, ic);
pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);
pixelCPEforGPU::errorFromDB(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic);

chargeh[h] = clusParams.charge[ic];

Expand All @@ -141,7 +141,7 @@ namespace gpuPixelRecHits {
mc[h]= clusParams.minCol[ic];

// to global and compute phi...
cpeParams.detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]);
cpeParams->detParams(me).frame.toGlobal(xl[h],yl[h], xg[h],yg[h],zg[h]);
// here correct for the beamspot...
xg[h]-=bs[0];
yg[h]-=bs[1];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace siPixelRecHitsHeterogeneousProduct {
using hindex_type = uint16_t; // if above is <=2^16

struct HitsOnGPU{
pixelCPEforGPU::ParamsOnGPU cpeParams; // forwarded from setup, NOT owned
const pixelCPEforGPU::ParamsOnGPU *cpeParams; // forwarded from setup, NOT owned
float * bs_d;
const uint32_t * hitsModuleStart_d; // forwarded from clusters
uint32_t * hitsLayerStart_d;
Expand Down
9 changes: 7 additions & 2 deletions RecoLocalTracker/SiPixelRecHits/src/PixelCPEFast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ PixelCPEFast::PixelCPEFast(edm::ParameterSet const & conf,
fillParamsForGpu();
}

pixelCPEforGPU::ParamsOnGPU PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
const pixelCPEforGPU::ParamsOnGPU *PixelCPEFast::getGPUProductAsync(cuda::stream_t<>& cudaStream) const {
m_helper.prefetchAsync(cudaStream);
return pixelCPEforGPU::ParamsOnGPU{m_commonParamsGPU, m_detParamsGPU};
return m_paramsGPU;
}

void PixelCPEFast::fillParamsForGpu() {
Expand Down Expand Up @@ -196,6 +196,11 @@ void PixelCPEFast::fillParamsForGpu() {
}

}

m_helper.allocate(&m_paramsGPU, 1);
m_paramsGPU->m_commonParams = m_commonParamsGPU;
m_paramsGPU->m_detParams = m_detParamsGPU;

m_helper.advise();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void kernelFastFitAllHits(TuplesOnGPU::Container const * __restrict__ foundNtupl
auto hit = hitId[i];
// printf("Hit global: %f,%f,%f\n", hhp->xg_d[hit],hhp->yg_d[hit],hhp->zg_d[hit]);
float ge[6];
hhp->cpeParams.detParams(hhp->detInd_d[hit]).frame.toGlobal(hhp->xerr_d[hit], 0, hhp->yerr_d[hit], ge);
hhp->cpeParams->detParams(hhp->detInd_d[hit]).frame.toGlobal(hhp->xerr_d[hit], 0, hhp->yerr_d[hit], ge);
// printf("Error: %d: %f,%f,%f,%f,%f,%f\n",hhp->detInd_d[hit],ge[0],ge[1],ge[2],ge[3],ge[4],ge[5]);

hits.col(i) << hhp->xg_d[hit], hhp->yg_d[hit], hhp->zg_d[hit];
Expand Down

0 comments on commit cbeb333

Please sign in to comment.