Skip to content

Commit

Permalink
Unified member access methods for class TrackSoAHeterogeneousT. (cms-…
Browse files Browse the repository at this point in the history
…sw#614)

Used accessors in all cases. This commit does not change the memory layout of the class.
Also fixed C-style casts (reinterpret casts are needed here).
cms-sw#31722 (comment)
cms-sw#31722 (comment)
  • Loading branch information
ericcano committed May 20, 2021
1 parent e1943a0 commit 8624fb5
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 39 deletions.
70 changes: 46 additions & 24 deletions CUDADataFormats/Track/interface/TrackSoAHeterogeneousT.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,39 +19,61 @@ class TrackSoAHeterogeneousT {
using hindex_type = uint32_t;
using HitContainer = cms::cuda::OneToManyAssoc<hindex_type, S, 5 * S>;


// quality accessors
constexpr Quality quality(int32_t i) const { return reinterpret_cast<Quality>(quality_(i)); }
constexpr Quality &quality(int32_t i) { return reinterpret_cast<Quality &>(quality_(i)); }
constexpr Quality const *qualityData() const { return reinterpret_cast<Quality const *>(quality_.data()); }
constexpr Quality *qualityData() { return reinterpret_cast<Quality *>(quality_.data()); }

// chi2 accessors
constexpr auto & chi2(int32_t i) { return chi2_(i); }
constexpr auto chi2(int32_t i) const { return chi2_(i); }

// stateAtBS accessors
constexpr auto & stateAtBS() { return stateAtBS_; }
constexpr auto stateAtBS() const { return stateAtBS_; }
// eta accessors
constexpr auto & eta(int32_t i) { return eta_(i); }
constexpr auto eta(int32_t i) const { return eta_(i); }
// pt accessors
constexpr auto & pt(int32_t i) { return pt_(i); }
constexpr auto pt(int32_t i) const { return pt_(i); }

constexpr float charge(int32_t i) const { return std::copysign(1.f, stateAtBS_.state(i)(2)); }
constexpr float phi(int32_t i) const { return stateAtBS_.state(i)(0); }
constexpr float tip(int32_t i) const { return stateAtBS_.state(i)(1); }
constexpr float zip(int32_t i) const { return stateAtBS_.state(i)(4); }

// hitIndices accessors
constexpr auto & hitIndices() { return hitIndices_; }
constexpr auto const & hitIndices() const { return hitIndices_; }

// detInndices accessor
constexpr int nHits(int i) const { return detIndices_.size(i); }
constexpr auto & detIndices() { return detIndices_; }
constexpr auto const & detIndices() const { return detIndices_; }

// state at the detector of the outermost hit
// representation to be decided...
// not yet filled on GPU
// TrajectoryStateSoA<S> stateAtOuterDet;
private:
// Always check quality is at least loose!
// CUDA does not support enums in __lgc ...
private:
eigenSoA::ScalarSoA<uint8_t, S> quality_;

public:
constexpr Quality quality(int32_t i) const { return (Quality)(quality_(i)); }
constexpr Quality &quality(int32_t i) { return (Quality &)(quality_(i)); }
constexpr Quality const *qualityData() const { return (Quality const *)(quality_.data()); }
constexpr Quality *qualityData() { return (Quality *)(quality_.data()); }

// this is chi2/ndof as not necessarely all hits are used in the fit
eigenSoA::ScalarSoA<float, S> chi2;

constexpr int nHits(int i) const { return detIndices.size(i); }
eigenSoA::ScalarSoA<float, S> chi2_;

// State at the Beam spot
// phi,tip,1/pt,cotan(theta),zip
TrajectoryStateSoAT<S> stateAtBS;
eigenSoA::ScalarSoA<float, S> eta;
eigenSoA::ScalarSoA<float, S> pt;
constexpr float charge(int32_t i) const { return std::copysign(1.f, stateAtBS.state(i)(2)); }
constexpr float phi(int32_t i) const { return stateAtBS.state(i)(0); }
constexpr float tip(int32_t i) const { return stateAtBS.state(i)(1); }
constexpr float zip(int32_t i) const { return stateAtBS.state(i)(4); }

// state at the detector of the outermost hit
// representation to be decided...
// not yet filled on GPU
// TrajectoryStateSoA<S> stateAtOuterDet;
TrajectoryStateSoAT<S> stateAtBS_;
eigenSoA::ScalarSoA<float, S> eta_;
eigenSoA::ScalarSoA<float, S> pt_;

HitContainer hitIndices;
HitContainer detIndices;
HitContainer hitIndices_;
HitContainer detIndices_;
};

namespace pixelTrack {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ void PixelTrackProducerFromSoA::produce(edm::StreamID streamID,
const auto &tsoa = *iEvent.get(tokenTrack_);

auto const *quality = tsoa.qualityData();
auto const &fit = tsoa.stateAtBS;
auto const &hitIndices = tsoa.hitIndices;
auto const &fit = tsoa.stateAtBS();
auto const &hitIndices = tsoa.hitIndices();
auto maxTracks = tsoa.stride();

int32_t nt = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ __global__ void kernel_BLFit(caConstants::TupleMultiplicity const *__restrict__
brokenline::lineFit(hits_ge, fast_fit, bField, data, line);
brokenline::circleFit(hits, hits_ge, fast_fit, bField, data, circle);

results->stateAtBS.copyFromCircle(circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
results->stateAtBS().copyFromCircle(circle.par, circle.cov, line.par, line.cov, 1.f / float(bField), tkid);
results->pt(tkid) = float(bField) / float(std::abs(circle.par(2)));
results->eta(tkid) = asinhf(line.par(0));
results->chi2(tkid) = (circle.chi2 + line.chi2) / (2 * N - 5);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ void CAHitNtupletGeneratorKernelsCPU::printCounters(Counters const *counters) {

template <>
void CAHitNtupletGeneratorKernelsCPU::fillHitDetIndices(HitsView const *hv, TkSoA *tracks_d, cudaStream_t) {
kernel_fillHitDetIndices(&tracks_d->hitIndices, hv, &tracks_d->detIndices);
kernel_fillHitDetIndices(&tracks_d->hitIndices(), hv, &tracks_d->detIndices());
}

template <>
Expand Down Expand Up @@ -75,7 +75,7 @@ void CAHitNtupletGeneratorKernelsCPU::buildDoublets(HitsOnCPU const &hh, cudaStr

template <>
void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) {
auto *tuples_d = &tracks_d->hitIndices;
auto *tuples_d = &tracks_d->hitIndices();
auto *quality_d = tracks_d->qualityData();

assert(tuples_d && quality_d);
Expand Down Expand Up @@ -155,7 +155,7 @@ void CAHitNtupletGeneratorKernelsCPU::launchKernels(HitsOnCPU const &hh, TkSoA *

template <>
void CAHitNtupletGeneratorKernelsCPU::classifyTuples(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) {
auto const *tuples_d = &tracks_d->hitIndices;
auto const *tuples_d = &tracks_d->hitIndices();
auto *quality_d = tracks_d->qualityData();

// classify tracks based on kinematics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ void CAHitNtupletGeneratorKernelsGPU::fillHitDetIndices(HitsView const *hv, TkSo
auto numberOfBlocks = (HitContainer::capacity() + blockSize - 1) / blockSize;

kernel_fillHitDetIndices<<<numberOfBlocks, blockSize, 0, cudaStream>>>(
&tracks_d->hitIndices, hv, &tracks_d->detIndices);
&tracks_d->hitIndices(), hv, &tracks_d->detIndices());
cudaCheck(cudaGetLastError());
#ifdef GPU_DEBUG
cudaDeviceSynchronize();
Expand All @@ -17,7 +17,7 @@ void CAHitNtupletGeneratorKernelsGPU::fillHitDetIndices(HitsView const *hv, TkSo
template <>
void CAHitNtupletGeneratorKernelsGPU::launchKernels(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) {
// these are pointer on GPU!
auto *tuples_d = &tracks_d->hitIndices;
auto *tuples_d = &tracks_d->hitIndices();
auto *quality_d = tracks_d->qualityData();

// zero tuples
Expand Down Expand Up @@ -222,7 +222,7 @@ void CAHitNtupletGeneratorKernelsGPU::buildDoublets(HitsOnCPU const &hh, cudaStr
template <>
void CAHitNtupletGeneratorKernelsGPU::classifyTuples(HitsOnCPU const &hh, TkSoA *tracks_d, cudaStream_t cudaStream) {
// these are pointer on GPU!
auto const *tuples_d = &tracks_d->hitIndices;
auto const *tuples_d = &tracks_d->hitIndices();
auto *quality_d = tracks_d->qualityData();

auto blockSize = 64;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ __global__ void kernel_classifyTracks(HitContainer const *__restrict__ tuples,
// if the fit has any invalid parameters, mark it as bad
bool isNaN = false;
for (int i = 0; i < 5; ++i) {
isNaN |= std::isnan(tracks->stateAtBS.state(it)(i));
isNaN |= std::isnan(tracks->stateAtBS().state(it)(i));
}
if (isNaN) {
#ifdef NTUPLE_DEBUG
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuplesAsync(TrackingRecH
kernels.fillHitDetIndices(hits_d.view(), soa, stream); // in principle needed only if Hits not "available"

HelixFitOnGPU fitter(bfield, m_params.fit5as4_);
fitter.allocateOnGPU(&(soa->hitIndices), kernels.tupleMultiplicity(), soa);
fitter.allocateOnGPU(&(soa->hitIndices()), kernels.tupleMultiplicity(), soa);
if (m_params.useRiemannFit_) {
fitter.launchRiemannKernels(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets, stream);
} else {
Expand Down Expand Up @@ -220,7 +220,7 @@ PixelTrackHeterogeneous CAHitNtupletGeneratorOnGPU::makeTuples(TrackingRecHit2DC

// now fit
HelixFitOnGPU fitter(bfield, m_params.fit5as4_);
fitter.allocateOnGPU(&(soa->hitIndices), kernels.tupleMultiplicity(), soa);
fitter.allocateOnGPU(&(soa->hitIndices()), kernels.tupleMultiplicity(), soa);

if (m_params.useRiemannFit_) {
fitter.launchRiemannKernelsOnCPU(hits_d.view(), hits_d.nHits(), caConstants::maxNumberOfQuadruplets);
Expand Down
2 changes: 1 addition & 1 deletion RecoPixelVertexing/PixelTriplets/plugins/RiemannFitOnGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ __global__ void kernel_LineFit(caConstants::TupleMultiplicity const *__restrict_

riemannFit::fromCircleToPerigee(circle_fit[local_idx]);

results->stateAtBS.copyFromCircle(
results->stateAtBS().copyFromCircle(
circle_fit[local_idx].par, circle_fit[local_idx].cov, line_fit.par, line_fit.cov, 1.f / float(bField), tkid);
results->pt(tkid) = bField / std::abs(circle_fit[local_idx].par(2));
results->eta(tkid) = asinhf(line_fit.par(0));
Expand Down
4 changes: 2 additions & 2 deletions RecoTracker/TkSeedGenerator/plugins/SeedProducerFromSoA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ void SeedProducerFromSoA::produce(edm::StreamID streamID, edm::Event& iEvent, co
const auto& tsoa = *(iEvent.get(tokenTrack_));

auto const* quality = tsoa.qualityData();
auto const& fit = tsoa.stateAtBS;
auto const& detIndices = tsoa.detIndices;
auto const& fit = tsoa.stateAtBS();
auto const& detIndices = tsoa.detIndices();
auto maxTracks = tsoa.stride();

int32_t nt = 0;
Expand Down

0 comments on commit 8624fb5

Please sign in to comment.