diff --git a/Configuration/ProcessModifiers/python/trackingIters01_cff.py b/Configuration/ProcessModifiers/python/trackingIters01_cff.py new file mode 100644 index 0000000000000..9f7506d27b51c --- /dev/null +++ b/Configuration/ProcessModifiers/python/trackingIters01_cff.py @@ -0,0 +1,4 @@ +import FWCore.ParameterSet.Config as cms + +# This modifier sets the iterative tracking to use a minimal set of iterations, first two +trackingIters01 = cms.Modifier() diff --git a/Configuration/ProcessModifiers/python/trackingLST_cff.py b/Configuration/ProcessModifiers/python/trackingLST_cff.py new file mode 100644 index 0000000000000..ae1dd83e20b0b --- /dev/null +++ b/Configuration/ProcessModifiers/python/trackingLST_cff.py @@ -0,0 +1,5 @@ +import FWCore.ParameterSet.Config as cms + +# This modifier sets the LST (Phase-2 line segment tracking) used for track building +trackingLST = cms.Modifier() + diff --git a/Configuration/PyReleaseValidation/README.md b/Configuration/PyReleaseValidation/README.md index 54af3619935dc..5951cd1c4d4ae 100644 --- a/Configuration/PyReleaseValidation/README.md +++ b/Configuration/PyReleaseValidation/README.md @@ -65,6 +65,8 @@ The offsets currently in use are: * 0.7: trackingMkFit modifier * 0.701: DisplacedRegionalStep tracking iteration for Run-3 * 0.702: trackingMkFit modifier for Phase-2 (initialStep only) +* 0.703: LST tracking, initialStep+HighPtTripletStep only, on CPU +* 0.704: LST tracking, initialStep+HighPtTripletStep only, on GPU * 0.78: Complete L1 workflow * 0.8: BPH Parking (Run-2) * 0.81: Running also HeavyFlavor DQM diff --git a/Configuration/PyReleaseValidation/python/relval_2026.py b/Configuration/PyReleaseValidation/python/relval_2026.py index 82b64a2d5e5e4..e3b7ce2031139 100644 --- a/Configuration/PyReleaseValidation/python/relval_2026.py +++ b/Configuration/PyReleaseValidation/python/relval_2026.py @@ -36,6 +36,9 @@ numWFIB.extend([31234.0]) #2026D114 numWFIB.extend([32034.0]) #2026D115 +# Temporary placement for LST workflow to workaround PR conflicts - to be formatted and placed in an upcoming PR +numWFIB.extend([24834.703,24834.704]) #2026D98 LST tracking (initialStep+HighPtTripletStep only): CPU, GPU + #Additional sample for short matrix and IB #Default Phase-2 Det NoPU numWFIB.extend([prefixDet+34.911]) #DD4hep XML diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py index dffde2a8f52c9..d18b25c4fed20 100644 --- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py +++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py @@ -478,6 +478,56 @@ def condition_(self, fragment, stepList, key, hasHarvest): '--procModifiers': 'trackingMkFitCommon,trackingMkFitInitialStep' } +# LST on CPU, initialStep+highPtTripletStep-only tracking-only +class UpgradeWorkflow_lstOnCPUIters01TrackingOnly(UpgradeWorkflowTracking): + def setup__(self, step, stepName, stepDict, k, properties): + if 'Reco' in step: stepDict[stepName][k] = merge([self.step3, stepDict[step][k]]) + elif 'HARVEST' in step: stepDict[stepName][k] = merge([{'-s': 'HARVESTING:@trackingOnlyValidation+@trackingOnlyDQM'}, stepDict[step][k]]) + elif 'ALCA' in step: stepDict[stepName][k] = None + def condition_(self, fragment, stepList, key, hasHarvest): + return ('2026' in key) +upgradeWFs['lstOnCPUIters01TrackingOnly'] = UpgradeWorkflow_lstOnCPUIters01TrackingOnly( + steps = [ + 'RecoGlobal', + 'HARVESTGlobal', + # Add ALCA steps explicitly, so that they can be properly removed + 'ALCA', + 'ALCAPhase2' + ], + PU = [], + suffix = '_lstOnCPUIters01TrackingOnly', + offset = 0.703, +) +upgradeWFs['lstOnCPUIters01TrackingOnly'].step3 = upgradeWFs['trackingOnly'].step3 | { + '--procModifiers': 'trackingIters01,trackingLST', + '--accelerators' : 'cpu' +} + +# LST on GPU, initialStep+highPtTripletStep-only tracking-only +class UpgradeWorkflow_lstOnGPUIters01TrackingOnly(UpgradeWorkflowTracking): + def setup__(self, step, stepName, stepDict, k, properties): + if 'Reco' in step: stepDict[stepName][k] = merge([self.step3, stepDict[step][k]]) + elif 'HARVEST' in step: stepDict[stepName][k] = merge([{'-s': 'HARVESTING:@trackingOnlyValidation+@trackingOnlyDQM'}, stepDict[step][k]]) + elif 'ALCA' in step: stepDict[stepName][k] = None + def condition_(self, fragment, stepList, key, hasHarvest): + return ('2026' in key) +upgradeWFs['lstOnGPUIters01TrackingOnly'] = UpgradeWorkflow_lstOnGPUIters01TrackingOnly( + steps = [ + 'RecoGlobal', + 'HARVESTGlobal', + # Add ALCA steps explicitly, so that they can be properly removed + 'ALCA', + 'ALCAPhase2' + ], + PU = [], + suffix = '_lstOnGPUIters01TrackingOnly', + offset = 0.704, +) +upgradeWFs['lstOnGPUIters01TrackingOnly'].step3 = upgradeWFs['trackingOnly'].step3 | { + '--procModifiers': 'trackingIters01,trackingLST', + '--accelerators' : 'gpu-nvidia' +} + #DeepCore seeding for JetCore iteration workflow class UpgradeWorkflow_seedingDeepCore(UpgradeWorkflow): def setup_(self, step, stepName, stepDict, k, properties): diff --git a/RecoTracker/ConversionSeedGenerators/python/ConversionStep_cff.py b/RecoTracker/ConversionSeedGenerators/python/ConversionStep_cff.py index 256432c1180c8..6d44990855324 100644 --- a/RecoTracker/ConversionSeedGenerators/python/ConversionStep_cff.py +++ b/RecoTracker/ConversionSeedGenerators/python/ConversionStep_cff.py @@ -33,6 +33,16 @@ oldClusterRemovalInfo = 'detachedQuadStepClusters', overrideTrkQuals = 'detachedQuadStepSelector:detachedQuadStepTrk' )) +from Configuration.ProcessModifiers.trackingIters01_cff import trackingIters01 +trackingIters01.toModify(convClusters, + trajectories = "highPtTripletStepTracks", + oldClusterRemovalInfo = "highPtTripletStepClusters", + overrideTrkQuals = "highPtTripletStepSelector:highPtTripletStep" +) +from Configuration.ProcessModifiers.trackingLST_cff import trackingLST +(trackingIters01 & trackingPhase2PU140 & trackingLST).toModify(convClusters, + overrideTrkQuals = "" +) _convLayerPairsStripOnlyLayers = ['TIB1+TID1_pos', 'TIB1+TID1_neg', diff --git a/RecoTracker/FinalTrackSelectors/python/MergeTrackCollections_cff.py b/RecoTracker/FinalTrackSelectors/python/MergeTrackCollections_cff.py index 907e3126a5cd7..d5256c19a1756 100644 --- a/RecoTracker/FinalTrackSelectors/python/MergeTrackCollections_cff.py +++ b/RecoTracker/FinalTrackSelectors/python/MergeTrackCollections_cff.py @@ -17,6 +17,8 @@ ttrhBuilderName = "WithAngleAndTemplate", chi2EstimatorName = "duplicateTrackCandidatesChi2Est" ) +from Configuration.ProcessModifiers.trackingIters01_cff import trackingIters01 +trackingIters01.toModify(duplicateTrackCandidates, source = "earlyGeneralTracks") import RecoTracker.TrackProducer.TrackProducer_cfi mergedDuplicateTracks = RecoTracker.TrackProducer.TrackProducer_cfi.TrackProducer.clone( @@ -44,6 +46,10 @@ candidateSource = "duplicateTrackCandidates:candidates", candidateComponents = "duplicateTrackCandidates:candidateMap" ) +trackingIters01.toModify(generalTracks, + originalSource = "earlyGeneralTracks", + originalMVAVals = "earlyGeneralTracks:MVAValues" +) generalTracksTask = cms.Task( duplicateTrackCandidates, diff --git a/RecoTracker/FinalTrackSelectors/python/earlyGeneralTracks_cfi.py b/RecoTracker/FinalTrackSelectors/python/earlyGeneralTracks_cfi.py index c33f6cbfd71a9..f18661413e571 100644 --- a/RecoTracker/FinalTrackSelectors/python/earlyGeneralTracks_cfi.py +++ b/RecoTracker/FinalTrackSelectors/python/earlyGeneralTracks_cfi.py @@ -107,6 +107,16 @@ def _extend_displacedGeneral(x): makeReKeyedSeeds = cms.untracked.bool(False) ) ) +from Configuration.ProcessModifiers.trackingIters01_cff import trackingIters01 +trackingIters01.toModify(earlyGeneralTracks, + TrackProducers = ['initialStepTracks', 'highPtTripletStepTracks'], + hasSelector = [1,1], + indivShareFrac = [1,0.16], + selectedTrackQuals = ['initialStepSelector:initialStep', + 'highPtTripletStepSelector:highPtTripletStep' + ], + setsToMerge = {0: dict(tLists = [0,1])} +) from Configuration.ProcessModifiers.vectorHits_cff import vectorHits def _extend_pixelLess(x): x.TrackProducers += ['pixelLessStepTracks'] @@ -116,3 +126,13 @@ def _extend_pixelLess(x): x.setsToMerge[0].tLists += [6] (trackingPhase2PU140 & vectorHits).toModify(earlyGeneralTracks, _extend_pixelLess) +from Configuration.ProcessModifiers.trackingLST_cff import trackingLST +(trackingPhase2PU140 & trackingLST).toModify(earlyGeneralTracks, + TrackProducers = ['highPtTripletStepLSTpTracks', 'highPtTripletStepLSTT5Tracks'], + hasSelector = [1,0], + indivShareFrac = [0.1,0.1], + selectedTrackQuals = ['highPtTripletStepSelector:highPtTripletStep', + 'highPtTripletStepSelectorLSTT5:highPtTripletStepLSTT5' + ], + setsToMerge = {0: dict(tLists = [0,1])} +) diff --git a/RecoTracker/IterativeTracking/python/HighPtTripletStep_cff.py b/RecoTracker/IterativeTracking/python/HighPtTripletStep_cff.py index 0ac3fa5338d8b..fc98c2a6d8664 100644 --- a/RecoTracker/IterativeTracking/python/HighPtTripletStep_cff.py +++ b/RecoTracker/IterativeTracking/python/HighPtTripletStep_cff.py @@ -259,6 +259,10 @@ phase2clustersToSkip = 'highPtTripletStepClusters' ) +from Configuration.ProcessModifiers.trackingLST_cff import trackingLST +from RecoTracker.LST.lstOutputConverter_cfi import lstOutputConverter as _lstOutputConverter +(trackingPhase2PU140 & trackingLST).toReplaceWith(highPtTripletStepTrackCandidates, _lstOutputConverter.clone()) + #For FastSim phase1 tracking import FastSimulation.Tracking.TrackCandidateProducer_cfi _fastSim_highPtTripletStepTrackCandidates = FastSimulation.Tracking.TrackCandidateProducer_cfi.trackCandidateProducer.clone( @@ -280,6 +284,25 @@ from Configuration.Eras.Modifier_phase2_timing_layer_cff import phase2_timing_layer phase2_timing_layer.toModify(highPtTripletStepTracks, TrajectoryInEvent = True) +highPtTripletStepLSTpTracks = highPtTripletStepTracks.clone( + src = 'highPtTripletStepTrackCandidates:pTCsLST' +) +highPtTripletStepLSTT5Tracks = highPtTripletStepTracks.clone( + src = 'highPtTripletStepTrackCandidates:t5TCsLST' +) +_highPtTripletStepTracks_LST = RecoTracker.FinalTrackSelectors.trackListMerger_cfi.trackListMerger.clone( + TrackProducers = ['highPtTripletStepLSTpTracks', + 'highPtTripletStepLSTT5Tracks'], + hasSelector = [1,0], + indivShareFrac = [0.1,0.1], + selectedTrackQuals = ['highPtTripletStepSelector:highPtTripletStep', + 'highPtTripletStepSelectorLSTT5:highPtTripletStepLSTT5'], + copyExtras = True, + copyMVA = False, + setsToMerge = [cms.PSet( tLists=cms.vint32(0,1), pQual=cms.bool(True) )] +) +(trackingPhase2PU140 & trackingLST).toReplaceWith(highPtTripletStepTracks, _highPtTripletStepTracks_LST) + # Final selection from RecoTracker.FinalTrackSelectors.TrackMVAClassifierPrompt_cfi import * highPtTripletStep = TrackMVAClassifierPrompt.clone( @@ -357,6 +380,28 @@ from Configuration.ProcessModifiers.vectorHits_cff import vectorHits vectorHits.toModify(highPtTripletStepSelector.trackSelectors[2], minNumberLayers = 3, minNumber3DLayers = 3, d0_par1 = ( 0.5, 4.0 ), dz_par1 = ( 0.6, 4.0 )) +(trackingPhase2PU140 & trackingLST).toModify(highPtTripletStepSelector, src = 'highPtTripletStepLSTpTracks') +# Passthrough selector to satisfy the TrackListMerger requirement for selector values +highPtTripletStepSelectorLSTT5 = RecoTracker.FinalTrackSelectors.multiTrackSelector_cfi.multiTrackSelector.clone( + src = 'highPtTripletStepLSTT5Tracks', + trackSelectors = [ + RecoTracker.FinalTrackSelectors.multiTrackSelector_cfi.looseMTS.clone( + name = 'highPtTripletStepLSTT5Loose', + minHitsToBypassChecks = 0 + ), #end of pset + RecoTracker.FinalTrackSelectors.multiTrackSelector_cfi.tightMTS.clone( + name = 'highPtTripletStepLSTT5Tight', + preFilterName = 'highPtTripletStepLSTT5Loose', + minHitsToBypassChecks = 0 + ), + RecoTracker.FinalTrackSelectors.multiTrackSelector_cfi.highpurityMTS.clone( + name = 'highPtTripletStepLSTT5', + preFilterName = 'highPtTripletStepLSTT5Tight', + minHitsToBypassChecks = 0 + ), + ] #end of vpset +) #end of clone + # Final sequence HighPtTripletStepTask = cms.Task(highPtTripletStepClusters, highPtTripletStepSeedLayers, @@ -378,6 +423,17 @@ _HighPtTripletStep_Phase2PU140 = cms.Sequence(_HighPtTripletStepTask_Phase2PU140) trackingPhase2PU140.toReplaceWith(HighPtTripletStepTask, _HighPtTripletStepTask_Phase2PU140) +_HighPtTripletStepTask_LST = HighPtTripletStepTask.copy() +from RecoLocalTracker.Phase2TrackerRecHits.Phase2TrackerRecHits_cfi import siPhase2RecHits +from RecoTracker.LST.lstSeedTracks_cff import lstInitialStepSeedTracks,lstHighPtTripletStepSeedTracks +from RecoTracker.LST.lstPixelSeedInputProducer_cfi import lstPixelSeedInputProducer +from RecoTracker.LST.lstPhase2OTHitsInputProducer_cfi import lstPhase2OTHitsInputProducer +from RecoTracker.LST.lstProducer_cff import * + +_HighPtTripletStepTask_LST.add(siPhase2RecHits, lstInitialStepSeedTracks, lstHighPtTripletStepSeedTracks, lstPixelSeedInputProducer, lstPhase2OTHitsInputProducer, + lstProducer, lstModulesDevESProducer, highPtTripletStepLSTpTracks, highPtTripletStepLSTT5Tracks, highPtTripletStepSelectorLSTT5) +(trackingPhase2PU140 & trackingLST).toReplaceWith(HighPtTripletStepTask, _HighPtTripletStepTask_LST) + # fast tracking mask producer from FastSimulation.Tracking.FastTrackerRecHitMaskProducer_cfi import maskProducerFromClusterRemover highPtTripletStepMasks = maskProducerFromClusterRemover(highPtTripletStepClusters) diff --git a/RecoTracker/IterativeTracking/python/LowPtQuadStep_cff.py b/RecoTracker/IterativeTracking/python/LowPtQuadStep_cff.py index 0b9429e3b3a5d..d51eee0e3e985 100644 --- a/RecoTracker/IterativeTracking/python/LowPtQuadStep_cff.py +++ b/RecoTracker/IterativeTracking/python/LowPtQuadStep_cff.py @@ -14,6 +14,9 @@ for _eraName, _postfix, _era in _cfg.nonDefaultEras(): _era.toReplaceWith(lowPtQuadStepClusters, _cfg.clusterRemoverForIter('LowPtQuadStep', _eraName, _postfix)) +from Configuration.ProcessModifiers.trackingLST_cff import trackingLST +# with LST, this is the first iteration with proper cluster masking +trackingLST.toModify(lowPtQuadStepClusters, oldClusterRemovalInfo = "") # SEEDING LAYERS import RecoTracker.TkSeedingLayers.PixelLayerQuadruplets_cfi diff --git a/RecoTracker/IterativeTracking/python/iterativeTkConfig.py b/RecoTracker/IterativeTracking/python/iterativeTkConfig.py index cd3713b758a4c..ce6c7c0f6571d 100644 --- a/RecoTracker/IterativeTracking/python/iterativeTkConfig.py +++ b/RecoTracker/IterativeTracking/python/iterativeTkConfig.py @@ -53,16 +53,21 @@ _iterations_trackingPhase1.append('JetCoreRegionalStep') -_iterations_trackingPhase2PU140 = [ +_iterations_trackingPhase2PU140_VS = cms.PSet(names = cms.vstring( "InitialStep", "HighPtTripletStep", "LowPtQuadStep", "LowPtTripletStep", "DetachedQuadStep", "PixelPairStep", -] +)) from Configuration.ProcessModifiers.vectorHits_cff import vectorHits -vectorHits.toModify(_iterations_trackingPhase2PU140, func=lambda x: x.append('PixelLessStep')) +vectorHits.toModify(_iterations_trackingPhase2PU140_VS.names, func=lambda x: x.append('PixelLessStep')) +from Configuration.ProcessModifiers.trackingIters01_cff import trackingIters01 +trackingIters01.toModify(_iterations_trackingPhase2PU140_VS, names = ["InitialStep", "HighPtTripletStep"]) +# apply all procModifiers before this +_iterations_trackingPhase2PU140 = _iterations_trackingPhase2PU140_VS.names.value() + _iterations_muonSeeded = [ "MuonSeededStepInOut", "MuonSeededStepOutIn", @@ -72,10 +77,13 @@ "MuonSeededStepOutIn", ] #Phase2 -_iterations_muonSeeded_trackingPhase2PU140 = [ +_iterations_muonSeeded_trackingPhase2PU140_VS = cms.PSet(names = cms.vstring( "MuonSeededStepInOut", "MuonSeededStepOutIn", -] +)) +trackingIters01.toModify(_iterations_muonSeeded_trackingPhase2PU140_VS, names = []) +_iterations_muonSeeded_trackingPhase2PU140 = _iterations_muonSeeded_trackingPhase2PU140_VS.names.value() + _multipleSeedProducers = { "MixedTripletStep": ["A", "B"], "TobTecStep": ["Pair", "Tripl"], diff --git a/RecoTracker/LST/BuildFile.xml b/RecoTracker/LST/BuildFile.xml new file mode 100644 index 0000000000000..07a6ae1d26eaf --- /dev/null +++ b/RecoTracker/LST/BuildFile.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/RecoTracker/LST/interface/LSTOutput.h b/RecoTracker/LST/interface/LSTOutput.h new file mode 100644 index 0000000000000..5be4f645d3416 --- /dev/null +++ b/RecoTracker/LST/interface/LSTOutput.h @@ -0,0 +1,38 @@ +#ifndef RecoTracker_LST_interface_LSTOutput_h +#define RecoTracker_LST_interface_LSTOutput_h + +#include +#include + +class LSTOutput { +public: + LSTOutput() = default; + LSTOutput(std::vector> const hitIdx, + std::vector const len, + std::vector const seedIdx, + std::vector const trackCandidateType) + : hitIdx_(std::move(hitIdx)), + len_(std::move(len)), + seedIdx_(std::move(seedIdx)), + trackCandidateType_(std::move(trackCandidateType)) {} + + enum LSTTCType { T5 = 4, pT3 = 5, pT5 = 7, pLS = 8 }; + + // Hit indices of each of the LST track candidates. + std::vector> const& hitIdx() const { return hitIdx_; } + // Number of hits of each of the LST track candidates. + std::vector const& len() const { return len_; } + // Index of the pixel track associated to each of the LST track candidates. + // If not associated to a pixel track, which is the case for T5s, it defaults to -1. + std::vector const& seedIdx() const { return seedIdx_; } + // LSTTCType as per the enum above. + std::vector const& trackCandidateType() const { return trackCandidateType_; } + +private: + std::vector> hitIdx_; + std::vector len_; + std::vector seedIdx_; + std::vector trackCandidateType_; +}; + +#endif diff --git a/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h b/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h new file mode 100644 index 0000000000000..00fd77846c4c3 --- /dev/null +++ b/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h @@ -0,0 +1,33 @@ +#ifndef RecoTracker_LST_interface_LSTPhase2OTHitsInput_h +#define RecoTracker_LST_interface_LSTPhase2OTHitsInput_h + +#include +#include + +#include "DataFormats/TrackerRecHit2D/interface/Phase2TrackerRecHit1D.h" + +class LSTPhase2OTHitsInput { +public: + LSTPhase2OTHitsInput() = default; + LSTPhase2OTHitsInput(std::vector const detId, + std::vector const x, + std::vector const y, + std::vector const z, + std::vector const hits) + : detId_(std::move(detId)), x_(std::move(x)), y_(std::move(y)), z_(std::move(z)), hits_(std::move(hits)) {} + + std::vector const& detId() const { return detId_; } + std::vector const& x() const { return x_; } + std::vector const& y() const { return y_; } + std::vector const& z() const { return z_; } + std::vector const& hits() const { return hits_; } + +private: + std::vector detId_; + std::vector x_; + std::vector y_; + std::vector z_; + std::vector hits_; +}; + +#endif diff --git a/RecoTracker/LST/interface/LSTPixelSeedInput.h b/RecoTracker/LST/interface/LSTPixelSeedInput.h new file mode 100644 index 0000000000000..18d3768b2e0fc --- /dev/null +++ b/RecoTracker/LST/interface/LSTPixelSeedInput.h @@ -0,0 +1,75 @@ +#ifndef RecoTracker_LST_interface_LSTPixelSeedInput_h +#define RecoTracker_LST_interface_LSTPixelSeedInput_h + +#include +#include + +class LSTPixelSeedInput { +public: + LSTPixelSeedInput() = default; + LSTPixelSeedInput(std::vector const px, + std::vector const py, + std::vector const pz, + std::vector const dxy, + std::vector const dz, + std::vector const ptErr, + std::vector const etaErr, + std::vector const stateTrajGlbX, + std::vector const stateTrajGlbY, + std::vector const stateTrajGlbZ, + std::vector const stateTrajGlbPx, + std::vector const stateTrajGlbPy, + std::vector const stateTrajGlbPz, + std::vector const q, + std::vector> const hitIdx) + : px_(std::move(px)), + py_(std::move(py)), + pz_(std::move(pz)), + dxy_(std::move(dxy)), + dz_(std::move(dz)), + ptErr_(std::move(ptErr)), + etaErr_(std::move(etaErr)), + stateTrajGlbX_(std::move(stateTrajGlbX)), + stateTrajGlbY_(std::move(stateTrajGlbY)), + stateTrajGlbZ_(std::move(stateTrajGlbZ)), + stateTrajGlbPx_(std::move(stateTrajGlbPx)), + stateTrajGlbPy_(std::move(stateTrajGlbPy)), + stateTrajGlbPz_(std::move(stateTrajGlbPz)), + q_(std::move(q)), + hitIdx_(std::move(hitIdx)) {} + + std::vector const& px() const { return px_; } + std::vector const& py() const { return py_; } + std::vector const& pz() const { return pz_; } + std::vector const& dxy() const { return dxy_; } + std::vector const& dz() const { return dz_; } + std::vector const& ptErr() const { return ptErr_; } + std::vector const& etaErr() const { return etaErr_; } + std::vector const& stateTrajGlbX() const { return stateTrajGlbX_; } + std::vector const& stateTrajGlbY() const { return stateTrajGlbY_; } + std::vector const& stateTrajGlbZ() const { return stateTrajGlbZ_; } + std::vector const& stateTrajGlbPx() const { return stateTrajGlbPx_; } + std::vector const& stateTrajGlbPy() const { return stateTrajGlbPy_; } + std::vector const& stateTrajGlbPz() const { return stateTrajGlbPz_; } + std::vector const& q() const { return q_; } + std::vector> const& hitIdx() const { return hitIdx_; } + +private: + std::vector px_; + std::vector py_; + std::vector pz_; + std::vector dxy_; + std::vector dz_; + std::vector ptErr_; + std::vector etaErr_; + std::vector stateTrajGlbX_; + std::vector stateTrajGlbY_; + std::vector stateTrajGlbZ_; + std::vector stateTrajGlbPx_; + std::vector stateTrajGlbPy_; + std::vector stateTrajGlbPz_; + std::vector q_; + std::vector> hitIdx_; +}; + +#endif diff --git a/RecoTracker/LST/plugins/BuildFile.xml b/RecoTracker/LST/plugins/BuildFile.xml new file mode 100644 index 0000000000000..1b6e79b68e8b4 --- /dev/null +++ b/RecoTracker/LST/plugins/BuildFile.xml @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/RecoTracker/LST/plugins/LSTOutputConverter.cc b/RecoTracker/LST/plugins/LSTOutputConverter.cc new file mode 100644 index 0000000000000..c40603c9c1e5d --- /dev/null +++ b/RecoTracker/LST/plugins/LSTOutputConverter.cc @@ -0,0 +1,273 @@ +#include "DataFormats/TrackerRecHit2D/interface/Phase2TrackerRecHit1D.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "DataFormats/TrackCandidate/interface/TrackCandidateCollection.h" +#include "DataFormats/TrackReco/interface/SeedStopInfo.h" +#include "DataFormats/TrajectorySeed/interface/TrajectorySeedCollection.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/Exception.h" +#include "Geometry/CommonDetUnit/interface/GeomDet.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "RecoTracker/LST/interface/LSTPhase2OTHitsInput.h" +#include "RecoTracker/LST/interface/LSTOutput.h" +#include "RecoTracker/TkSeedingLayers/interface/SeedingHitSet.h" + +#include "RecoTracker/TkSeedGenerator/interface/SeedCreator.h" +#include "RecoTracker/TkSeedGenerator/interface/SeedCreatorFactory.h" + +#include "RecoTracker/TkTrackingRegions/interface/GlobalTrackingRegion.h" +#include "TrackingTools/GeomPropagators/interface/Propagator.h" +#include "TrackingTools/Records/interface/TrackingComponentsRecord.h" +#include "TrackingTools/TrajectoryState/interface/TrajectoryStateTransform.h" + +class LSTOutputConverter : public edm::global::EDProducer<> { +public: + explicit LSTOutputConverter(edm::ParameterSet const& iConfig); + ~LSTOutputConverter() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + const edm::EDGetTokenT lstOutputToken_; + const edm::EDGetTokenT lstPhase2OTHitsInputToken_; + const edm::EDGetTokenT lstPixelSeedToken_; + const bool includeT5s_; + const bool includeNonpLSTSs_; + const edm::ESGetToken mfToken_; + const edm::ESGetToken propagatorAlongToken_; + const edm::ESGetToken propagatorOppositeToken_; + const edm::ESGetToken tGeomToken_; + std::unique_ptr seedCreator_; + const edm::EDPutTokenT trajectorySeedPutToken_; + const edm::EDPutTokenT trajectorySeedpLSPutToken_; + const edm::EDPutTokenT trackCandidatePutToken_; + const edm::EDPutTokenT trackCandidatepTCPutToken_; + const edm::EDPutTokenT trackCandidateT5TCPutToken_; + const edm::EDPutTokenT trackCandidateNopLSTCPutToken_; + const edm::EDPutTokenT trackCandidatepTTCPutToken_; + const edm::EDPutTokenT trackCandidatepLSTCPutToken_; + const edm::EDPutTokenT> seedStopInfoPutToken_; +}; + +LSTOutputConverter::LSTOutputConverter(edm::ParameterSet const& iConfig) + : lstOutputToken_(consumes(iConfig.getParameter("lstOutput"))), + lstPhase2OTHitsInputToken_{consumes(iConfig.getParameter("phase2OTHits"))}, + lstPixelSeedToken_{consumes(iConfig.getParameter("lstPixelSeeds"))}, + includeT5s_(iConfig.getParameter("includeT5s")), + includeNonpLSTSs_(iConfig.getParameter("includeNonpLSTSs")), + mfToken_(esConsumes()), + propagatorAlongToken_{esConsumes(iConfig.getParameter("propagatorAlong"))}, + propagatorOppositeToken_{esConsumes(iConfig.getParameter("propagatorOpposite"))}, + tGeomToken_(esConsumes()), + seedCreator_(SeedCreatorFactory::get()->create("SeedFromConsecutiveHitsCreator", + iConfig.getParameter("SeedCreatorPSet"), + consumesCollector())), + // FIXME: need to make creation configurable: + // - A toggle to not produce TSs at all could be useful to save memory; + // it won't affect speed though + // - The minimal set for TCs is t5TCsLST, pTTCsLST and pLSTCsLST. + // That would complicate the handling of collections though, + // so it is deferred to when we have a clearer picture of what's needed. + trajectorySeedPutToken_(produces("")), + trajectorySeedpLSPutToken_(produces("pLSTSsLST")), + trackCandidatePutToken_(produces("")), + trackCandidatepTCPutToken_(produces("pTCsLST")), + trackCandidateT5TCPutToken_(produces("t5TCsLST")), + trackCandidateNopLSTCPutToken_(produces("nopLSTCsLST")), + trackCandidatepTTCPutToken_(produces("pTTCsLST")), + trackCandidatepLSTCPutToken_(produces("pLSTCsLST")), + seedStopInfoPutToken_(produces()) {} + +void LSTOutputConverter::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("lstOutput", edm::InputTag("lstProducer")); + desc.add("phase2OTHits", edm::InputTag("lstPhase2OTHitsInputProducer")); + desc.add("lstPixelSeeds", edm::InputTag("lstPixelSeedInputProducer")); + desc.add("includeT5s", true); + desc.add("includeNonpLSTSs", false); + desc.add("propagatorAlong", edm::ESInputTag{"", "PropagatorWithMaterial"}); + desc.add("propagatorOpposite", edm::ESInputTag{"", "PropagatorWithMaterialOpposite"}); + + edm::ParameterSetDescription psd0; + psd0.add("ComponentName", std::string("SeedFromConsecutiveHitsCreator")); + psd0.add("propagator", std::string("PropagatorWithMaterial")); + psd0.add("SeedMomentumForBOFF", 5.0); + psd0.add("OriginTransverseErrorMultiplier", 1.0); + psd0.add("MinOneOverPtError", 1.0); + psd0.add("magneticField", std::string("")); + psd0.add("TTRHBuilder", std::string("WithTrackAngle")); + psd0.add("forceKinematicWithRegionDirection", false); + desc.add("SeedCreatorPSet", psd0); + + descriptions.addWithDefaultLabel(desc); +} + +void LSTOutputConverter::produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + // Setup + auto const& lstOutput = iEvent.get(lstOutputToken_); + auto const& phase2OTRecHits = iEvent.get(lstPhase2OTHitsInputToken_); + auto const& pixelSeeds = iEvent.get(lstPixelSeedToken_); + auto const& mf = iSetup.getData(mfToken_); + auto const& propAlo = iSetup.getData(propagatorAlongToken_); + auto const& propOppo = iSetup.getData(propagatorOppositeToken_); + auto const& tracker = iSetup.getData(tGeomToken_); + + // Vector definitions + std::vector> const& lstTC_hitIdx = lstOutput.hitIdx(); + std::vector const& lstTC_len = lstOutput.len(); + std::vector const& lstTC_seedIdx = lstOutput.seedIdx(); + std::vector const& lstTC_trackCandidateType = lstOutput.trackCandidateType(); + + TrajectorySeedCollection outputTS, outputpLSTS; + outputTS.reserve(lstTC_len.size()); + outputpLSTS.reserve(lstTC_len.size()); + TrackCandidateCollection outputTC, outputpTC, outputT5TC, outputNopLSTC, outputpTTC, outputpLSTC; + outputTC.reserve(lstTC_len.size()); + outputpTC.reserve(lstTC_len.size()); + outputT5TC.reserve(lstTC_len.size()); + outputNopLSTC.reserve(lstTC_len.size()); + outputpTTC.reserve(lstTC_len.size()); + outputpLSTC.reserve(lstTC_len.size()); + + auto const& OTHits = phase2OTRecHits.hits(); + + LogDebug("LSTOutputConverter") << "lstTC size " << lstTC_len.size(); + for (unsigned int i = 0; i < lstTC_len.size(); i++) { + LogDebug("LSTOutputConverter") << " cand " << i << " " << lstTC_len[i] << " " << lstTC_seedIdx[i]; + TrajectorySeed seed; + if (lstTC_trackCandidateType[i] != LSTOutput::LSTTCType::T5) + seed = pixelSeeds[lstTC_seedIdx[i]]; + + edm::OwnVector recHits; + if (lstTC_trackCandidateType[i] != LSTOutput::LSTTCType::T5) { + for (auto const& hit : seed.recHits()) + recHits.push_back(hit.clone()); + } + + unsigned int const nPixelHits = lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5 ? 0 : recHits.size(); + for (unsigned int j = nPixelHits; j < lstTC_hitIdx[i].size(); j++) + recHits.push_back(OTHits[lstTC_hitIdx[i][j]]->clone()); + + recHits.sort([](const auto& a, const auto& b) { + const auto asub = a.det()->subDetector(); + const auto bsub = b.det()->subDetector(); + if (GeomDetEnumerators::isInnerTracker(asub) && GeomDetEnumerators::isOuterTracker(bsub)) { + return true; + } else if (GeomDetEnumerators::isOuterTracker(asub) && GeomDetEnumerators::isInnerTracker(bsub)) { + return false; + } else if (asub != bsub) { + return asub < bsub; + } else { + const auto& apos = a.surface(); + const auto& bpos = b.surface(); + if (GeomDetEnumerators::isBarrel(asub)) { + return apos->rSpan().first < bpos->rSpan().first; + } else { + return std::abs(apos->zSpan().first) < std::abs(bpos->zSpan().first); + } + } + }); + + TrajectorySeedCollection seeds; + if (lstTC_trackCandidateType[i] != LSTOutput::LSTTCType::pLS) { + // Construct a full-length TrajectorySeed always for T5s, + // only when required by a flag for other pT objects. + if (includeNonpLSTSs_ || lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) { + using Hit = SeedingHitSet::ConstRecHitPointer; + std::vector hitsForSeed; + hitsForSeed.reserve(lstTC_len[i]); + int nHits = 0; + for (auto const& hit : recHits) { + if (lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) { + auto hType = tracker.getDetectorType(hit.geographicalId()); + if (hType != TrackerGeometry::ModuleType::Ph2PSP && nHits < 2) + continue; // the first two should be P + } + hitsForSeed.emplace_back(dynamic_cast(&hit)); + nHits++; + } + + seedCreator_->init(GlobalTrackingRegion(), iSetup, nullptr); + seedCreator_->makeSeed(seeds, hitsForSeed); + if (seeds.empty()) { + edm::LogInfo("LSTOutputConverter") + << "failed to convert a LST object to a seed" << i << " " << lstTC_len[i] << " " << lstTC_seedIdx[i]; + if (lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) + continue; + } + if (lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) + seed = seeds[0]; + + auto trajectorySeed = (seeds.empty() ? seed : seeds[0]); + outputTS.emplace_back(trajectorySeed); + auto const& ss = trajectorySeed.startingState(); + LogDebug("LSTOutputConverter") << "Created a seed with " << seed.nHits() << " " << ss.detId() << " " << ss.pt() + << " " << ss.parameters().vector() << " " << ss.error(0); + } + } else { + outputTS.emplace_back(seed); + outputpLSTS.emplace_back(seed); + } + + TrajectoryStateOnSurface tsos = + trajectoryStateTransform::transientState(seed.startingState(), (seed.recHits().end() - 1)->surface(), &mf); + tsos.rescaleError(100.); + auto tsosPair = propOppo.propagateWithPath(tsos, *recHits[0].surface()); + if (!tsosPair.first.isValid()) { + LogDebug("LSTOutputConverter") << "Propagating to startingState opposite to momentum failed, trying along next"; + tsosPair = propAlo.propagateWithPath(tsos, *recHits[0].surface()); + } + if (tsosPair.first.isValid()) { + PTrajectoryStateOnDet st = + trajectoryStateTransform::persistentState(tsosPair.first, recHits[0].det()->geographicalId().rawId()); + + if (lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) { + if (!includeT5s_) { + continue; + } else { + auto tc = TrackCandidate(recHits, seed, st); + outputTC.emplace_back(tc); + outputT5TC.emplace_back(tc); + outputNopLSTC.emplace_back(tc); + } + } else { + auto tc = TrackCandidate(recHits, seed, st); + outputTC.emplace_back(tc); + outputpTC.emplace_back(tc); + if (lstTC_trackCandidateType[i] != LSTOutput::LSTTCType::pLS) { + outputNopLSTC.emplace_back(tc); + outputpTTC.emplace_back(tc); + } else { + outputpLSTC.emplace_back(tc); + } + } + } else { + edm::LogInfo("LSTOutputConverter") << "Failed to make a candidate initial state. Seed state is " << tsos + << " TC cand " << i << " " << lstTC_len[i] << " " << lstTC_seedIdx[i] + << " first hit " << recHits.front().globalPosition() << " last hit " + << recHits.back().globalPosition(); + } + } + + LogDebug("LSTOutputConverter") << "done with conversion: Track candidate output size = " << outputpTC.size() + << " (p* objects) + " << outputT5TC.size() << " (T5 objects)"; + iEvent.emplace(trajectorySeedPutToken_, std::move(outputTS)); + iEvent.emplace(trajectorySeedpLSPutToken_, std::move(outputpLSTS)); + iEvent.emplace(trackCandidatePutToken_, std::move(outputTC)); + iEvent.emplace(trackCandidatepTCPutToken_, std::move(outputpTC)); + iEvent.emplace(trackCandidateT5TCPutToken_, std::move(outputT5TC)); + iEvent.emplace(trackCandidateNopLSTCPutToken_, std::move(outputNopLSTC)); + iEvent.emplace(trackCandidatepTTCPutToken_, std::move(outputpTTC)); + iEvent.emplace(trackCandidatepLSTCPutToken_, std::move(outputpLSTC)); + iEvent.emplace(seedStopInfoPutToken_, 0U); //dummy stop info +} + +DEFINE_FWK_MODULE(LSTOutputConverter); diff --git a/RecoTracker/LST/plugins/LSTPhase2OTHitsInputProducer.cc b/RecoTracker/LST/plugins/LSTPhase2OTHitsInputProducer.cc new file mode 100644 index 0000000000000..a0fcc72f598b6 --- /dev/null +++ b/RecoTracker/LST/plugins/LSTPhase2OTHitsInputProducer.cc @@ -0,0 +1,67 @@ +#include "FWCore/Framework/interface/global/EDProducer.h" + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "RecoTracker/LST/interface/LSTPhase2OTHitsInput.h" + +class LSTPhase2OTHitsInputProducer : public edm::global::EDProducer<> { +public: + explicit LSTPhase2OTHitsInputProducer(edm::ParameterSet const& iConfig); + ~LSTPhase2OTHitsInputProducer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + const edm::EDGetTokenT phase2OTRecHitToken_; + const edm::EDPutTokenT lstPhase2OTHitsInputPutToken_; +}; + +LSTPhase2OTHitsInputProducer::LSTPhase2OTHitsInputProducer(edm::ParameterSet const& iConfig) + : phase2OTRecHitToken_(consumes(iConfig.getParameter("phase2OTRecHits"))), + lstPhase2OTHitsInputPutToken_(produces()) {} + +void LSTPhase2OTHitsInputProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("phase2OTRecHits", edm::InputTag("siPhase2RecHits")); + + descriptions.addWithDefaultLabel(desc); +} + +void LSTPhase2OTHitsInputProducer::produce(edm::StreamID iID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + // Setup + auto const& phase2OTHits = iEvent.get(phase2OTRecHitToken_); + + // Vector definitions + std::vector ph2_detId; + ph2_detId.reserve(phase2OTHits.dataSize()); + std::vector ph2_x; + ph2_x.reserve(phase2OTHits.dataSize()); + std::vector ph2_y; + ph2_y.reserve(phase2OTHits.dataSize()); + std::vector ph2_z; + ph2_z.reserve(phase2OTHits.dataSize()); + std::vector ph2_hits; + ph2_hits.reserve(phase2OTHits.dataSize()); + + for (auto const& it : phase2OTHits) { + const DetId hitId = it.detId(); + for (auto const& hit : it) { + ph2_detId.push_back(hitId.rawId()); + ph2_x.push_back(hit.globalPosition().x()); + ph2_y.push_back(hit.globalPosition().y()); + ph2_z.push_back(hit.globalPosition().z()); + ph2_hits.push_back(&hit); + } + } + + LSTPhase2OTHitsInput phase2OTHitsInput( + std::move(ph2_detId), std::move(ph2_x), std::move(ph2_y), std::move(ph2_z), std::move(ph2_hits)); + iEvent.emplace(lstPhase2OTHitsInputPutToken_, std::move(phase2OTHitsInput)); +} + +DEFINE_FWK_MODULE(LSTPhase2OTHitsInputProducer); diff --git a/RecoTracker/LST/plugins/LSTPixelSeedInputProducer.cc b/RecoTracker/LST/plugins/LSTPixelSeedInputProducer.cc new file mode 100644 index 0000000000000..819baf78c6aa4 --- /dev/null +++ b/RecoTracker/LST/plugins/LSTPixelSeedInputProducer.cc @@ -0,0 +1,171 @@ +#include "FWCore/Framework/interface/global/EDProducer.h" + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "FWCore/Utilities/interface/transform.h" + +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" + +#include "DataFormats/TrackerRecHit2D/interface/SiStripMatchedRecHit2DCollection.h" +#include "DataFormats/TrajectorySeed/interface/TrajectorySeedCollection.h" + +#include "Validation/RecoTrack/interface/trackFromSeedFitFailed.h" + +#include "TrackingTools/Records/interface/TransientRecHitRecord.h" +#include "TrackingTools/TrajectoryState/interface/TrajectoryStateTransform.h" +#include "TrackingTools/TransientTrackingRecHit/interface/TransientTrackingRecHitBuilder.h" + +#include "RecoTracker/LST/interface/LSTPixelSeedInput.h" + +class LSTPixelSeedInputProducer : public edm::global::EDProducer<> { +public: + explicit LSTPixelSeedInputProducer(edm::ParameterSet const& iConfig); + ~LSTPixelSeedInputProducer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + const edm::ESGetToken mfToken_; + const edm::EDGetTokenT beamSpotToken_; + std::vector>> seedTokens_; + const edm::EDPutTokenT lstPixelSeedInputPutToken_; + const edm::EDPutTokenT lstPixelSeedsPutToken_; +}; + +LSTPixelSeedInputProducer::LSTPixelSeedInputProducer(edm::ParameterSet const& iConfig) + : mfToken_(esConsumes()), + beamSpotToken_(consumes(iConfig.getParameter("beamSpot"))), + lstPixelSeedInputPutToken_(produces()), + lstPixelSeedsPutToken_(produces()) { + seedTokens_ = edm::vector_transform(iConfig.getParameter>("seedTracks"), + [&](const edm::InputTag& tag) { return consumes>(tag); }); +} + +void LSTPixelSeedInputProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); + + desc.add>("seedTracks", + std::vector{edm::InputTag("lstInitialStepSeedTracks"), + edm::InputTag("lstHighPtTripletStepSeedTracks")}); + + descriptions.addWithDefaultLabel(desc); +} + +void LSTPixelSeedInputProducer::produce(edm::StreamID iID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + // Setup + auto const& mf = iSetup.getData(mfToken_); + auto const& bs = iEvent.get(beamSpotToken_); + + // Vector definitions + std::vector see_px; + std::vector see_py; + std::vector see_pz; + std::vector see_dxy; + std::vector see_dz; + std::vector see_ptErr; + std::vector see_etaErr; + std::vector see_stateTrajGlbX; + std::vector see_stateTrajGlbY; + std::vector see_stateTrajGlbZ; + std::vector see_stateTrajGlbPx; + std::vector see_stateTrajGlbPy; + std::vector see_stateTrajGlbPz; + std::vector see_q; + std::vector> see_hitIdx; + TrajectorySeedCollection see_seeds; + + for (size_t iColl = 0; iColl < seedTokens_.size(); ++iColl) { + // Get seed tokens + auto const& seedToken = seedTokens_[iColl]; + auto const& seedTracks = iEvent.get(seedToken); + + if (seedTracks.empty()) + continue; + + // Get seed track refs + edm::RefToBaseVector seedTrackRefs; + for (edm::View::size_type i = 0; i < seedTracks.size(); ++i) { + seedTrackRefs.push_back(seedTracks.refAt(i)); + } + + edm::ProductID id = seedTracks[0].seedRef().id(); + + for (size_t iSeed = 0; iSeed < seedTrackRefs.size(); ++iSeed) { + auto const& seedTrackRef = seedTrackRefs[iSeed]; + auto const& seedTrack = *seedTrackRef; + auto const& seedRef = seedTrack.seedRef(); + auto const& seed = *seedRef; + + if (seedRef.id() != id) + throw cms::Exception("LogicError") + << "All tracks in 'TracksFromSeeds' collection should point to seeds in the same collection. Now the " + "element 0 had ProductID " + << id << " while the element " << seedTrackRef.key() << " had " << seedTrackRef.id() << "."; + + const bool seedFitOk = !trackFromSeedFitFailed(seedTrack); + + const TrackingRecHit* lastRecHit = &*(seed.recHits().end() - 1); + TrajectoryStateOnSurface tsos = + trajectoryStateTransform::transientState(seed.startingState(), lastRecHit->surface(), &mf); + auto const& stateGlobal = tsos.globalParameters(); + + std::vector hitIdx; + for (auto const& hit : seed.recHits()) { + int subid = hit.geographicalId().subdetId(); + if (subid == (int)PixelSubdetector::PixelBarrel || subid == (int)PixelSubdetector::PixelEndcap) { + const BaseTrackerRecHit* bhit = dynamic_cast(&hit); + const auto& clusterRef = bhit->firstClusterRef(); + const auto clusterKey = clusterRef.cluster_pixel().key(); + hitIdx.push_back(clusterKey); + } else { + throw cms::Exception("LSTPixelSeedInputProducer") << "Not pixel hits found!"; + } + } + + // Fill output + see_px.push_back(seedFitOk ? seedTrack.px() : 0); + see_py.push_back(seedFitOk ? seedTrack.py() : 0); + see_pz.push_back(seedFitOk ? seedTrack.pz() : 0); + see_dxy.push_back(seedFitOk ? seedTrack.dxy(bs.position()) : 0); + see_dz.push_back(seedFitOk ? seedTrack.dz(bs.position()) : 0); + see_ptErr.push_back(seedFitOk ? seedTrack.ptError() : 0); + see_etaErr.push_back(seedFitOk ? seedTrack.etaError() : 0); + see_stateTrajGlbX.push_back(stateGlobal.position().x()); + see_stateTrajGlbY.push_back(stateGlobal.position().y()); + see_stateTrajGlbZ.push_back(stateGlobal.position().z()); + see_stateTrajGlbPx.push_back(stateGlobal.momentum().x()); + see_stateTrajGlbPy.push_back(stateGlobal.momentum().y()); + see_stateTrajGlbPz.push_back(stateGlobal.momentum().z()); + see_q.push_back(seedTrack.charge()); + see_hitIdx.push_back(hitIdx); + see_seeds.push_back(seed); + } + } + + LSTPixelSeedInput pixelSeedInput(std::move(see_px), + std::move(see_py), + std::move(see_pz), + std::move(see_dxy), + std::move(see_dz), + std::move(see_ptErr), + std::move(see_etaErr), + std::move(see_stateTrajGlbX), + std::move(see_stateTrajGlbY), + std::move(see_stateTrajGlbZ), + std::move(see_stateTrajGlbPx), + std::move(see_stateTrajGlbPy), + std::move(see_stateTrajGlbPz), + std::move(see_q), + std::move(see_hitIdx)); + iEvent.emplace(lstPixelSeedInputPutToken_, std::move(pixelSeedInput)); + iEvent.emplace(lstPixelSeedsPutToken_, std::move(see_seeds)); +} + +DEFINE_FWK_MODULE(LSTPixelSeedInputProducer); diff --git a/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc new file mode 100644 index 0000000000000..0f0c53344de18 --- /dev/null +++ b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc @@ -0,0 +1,32 @@ +// LST includes +#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" + +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class LSTModulesDevESProducer : public ESProducer { + public: + LSTModulesDevESProducer(edm::ParameterSet const& iConfig) : ESProducer(iConfig) { setWhatProduced(this); } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + descriptions.addWithDefaultLabel(desc); + } + + std::unique_ptr> produce(TrackerRecoGeometryRecord const& iRecord) { + return lst::loadAndFillESHost(); + } + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(LSTModulesDevESProducer); diff --git a/RecoTracker/LST/plugins/alpaka/LSTProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc new file mode 100644 index 0000000000000..7eb6c57ade05c --- /dev/null +++ b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc @@ -0,0 +1,99 @@ +#include + +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" + +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#include "RecoTracker/LST/interface/LSTOutput.h" +#include "RecoTracker/LST/interface/LSTPhase2OTHitsInput.h" +#include "RecoTracker/LST/interface/LSTPixelSeedInput.h" + +#include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class LSTProducer : public stream::SynchronizingEDProducer<> { + public: + LSTProducer(edm::ParameterSet const& config) + : lstPixelSeedInputToken_{consumes(config.getParameter("pixelSeedInput"))}, + lstPhase2OTHitsInputToken_{consumes(config.getParameter("phase2OTHitsInput"))}, + lstESToken_{esConsumes()}, + verbose_(config.getParameter("verbose")), + nopLSDupClean_(config.getParameter("nopLSDupClean")), + tcpLSTriplets_(config.getParameter("tcpLSTriplets")), + lstOutputToken_{produces()} {} + + void acquire(device::Event const& event, device::EventSetup const& setup) override { + // Inputs + auto const& pixelSeeds = event.get(lstPixelSeedInputToken_); + auto const& phase2OTHits = event.get(lstPhase2OTHitsInputToken_); + + auto const& lstESDeviceData = setup.getData(lstESToken_); + + lst_.run(event.queue(), + verbose_, + &lstESDeviceData, + pixelSeeds.px(), + pixelSeeds.py(), + pixelSeeds.pz(), + pixelSeeds.dxy(), + pixelSeeds.dz(), + pixelSeeds.ptErr(), + pixelSeeds.etaErr(), + pixelSeeds.stateTrajGlbX(), + pixelSeeds.stateTrajGlbY(), + pixelSeeds.stateTrajGlbZ(), + pixelSeeds.stateTrajGlbPx(), + pixelSeeds.stateTrajGlbPy(), + pixelSeeds.stateTrajGlbPz(), + pixelSeeds.q(), + pixelSeeds.hitIdx(), + phase2OTHits.detId(), + phase2OTHits.x(), + phase2OTHits.y(), + phase2OTHits.z(), + nopLSDupClean_, + tcpLSTriplets_); + } + + void produce(device::Event& event, device::EventSetup const&) override { + // Output + LSTOutput lstOutput(lst_.hits(), lst_.len(), lst_.seedIdx(), lst_.trackCandidateType()); + event.emplace(lstOutputToken_, std::move(lstOutput)); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("pixelSeedInput", edm::InputTag{"lstPixelSeedInputProducer"}); + desc.add("phase2OTHitsInput", edm::InputTag{"lstPhase2OTHitsInputProducer"}); + desc.add("verbose", false); + desc.add("nopLSDupClean", false); + desc.add("tcpLSTriplets", false); + descriptions.addWithDefaultLabel(desc); + } + + private: + edm::EDGetTokenT lstPixelSeedInputToken_; + edm::EDGetTokenT lstPhase2OTHitsInputToken_; + device::ESGetToken, TrackerRecoGeometryRecord> lstESToken_; + const bool verbose_, nopLSDupClean_, tcpLSTriplets_; + edm::EDPutTokenT lstOutputToken_; + + lst::LST lst_; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(LSTProducer); diff --git a/RecoTracker/LST/python/lstProducer_cff.py b/RecoTracker/LST/python/lstProducer_cff.py new file mode 100644 index 0000000000000..9cf355bb3a3fa --- /dev/null +++ b/RecoTracker/LST/python/lstProducer_cff.py @@ -0,0 +1,8 @@ +import FWCore.ParameterSet.Config as cms + +from RecoTracker.LST.lstProducer_cfi import lstProducer + +from RecoTracker.LST.lstModulesDevESProducer_cfi import lstModulesDevESProducer + +# not scheduled task to get the framework to hide the producer +dummyLSTModulesDevESProducerTask = cms.Task(lstModulesDevESProducer) diff --git a/RecoTracker/LST/python/lstSeedTracks_cff.py b/RecoTracker/LST/python/lstSeedTracks_cff.py new file mode 100644 index 0000000000000..7046c616b0054 --- /dev/null +++ b/RecoTracker/LST/python/lstSeedTracks_cff.py @@ -0,0 +1,15 @@ +import FWCore.ParameterSet.Config as cms + +lstInitialStepSeedTracks = cms.EDProducer( + "TrackFromSeedProducer", + src = cms.InputTag("initialStepSeeds"), + beamSpot = cms.InputTag("offlineBeamSpot"), + TTRHBuilder = cms.string("WithoutRefit") +) + +lstHighPtTripletStepSeedTracks = cms.EDProducer( + "TrackFromSeedProducer", + src = cms.InputTag("highPtTripletStepSeeds"), + beamSpot = cms.InputTag("offlineBeamSpot"), + TTRHBuilder = cms.string("WithoutRefit") +) diff --git a/RecoTracker/LST/python/lst_cff.py b/RecoTracker/LST/python/lst_cff.py new file mode 100644 index 0000000000000..af3a80ae77e18 --- /dev/null +++ b/RecoTracker/LST/python/lst_cff.py @@ -0,0 +1,6 @@ +import FWCore.ParameterSet.Config as cms + +from RecoTracker.LST.lstSeedTracks_cff import * +from RecoTracker.LST.lstPixelSeedInputProducer_cfi import * +from RecoTracker.LST.lstPhase2OTHitsInputProducer_cfi import * +from RecoTracker.LST.lstOutputConverter_cfi import * diff --git a/RecoTracker/LST/src/ES_ModulesDev.cc b/RecoTracker/LST/src/ES_ModulesDev.cc new file mode 100644 index 0000000000000..06a357860a7d5 --- /dev/null +++ b/RecoTracker/LST/src/ES_ModulesDev.cc @@ -0,0 +1,5 @@ +#include "RecoTracker/LSTCore/interface/LSTESData.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(lst::LSTESData); diff --git a/RecoTracker/LST/src/alpaka/ES_ModulesDev.cc b/RecoTracker/LST/src/alpaka/ES_ModulesDev.cc new file mode 100644 index 0000000000000..54ded5e7a7c98 --- /dev/null +++ b/RecoTracker/LST/src/alpaka/ES_ModulesDev.cc @@ -0,0 +1,4 @@ +#include "RecoTracker/LSTCore/interface/LSTESData.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_TEMPLATED_DATA_REG(lst::LSTESData); diff --git a/RecoTracker/LST/src/classes.h b/RecoTracker/LST/src/classes.h new file mode 100644 index 0000000000000..6a6817d9b538e --- /dev/null +++ b/RecoTracker/LST/src/classes.h @@ -0,0 +1,9 @@ +#ifndef RecoTracker_LST_classes_h +#define RecoTracker_LST_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "RecoTracker/LST/interface/LSTPixelSeedInput.h" +#include "RecoTracker/LST/interface/LSTPhase2OTHitsInput.h" +#include "RecoTracker/LST/interface/LSTOutput.h" + +#endif diff --git a/RecoTracker/LST/src/classes_def.xml b/RecoTracker/LST/src/classes_def.xml new file mode 100644 index 0000000000000..d386e7b92a215 --- /dev/null +++ b/RecoTracker/LST/src/classes_def.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/RecoTracker/LSTCore/BuildFile.xml b/RecoTracker/LSTCore/BuildFile.xml new file mode 100644 index 0000000000000..1208407185001 --- /dev/null +++ b/RecoTracker/LSTCore/BuildFile.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/RecoTracker/LSTCore/interface/Constants.h b/RecoTracker/LSTCore/interface/Constants.h new file mode 100644 index 0000000000000..350857ac0b2e5 --- /dev/null +++ b/RecoTracker/LSTCore/interface/Constants.h @@ -0,0 +1,81 @@ +#ifndef RecoTracker_LSTCore_interface_Constants_h +#define RecoTracker_LSTCore_interface_Constants_h + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#ifdef CACHE_ALLOC +#include "HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h" +#endif + +namespace lst { + + // Buffer type for allocations where auto type can't be used. + template + using Buf = alpaka::Buf; + + // Allocation wrapper function to make integration of the caching allocator easier and reduce code boilerplate. + template + ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf allocBufWrapper(TDev const& dev, TSize nElements, TQueue queue) { +#ifdef CACHE_ALLOC + return cms::alpakatools::allocCachedBuf( + dev, queue, alpaka_common::Vec1D(static_cast(nElements))); +#else + return alpaka::allocBuf(dev, + alpaka_common::Vec1D(static_cast(nElements))); +#endif + } + + // Second allocation wrapper function when queue is not given. Reduces code boilerplate. + template + ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf allocBufWrapper(TDev const& dev, TSize nElements) { + return alpaka::allocBuf(dev, + alpaka_common::Vec1D(static_cast(nElements))); + } + + // Named constants for pixelTypes + enum PixelType : int8_t { kInvalid = -1, kHighPt = 0, kLowPtPosCurv = 1, kLowPtNegCurv = 2 }; + +// If a compile time flag does not define PT_CUT, default to 0.8 (GeV) +#ifndef PT_CUT + constexpr float PT_CUT = 0.8f; +#endif + + constexpr unsigned int max_blocks = 80; + constexpr unsigned int max_connected_modules = 40; + + constexpr unsigned int n_max_pixel_segments_per_module = 50000; + + constexpr unsigned int n_max_pixel_md_per_modules = 2 * n_max_pixel_segments_per_module; + + constexpr unsigned int n_max_pixel_triplets = 5000; + constexpr unsigned int n_max_pixel_quintuplets = 15000; + + constexpr unsigned int n_max_pixel_track_candidates = 30000; + constexpr unsigned int n_max_nonpixel_track_candidates = 1000; + + constexpr unsigned int size_superbins = 45000; + + // Defining the constant host device variables right up here + // Currently pixel tracks treated as LSs with 2 double layers (IT layers 1+2 and 3+4) and 4 hits. To be potentially handled better in the future. + struct Params_pLS { + static constexpr int kLayers = 2, kHits = 4; + }; + struct Params_LS { + static constexpr int kLayers = 2, kHits = 4; + }; + struct Params_T3 { + static constexpr int kLayers = 3, kHits = 6; + }; + struct Params_pT3 { + static constexpr int kLayers = 5, kHits = 10; + }; + struct Params_T5 { + static constexpr int kLayers = 5, kHits = 10; + }; + struct Params_pT5 { + static constexpr int kLayers = 7, kHits = 14; + }; + +} //namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/EndcapGeometry.h b/RecoTracker/LSTCore/interface/EndcapGeometry.h new file mode 100644 index 0000000000000..b8c44c14fb143 --- /dev/null +++ b/RecoTracker/LSTCore/interface/EndcapGeometry.h @@ -0,0 +1,29 @@ +#ifndef RecoTracker_LSTCore_interface_EndcapGeometry_h +#define RecoTracker_LSTCore_interface_EndcapGeometry_h + +#include +#include +#include + +namespace lst { + class EndcapGeometry { + private: + std::map dxdy_slope_; // dx/dy slope + std::map centroid_phis_; // centroid phi + + public: + std::vector geoMapDetId_buf; + std::vector geoMapPhi_buf; + + unsigned int nEndCapMap; + + EndcapGeometry() = default; + EndcapGeometry(std::string const& filename); + + void load(std::string const&); + void fillGeoMapArraysExplicit(); + float getdxdy_slope(unsigned int detid) const; + }; +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h b/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h new file mode 100644 index 0000000000000..ce037b026fc22 --- /dev/null +++ b/RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h @@ -0,0 +1,58 @@ +#ifndef RecoTracker_LSTCore_interface_EndcapGeometryBuffers_h +#define RecoTracker_LSTCore_interface_EndcapGeometryBuffers_h + +#include +#include +#include +#include +#include +#include +#include + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + + struct EndcapGeometryDev { + const unsigned int* geoMapDetId; + const float* geoMapPhi; + + template + void setData(TBuff const& buf) { + geoMapDetId = buf.geoMapDetId_buf.data(); + geoMapPhi = buf.geoMapPhi_buf.data(); + } + }; + + template + struct EndcapGeometryBuffer { + Buf geoMapDetId_buf; + Buf geoMapPhi_buf; + + EndcapGeometryBuffer(TDev const& dev, unsigned int nEndCapMap) + : geoMapDetId_buf(allocBufWrapper(dev, nEndCapMap)), + geoMapPhi_buf(allocBufWrapper(dev, nEndCapMap)) { + data_.setData(*this); + } + + template + inline void copyFromSrc(TQueue queue, EndcapGeometryBuffer const& src) { + alpaka::memcpy(queue, geoMapDetId_buf, src.geoMapDetId_buf); + alpaka::memcpy(queue, geoMapPhi_buf, src.geoMapPhi_buf); + } + + template + EndcapGeometryBuffer(TQueue queue, EndcapGeometryBuffer const& src, unsigned int nEndCapMap) + : EndcapGeometryBuffer(alpaka::getDev(queue), nEndCapMap) { + copyFromSrc(queue, src); + } + + inline EndcapGeometryDev const* data() const { return &data_; } + + private: + EndcapGeometryDev data_; + }; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/LSTESData.h b/RecoTracker/LSTCore/interface/LSTESData.h new file mode 100644 index 0000000000000..9f51be48f28b6 --- /dev/null +++ b/RecoTracker/LSTCore/interface/LSTESData.h @@ -0,0 +1,70 @@ +#ifndef RecoTracker_LSTCore_interface_LSTESData_h +#define RecoTracker_LSTCore_interface_LSTESData_h + +#include "RecoTracker/LSTCore/interface/Constants.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometryBuffer.h" +#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/PixelMap.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" + +#include +#include + +namespace lst { + + template + struct LSTESData { + uint16_t nModules; + uint16_t nLowerModules; + unsigned int nPixels; + unsigned int nEndCapMap; + ModulesBuffer modulesBuffers; + EndcapGeometryBuffer endcapGeometryBuffers; + std::shared_ptr pixelMapping; + + LSTESData(uint16_t const& nModulesIn, + uint16_t const& nLowerModulesIn, + unsigned int const& nPixelsIn, + unsigned int const& nEndCapMapIn, + ModulesBuffer const& modulesBuffersIn, + EndcapGeometryBuffer const& endcapGeometryBuffersIn, + std::shared_ptr const& pixelMappingIn) + : nModules(nModulesIn), + nLowerModules(nLowerModulesIn), + nPixels(nPixelsIn), + nEndCapMap(nEndCapMapIn), + modulesBuffers(modulesBuffersIn), + endcapGeometryBuffers(endcapGeometryBuffersIn), + pixelMapping(pixelMappingIn) {} + }; + + std::unique_ptr> loadAndFillESHost(); + +} // namespace lst + +namespace cms::alpakatools { + template <> + struct CopyToDevice> { + template + static lst::LSTESData> copyAsync(TQueue& queue, + lst::LSTESData const& srcData) { + auto deviceModulesBuffers = + lst::ModulesBuffer>(alpaka::getDev(queue), srcData.nModules, srcData.nPixels); + deviceModulesBuffers.copyFromSrc(queue, srcData.modulesBuffers); + auto deviceEndcapGeometryBuffers = + lst::EndcapGeometryBuffer>(alpaka::getDev(queue), srcData.nEndCapMap); + deviceEndcapGeometryBuffers.copyFromSrc(queue, srcData.endcapGeometryBuffers); + + return lst::LSTESData>(srcData.nModules, + srcData.nLowerModules, + srcData.nPixels, + srcData.nEndCapMap, + std::move(deviceModulesBuffers), + std::move(deviceEndcapGeometryBuffers), + srcData.pixelMapping); + } + }; +} // namespace cms::alpakatools + +#endif diff --git a/RecoTracker/LSTCore/interface/Module.h b/RecoTracker/LSTCore/interface/Module.h new file mode 100644 index 0000000000000..7266ebd7bc49b --- /dev/null +++ b/RecoTracker/LSTCore/interface/Module.h @@ -0,0 +1,227 @@ +#ifndef RecoTracker_LSTCore_interface_Module_h +#define RecoTracker_LSTCore_interface_Module_h + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + enum SubDet { InnerPixel = 0, Barrel = 5, Endcap = 4 }; + + enum Side { NegZ = 1, PosZ = 2, Center = 3 }; + + enum ModuleType { PS, TwoS, PixelModule }; + + enum ModuleLayerType { Pixel, Strip, InnerPixelLayer }; + + struct Modules { + const unsigned int* detIds; + const uint16_t* moduleMap; + const unsigned int* mapdetId; + const uint16_t* mapIdx; + const uint16_t* nConnectedModules; + const float* drdzs; + const float* dxdys; + const uint16_t* nModules; + const uint16_t* nLowerModules; + const uint16_t* partnerModuleIndices; + + const short* layers; + const short* rings; + const short* modules; + const short* rods; + const short* subdets; + const short* sides; + const float* eta; + const float* r; + const bool* isInverted; + const bool* isLower; + const bool* isAnchor; + const ModuleType* moduleType; + const ModuleLayerType* moduleLayerType; + const int* lstLayers; + const unsigned int* connectedPixels; + + static bool parseIsInverted(short subdet, short side, short module, short layer) { + if (subdet == Endcap) { + if (side == NegZ) { + return module % 2 == 1; + } else if (side == PosZ) { + return module % 2 == 0; + } else { + return false; + } + } else if (subdet == Barrel) { + if (side == Center) { + if (layer <= 3) { + return module % 2 == 1; + } else if (layer >= 4) { + return module % 2 == 0; + } else { + return false; + } + } else if (side == NegZ or side == PosZ) { + if (layer <= 2) { + return module % 2 == 1; + } else if (layer == 3) { + return module % 2 == 0; + } else { + return false; + } + } else { + return false; + } + } else { + return false; + } + } + + static bool parseIsLower(bool isInvertedx, unsigned int detId) { + return (isInvertedx) ? !(detId & 1) : (detId & 1); + } + + static unsigned int parsePartnerModuleId(unsigned int detId, bool isLowerx, bool isInvertedx) { + return isLowerx ? (isInvertedx ? detId - 1 : detId + 1) : (isInvertedx ? detId + 1 : detId - 1); + } + + template + void setData(TBuff const& buf) { + detIds = buf.detIds_buf.data(); + moduleMap = buf.moduleMap_buf.data(); + mapdetId = buf.mapdetId_buf.data(); + mapIdx = buf.mapIdx_buf.data(); + nConnectedModules = buf.nConnectedModules_buf.data(); + drdzs = buf.drdzs_buf.data(); + dxdys = buf.dxdys_buf.data(); + nModules = buf.nModules_buf.data(); + nLowerModules = buf.nLowerModules_buf.data(); + partnerModuleIndices = buf.partnerModuleIndices_buf.data(); + + layers = buf.layers_buf.data(); + rings = buf.rings_buf.data(); + modules = buf.modules_buf.data(); + rods = buf.rods_buf.data(); + subdets = buf.subdets_buf.data(); + sides = buf.sides_buf.data(); + eta = buf.eta_buf.data(); + r = buf.r_buf.data(); + isInverted = buf.isInverted_buf.data(); + isLower = buf.isLower_buf.data(); + isAnchor = buf.isAnchor_buf.data(); + moduleType = buf.moduleType_buf.data(); + moduleLayerType = buf.moduleLayerType_buf.data(); + lstLayers = buf.lstLayers_buf.data(); + connectedPixels = buf.connectedPixels_buf.data(); + } + }; + + template + struct ModulesBuffer { + Buf detIds_buf; + Buf moduleMap_buf; + Buf mapdetId_buf; + Buf mapIdx_buf; + Buf nConnectedModules_buf; + Buf drdzs_buf; + Buf dxdys_buf; + Buf nModules_buf; + Buf nLowerModules_buf; + Buf partnerModuleIndices_buf; + + Buf layers_buf; + Buf rings_buf; + Buf modules_buf; + Buf rods_buf; + Buf subdets_buf; + Buf sides_buf; + Buf eta_buf; + Buf r_buf; + Buf isInverted_buf; + Buf isLower_buf; + Buf isAnchor_buf; + Buf moduleType_buf; + Buf moduleLayerType_buf; + Buf lstLayers_buf; + Buf connectedPixels_buf; + + Modules data_; + + ModulesBuffer(TDev const& dev, unsigned int nMod, unsigned int nPixs) + : detIds_buf(allocBufWrapper(dev, nMod)), + moduleMap_buf(allocBufWrapper(dev, nMod * max_connected_modules)), + mapdetId_buf(allocBufWrapper(dev, nMod)), + mapIdx_buf(allocBufWrapper(dev, nMod)), + nConnectedModules_buf(allocBufWrapper(dev, nMod)), + drdzs_buf(allocBufWrapper(dev, nMod)), + dxdys_buf(allocBufWrapper(dev, nMod)), + nModules_buf(allocBufWrapper(dev, 1)), + nLowerModules_buf(allocBufWrapper(dev, 1)), + partnerModuleIndices_buf(allocBufWrapper(dev, nMod)), + + layers_buf(allocBufWrapper(dev, nMod)), + rings_buf(allocBufWrapper(dev, nMod)), + modules_buf(allocBufWrapper(dev, nMod)), + rods_buf(allocBufWrapper(dev, nMod)), + subdets_buf(allocBufWrapper(dev, nMod)), + sides_buf(allocBufWrapper(dev, nMod)), + eta_buf(allocBufWrapper(dev, nMod)), + r_buf(allocBufWrapper(dev, nMod)), + isInverted_buf(allocBufWrapper(dev, nMod)), + isLower_buf(allocBufWrapper(dev, nMod)), + isAnchor_buf(allocBufWrapper(dev, nMod)), + moduleType_buf(allocBufWrapper(dev, nMod)), + moduleLayerType_buf(allocBufWrapper(dev, nMod)), + lstLayers_buf(allocBufWrapper(dev, nMod)), + connectedPixels_buf(allocBufWrapper(dev, nPixs)) { + data_.setData(*this); + } + + template + inline void copyFromSrc(TQueue queue, ModulesBuffer const& src, bool isFull = true) { + alpaka::memcpy(queue, detIds_buf, src.detIds_buf); + if (isFull) { + alpaka::memcpy(queue, moduleMap_buf, src.moduleMap_buf); + alpaka::memcpy(queue, mapdetId_buf, src.mapdetId_buf); + alpaka::memcpy(queue, mapIdx_buf, src.mapIdx_buf); + alpaka::memcpy(queue, nConnectedModules_buf, src.nConnectedModules_buf); + alpaka::memcpy(queue, drdzs_buf, src.drdzs_buf); + alpaka::memcpy(queue, dxdys_buf, src.dxdys_buf); + } + alpaka::memcpy(queue, nModules_buf, src.nModules_buf); + alpaka::memcpy(queue, nLowerModules_buf, src.nLowerModules_buf); + if (isFull) { + alpaka::memcpy(queue, partnerModuleIndices_buf, src.partnerModuleIndices_buf); + } + + alpaka::memcpy(queue, layers_buf, src.layers_buf); + alpaka::memcpy(queue, rings_buf, src.rings_buf); + alpaka::memcpy(queue, modules_buf, src.modules_buf); + alpaka::memcpy(queue, rods_buf, src.rods_buf); + alpaka::memcpy(queue, subdets_buf, src.subdets_buf); + alpaka::memcpy(queue, sides_buf, src.sides_buf); + alpaka::memcpy(queue, eta_buf, src.eta_buf); + alpaka::memcpy(queue, r_buf, src.r_buf); + if (isFull) { + alpaka::memcpy(queue, isInverted_buf, src.isInverted_buf); + } + alpaka::memcpy(queue, isLower_buf, src.isLower_buf); + if (isFull) { + alpaka::memcpy(queue, isAnchor_buf, src.isAnchor_buf); + } + alpaka::memcpy(queue, moduleType_buf, src.moduleType_buf); + if (isFull) { + alpaka::memcpy(queue, moduleLayerType_buf, src.moduleLayerType_buf); + alpaka::memcpy(queue, lstLayers_buf, src.lstLayers_buf); + alpaka::memcpy(queue, connectedPixels_buf, src.connectedPixels_buf); + } + } + + template + ModulesBuffer(TQueue queue, ModulesBuffer const& src, unsigned int nMod, unsigned int nPixs) + : ModulesBuffer(alpaka::getDev(queue), nMod, nPixs) { + copyFromSrc(queue, src); + } + + inline Modules const* data() const { return &data_; } + }; + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/ModuleConnectionMap.h b/RecoTracker/LSTCore/interface/ModuleConnectionMap.h new file mode 100644 index 0000000000000..63c3496523c0d --- /dev/null +++ b/RecoTracker/LSTCore/interface/ModuleConnectionMap.h @@ -0,0 +1,29 @@ +#ifndef RecoTracker_LSTCore_interface_ModuleConnectionMap_h +#define RecoTracker_LSTCore_interface_ModuleConnectionMap_h + +#include +#include +#include +#include + +namespace lst { + class ModuleConnectionMap { + private: + std::map> moduleConnections_; + + public: + ModuleConnectionMap(); + ModuleConnectionMap(std::string const& filename); + + void load(std::string const&); + void add(std::string const&); + void print(); + + const std::vector& getConnectedModuleDetIds(unsigned int detid) const; + int size() const; + }; + + using MapPLStoLayer = std::array, 3>; +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/PixelMap.h b/RecoTracker/LSTCore/interface/PixelMap.h new file mode 100644 index 0000000000000..a0fd89387e7e4 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelMap.h @@ -0,0 +1,33 @@ +#ifndef RecoTracker_LSTCore_interface_PixelMap_h +#define RecoTracker_LSTCore_interface_PixelMap_h + +#include +#include + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace lst { + struct PixelMap { + uint16_t pixelModuleIndex; + + std::vector connectedPixelsIndex; + std::vector connectedPixelsSizes; + std::vector connectedPixelsIndexPos; + std::vector connectedPixelsSizesPos; + std::vector connectedPixelsIndexNeg; + std::vector connectedPixelsSizesNeg; + + const int* pixelType; + + PixelMap(unsigned int sizef = size_superbins) + : pixelModuleIndex(0), + connectedPixelsIndex(sizef), + connectedPixelsSizes(sizef), + connectedPixelsIndexPos(sizef), + connectedPixelsSizesPos(sizef), + connectedPixelsIndexNeg(sizef), + connectedPixelsSizesNeg(sizef) {} + }; +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/TiltedGeometry.h b/RecoTracker/LSTCore/interface/TiltedGeometry.h new file mode 100644 index 0000000000000..7a17106195522 --- /dev/null +++ b/RecoTracker/LSTCore/interface/TiltedGeometry.h @@ -0,0 +1,26 @@ +#ifndef RecoTracker_LSTCore_interface_TiltedGeometry_h +#define RecoTracker_LSTCore_interface_TiltedGeometry_h + +#include +#include +#include + +namespace lst { + class TiltedGeometry { + private: + std::map drdzs_; // dr/dz slope + std::map dxdys_; // dx/dy slope + + public: + TiltedGeometry() = default; + TiltedGeometry(std::string const& filename); + + void load(std::string const&); + + float getDrDz(unsigned int detid) const; + float getDxDy(unsigned int detid) const; + }; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/Constants.h b/RecoTracker/LSTCore/interface/alpaka/Constants.h new file mode 100644 index 0000000000000..1a16dad68420e --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/Constants.h @@ -0,0 +1,107 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_Constants_h +#define RecoTracker_LSTCore_interface_alpaka_Constants_h + +#include "RecoTracker/LSTCore/interface/Constants.h" + +#if defined ALPAKA_ACC_GPU_CUDA_ENABLED +#include +#elif defined ALPAKA_ACC_GPU_HIP_ENABLED +#include +#endif + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + using namespace ::lst; + +// Half precision wrapper functions. +#if defined(FP16_Base) +#define __F2H __float2half +#define __H2F __half2float + typedef __half float FPX; +#else +#define __F2H +#define __H2F + typedef float FPX; +#endif + + Vec3D constexpr elementsPerThread(Vec3D::all(static_cast(1))); + +// Needed for files that are compiled by g++ to not throw an error. +// uint4 is defined only for CUDA, so we will have to revisit this soon when running on other backends. +#if !defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !defined(ALPAKA_ACC_GPU_HIP_ENABLED) + struct uint4 { + unsigned int x; + unsigned int y; + unsigned int z; + unsigned int w; + }; +#endif + + // Adjust grid and block sizes based on backend configuration + template > + ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid, + const Vec& threadsPerBlock, + const Vec& elementsPerThreadArg) { + Vec adjustedBlocks = blocksPerGrid; + Vec adjustedThreads = threadsPerBlock; + + // special overrides for CPU/host cases + if constexpr (std::is_same_v) { + adjustedBlocks = Vec::all(static_cast(1)); + + if constexpr (alpaka::accMatchesTags) { + // Serial execution, set threads to 1 as well + adjustedThreads = Vec::all(static_cast(1)); // probably redundant + } + } + + return WorkDiv(adjustedBlocks, adjustedThreads, elementsPerThreadArg); + } + + // The constants below are usually used in functions like alpaka::math::min(), + // expecting a reference (T const&) in the arguments. Hence, + // ALPAKA_STATIC_ACC_MEM_GLOBAL needs to be used in addition to constexpr. + + // 15 MeV constant from the approximate Bethe-Bloch formula + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMulsInGeV = 0.015; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleBarrel[6] = { + 0.0052, 0.0038, 0.0034, 0.0034, 0.0032, 0.0034}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleEndcap[5] = {0.006, 0.006, 0.006, 0.006, 0.006}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanBarrel[6] = { + 25.007152356, 37.2186993757, 52.3104270826, 68.6658656666, 85.9770373007, 108.301772384}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanEndcap[5] = { + 130.992832231, 154.813883559, 185.352604327, 221.635123002, 265.022076742}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float k2Rinv1GeVf = (2.99792458e-3 * 3.8) / 2; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kR1GeVf = 1. / (2.99792458e-3 * 3.8); + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kSinAlphaMax = 0.95; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float ptCut = PT_CUT; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kDeltaZLum = 15.0; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPixelPSZpitch = 0.15; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStripPSZpitch = 2.4; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStrip2SZpitch = 5.0; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidth2S = 0.009; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidthPS = 0.01; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPt_betaMax = 7.0; + // Since C++ can't represent infinity, lst_INF = 123456789 was used to represent infinity in the data table + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float lst_INF = 123456789.0; + + namespace t5dnn { + + // Working points matching LST fake rate (43.9%) or signal acceptance (82.0%) + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp1 = 0.3418833f; // 94.0% TPR, 43.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp2 = 0.6177366f; // 82.0% TPR, 20.0% FPR + // Other working points + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp70 = 0.7776195f; // 70.0% TPR, 10.0% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp75 = 0.7181118f; // 75.0% TPR, 13.5% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp80 = 0.6492643f; // 80.0% TPR, 17.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp85 = 0.5655319f; // 85.0% TPR, 23.8% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp90 = 0.4592205f; // 90.0% TPR, 32.6% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp95 = 0.3073708f; // 95.0% TPR, 47.7% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp97p5 = 0.2001348f; // 97.5% TPR, 61.2% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99 = 0.1120605f; // 99.0% TPR, 75.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99p9 = 0.0218196f; // 99.9% TPR, 95.4% FPR + + } // namespace t5dnn + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/LST.h b/RecoTracker/LSTCore/interface/alpaka/LST.h new file mode 100644 index 0000000000000..1f3c08804540f --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/LST.h @@ -0,0 +1,106 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_LST_h +#define RecoTracker_LSTCore_interface_alpaka_LST_h + +#include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/LSTESData.h" + +#include +#include +#include + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + class Event; + + class LST { + public: + LST() = default; + + void run(Queue& queue, + bool verbose, + LSTESData const* deviceESData, + std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z, + bool no_pls_dupclean, + bool tc_pls_triplets); + std::vector> const& hits() const { return out_tc_hitIdxs_; } + std::vector const& len() const { return out_tc_len_; } + std::vector const& seedIdx() const { return out_tc_seedIdx_; } + std::vector const& trackCandidateType() const { return out_tc_trackCandidateType_; } + + private: + void prepareInput(std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z); + + void getOutput(Event& event); + std::vector getHitIdxs(short trackCandidateType, + unsigned int TCIdx, + unsigned int const* TCHitIndices, + unsigned int const* hitIndices); + + // Input and output vectors + std::vector in_trkX_; + std::vector in_trkY_; + std::vector in_trkZ_; + std::vector in_hitId_; + std::vector in_hitIdxs_; + std::vector in_hitIndices_vec0_; + std::vector in_hitIndices_vec1_; + std::vector in_hitIndices_vec2_; + std::vector in_hitIndices_vec3_; + std::vector in_deltaPhi_vec_; + std::vector in_ptIn_vec_; + std::vector in_ptErr_vec_; + std::vector in_px_vec_; + std::vector in_py_vec_; + std::vector in_pz_vec_; + std::vector in_eta_vec_; + std::vector in_etaErr_vec_; + std::vector in_phi_vec_; + std::vector in_charge_vec_; + std::vector in_seedIdx_vec_; + std::vector in_superbin_vec_; + std::vector in_pixelType_vec_; + std::vector in_isQuad_vec_; + std::vector> out_tc_hitIdxs_; + std::vector out_tc_len_; + std::vector out_tc_seedIdx_; + std::vector out_tc_trackCandidateType_; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/src/EndcapGeometry.cc b/RecoTracker/LSTCore/src/EndcapGeometry.cc new file mode 100644 index 0000000000000..17e72379bb2ec --- /dev/null +++ b/RecoTracker/LSTCore/src/EndcapGeometry.cc @@ -0,0 +1,59 @@ +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" + +#include +#include +#include +#include + +lst::EndcapGeometry::EndcapGeometry(std::string const& filename) { load(filename); } + +void lst::EndcapGeometry::load(std::string const& filename) { + dxdy_slope_.clear(); + centroid_phis_.clear(); + + std::ifstream ifile(filename, std::ios::binary); + if (!ifile.is_open()) { + throw std::runtime_error("Unable to open file: " + filename); + } + + while (!ifile.eof()) { + unsigned int detid; + float dxdy_slope, centroid_phi; + + // Read the detid, dxdy_slope, and centroid_phi from binary file + ifile.read(reinterpret_cast(&detid), sizeof(detid)); + ifile.read(reinterpret_cast(&dxdy_slope), sizeof(dxdy_slope)); + ifile.read(reinterpret_cast(¢roid_phi), sizeof(centroid_phi)); + + if (ifile) { + dxdy_slope_[detid] = dxdy_slope; + centroid_phis_[detid] = centroid_phi; + } else { + // End of file or read failed + if (!ifile.eof()) { + throw std::runtime_error("Failed to read Endcap Geometry binary data."); + } + } + } + + fillGeoMapArraysExplicit(); +} + +void lst::EndcapGeometry::fillGeoMapArraysExplicit() { + nEndCapMap = centroid_phis_.size(); + + geoMapDetId_buf.reserve(nEndCapMap); + geoMapPhi_buf.reserve(nEndCapMap); + + for (auto it = centroid_phis_.begin(); it != centroid_phis_.end(); ++it) { + unsigned int detId = it->first; + float Phi = it->second; + geoMapPhi_buf.push_back(Phi); + geoMapDetId_buf.push_back(detId); + } +} + +float lst::EndcapGeometry::getdxdy_slope(unsigned int detid) const { + auto res = dxdy_slope_.find(detid); + return res == dxdy_slope_.end() ? 0.f : res->second; +} diff --git a/RecoTracker/LSTCore/src/LSTESData.cc b/RecoTracker/LSTCore/src/LSTESData.cc new file mode 100644 index 0000000000000..1acf085a0f491 --- /dev/null +++ b/RecoTracker/LSTCore/src/LSTESData.cc @@ -0,0 +1,118 @@ +#include "RecoTracker/LSTCore/interface/LSTESData.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ModuleConnectionMap.h" +#include "RecoTracker/LSTCore/interface/TiltedGeometry.h" +#include "RecoTracker/LSTCore/interface/PixelMap.h" + +#include "ModuleMethods.h" + +namespace { + std::string trackLooperDir() { + const char* path_lst_base = std::getenv("LST_BASE"); + const char* path_tracklooperdir = std::getenv("TRACKLOOPERDIR"); + std::string path_str; + if (path_lst_base != nullptr) { + path_str = path_lst_base; + } else if (path_tracklooperdir != nullptr) { + path_str = path_tracklooperdir; + path_str += "/../"; + } else { + std::stringstream search_path(std::getenv("CMSSW_SEARCH_PATH")); + std::string path; + while (std::getline(search_path, path, ':')) { + if (std::filesystem::exists(path + "/RecoTracker/LSTCore/data")) { + path_str = path; + break; + } + } + path_str += "/RecoTracker/LSTCore"; + } + return path_str; + } + + std::string get_absolute_path_after_check_file_exists(std::string const& name) { + std::filesystem::path fullpath = std::filesystem::absolute(name.c_str()); + if (not std::filesystem::exists(fullpath)) { + throw std::runtime_error("Could not find the file = " + fullpath.string()); + } + return fullpath.string(); + } + + void loadMapsHost(lst::MapPLStoLayer& pLStoLayer, + lst::EndcapGeometry& endcapGeometry, + lst::TiltedGeometry& tiltedGeometry, + lst::ModuleConnectionMap& moduleConnectionMap) { + // Module orientation information (DrDz or phi angles) + auto endcap_geom = + get_absolute_path_after_check_file_exists(trackLooperDir() + "/data/OT800_IT615_pt0.8/endcap_orientation.bin"); + auto tilted_geom = get_absolute_path_after_check_file_exists( + trackLooperDir() + "/data/OT800_IT615_pt0.8/tilted_barrel_orientation.bin"); + // Module connection map (for line segment building) + auto mappath = get_absolute_path_after_check_file_exists( + trackLooperDir() + "/data/OT800_IT615_pt0.8/module_connection_tracing_merged.bin"); + + endcapGeometry.load(endcap_geom); + tiltedGeometry.load(tilted_geom); + moduleConnectionMap.load(mappath); + + auto pLSMapDir = trackLooperDir() + "/data/OT800_IT615_pt0.8/pixelmap/pLS_map"; + const std::array connects{ + {"_layer1_subdet5", "_layer2_subdet5", "_layer1_subdet4", "_layer2_subdet4"}}; + std::string path; + + static_assert(connects.size() == std::tuple_size>{}); + for (unsigned int i = 0; i < connects.size(); i++) { + auto connectData = connects[i].data(); + + path = pLSMapDir + connectData + ".bin"; + pLStoLayer[0][i] = lst::ModuleConnectionMap(get_absolute_path_after_check_file_exists(path)); + + path = pLSMapDir + "_pos" + connectData + ".bin"; + pLStoLayer[1][i] = lst::ModuleConnectionMap(get_absolute_path_after_check_file_exists(path)); + + path = pLSMapDir + "_neg" + connectData + ".bin"; + pLStoLayer[2][i] = lst::ModuleConnectionMap(get_absolute_path_after_check_file_exists(path)); + } + } +} // namespace + +std::unique_ptr> lst::loadAndFillESHost() { + uint16_t nModules; + uint16_t nLowerModules; + unsigned int nPixels; + MapPLStoLayer pLStoLayer; + EndcapGeometry endcapGeometry; + TiltedGeometry tiltedGeometry; + PixelMap pixelMapping; + ModuleConnectionMap moduleConnectionMap; + ::loadMapsHost(pLStoLayer, endcapGeometry, tiltedGeometry, moduleConnectionMap); + + auto endcapGeometryBuffers = + EndcapGeometryBuffer(cms::alpakatools::host(), endcapGeometry.nEndCapMap); + std::memcpy(endcapGeometryBuffers.geoMapDetId_buf.data(), + endcapGeometry.geoMapDetId_buf.data(), + endcapGeometry.nEndCapMap * sizeof(unsigned int)); + std::memcpy(endcapGeometryBuffers.geoMapPhi_buf.data(), + endcapGeometry.geoMapPhi_buf.data(), + endcapGeometry.nEndCapMap * sizeof(float)); + + auto path = + get_absolute_path_after_check_file_exists(trackLooperDir() + "/data/OT800_IT615_pt0.8/sensor_centroids.bin"); + auto modulesBuffers = lst::loadModulesFromFile(pLStoLayer, + path.c_str(), + nModules, + nLowerModules, + nPixels, + pixelMapping, + endcapGeometry, + tiltedGeometry, + moduleConnectionMap); + auto pixelMappingPtr = std::make_shared(std::move(pixelMapping)); + return std::make_unique>(nModules, + nLowerModules, + nPixels, + endcapGeometry.nEndCapMap, + std::move(modulesBuffers), + std::move(endcapGeometryBuffers), + pixelMappingPtr); +} diff --git a/RecoTracker/LSTCore/src/ModuleConnectionMap.cc b/RecoTracker/LSTCore/src/ModuleConnectionMap.cc new file mode 100644 index 0000000000000..881b2a66f6216 --- /dev/null +++ b/RecoTracker/LSTCore/src/ModuleConnectionMap.cc @@ -0,0 +1,106 @@ +#include "RecoTracker/LSTCore/interface/ModuleConnectionMap.h" + +#include +#include +#include +#include + +lst::ModuleConnectionMap::ModuleConnectionMap() {} + +lst::ModuleConnectionMap::ModuleConnectionMap(std::string const& filename) { load(filename); } + +void lst::ModuleConnectionMap::load(std::string const& filename) { + moduleConnections_.clear(); + + std::ifstream ifile(filename, std::ios::binary); + if (!ifile.is_open()) { + throw std::runtime_error("Unable to open file: " + filename); + } + + while (!ifile.eof()) { + unsigned int detid, number_of_connections; + + // Read the detid and the number of connections from the binary file + ifile.read(reinterpret_cast(&detid), sizeof(detid)); + ifile.read(reinterpret_cast(&number_of_connections), sizeof(number_of_connections)); + + if (ifile) { + std::vector connected_detids; + + // Read the connections for the given detid + for (unsigned int i = 0; i < number_of_connections; ++i) { + unsigned int connected_detid; + ifile.read(reinterpret_cast(&connected_detid), sizeof(connected_detid)); + if (ifile) { + connected_detids.push_back(connected_detid); + } else { + if (!ifile.eof()) { + throw std::runtime_error("Failed to read connection data."); + } + break; // Exit loop on read failure that's not EOF + } + } + + if (ifile) { + moduleConnections_[detid] = connected_detids; + } + } else { + if (!ifile.eof()) { + throw std::runtime_error("Failed to read module connection binary data."); + } + } + } +} + +void lst::ModuleConnectionMap::add(std::string const& filename) { + std::ifstream ifile; + ifile.open(filename.c_str()); + std::string line; + + while (std::getline(ifile, line)) { + unsigned int detid; + int number_of_connections; + std::vector connected_detids; + unsigned int connected_detid; + + std::stringstream ss(line); + + ss >> detid >> number_of_connections; + + for (int ii = 0; ii < number_of_connections; ++ii) { + ss >> connected_detid; + connected_detids.push_back(connected_detid); + } + + auto& thisModuleConnections = moduleConnections_.at(detid); + + // Concatenate + thisModuleConnections.insert(thisModuleConnections.end(), connected_detids.begin(), connected_detids.end()); + + // Sort + std::sort(thisModuleConnections.begin(), thisModuleConnections.end()); + + // Unique + thisModuleConnections.erase(std::unique(thisModuleConnections.begin(), thisModuleConnections.end()), + thisModuleConnections.end()); + } +} + +void lst::ModuleConnectionMap::print() { + std::cout << "Printing ModuleConnectionMap" << std::endl; + for (auto& pair : moduleConnections_) { + unsigned int detid = pair.first; + std::vector connected_detids = pair.second; + std::cout << " detid: " << detid << std::endl; + for (auto& connected_detid : connected_detids) { + std::cout << " connected_detid: " << connected_detid << std::endl; + } + } +} + +const std::vector& lst::ModuleConnectionMap::getConnectedModuleDetIds(unsigned int detid) const { + static const std::vector dummy; + auto const mList = moduleConnections_.find(detid); + return mList != moduleConnections_.end() ? mList->second : dummy; +} +int lst::ModuleConnectionMap::size() const { return moduleConnections_.size(); } diff --git a/RecoTracker/LSTCore/src/ModuleMethods.h b/RecoTracker/LSTCore/src/ModuleMethods.h new file mode 100644 index 0000000000000..bf51e262f69e5 --- /dev/null +++ b/RecoTracker/LSTCore/src/ModuleMethods.h @@ -0,0 +1,341 @@ +#ifndef RecoTracker_LSTCore_src_ModuleMethods_h +#define RecoTracker_LSTCore_src_ModuleMethods_h + +#include +#include + +#include "RecoTracker/LSTCore/interface/Constants.h" +#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/TiltedGeometry.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ModuleConnectionMap.h" +#include "RecoTracker/LSTCore/interface/PixelMap.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace lst { + struct ModuleMetaData { + std::map detIdToIndex; + std::map module_x; + std::map module_y; + std::map module_z; + std::map module_type; // 23 : Ph2PSP, 24 : Ph2PSS, 25 : Ph2SS + // https://github.com/cms-sw/cmssw/blob/5e809e8e0a625578aa265dc4b128a93830cb5429/Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h#L29 + }; + + inline void fillPixelMap(ModulesBuffer& modulesBuf, + uint16_t nModules, + unsigned int& nPixels, + PixelMap& pixelMapping, + MapPLStoLayer const& pLStoLayer, + ModuleMetaData const& mmd) { + pixelMapping.pixelModuleIndex = mmd.detIdToIndex.at(1); + + std::vector connectedModuleDetIds; + std::vector connectedModuleDetIds_pos; + std::vector connectedModuleDetIds_neg; + + unsigned int totalSizes = 0; + unsigned int totalSizes_pos = 0; + unsigned int totalSizes_neg = 0; + for (unsigned int isuperbin = 0; isuperbin < size_superbins; isuperbin++) { + int sizes = 0; + for (auto const& mCM_pLS : pLStoLayer[0]) { + std::vector connectedModuleDetIds_pLS = + mCM_pLS.getConnectedModuleDetIds(isuperbin + size_superbins); + connectedModuleDetIds.insert( + connectedModuleDetIds.end(), connectedModuleDetIds_pLS.begin(), connectedModuleDetIds_pLS.end()); + sizes += connectedModuleDetIds_pLS.size(); + } + pixelMapping.connectedPixelsIndex[isuperbin] = totalSizes; + pixelMapping.connectedPixelsSizes[isuperbin] = sizes; + totalSizes += sizes; + + int sizes_pos = 0; + for (auto const& mCM_pLS : pLStoLayer[1]) { + std::vector connectedModuleDetIds_pLS_pos = mCM_pLS.getConnectedModuleDetIds(isuperbin); + connectedModuleDetIds_pos.insert(connectedModuleDetIds_pos.end(), + connectedModuleDetIds_pLS_pos.begin(), + connectedModuleDetIds_pLS_pos.end()); + sizes_pos += connectedModuleDetIds_pLS_pos.size(); + } + pixelMapping.connectedPixelsIndexPos[isuperbin] = totalSizes_pos; + pixelMapping.connectedPixelsSizesPos[isuperbin] = sizes_pos; + totalSizes_pos += sizes_pos; + + int sizes_neg = 0; + for (auto const& mCM_pLS : pLStoLayer[2]) { + std::vector connectedModuleDetIds_pLS_neg = mCM_pLS.getConnectedModuleDetIds(isuperbin); + connectedModuleDetIds_neg.insert(connectedModuleDetIds_neg.end(), + connectedModuleDetIds_pLS_neg.begin(), + connectedModuleDetIds_pLS_neg.end()); + sizes_neg += connectedModuleDetIds_pLS_neg.size(); + } + pixelMapping.connectedPixelsIndexNeg[isuperbin] = totalSizes_neg; + pixelMapping.connectedPixelsSizesNeg[isuperbin] = sizes_neg; + totalSizes_neg += sizes_neg; + } + + unsigned int connectedPix_size = totalSizes + totalSizes_pos + totalSizes_neg; + nPixels = connectedPix_size; + + // Now we re-initialize connectedPixels_buf since nPixels is now known + modulesBuf.connectedPixels_buf = cms::alpakatools::make_host_buffer(nPixels); + modulesBuf.data_.setData(modulesBuf); + + unsigned int* connectedPixels = modulesBuf.connectedPixels_buf.data(); + + for (unsigned int icondet = 0; icondet < totalSizes; icondet++) { + connectedPixels[icondet] = mmd.detIdToIndex.at(connectedModuleDetIds[icondet]); + } + for (unsigned int icondet = 0; icondet < totalSizes_pos; icondet++) { + connectedPixels[icondet + totalSizes] = mmd.detIdToIndex.at(connectedModuleDetIds_pos[icondet]); + } + for (unsigned int icondet = 0; icondet < totalSizes_neg; icondet++) { + connectedPixels[icondet + totalSizes + totalSizes_pos] = mmd.detIdToIndex.at(connectedModuleDetIds_neg[icondet]); + } + } + + inline void fillConnectedModuleArrayExplicit(ModulesBuffer& modulesBuf, + ModuleMetaData const& mmd, + ModuleConnectionMap const& moduleConnectionMap) { + uint16_t* moduleMap = modulesBuf.moduleMap_buf.data(); + uint16_t* nConnectedModules = modulesBuf.nConnectedModules_buf.data(); + + for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { + unsigned int detId = it->first; + uint16_t index = it->second; + auto& connectedModules = moduleConnectionMap.getConnectedModuleDetIds(detId); + nConnectedModules[index] = connectedModules.size(); + for (uint16_t i = 0; i < nConnectedModules[index]; i++) { + moduleMap[index * max_connected_modules + i] = mmd.detIdToIndex.at(connectedModules[i]); + } + } + } + + inline void fillMapArraysExplicit(ModulesBuffer& modulesBuf, ModuleMetaData const& mmd) { + uint16_t* mapIdx = modulesBuf.mapIdx_buf.data(); + unsigned int* mapdetId = modulesBuf.mapdetId_buf.data(); + + unsigned int counter = 0; + for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { + unsigned int detId = it->first; + unsigned int index = it->second; + mapIdx[counter] = index; + mapdetId[counter] = detId; + counter++; + } + } + + inline void setDerivedQuantities(unsigned int detId, + unsigned short& layer, + unsigned short& ring, + unsigned short& rod, + unsigned short& module, + unsigned short& subdet, + unsigned short& side, + float m_x, + float m_y, + float m_z, + float& eta, + float& r) { + subdet = (detId & (7 << 25)) >> 25; + side = (subdet == Endcap) ? (detId & (3 << 23)) >> 23 : (detId & (3 << 18)) >> 18; + layer = (subdet == Endcap) ? (detId & (7 << 18)) >> 18 : (detId & (7 << 20)) >> 20; + ring = (subdet == Endcap) ? (detId & (15 << 12)) >> 12 : 0; + module = (detId & (127 << 2)) >> 2; + rod = (subdet == Endcap) ? 0 : (detId & (127 << 10)) >> 10; + + r = std::sqrt(m_x * m_x + m_y * m_y + m_z * m_z); + eta = ((m_z > 0) - (m_z < 0)) * std::acosh(r / std::sqrt(m_x * m_x + m_y * m_y)); + } + + inline void loadCentroidsFromFile(const char* filePath, ModuleMetaData& mmd, uint16_t& nModules) { + std::ifstream ifile(filePath, std::ios::binary); + if (!ifile.is_open()) { + throw std::runtime_error("Unable to open file: " + std::string(filePath)); + } + + uint16_t counter = 0; + while (!ifile.eof()) { + unsigned int temp_detId; + float module_x, module_y, module_z; + int module_type; + + ifile.read(reinterpret_cast(&temp_detId), sizeof(temp_detId)); + ifile.read(reinterpret_cast(&module_x), sizeof(module_x)); + ifile.read(reinterpret_cast(&module_y), sizeof(module_y)); + ifile.read(reinterpret_cast(&module_z), sizeof(module_z)); + ifile.read(reinterpret_cast(&module_type), sizeof(module_type)); + + if (ifile) { + mmd.detIdToIndex[temp_detId] = counter; + mmd.module_x[temp_detId] = module_x; + mmd.module_y[temp_detId] = module_y; + mmd.module_z[temp_detId] = module_z; + mmd.module_type[temp_detId] = module_type; + counter++; + } else { + if (!ifile.eof()) { + throw std::runtime_error("Failed to read data for detId: " + std::to_string(temp_detId)); + } + } + } + + mmd.detIdToIndex[1] = counter; //pixel module is the last module in the module list + counter++; + nModules = counter; + } + + inline ModulesBuffer loadModulesFromFile(MapPLStoLayer const& pLStoLayer, + const char* moduleMetaDataFilePath, + uint16_t& nModules, + uint16_t& nLowerModules, + unsigned int& nPixels, + PixelMap& pixelMapping, + const EndcapGeometry& endcapGeometry, + const TiltedGeometry& tiltedGeometry, + const ModuleConnectionMap& moduleConnectionMap) { + ModuleMetaData mmd; + + loadCentroidsFromFile(moduleMetaDataFilePath, mmd, nModules); + + // Initialize modulesBuf, but with nPixels = 0 + // The fields that require nPixels are re-initialized in fillPixelMap + ModulesBuffer modulesBuf(cms::alpakatools::host(), nModules, 0); + + // Getting the underlying data pointers + unsigned int* host_detIds = modulesBuf.detIds_buf.data(); + short* host_layers = modulesBuf.layers_buf.data(); + short* host_rings = modulesBuf.rings_buf.data(); + short* host_rods = modulesBuf.rods_buf.data(); + short* host_modules = modulesBuf.modules_buf.data(); + short* host_subdets = modulesBuf.subdets_buf.data(); + short* host_sides = modulesBuf.sides_buf.data(); + float* host_eta = modulesBuf.eta_buf.data(); + float* host_r = modulesBuf.r_buf.data(); + bool* host_isInverted = modulesBuf.isInverted_buf.data(); + bool* host_isLower = modulesBuf.isLower_buf.data(); + bool* host_isAnchor = modulesBuf.isAnchor_buf.data(); + ModuleType* host_moduleType = modulesBuf.moduleType_buf.data(); + ModuleLayerType* host_moduleLayerType = modulesBuf.moduleLayerType_buf.data(); + float* host_dxdys = modulesBuf.dxdys_buf.data(); + float* host_drdzs = modulesBuf.drdzs_buf.data(); + uint16_t* host_nModules = modulesBuf.nModules_buf.data(); + uint16_t* host_nLowerModules = modulesBuf.nLowerModules_buf.data(); + uint16_t* host_partnerModuleIndices = modulesBuf.partnerModuleIndices_buf.data(); + int* host_lstLayers = modulesBuf.lstLayers_buf.data(); + + //reassign detIdToIndex indices here + nLowerModules = (nModules - 1) / 2; + uint16_t lowerModuleCounter = 0; + uint16_t upperModuleCounter = nLowerModules + 1; + //0 to nLowerModules - 1 => only lower modules, nLowerModules - pixel module, nLowerModules + 1 to nModules => upper modules + for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); it++) { + unsigned int detId = it->first; + float m_x = mmd.module_x[detId]; + float m_y = mmd.module_y[detId]; + float m_z = mmd.module_z[detId]; + unsigned int m_t = mmd.module_type[detId]; + + float eta, r; + + uint16_t index; + unsigned short layer, ring, rod, module, subdet, side; + bool isInverted, isLower; + if (detId == 1) { + layer = 0; + ring = 0; + rod = 0; + module = 0; + subdet = 0; + side = 0; + isInverted = false; + isLower = false; + eta = 0; + r = 0; + } else { + setDerivedQuantities(detId, layer, ring, rod, module, subdet, side, m_x, m_y, m_z, eta, r); + isInverted = lst::Modules::parseIsInverted(subdet, side, module, layer); + isLower = lst::Modules::parseIsLower(isInverted, detId); + } + if (isLower) { + index = lowerModuleCounter; + lowerModuleCounter++; + } else if (detId != 1) { + index = upperModuleCounter; + upperModuleCounter++; + } else { + index = nLowerModules; //pixel + } + //reassigning indices! + mmd.detIdToIndex[detId] = index; + host_detIds[index] = detId; + host_layers[index] = layer; + host_rings[index] = ring; + host_rods[index] = rod; + host_modules[index] = module; + host_subdets[index] = subdet; + host_sides[index] = side; + host_eta[index] = eta; + host_r[index] = r; + host_isInverted[index] = isInverted; + host_isLower[index] = isLower; + + //assigning other variables! + if (detId == 1) { + host_moduleType[index] = PixelModule; + host_moduleLayerType[index] = lst::InnerPixelLayer; + host_dxdys[index] = 0; + host_drdzs[index] = 0; + host_isAnchor[index] = false; + } else { + host_moduleType[index] = (m_t == 25 ? lst::TwoS : lst::PS); + host_moduleLayerType[index] = (m_t == 23 ? lst::Pixel : lst::Strip); + + if (host_moduleType[index] == lst::PS and host_moduleLayerType[index] == lst::Pixel) { + host_isAnchor[index] = true; + } else if (host_moduleType[index] == lst::TwoS and host_isLower[index]) { + host_isAnchor[index] = true; + } else { + host_isAnchor[index] = false; + } + + host_dxdys[index] = (subdet == Endcap) ? endcapGeometry.getdxdy_slope(detId) : tiltedGeometry.getDxDy(detId); + host_drdzs[index] = (subdet == Barrel) ? tiltedGeometry.getDrDz(detId) : 0; + } + + host_lstLayers[index] = + layer + 6 * (subdet == lst::Endcap) + 5 * (subdet == lst::Endcap and host_moduleType[index] == lst::TwoS); + } + + //partner module stuff, and slopes and drdz move around + for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); it++) { + auto& detId = it->first; + auto& index = it->second; + if (detId != 1) { + host_partnerModuleIndices[index] = + mmd.detIdToIndex[lst::Modules::parsePartnerModuleId(detId, host_isLower[index], host_isInverted[index])]; + //add drdz and slope importing stuff here! + if (host_drdzs[index] == 0) { + host_drdzs[index] = host_drdzs[host_partnerModuleIndices[index]]; + } + if (host_dxdys[index] == 0) { + host_dxdys[index] = host_dxdys[host_partnerModuleIndices[index]]; + } + } + } + + fillPixelMap(modulesBuf, nModules, nPixels, pixelMapping, pLStoLayer, mmd); + + *host_nModules = nModules; + *host_nLowerModules = nLowerModules; + + fillConnectedModuleArrayExplicit(modulesBuf, mmd, moduleConnectionMap); + fillMapArraysExplicit(modulesBuf, mmd); + + return modulesBuf; + } +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/src/TiltedGeometry.cc b/RecoTracker/LSTCore/src/TiltedGeometry.cc new file mode 100644 index 0000000000000..d65a9a4a5f7b9 --- /dev/null +++ b/RecoTracker/LSTCore/src/TiltedGeometry.cc @@ -0,0 +1,48 @@ +#include "RecoTracker/LSTCore/interface/TiltedGeometry.h" + +#include +#include +#include +#include + +lst::TiltedGeometry::TiltedGeometry(std::string const& filename) { load(filename); } + +void lst::TiltedGeometry::load(std::string const& filename) { + drdzs_.clear(); + dxdys_.clear(); + + std::ifstream ifile(filename, std::ios::binary); + if (!ifile.is_open()) { + throw std::runtime_error("Unable to open file: " + filename); + } + + while (!ifile.eof()) { + unsigned int detid; + float drdz, dxdy; + + // Read the detid, drdz, and dxdy from binary file + ifile.read(reinterpret_cast(&detid), sizeof(detid)); + ifile.read(reinterpret_cast(&drdz), sizeof(drdz)); + ifile.read(reinterpret_cast(&dxdy), sizeof(dxdy)); + + if (ifile) { + drdzs_[detid] = drdz; + dxdys_[detid] = dxdy; + } else { + // End of file or read failed + if (!ifile.eof()) { + throw std::runtime_error("Failed to read Tilted Geometry binary data."); + } + } + } +} + +float lst::TiltedGeometry::getDrDz(unsigned int detid) const { + auto res = drdzs_.find(detid); + return res == drdzs_.end() ? 0.f : res->second; +} + +float lst::TiltedGeometry::getDxDy(unsigned int detid) const { + auto res = dxdys_.find(detid); + return res == dxdys_.end() ? 0.f : res->second; +} diff --git a/RecoTracker/LSTCore/src/alpaka/Event.dev.cc b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc new file mode 100644 index 0000000000000..659591b836ec9 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Event.dev.cc @@ -0,0 +1,1623 @@ +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "Event.h" + +using Device = ALPAKA_ACCELERATOR_NAMESPACE::Device; +using Queue = ALPAKA_ACCELERATOR_NAMESPACE::Queue; +using Acc1D = ALPAKA_ACCELERATOR_NAMESPACE::Acc1D; +using Acc3D = ALPAKA_ACCELERATOR_NAMESPACE::Acc3D; + +using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + +void Event::initSync(bool verbose) { + alpaka::wait(queue_); // other calls can be asynchronous + addObjects_ = verbose; + + //reset the arrays + for (int i = 0; i < 6; i++) { + n_hits_by_layer_barrel_[i] = 0; + n_minidoublets_by_layer_barrel_[i] = 0; + n_segments_by_layer_barrel_[i] = 0; + n_triplets_by_layer_barrel_[i] = 0; + n_trackCandidates_by_layer_barrel_[i] = 0; + n_quintuplets_by_layer_barrel_[i] = 0; + if (i < 5) { + n_hits_by_layer_endcap_[i] = 0; + n_minidoublets_by_layer_endcap_[i] = 0; + n_segments_by_layer_endcap_[i] = 0; + n_triplets_by_layer_endcap_[i] = 0; + n_trackCandidates_by_layer_endcap_[i] = 0; + n_quintuplets_by_layer_endcap_[i] = 0; + } + } +} + +void Event::resetEventSync() { + alpaka::wait(queue_); // synchronize to reset consistently + //reset the arrays + for (int i = 0; i < 6; i++) { + n_hits_by_layer_barrel_[i] = 0; + n_minidoublets_by_layer_barrel_[i] = 0; + n_segments_by_layer_barrel_[i] = 0; + n_triplets_by_layer_barrel_[i] = 0; + n_trackCandidates_by_layer_barrel_[i] = 0; + n_quintuplets_by_layer_barrel_[i] = 0; + if (i < 5) { + n_hits_by_layer_endcap_[i] = 0; + n_minidoublets_by_layer_endcap_[i] = 0; + n_segments_by_layer_endcap_[i] = 0; + n_triplets_by_layer_endcap_[i] = 0; + n_trackCandidates_by_layer_endcap_[i] = 0; + n_quintuplets_by_layer_endcap_[i] = 0; + } + } + hitsInGPU_.reset(); + hitsBuffers_.reset(); + mdsInGPU_.reset(); + miniDoubletsBuffers_.reset(); + rangesInGPU_.reset(); + rangesBuffers_.reset(); + segmentsInGPU_.reset(); + segmentsBuffers_.reset(); + tripletsInGPU_.reset(); + tripletsBuffers_.reset(); + quintupletsInGPU_.reset(); + quintupletsBuffers_.reset(); + trackCandidatesInGPU_.reset(); + trackCandidatesBuffers_.reset(); + pixelTripletsInGPU_.reset(); + pixelTripletsBuffers_.reset(); + pixelQuintupletsInGPU_.reset(); + pixelQuintupletsBuffers_.reset(); + + hitsInCPU_.reset(); + rangesInCPU_.reset(); + mdsInCPU_.reset(); + segmentsInCPU_.reset(); + tripletsInCPU_.reset(); + quintupletsInCPU_.reset(); + pixelTripletsInCPU_.reset(); + pixelQuintupletsInCPU_.reset(); + trackCandidatesInCPU_.reset(); + modulesInCPU_.reset(); +} + +void Event::addHitToEvent(std::vector const& x, + std::vector const& y, + std::vector const& z, + std::vector const& detId, + std::vector const& idxInNtuple) { + // Use the actual number of hits instead of a max. + unsigned int nHits = x.size(); + + // Initialize space on device/host for next event. + if (!hitsInGPU_) { + hitsInGPU_.emplace(); + hitsBuffers_.emplace(nModules_, nHits, devAcc_, queue_); + hitsInGPU_->setData(*hitsBuffers_); + } + + if (!rangesInGPU_) { + rangesInGPU_.emplace(); + rangesBuffers_.emplace(nModules_, nLowerModules_, devAcc_, queue_); + rangesInGPU_->setData(*rangesBuffers_); + } + + // Need a view here before transferring to the device. + auto nHits_view = alpaka::createView(cms::alpakatools::host(), &nHits, (Idx)1u); + + // Copy the host arrays to the GPU. + alpaka::memcpy(queue_, hitsBuffers_->xs_buf, x, nHits); + alpaka::memcpy(queue_, hitsBuffers_->ys_buf, y, nHits); + alpaka::memcpy(queue_, hitsBuffers_->zs_buf, z, nHits); + alpaka::memcpy(queue_, hitsBuffers_->detid_buf, detId, nHits); + alpaka::memcpy(queue_, hitsBuffers_->idxs_buf, idxInNtuple, nHits); + alpaka::memcpy(queue_, hitsBuffers_->nHits_buf, nHits_view); + alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory + + Vec3D const threadsPerBlock1{1, 1, 256}; + Vec3D const blocksPerGrid1{1, 1, max_blocks}; + WorkDiv3D const hit_loop_workdiv = createWorkDiv(blocksPerGrid1, threadsPerBlock1, elementsPerThread); + + alpaka::exec(queue_, + hit_loop_workdiv, + HitLoopKernel{}, + Endcap, + TwoS, + nModules_, + nEndCapMap_, + endcapGeometryBuffers_.geoMapDetId_buf.data(), + endcapGeometryBuffers_.geoMapPhi_buf.data(), + *modulesBuffers_.data(), + *hitsInGPU_, + nHits); + + Vec3D const threadsPerBlock2{1, 1, 256}; + Vec3D const blocksPerGrid2{1, 1, max_blocks}; + WorkDiv3D const module_ranges_workdiv = createWorkDiv(blocksPerGrid2, threadsPerBlock2, elementsPerThread); + + alpaka::exec( + queue_, module_ranges_workdiv, ModuleRangesKernel{}, *modulesBuffers_.data(), *hitsInGPU_, nLowerModules_); +} + +void Event::addPixelSegmentToEvent(std::vector const& hitIndices0, + std::vector const& hitIndices1, + std::vector const& hitIndices2, + std::vector const& hitIndices3, + std::vector const& dPhiChange, + std::vector const& ptIn, + std::vector const& ptErr, + std::vector const& px, + std::vector const& py, + std::vector const& pz, + std::vector const& eta, + std::vector const& etaErr, + std::vector const& phi, + std::vector const& charge, + std::vector const& seedIdx, + std::vector const& superbin, + std::vector const& pixelType, + std::vector const& isQuad) { + unsigned int size = ptIn.size(); + + if (size > n_max_pixel_segments_per_module) { + printf( + "*********************************************************\n" + "* Warning: Pixel line segments will be truncated. *\n" + "* You need to increase n_max_pixel_segments_per_module. *\n" + "*********************************************************\n"); + size = n_max_pixel_segments_per_module; + } + + unsigned int mdSize = 2 * size; + uint16_t pixelModuleIndex = pixelMapping_.pixelModuleIndex; + + if (!mdsInGPU_) { + // Create a view for the element nLowerModules_ inside rangesBuffers_->miniDoubletModuleOccupancy + auto dst_view_miniDoubletModuleOccupancy = + alpaka::createSubView(rangesBuffers_->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); + + // Create a host buffer for a value to be passed to the device + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; + + alpaka::memcpy(queue_, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); + + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec( + queue_, createMDArrayRangesGPU_workDiv, CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU_); + + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); + alpaka::memcpy(queue_, nTotalMDs_buf_h, rangesBuffers_->device_nTotalMDs_buf); + alpaka::wait(queue_); // wait to get the data before manipulation + + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); + + mdsInGPU_.emplace(); + miniDoubletsBuffers_.emplace(nTotalMDs, nLowerModules_, devAcc_, queue_); + mdsInGPU_->setData(*miniDoubletsBuffers_); + + alpaka::memcpy(queue_, miniDoubletsBuffers_->nMemoryLocations_buf, nTotalMDs_buf_h); + } + if (!segmentsInGPU_) { + // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. + // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them + + WorkDiv1D const createSegmentArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + createSegmentArrayRanges_workDiv, + CreateSegmentArrayRanges{}, + *modulesBuffers_.data(), + *rangesInGPU_, + *mdsInGPU_); + + auto nTotalSegments_view = alpaka::createView(cms::alpakatools::host(), &nTotalSegments_, (Idx)1u); + + alpaka::memcpy(queue_, nTotalSegments_view, rangesBuffers_->device_nTotalSegs_buf); + alpaka::wait(queue_); // wait to get the value before manipulation + + nTotalSegments_ += n_max_pixel_segments_per_module; + + segmentsInGPU_.emplace(); + segmentsBuffers_.emplace(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc_, queue_); + segmentsInGPU_->setData(*segmentsBuffers_); + + alpaka::memcpy(queue_, segmentsBuffers_->nMemoryLocations_buf, nTotalSegments_view); + } + + auto hitIndices0_dev = allocBufWrapper(devAcc_, size, queue_); + auto hitIndices1_dev = allocBufWrapper(devAcc_, size, queue_); + auto hitIndices2_dev = allocBufWrapper(devAcc_, size, queue_); + auto hitIndices3_dev = allocBufWrapper(devAcc_, size, queue_); + auto dPhiChange_dev = allocBufWrapper(devAcc_, size, queue_); + + alpaka::memcpy(queue_, hitIndices0_dev, hitIndices0, size); + alpaka::memcpy(queue_, hitIndices1_dev, hitIndices1, size); + alpaka::memcpy(queue_, hitIndices2_dev, hitIndices2, size); + alpaka::memcpy(queue_, hitIndices3_dev, hitIndices3, size); + alpaka::memcpy(queue_, dPhiChange_dev, dPhiChange, size); + + alpaka::memcpy(queue_, segmentsBuffers_->ptIn_buf, ptIn, size); + alpaka::memcpy(queue_, segmentsBuffers_->ptErr_buf, ptErr, size); + alpaka::memcpy(queue_, segmentsBuffers_->px_buf, px, size); + alpaka::memcpy(queue_, segmentsBuffers_->py_buf, py, size); + alpaka::memcpy(queue_, segmentsBuffers_->pz_buf, pz, size); + alpaka::memcpy(queue_, segmentsBuffers_->etaErr_buf, etaErr, size); + alpaka::memcpy(queue_, segmentsBuffers_->isQuad_buf, isQuad, size); + alpaka::memcpy(queue_, segmentsBuffers_->eta_buf, eta, size); + alpaka::memcpy(queue_, segmentsBuffers_->phi_buf, phi, size); + alpaka::memcpy(queue_, segmentsBuffers_->charge_buf, charge, size); + alpaka::memcpy(queue_, segmentsBuffers_->seedIdx_buf, seedIdx, size); + alpaka::memcpy(queue_, segmentsBuffers_->superbin_buf, superbin, size); + alpaka::memcpy(queue_, segmentsBuffers_->pixelType_buf, pixelType, size); + + // Create source views for size and mdSize + auto src_view_size = alpaka::createView(cms::alpakatools::host(), &size, (Idx)1u); + auto src_view_mdSize = alpaka::createView(cms::alpakatools::host(), &mdSize, (Idx)1u); + + auto dst_view_segments = alpaka::createSubView(segmentsBuffers_->nSegments_buf, (Idx)1u, (Idx)pixelModuleIndex); + alpaka::memcpy(queue_, dst_view_segments, src_view_size); + + auto dst_view_totOccupancySegments = + alpaka::createSubView(segmentsBuffers_->totOccupancySegments_buf, (Idx)1u, (Idx)pixelModuleIndex); + alpaka::memcpy(queue_, dst_view_totOccupancySegments, src_view_size); + + auto dst_view_nMDs = alpaka::createSubView(miniDoubletsBuffers_->nMDs_buf, (Idx)1u, (Idx)pixelModuleIndex); + alpaka::memcpy(queue_, dst_view_nMDs, src_view_mdSize); + + auto dst_view_totOccupancyMDs = + alpaka::createSubView(miniDoubletsBuffers_->totOccupancyMDs_buf, (Idx)1u, (Idx)pixelModuleIndex); + alpaka::memcpy(queue_, dst_view_totOccupancyMDs, src_view_mdSize); + + alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory + + Vec3D const threadsPerBlock{1, 1, 256}; + Vec3D const blocksPerGrid{1, 1, max_blocks}; + WorkDiv3D const addPixelSegmentToEvent_workdiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); + + alpaka::exec(queue_, + addPixelSegmentToEvent_workdiv, + AddPixelSegmentToEventKernel{}, + *modulesBuffers_.data(), + *rangesInGPU_, + *hitsInGPU_, + *mdsInGPU_, + *segmentsInGPU_, + hitIndices0_dev.data(), + hitIndices1_dev.data(), + hitIndices2_dev.data(), + hitIndices3_dev.data(), + dPhiChange_dev.data(), + pixelModuleIndex, + size); +} + +void Event::createMiniDoublets() { + // Create a view for the element nLowerModules_ inside rangesBuffers_->miniDoubletModuleOccupancy + auto dst_view_miniDoubletModuleOccupancy = + alpaka::createSubView(rangesBuffers_->miniDoubletModuleOccupancy_buf, (Idx)1u, (Idx)nLowerModules_); + + // Create a host buffer for a value to be passed to the device + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; + + alpaka::memcpy(queue_, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); + + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec( + queue_, createMDArrayRangesGPU_workDiv, CreateMDArrayRangesGPU{}, *modulesBuffers_.data(), *rangesInGPU_); + + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); + alpaka::memcpy(queue_, nTotalMDs_buf_h, rangesBuffers_->device_nTotalMDs_buf); + alpaka::wait(queue_); // wait to get the data before manipulation + + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); + + if (!mdsInGPU_) { + mdsInGPU_.emplace(); + miniDoubletsBuffers_.emplace(nTotalMDs, nLowerModules_, devAcc_, queue_); + mdsInGPU_->setData(*miniDoubletsBuffers_); + } + + Vec3D const threadsPerBlockCreateMDInGPU{1, 16, 32}; + Vec3D const blocksPerGridCreateMDInGPU{1, nLowerModules_ / threadsPerBlockCreateMDInGPU[1], 1}; + WorkDiv3D const createMiniDoubletsInGPUv2_workDiv = + createWorkDiv(blocksPerGridCreateMDInGPU, threadsPerBlockCreateMDInGPU, elementsPerThread); + + alpaka::exec(queue_, + createMiniDoubletsInGPUv2_workDiv, + CreateMiniDoubletsInGPUv2{}, + *modulesBuffers_.data(), + *hitsInGPU_, + *mdsInGPU_, + *rangesInGPU_); + + WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + addMiniDoubletRangesToEventExplicit_workDiv, + AddMiniDoubletRangesToEventExplicit{}, + *modulesBuffers_.data(), + *mdsInGPU_, + *rangesInGPU_, + *hitsInGPU_); + + if (addObjects_) { + addMiniDoubletsToEventExplicit(); + } +} + +void Event::createSegmentsWithModuleMap() { + if (!segmentsInGPU_) { + segmentsInGPU_.emplace(); + segmentsBuffers_.emplace(nTotalSegments_, nLowerModules_, n_max_pixel_segments_per_module, devAcc_, queue_); + segmentsInGPU_->setData(*segmentsBuffers_); + } + + Vec3D const threadsPerBlockCreateSeg{1, 1, 64}; + Vec3D const blocksPerGridCreateSeg{1, 1, nLowerModules_}; + WorkDiv3D const createSegmentsInGPUv2_workDiv = + createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); + + alpaka::exec(queue_, + createSegmentsInGPUv2_workDiv, + CreateSegmentsInGPUv2{}, + *modulesBuffers_.data(), + *mdsInGPU_, + *segmentsInGPU_, + *rangesInGPU_); + + WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + addSegmentRangesToEventExplicit_workDiv, + AddSegmentRangesToEventExplicit{}, + *modulesBuffers_.data(), + *segmentsInGPU_, + *rangesInGPU_); + + if (addObjects_) { + addSegmentsToEventExplicit(); + } +} + +void Event::createTriplets() { + if (!tripletsInGPU_) { + WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + createTripletArrayRanges_workDiv, + CreateTripletArrayRanges{}, + *modulesBuffers_.data(), + *rangesInGPU_, + *segmentsInGPU_); + + // TODO: Why are we pulling this back down only to put it back on the device in a new struct? + auto maxTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_, (Idx)1u); + + alpaka::memcpy(queue_, maxTriplets_buf_h, rangesBuffers_->device_nTotalTrips_buf); + alpaka::wait(queue_); // wait to get the value before using it + + tripletsInGPU_.emplace(); + tripletsBuffers_.emplace(*maxTriplets_buf_h.data(), nLowerModules_, devAcc_, queue_); + tripletsInGPU_->setData(*tripletsBuffers_); + + alpaka::memcpy(queue_, tripletsBuffers_->nMemoryLocations_buf, maxTriplets_buf_h); + } + + uint16_t nonZeroModules = 0; + unsigned int max_InnerSeg = 0; + + // Allocate and copy nSegments from device to host (only nLowerModules in OT, not the +1 with pLSs) + auto nSegments_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + alpaka::memcpy(queue_, nSegments_buf_h, segmentsBuffers_->nSegments_buf, nLowerModules_); + + // ... same for module_nConnectedModules + // FIXME: replace by ES host data + auto module_nConnectedModules_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + alpaka::memcpy(queue_, module_nConnectedModules_buf_h, modulesBuffers_.nConnectedModules_buf, nLowerModules_); + + alpaka::wait(queue_); // wait for nSegments and module_nConnectedModules before using + + auto const* nSegments = nSegments_buf_h.data(); + auto const* module_nConnectedModules = module_nConnectedModules_buf_h.data(); + + // Allocate host index and fill it directly + auto index_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto* index = index_buf_h.data(); + + for (uint16_t innerLowerModuleIndex = 0; innerLowerModuleIndex < nLowerModules_; innerLowerModuleIndex++) { + uint16_t nConnectedModules = module_nConnectedModules[innerLowerModuleIndex]; + unsigned int nInnerSegments = nSegments[innerLowerModuleIndex]; + if (nConnectedModules != 0 and nInnerSegments != 0) { + index[nonZeroModules] = innerLowerModuleIndex; + nonZeroModules++; + } + max_InnerSeg = std::max(max_InnerSeg, nInnerSegments); + } + + // Allocate and copy to device index + auto index_gpu_buf = allocBufWrapper(devAcc_, nLowerModules_, queue_); + alpaka::memcpy(queue_, index_gpu_buf, index_buf_h, nonZeroModules); + + Vec3D const threadsPerBlockCreateTrip{1, 16, 16}; + Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1}; + WorkDiv3D const createTripletsInGPUv2_workDiv = + createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); + + alpaka::exec(queue_, + createTripletsInGPUv2_workDiv, + CreateTripletsInGPUv2{}, + *modulesBuffers_.data(), + *mdsInGPU_, + *segmentsInGPU_, + *tripletsInGPU_, + *rangesInGPU_, + index_gpu_buf.data(), + nonZeroModules); + + WorkDiv1D const addTripletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + addTripletRangesToEventExplicit_workDiv, + AddTripletRangesToEventExplicit{}, + *modulesBuffers_.data(), + *tripletsInGPU_, + *rangesInGPU_); + + if (addObjects_) { + addTripletsToEventExplicit(); + } +} + +void Event::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { + if (!trackCandidatesInGPU_) { + trackCandidatesInGPU_.emplace(); + trackCandidatesBuffers_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc_, queue_); + trackCandidatesInGPU_->setData(*trackCandidatesBuffers_); + } + + Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; + Vec3D const blocksPerGrid_crossCleanpT3{1, 4, 20}; + WorkDiv3D const crossCleanpT3_workDiv = + createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, elementsPerThread); + + alpaka::exec(queue_, + crossCleanpT3_workDiv, + CrossCleanpT3{}, + *modulesBuffers_.data(), + *rangesInGPU_, + *pixelTripletsInGPU_, + *segmentsInGPU_, + *pixelQuintupletsInGPU_); + + WorkDiv1D const addpT3asTrackCandidatesInGPU_workDiv = createWorkDiv({1}, {512}, {1}); + + alpaka::exec(queue_, + addpT3asTrackCandidatesInGPU_workDiv, + AddpT3asTrackCandidatesInGPU{}, + nLowerModules_, + *pixelTripletsInGPU_, + *trackCandidatesInGPU_, + *segmentsInGPU_, + *rangesInGPU_); + + // Pull nEligibleT5Modules from the device. + auto nEligibleModules_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nEligibleModules_buf_h, rangesBuffers_->nEligibleT5Modules_buf); + alpaka::wait(queue_); // wait to get the value before using + auto const nEligibleModules = *nEligibleModules_buf_h.data(); + + Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32}; + Vec3D const blocksPerGridRemoveDupQuints{1, std::max(nEligibleModules / 16, 1), std::max(nEligibleModules / 32, 1)}; + WorkDiv3D const removeDupQuintupletsInGPUBeforeTC_workDiv = + createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread); + + alpaka::exec(queue_, + removeDupQuintupletsInGPUBeforeTC_workDiv, + RemoveDupQuintupletsInGPUBeforeTC{}, + *quintupletsInGPU_, + *rangesInGPU_); + + Vec3D const threadsPerBlock_crossCleanT5{32, 1, 32}; + Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, max_blocks}; + WorkDiv3D const crossCleanT5_workDiv = + createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, elementsPerThread); + + alpaka::exec(queue_, + crossCleanT5_workDiv, + CrossCleanT5{}, + *modulesBuffers_.data(), + *quintupletsInGPU_, + *pixelQuintupletsInGPU_, + *pixelTripletsInGPU_, + *rangesInGPU_); + + Vec3D const threadsPerBlock_addT5asTrackCandidateInGPU{1, 8, 128}; + Vec3D const blocksPerGrid_addT5asTrackCandidateInGPU{1, 8, 10}; + WorkDiv3D const addT5asTrackCandidateInGPU_workDiv = createWorkDiv( + blocksPerGrid_addT5asTrackCandidateInGPU, threadsPerBlock_addT5asTrackCandidateInGPU, elementsPerThread); + + alpaka::exec(queue_, + addT5asTrackCandidateInGPU_workDiv, + AddT5asTrackCandidateInGPU{}, + nLowerModules_, + *quintupletsInGPU_, + *trackCandidatesInGPU_, + *rangesInGPU_); + + if (!no_pls_dupclean) { + Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; + Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; + WorkDiv3D const checkHitspLS_workDiv = + createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); + + alpaka::exec(queue_, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU_, true); + } + + Vec3D const threadsPerBlock_crossCleanpLS{1, 16, 32}; + Vec3D const blocksPerGrid_crossCleanpLS{1, 4, 20}; + WorkDiv3D const crossCleanpLS_workDiv = + createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, elementsPerThread); + + alpaka::exec(queue_, + crossCleanpLS_workDiv, + CrossCleanpLS{}, + *modulesBuffers_.data(), + *rangesInGPU_, + *pixelTripletsInGPU_, + *trackCandidatesInGPU_, + *segmentsInGPU_, + *mdsInGPU_, + *hitsInGPU_, + *quintupletsInGPU_); + + Vec3D const threadsPerBlock_addpLSasTrackCandidateInGPU{1, 1, 384}; + Vec3D const blocksPerGrid_addpLSasTrackCandidateInGPU{1, 1, max_blocks}; + WorkDiv3D const addpLSasTrackCandidateInGPU_workDiv = createWorkDiv( + blocksPerGrid_addpLSasTrackCandidateInGPU, threadsPerBlock_addpLSasTrackCandidateInGPU, elementsPerThread); + + alpaka::exec(queue_, + addpLSasTrackCandidateInGPU_workDiv, + AddpLSasTrackCandidateInGPU{}, + nLowerModules_, + *trackCandidatesInGPU_, + *segmentsInGPU_, + tc_pls_triplets); + + // Check if either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates was reached + auto nTrackCanpT5Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); + auto nTrackCanpT3Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); + auto nTrackCanpLSHost_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); + auto nTrackCanT5Host_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); + alpaka::memcpy(queue_, nTrackCanpT5Host_buf, trackCandidatesBuffers_->nTrackCandidatespT5_buf); + alpaka::memcpy(queue_, nTrackCanpT3Host_buf, trackCandidatesBuffers_->nTrackCandidatespT3_buf); + alpaka::memcpy(queue_, nTrackCanpLSHost_buf, trackCandidatesBuffers_->nTrackCandidatespLS_buf); + alpaka::memcpy(queue_, nTrackCanT5Host_buf, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::wait(queue_); // wait to get the values before using them + + auto nTrackCandidatespT5 = *nTrackCanpT5Host_buf.data(); + auto nTrackCandidatespT3 = *nTrackCanpT3Host_buf.data(); + auto nTrackCandidatespLS = *nTrackCanpLSHost_buf.data(); + auto nTrackCandidatesT5 = *nTrackCanT5Host_buf.data(); + if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == n_max_pixel_track_candidates) || + (nTrackCandidatesT5 == n_max_nonpixel_track_candidates)) { + printf( + "****************************************************************************************************\n" + "* Warning: Track candidates were possibly truncated. *\n" + "* You may need to increase either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates. *\n" + "* Run the code with the WARNINGS flag activated for more details. *\n" + "****************************************************************************************************\n"); + } +} + +void Event::createPixelTriplets() { + if (!pixelTripletsInGPU_) { + pixelTripletsInGPU_.emplace(); + pixelTripletsBuffers_.emplace(n_max_pixel_triplets, devAcc_, queue_); + pixelTripletsInGPU_->setData(*pixelTripletsBuffers_); + } + + auto superbins_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); + auto pixelTypes_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); + + alpaka::memcpy(queue_, superbins_buf, segmentsBuffers_->superbin_buf); + alpaka::memcpy(queue_, pixelTypes_buf, segmentsBuffers_->pixelType_buf); + auto const* superbins = superbins_buf.data(); + auto const* pixelTypes = pixelTypes_buf.data(); + + unsigned int nInnerSegments; + auto nInnerSegments_src_view = alpaka::createView(cms::alpakatools::host(), &nInnerSegments, (size_t)1u); + + // Create a sub-view for the device buffer + auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers_->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); + + alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments); + alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using + + auto connectedPixelSize_host_buf = allocBufWrapper(cms::alpakatools::host(), nInnerSegments, queue_); + auto connectedPixelIndex_host_buf = allocBufWrapper(cms::alpakatools::host(), nInnerSegments, queue_); + auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc_, nInnerSegments, queue_); + auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc_, nInnerSegments, queue_); + + unsigned int* connectedPixelSize_host = connectedPixelSize_host_buf.data(); + unsigned int* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); + + int pixelIndexOffsetPos = + pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos; + + // TODO: check if a map/reduction to just eligible pLSs would speed up the kernel + // the current selection still leaves a significant fraction of unmatchable pLSs + for (unsigned int i = 0; i < nInnerSegments; i++) { // loop over # pLS + PixelType pixelType = pixelTypes[i]; // Get pixel type for this pLS + int superbin = superbins[i]; // Get superbin for this pixel + if ((superbin < 0) or (superbin >= (int)size_superbins) or + ((pixelType != PixelType::kHighPt) and (pixelType != PixelType::kLowPtPosCurv) and + (pixelType != PixelType::kLowPtNegCurv))) { + connectedPixelSize_host[i] = 0; + connectedPixelIndex_host[i] = 0; + continue; + } + + // Used pixel type to select correct size-index arrays + switch (pixelType) { + case PixelType::kInvalid: + break; + case PixelType::kHighPt: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizes[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndex[superbin]; + break; + case PixelType::kLowPtPosCurv: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesPos[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexPos[superbin] + pixelIndexOffsetPos; + break; + case PixelType::kLowPtNegCurv: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesNeg[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexNeg[superbin] + pixelIndexOffsetNeg; + break; + } + } + + alpaka::memcpy(queue_, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); + alpaka::memcpy(queue_, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); + + Vec3D const threadsPerBlock{1, 4, 32}; + Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1}; + WorkDiv3D const createPixelTripletsInGPUFromMapv2_workDiv = + createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); + + alpaka::exec(queue_, + createPixelTripletsInGPUFromMapv2_workDiv, + CreatePixelTripletsInGPUFromMapv2{}, + *modulesBuffers_.data(), + *rangesInGPU_, + *mdsInGPU_, + *segmentsInGPU_, + *tripletsInGPU_, + *pixelTripletsInGPU_, + connectedPixelSize_dev_buf.data(), + connectedPixelIndex_dev_buf.data(), + nInnerSegments); + +#ifdef WARNINGS + auto nPixelTriplets_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); + + alpaka::memcpy(queue_, nPixelTriplets_buf, pixelTripletsBuffers_->nPixelTriplets_buf); + alpaka::wait(queue_); // wait to get the value before using it + + std::cout << "number of pixel triplets = " << *nPixelTriplets_buf.data() << std::endl; +#endif + + //pT3s can be cleaned here because they're not used in making pT5s! + Vec3D const threadsPerBlockDupPixTrip{1, 16, 16}; + //seems like more blocks lead to conflicting writes + Vec3D const blocksPerGridDupPixTrip{1, 40, 1}; + WorkDiv3D const removeDupPixelTripletsInGPUFromMap_workDiv = + createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread); + + alpaka::exec( + queue_, removeDupPixelTripletsInGPUFromMap_workDiv, RemoveDupPixelTripletsInGPUFromMap{}, *pixelTripletsInGPU_); +} + +void Event::createQuintuplets() { + WorkDiv1D const createEligibleModulesListForQuintupletsGPU_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + createEligibleModulesListForQuintupletsGPU_workDiv, + CreateEligibleModulesListForQuintupletsGPU{}, + *modulesBuffers_.data(), + *tripletsInGPU_, + *rangesInGPU_); + + auto nEligibleT5Modules_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); + auto nTotalQuintuplets_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); + + alpaka::memcpy(queue_, nEligibleT5Modules_buf, rangesBuffers_->nEligibleT5Modules_buf); + alpaka::memcpy(queue_, nTotalQuintuplets_buf, rangesBuffers_->device_nTotalQuints_buf); + alpaka::wait(queue_); // wait for the values before using them + + auto nEligibleT5Modules = *nEligibleT5Modules_buf.data(); + auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); + + if (!quintupletsInGPU_) { + quintupletsInGPU_.emplace(); + quintupletsBuffers_.emplace(nTotalQuintuplets, nLowerModules_, devAcc_, queue_); + quintupletsInGPU_->setData(*quintupletsBuffers_); + + alpaka::memcpy(queue_, quintupletsBuffers_->nMemoryLocations_buf, nTotalQuintuplets_buf); + } + + Vec3D const threadsPerBlockQuints{1, 8, 32}; + Vec3D const blocksPerGridQuints{std::max((int)nEligibleT5Modules, 1), 1, 1}; + WorkDiv3D const createQuintupletsInGPUv2_workDiv = + createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread); + + alpaka::exec(queue_, + createQuintupletsInGPUv2_workDiv, + CreateQuintupletsInGPUv2{}, + *modulesBuffers_.data(), + *mdsInGPU_, + *segmentsInGPU_, + *tripletsInGPU_, + *quintupletsInGPU_, + *rangesInGPU_, + nEligibleT5Modules); + + Vec3D const threadsPerBlockDupQuint{1, 16, 16}; + Vec3D const blocksPerGridDupQuint{max_blocks, 1, 1}; + WorkDiv3D const removeDupQuintupletsInGPUAfterBuild_workDiv = + createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread); + + alpaka::exec(queue_, + removeDupQuintupletsInGPUAfterBuild_workDiv, + RemoveDupQuintupletsInGPUAfterBuild{}, + *modulesBuffers_.data(), + *quintupletsInGPU_, + *rangesInGPU_); + + WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + addQuintupletRangesToEventExplicit_workDiv, + AddQuintupletRangesToEventExplicit{}, + *modulesBuffers_.data(), + *quintupletsInGPU_, + *rangesInGPU_); + + if (addObjects_) { + addQuintupletsToEventExplicit(); + } +} + +void Event::pixelLineSegmentCleaning(bool no_pls_dupclean) { + if (!no_pls_dupclean) { + Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; + Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; + WorkDiv3D const checkHitspLS_workDiv = + createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); + + alpaka::exec(queue_, checkHitspLS_workDiv, CheckHitspLS{}, *modulesBuffers_.data(), *segmentsInGPU_, false); + } +} + +void Event::createPixelQuintuplets() { + if (!pixelQuintupletsInGPU_) { + pixelQuintupletsInGPU_.emplace(); + pixelQuintupletsBuffers_.emplace(n_max_pixel_quintuplets, devAcc_, queue_); + pixelQuintupletsInGPU_->setData(*pixelQuintupletsBuffers_); + } + if (!trackCandidatesInGPU_) { + trackCandidatesInGPU_.emplace(); + trackCandidatesBuffers_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, devAcc_, queue_); + trackCandidatesInGPU_->setData(*trackCandidatesBuffers_); + } + + auto superbins_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); + auto pixelTypes_buf = allocBufWrapper(cms::alpakatools::host(), n_max_pixel_segments_per_module, queue_); + + alpaka::memcpy(queue_, superbins_buf, segmentsBuffers_->superbin_buf); + alpaka::memcpy(queue_, pixelTypes_buf, segmentsBuffers_->pixelType_buf); + auto const* superbins = superbins_buf.data(); + auto const* pixelTypes = pixelTypes_buf.data(); + + unsigned int nInnerSegments; + auto nInnerSegments_src_view = alpaka::createView(cms::alpakatools::host(), &nInnerSegments, (size_t)1u); + + // Create a sub-view for the device buffer + auto dev_view_nSegments = alpaka::createSubView(segmentsBuffers_->nSegments_buf, (Idx)1u, (Idx)nLowerModules_); + + alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments); + alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using + + auto connectedPixelSize_host_buf = allocBufWrapper(cms::alpakatools::host(), nInnerSegments, queue_); + auto connectedPixelIndex_host_buf = allocBufWrapper(cms::alpakatools::host(), nInnerSegments, queue_); + auto connectedPixelSize_dev_buf = allocBufWrapper(devAcc_, nInnerSegments, queue_); + auto connectedPixelIndex_dev_buf = allocBufWrapper(devAcc_, nInnerSegments, queue_); + + auto* connectedPixelSize_host = connectedPixelSize_host_buf.data(); + auto* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); + + int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::size_superbins - 1] + + pixelMapping_.connectedPixelsSizes[::size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[::size_superbins - 1] + pixelIndexOffsetPos; + + // Loop over # pLS + for (unsigned int i = 0; i < nInnerSegments; i++) { + PixelType pixelType = pixelTypes[i]; // Get pixel type for this pLS + int superbin = superbins[i]; // Get superbin for this pixel + if ((superbin < 0) or (superbin >= (int)size_superbins) or + ((pixelType != PixelType::kHighPt) and (pixelType != PixelType::kLowPtPosCurv) and + (pixelType != PixelType::kLowPtNegCurv))) { + connectedPixelSize_host[i] = 0; + connectedPixelIndex_host[i] = 0; + continue; + } + + // Used pixel type to select correct size-index arrays + switch (pixelType) { + case PixelType::kInvalid: + break; + case PixelType::kHighPt: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizes[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndex[superbin]; + break; + case PixelType::kLowPtPosCurv: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesPos[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexPos[superbin] + pixelIndexOffsetPos; + break; + case PixelType::kLowPtNegCurv: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesNeg[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexNeg[superbin] + pixelIndexOffsetNeg; + break; + } + } + + alpaka::memcpy(queue_, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); + alpaka::memcpy(queue_, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); + + Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16}; + Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1}; + WorkDiv3D const createPixelQuintupletsInGPUFromMapv2_workDiv = + createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread); + + alpaka::exec(queue_, + createPixelQuintupletsInGPUFromMapv2_workDiv, + CreatePixelQuintupletsInGPUFromMapv2{}, + *modulesBuffers_.data(), + *mdsInGPU_, + *segmentsInGPU_, + *tripletsInGPU_, + *quintupletsInGPU_, + *pixelQuintupletsInGPU_, + connectedPixelSize_dev_buf.data(), + connectedPixelIndex_dev_buf.data(), + nInnerSegments, + *rangesInGPU_); + + Vec3D const threadsPerBlockDupPix{1, 16, 16}; + Vec3D const blocksPerGridDupPix{1, max_blocks, 1}; + WorkDiv3D const removeDupPixelQuintupletsInGPUFromMap_workDiv = + createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread); + + alpaka::exec(queue_, + removeDupPixelQuintupletsInGPUFromMap_workDiv, + RemoveDupPixelQuintupletsInGPUFromMap{}, + *pixelQuintupletsInGPU_); + + WorkDiv1D const addpT5asTrackCandidateInGPU_workDiv = createWorkDiv({1}, {256}, {1}); + + alpaka::exec(queue_, + addpT5asTrackCandidateInGPU_workDiv, + AddpT5asTrackCandidateInGPU{}, + nLowerModules_, + *pixelQuintupletsInGPU_, + *trackCandidatesInGPU_, + *segmentsInGPU_, + *rangesInGPU_); + +#ifdef WARNINGS + auto nPixelQuintuplets_buf = allocBufWrapper(cms::alpakatools::host(), 1, queue_); + + alpaka::memcpy(queue_, nPixelQuintuplets_buf, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); + alpaka::wait(queue_); // wait to get the value before using it + + std::cout << "number of pixel quintuplets = " << *nPixelQuintuplets_buf.data() << std::endl; +#endif +} + +void Event::addMiniDoubletsToEventExplicit() { + auto nMDsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, nMDsCPU_buf, miniDoubletsBuffers_->nMDs_buf, nLowerModules_); + + // FIXME: replace by ES host data + auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); + + auto module_layers_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); + + auto module_hitRanges_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_ * 2, queue_); + alpaka::memcpy(queue_, module_hitRanges_buf, hitsBuffers_->hitRanges_buf, nLowerModules_ * 2u); + + alpaka::wait(queue_); // wait for inputs before using them + + auto const* nMDsCPU = nMDsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + auto const* module_hitRanges = module_hitRanges_buf.data(); + + for (unsigned int i = 0; i < nLowerModules_; i++) { + if (!(nMDsCPU[i] == 0 or module_hitRanges[i * 2] == -1)) { + if (module_subdets[i] == Barrel) { + n_minidoublets_by_layer_barrel_[module_layers[i] - 1] += nMDsCPU[i]; + } else { + n_minidoublets_by_layer_endcap_[module_layers[i] - 1] += nMDsCPU[i]; + } + } + } +} + +void Event::addSegmentsToEventExplicit() { + auto nSegmentsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, nSegmentsCPU_buf, segmentsBuffers_->nSegments_buf, nLowerModules_); + + // FIXME: replace by ES host data + auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); + + auto module_layers_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); + + alpaka::wait(queue_); // wait for inputs before using them + + auto const* nSegmentsCPU = nSegmentsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + + for (unsigned int i = 0; i < nLowerModules_; i++) { + if (!(nSegmentsCPU[i] == 0)) { + if (module_subdets[i] == Barrel) { + n_segments_by_layer_barrel_[module_layers[i] - 1] += nSegmentsCPU[i]; + } else { + n_segments_by_layer_endcap_[module_layers[i] - 1] += nSegmentsCPU[i]; + } + } + } +} + +void Event::addQuintupletsToEventExplicit() { + auto nQuintupletsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, nQuintupletsCPU_buf, quintupletsBuffers_->nQuintuplets_buf); + + // FIXME: replace by ES host data + auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nModules_, queue_); + alpaka::memcpy(queue_, module_subdets_buf, modulesBuffers_.subdets_buf, nModules_); + + auto module_layers_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); + + auto module_quintupletModuleIndices_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_quintupletModuleIndices_buf, rangesBuffers_->quintupletModuleIndices_buf); + + alpaka::wait(queue_); // wait for inputs before using them + + auto const* nQuintupletsCPU = nQuintupletsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + auto const* module_quintupletModuleIndices = module_quintupletModuleIndices_buf.data(); + + for (uint16_t i = 0; i < nLowerModules_; i++) { + if (!(nQuintupletsCPU[i] == 0 or module_quintupletModuleIndices[i] == -1)) { + if (module_subdets[i] == Barrel) { + n_quintuplets_by_layer_barrel_[module_layers[i] - 1] += nQuintupletsCPU[i]; + } else { + n_quintuplets_by_layer_endcap_[module_layers[i] - 1] += nQuintupletsCPU[i]; + } + } + } +} + +void Event::addTripletsToEventExplicit() { + auto nTripletsCPU_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, nTripletsCPU_buf, tripletsBuffers_->nTriplets_buf); + + // FIXME: replace by ES host data + auto module_subdets_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_subdets_buf, modulesBuffers_.subdets_buf, nLowerModules_); + + auto module_layers_buf = allocBufWrapper(cms::alpakatools::host(), nLowerModules_, queue_); + alpaka::memcpy(queue_, module_layers_buf, modulesBuffers_.layers_buf, nLowerModules_); + + alpaka::wait(queue_); // wait for inputs before using them + + auto const* nTripletsCPU = nTripletsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + + for (uint16_t i = 0; i < nLowerModules_; i++) { + if (nTripletsCPU[i] != 0) { + if (module_subdets[i] == Barrel) { + n_triplets_by_layer_barrel_[module_layers[i] - 1] += nTripletsCPU[i]; + } else { + n_triplets_by_layer_endcap_[module_layers[i] - 1] += nTripletsCPU[i]; + } + } + } +} + +unsigned int Event::getNumberOfHits() { + unsigned int hits = 0; + for (auto& it : n_hits_by_layer_barrel_) { + hits += it; + } + for (auto& it : n_hits_by_layer_endcap_) { + hits += it; + } + + return hits; +} + +unsigned int Event::getNumberOfHitsByLayer(unsigned int layer) { + if (layer == 6) + return n_hits_by_layer_barrel_[layer]; + else + return n_hits_by_layer_barrel_[layer] + n_hits_by_layer_endcap_[layer]; +} + +unsigned int Event::getNumberOfHitsByLayerBarrel(unsigned int layer) { return n_hits_by_layer_barrel_[layer]; } + +unsigned int Event::getNumberOfHitsByLayerEndcap(unsigned int layer) { return n_hits_by_layer_endcap_[layer]; } + +unsigned int Event::getNumberOfMiniDoublets() { + unsigned int miniDoublets = 0; + for (auto& it : n_minidoublets_by_layer_barrel_) { + miniDoublets += it; + } + for (auto& it : n_minidoublets_by_layer_endcap_) { + miniDoublets += it; + } + + return miniDoublets; +} + +unsigned int Event::getNumberOfMiniDoubletsByLayer(unsigned int layer) { + if (layer == 6) + return n_minidoublets_by_layer_barrel_[layer]; + else + return n_minidoublets_by_layer_barrel_[layer] + n_minidoublets_by_layer_endcap_[layer]; +} + +unsigned int Event::getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer) { + return n_minidoublets_by_layer_barrel_[layer]; +} + +unsigned int Event::getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer) { + return n_minidoublets_by_layer_endcap_[layer]; +} + +unsigned int Event::getNumberOfSegments() { + unsigned int segments = 0; + for (auto& it : n_segments_by_layer_barrel_) { + segments += it; + } + for (auto& it : n_segments_by_layer_endcap_) { + segments += it; + } + + return segments; +} + +unsigned int Event::getNumberOfSegmentsByLayer(unsigned int layer) { + if (layer == 6) + return n_segments_by_layer_barrel_[layer]; + else + return n_segments_by_layer_barrel_[layer] + n_segments_by_layer_endcap_[layer]; +} + +unsigned int Event::getNumberOfSegmentsByLayerBarrel(unsigned int layer) { return n_segments_by_layer_barrel_[layer]; } + +unsigned int Event::getNumberOfSegmentsByLayerEndcap(unsigned int layer) { return n_segments_by_layer_endcap_[layer]; } + +unsigned int Event::getNumberOfTriplets() { + unsigned int triplets = 0; + for (auto& it : n_triplets_by_layer_barrel_) { + triplets += it; + } + for (auto& it : n_triplets_by_layer_endcap_) { + triplets += it; + } + + return triplets; +} + +unsigned int Event::getNumberOfTripletsByLayer(unsigned int layer) { + if (layer == 6) + return n_triplets_by_layer_barrel_[layer]; + else + return n_triplets_by_layer_barrel_[layer] + n_triplets_by_layer_endcap_[layer]; +} + +unsigned int Event::getNumberOfTripletsByLayerBarrel(unsigned int layer) { return n_triplets_by_layer_barrel_[layer]; } + +unsigned int Event::getNumberOfTripletsByLayerEndcap(unsigned int layer) { return n_triplets_by_layer_endcap_[layer]; } + +int Event::getNumberOfPixelTriplets() { + auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + + alpaka::memcpy(queue_, nPixelTriplets_buf_h, pixelTripletsBuffers_->nPixelTriplets_buf); + alpaka::wait(queue_); + + return *nPixelTriplets_buf_h.data(); +} + +int Event::getNumberOfPixelQuintuplets() { + auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + + alpaka::memcpy(queue_, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); + alpaka::wait(queue_); + + return *nPixelQuintuplets_buf_h.data(); +} + +unsigned int Event::getNumberOfQuintuplets() { + unsigned int quintuplets = 0; + for (auto& it : n_quintuplets_by_layer_barrel_) { + quintuplets += it; + } + for (auto& it : n_quintuplets_by_layer_endcap_) { + quintuplets += it; + } + + return quintuplets; +} + +unsigned int Event::getNumberOfQuintupletsByLayer(unsigned int layer) { + if (layer == 6) + return n_quintuplets_by_layer_barrel_[layer]; + else + return n_quintuplets_by_layer_barrel_[layer] + n_quintuplets_by_layer_endcap_[layer]; +} + +unsigned int Event::getNumberOfQuintupletsByLayerBarrel(unsigned int layer) { + return n_quintuplets_by_layer_barrel_[layer]; +} + +unsigned int Event::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { + return n_quintuplets_by_layer_endcap_[layer]; +} + +int Event::getNumberOfTrackCandidates() { + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + + alpaka::memcpy(queue_, nTrackCandidates_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); + alpaka::wait(queue_); + + return *nTrackCandidates_buf_h.data(); +} + +int Event::getNumberOfPT5TrackCandidates() { + auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + + alpaka::memcpy(queue_, nTrackCandidatesPT5_buf_h, trackCandidatesBuffers_->nTrackCandidatespT5_buf); + alpaka::wait(queue_); + + return *nTrackCandidatesPT5_buf_h.data(); +} + +int Event::getNumberOfPT3TrackCandidates() { + auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + + alpaka::memcpy(queue_, nTrackCandidatesPT3_buf_h, trackCandidatesBuffers_->nTrackCandidatespT3_buf); + alpaka::wait(queue_); + + return *nTrackCandidatesPT3_buf_h.data(); +} + +int Event::getNumberOfPLSTrackCandidates() { + auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + + alpaka::memcpy(queue_, nTrackCandidatesPLS_buf_h, trackCandidatesBuffers_->nTrackCandidatespLS_buf); + alpaka::wait(queue_); + + return *nTrackCandidatesPLS_buf_h.data(); +} + +int Event::getNumberOfPixelTrackCandidates() { + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + + alpaka::memcpy(queue_, nTrackCandidates_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); + alpaka::memcpy(queue_, nTrackCandidatesT5_buf_h, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::wait(queue_); + + return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); +} + +int Event::getNumberOfT5TrackCandidates() { + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + + alpaka::memcpy(queue_, nTrackCandidatesT5_buf_h, trackCandidatesBuffers_->nTrackCandidatesT5_buf); + alpaka::wait(queue_); + + return *nTrackCandidatesT5_buf_h.data(); +} + +HitsBuffer& Event::getHits(bool sync) //std::shared_ptr should take care of garbage collection +{ + if (!hitsInCPU_) { + auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nHits_buf_h, hitsBuffers_->nHits_buf); + alpaka::wait(queue_); // wait for the value before using + + auto const nHits = *nHits_buf_h.data(); + hitsInCPU_.emplace(nModules_, nHits, cms::alpakatools::host(), queue_); + hitsInCPU_->setData(*hitsInCPU_); + + alpaka::memcpy(queue_, hitsInCPU_->nHits_buf, hitsBuffers_->nHits_buf); + alpaka::memcpy(queue_, hitsInCPU_->idxs_buf, hitsBuffers_->idxs_buf, nHits); + alpaka::memcpy(queue_, hitsInCPU_->detid_buf, hitsBuffers_->detid_buf, nHits); + alpaka::memcpy(queue_, hitsInCPU_->xs_buf, hitsBuffers_->xs_buf, nHits); + alpaka::memcpy(queue_, hitsInCPU_->ys_buf, hitsBuffers_->ys_buf, nHits); + alpaka::memcpy(queue_, hitsInCPU_->zs_buf, hitsBuffers_->zs_buf, nHits); + alpaka::memcpy(queue_, hitsInCPU_->moduleIndices_buf, hitsBuffers_->moduleIndices_buf, nHits); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return hitsInCPU_.value(); +} + +HitsBuffer& Event::getHitsInCMSSW(bool sync) { + if (!hitsInCPU_) { + auto nHits_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nHits_buf_h, hitsBuffers_->nHits_buf); + alpaka::wait(queue_); // wait for the value before using + + auto const nHits = *nHits_buf_h.data(); + hitsInCPU_.emplace(nModules_, nHits, cms::alpakatools::host(), queue_); + hitsInCPU_->setData(*hitsInCPU_); + + alpaka::memcpy(queue_, hitsInCPU_->nHits_buf, hitsBuffers_->nHits_buf); + alpaka::memcpy(queue_, hitsInCPU_->idxs_buf, hitsBuffers_->idxs_buf, nHits); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return hitsInCPU_.value(); +} + +ObjectRangesBuffer& Event::getRanges(bool sync) { + if (!rangesInCPU_) { + rangesInCPU_.emplace(nModules_, nLowerModules_, cms::alpakatools::host(), queue_); + rangesInCPU_->setData(*rangesInCPU_); + + alpaka::memcpy(queue_, rangesInCPU_->hitRanges_buf, rangesBuffers_->hitRanges_buf); + alpaka::memcpy(queue_, rangesInCPU_->quintupletModuleIndices_buf, rangesBuffers_->quintupletModuleIndices_buf); + alpaka::memcpy(queue_, rangesInCPU_->miniDoubletModuleIndices_buf, rangesBuffers_->miniDoubletModuleIndices_buf); + alpaka::memcpy(queue_, rangesInCPU_->segmentModuleIndices_buf, rangesBuffers_->segmentModuleIndices_buf); + alpaka::memcpy(queue_, rangesInCPU_->tripletModuleIndices_buf, rangesBuffers_->tripletModuleIndices_buf); + if (sync) + alpaka::wait(queue_); // wait to get completed host data + } + return rangesInCPU_.value(); +} + +MiniDoubletsBuffer& Event::getMiniDoublets(bool sync) { + if (!mdsInCPU_) { + // Get nMemoryLocations parameter to initialize host based mdsInCPU_ + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nMemHost_buf_h, miniDoubletsBuffers_->nMemoryLocations_buf); + alpaka::wait(queue_); // wait for the value before using + + auto const nMemHost = *nMemHost_buf_h.data(); + mdsInCPU_.emplace(nMemHost, nLowerModules_, cms::alpakatools::host(), queue_); + mdsInCPU_->setData(*mdsInCPU_); + + alpaka::memcpy(queue_, mdsInCPU_->nMemoryLocations_buf, miniDoubletsBuffers_->nMemoryLocations_buf); + alpaka::memcpy(queue_, mdsInCPU_->anchorHitIndices_buf, miniDoubletsBuffers_->anchorHitIndices_buf, nMemHost); + alpaka::memcpy(queue_, mdsInCPU_->outerHitIndices_buf, miniDoubletsBuffers_->outerHitIndices_buf, nMemHost); + alpaka::memcpy(queue_, mdsInCPU_->dphichanges_buf, miniDoubletsBuffers_->dphichanges_buf, nMemHost); + alpaka::memcpy(queue_, mdsInCPU_->nMDs_buf, miniDoubletsBuffers_->nMDs_buf); + alpaka::memcpy(queue_, mdsInCPU_->totOccupancyMDs_buf, miniDoubletsBuffers_->totOccupancyMDs_buf); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return mdsInCPU_.value(); +} + +SegmentsBuffer& Event::getSegments(bool sync) { + if (!segmentsInCPU_) { + // Get nMemoryLocations parameter to initialize host based segmentsInCPU_ + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nMemHost_buf_h, segmentsBuffers_->nMemoryLocations_buf); + alpaka::wait(queue_); // wait for the value before using + + auto const nMemHost = *nMemHost_buf_h.data(); + segmentsInCPU_.emplace(nMemHost, nLowerModules_, n_max_pixel_segments_per_module, cms::alpakatools::host(), queue_); + segmentsInCPU_->setData(*segmentsInCPU_); + + alpaka::memcpy(queue_, segmentsInCPU_->nMemoryLocations_buf, segmentsBuffers_->nMemoryLocations_buf); + alpaka::memcpy(queue_, segmentsInCPU_->nSegments_buf, segmentsBuffers_->nSegments_buf); + alpaka::memcpy(queue_, segmentsInCPU_->mdIndices_buf, segmentsBuffers_->mdIndices_buf, 2u * nMemHost); + alpaka::memcpy(queue_, + segmentsInCPU_->innerMiniDoubletAnchorHitIndices_buf, + segmentsBuffers_->innerMiniDoubletAnchorHitIndices_buf, + nMemHost); + alpaka::memcpy(queue_, + segmentsInCPU_->outerMiniDoubletAnchorHitIndices_buf, + segmentsBuffers_->outerMiniDoubletAnchorHitIndices_buf, + nMemHost); + alpaka::memcpy(queue_, segmentsInCPU_->totOccupancySegments_buf, segmentsBuffers_->totOccupancySegments_buf); + alpaka::memcpy(queue_, segmentsInCPU_->ptIn_buf, segmentsBuffers_->ptIn_buf); + alpaka::memcpy(queue_, segmentsInCPU_->eta_buf, segmentsBuffers_->eta_buf); + alpaka::memcpy(queue_, segmentsInCPU_->phi_buf, segmentsBuffers_->phi_buf); + alpaka::memcpy(queue_, segmentsInCPU_->seedIdx_buf, segmentsBuffers_->seedIdx_buf); + alpaka::memcpy(queue_, segmentsInCPU_->isDup_buf, segmentsBuffers_->isDup_buf); + alpaka::memcpy(queue_, segmentsInCPU_->isQuad_buf, segmentsBuffers_->isQuad_buf); + alpaka::memcpy(queue_, segmentsInCPU_->score_buf, segmentsBuffers_->score_buf); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return segmentsInCPU_.value(); +} + +TripletsBuffer& Event::getTriplets(bool sync) { + if (!tripletsInCPU_) { + // Get nMemoryLocations parameter to initialize host based tripletsInCPU_ + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nMemHost_buf_h, tripletsBuffers_->nMemoryLocations_buf); + alpaka::wait(queue_); // wait for the value before using + + auto const nMemHost = *nMemHost_buf_h.data(); + tripletsInCPU_.emplace(nMemHost, nLowerModules_, cms::alpakatools::host(), queue_); + tripletsInCPU_->setData(*tripletsInCPU_); + + alpaka::memcpy(queue_, tripletsInCPU_->nMemoryLocations_buf, tripletsBuffers_->nMemoryLocations_buf); +#ifdef CUT_VALUE_DEBUG + alpaka::memcpy(queue_, tripletsInCPU_->zOut_buf, tripletsBuffers_->zOut_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->zLo_buf, tripletsBuffers_->zLo_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->zHi_buf, tripletsBuffers_->zHi_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->zLoPointed_buf, tripletsBuffers_->zLoPointed_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->zHiPointed_buf, tripletsBuffers_->zHiPointed_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->dPhiCut_buf, tripletsBuffers_->dPhiCut_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->betaInCut_buf, tripletsBuffers_->betaInCut_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->rtLo_buf, tripletsBuffers_->rtLo_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->rtHi_buf, tripletsBuffers_->rtHi_buf, nMemHost); +#endif + alpaka::memcpy( + queue_, tripletsInCPU_->hitIndices_buf, tripletsBuffers_->hitIndices_buf, Params_T3::kHits * nMemHost); + alpaka::memcpy( + queue_, tripletsInCPU_->logicalLayers_buf, tripletsBuffers_->logicalLayers_buf, Params_T3::kLayers * nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->segmentIndices_buf, tripletsBuffers_->segmentIndices_buf, 2 * nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->betaIn_buf, tripletsBuffers_->betaIn_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->circleRadius_buf, tripletsBuffers_->circleRadius_buf, nMemHost); + alpaka::memcpy(queue_, tripletsInCPU_->nTriplets_buf, tripletsBuffers_->nTriplets_buf); + alpaka::memcpy(queue_, tripletsInCPU_->totOccupancyTriplets_buf, tripletsBuffers_->totOccupancyTriplets_buf); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return tripletsInCPU_.value(); +} + +QuintupletsBuffer& Event::getQuintuplets(bool sync) { + if (!quintupletsInCPU_) { + // Get nMemoryLocations parameter to initialize host based quintupletsInCPU_ + auto nMemHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nMemHost_buf_h, quintupletsBuffers_->nMemoryLocations_buf); + alpaka::wait(queue_); // wait for the value before using + + auto const nMemHost = *nMemHost_buf_h.data(); + quintupletsInCPU_.emplace(nMemHost, nLowerModules_, cms::alpakatools::host(), queue_); + quintupletsInCPU_->setData(*quintupletsInCPU_); + + alpaka::memcpy(queue_, quintupletsInCPU_->nMemoryLocations_buf, quintupletsBuffers_->nMemoryLocations_buf); + alpaka::memcpy(queue_, quintupletsInCPU_->nQuintuplets_buf, quintupletsBuffers_->nQuintuplets_buf); + alpaka::memcpy( + queue_, quintupletsInCPU_->totOccupancyQuintuplets_buf, quintupletsBuffers_->totOccupancyQuintuplets_buf); + alpaka::memcpy( + queue_, quintupletsInCPU_->tripletIndices_buf, quintupletsBuffers_->tripletIndices_buf, 2 * nMemHost); + alpaka::memcpy(queue_, + quintupletsInCPU_->lowerModuleIndices_buf, + quintupletsBuffers_->lowerModuleIndices_buf, + Params_T5::kLayers * nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->innerRadius_buf, quintupletsBuffers_->innerRadius_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->bridgeRadius_buf, quintupletsBuffers_->bridgeRadius_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->outerRadius_buf, quintupletsBuffers_->outerRadius_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->isDup_buf, quintupletsBuffers_->isDup_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->score_rphisum_buf, quintupletsBuffers_->score_rphisum_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->eta_buf, quintupletsBuffers_->eta_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->phi_buf, quintupletsBuffers_->phi_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->chiSquared_buf, quintupletsBuffers_->chiSquared_buf, nMemHost); + alpaka::memcpy(queue_, quintupletsInCPU_->rzChiSquared_buf, quintupletsBuffers_->rzChiSquared_buf, nMemHost); + alpaka::memcpy( + queue_, quintupletsInCPU_->nonAnchorChiSquared_buf, quintupletsBuffers_->nonAnchorChiSquared_buf, nMemHost); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return quintupletsInCPU_.value(); +} + +PixelTripletsBuffer& Event::getPixelTriplets(bool sync) { + if (!pixelTripletsInCPU_) { + // Get nPixelTriplets parameter to initialize host based quintupletsInCPU_ + auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nPixelTriplets_buf_h, pixelTripletsBuffers_->nPixelTriplets_buf); + alpaka::wait(queue_); // wait for the value before using + + auto const nPixelTriplets = *nPixelTriplets_buf_h.data(); + pixelTripletsInCPU_.emplace(nPixelTriplets, cms::alpakatools::host(), queue_); + pixelTripletsInCPU_->setData(*pixelTripletsInCPU_); + + alpaka::memcpy(queue_, pixelTripletsInCPU_->nPixelTriplets_buf, pixelTripletsBuffers_->nPixelTriplets_buf); + alpaka::memcpy(queue_, + pixelTripletsInCPU_->totOccupancyPixelTriplets_buf, + pixelTripletsBuffers_->totOccupancyPixelTriplets_buf); + alpaka::memcpy( + queue_, pixelTripletsInCPU_->rzChiSquared_buf, pixelTripletsBuffers_->rzChiSquared_buf, nPixelTriplets); + alpaka::memcpy( + queue_, pixelTripletsInCPU_->rPhiChiSquared_buf, pixelTripletsBuffers_->rPhiChiSquared_buf, nPixelTriplets); + alpaka::memcpy(queue_, + pixelTripletsInCPU_->rPhiChiSquaredInwards_buf, + pixelTripletsBuffers_->rPhiChiSquaredInwards_buf, + nPixelTriplets); + alpaka::memcpy( + queue_, pixelTripletsInCPU_->tripletIndices_buf, pixelTripletsBuffers_->tripletIndices_buf, nPixelTriplets); + alpaka::memcpy(queue_, + pixelTripletsInCPU_->pixelSegmentIndices_buf, + pixelTripletsBuffers_->pixelSegmentIndices_buf, + nPixelTriplets); + alpaka::memcpy( + queue_, pixelTripletsInCPU_->pixelRadius_buf, pixelTripletsBuffers_->pixelRadius_buf, nPixelTriplets); + alpaka::memcpy( + queue_, pixelTripletsInCPU_->tripletRadius_buf, pixelTripletsBuffers_->tripletRadius_buf, nPixelTriplets); + alpaka::memcpy(queue_, pixelTripletsInCPU_->isDup_buf, pixelTripletsBuffers_->isDup_buf, nPixelTriplets); + alpaka::memcpy(queue_, pixelTripletsInCPU_->eta_buf, pixelTripletsBuffers_->eta_buf, nPixelTriplets); + alpaka::memcpy(queue_, pixelTripletsInCPU_->phi_buf, pixelTripletsBuffers_->phi_buf, nPixelTriplets); + alpaka::memcpy(queue_, pixelTripletsInCPU_->score_buf, pixelTripletsBuffers_->score_buf, nPixelTriplets); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return pixelTripletsInCPU_.value(); +} + +PixelQuintupletsBuffer& Event::getPixelQuintuplets(bool sync) { + if (!pixelQuintupletsInCPU_) { + // Get nPixelQuintuplets parameter to initialize host based quintupletsInCPU_ + auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nPixelQuintuplets_buf_h, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); + alpaka::wait(queue_); // wait for the value before using + + auto const nPixelQuintuplets = *nPixelQuintuplets_buf_h.data(); + pixelQuintupletsInCPU_.emplace(nPixelQuintuplets, cms::alpakatools::host(), queue_); + pixelQuintupletsInCPU_->setData(*pixelQuintupletsInCPU_); + + alpaka::memcpy( + queue_, pixelQuintupletsInCPU_->nPixelQuintuplets_buf, pixelQuintupletsBuffers_->nPixelQuintuplets_buf); + alpaka::memcpy(queue_, + pixelQuintupletsInCPU_->totOccupancyPixelQuintuplets_buf, + pixelQuintupletsBuffers_->totOccupancyPixelQuintuplets_buf); + alpaka::memcpy(queue_, + pixelQuintupletsInCPU_->rzChiSquared_buf, + pixelQuintupletsBuffers_->rzChiSquared_buf, + nPixelQuintuplets); + alpaka::memcpy(queue_, + pixelQuintupletsInCPU_->rPhiChiSquared_buf, + pixelQuintupletsBuffers_->rPhiChiSquared_buf, + nPixelQuintuplets); + alpaka::memcpy(queue_, + pixelQuintupletsInCPU_->rPhiChiSquaredInwards_buf, + pixelQuintupletsBuffers_->rPhiChiSquaredInwards_buf, + nPixelQuintuplets); + alpaka::memcpy(queue_, + pixelQuintupletsInCPU_->pixelIndices_buf, + pixelQuintupletsBuffers_->pixelIndices_buf, + nPixelQuintuplets); + alpaka::memcpy( + queue_, pixelQuintupletsInCPU_->T5Indices_buf, pixelQuintupletsBuffers_->T5Indices_buf, nPixelQuintuplets); + alpaka::memcpy(queue_, pixelQuintupletsInCPU_->isDup_buf, pixelQuintupletsBuffers_->isDup_buf, nPixelQuintuplets); + alpaka::memcpy(queue_, pixelQuintupletsInCPU_->score_buf, pixelQuintupletsBuffers_->score_buf, nPixelQuintuplets); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return pixelQuintupletsInCPU_.value(); +} + +TrackCandidatesBuffer& Event::getTrackCandidates(bool sync) { + if (!trackCandidatesInCPU_) { + // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU_ + auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nTrackCanHost_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); + trackCandidatesInCPU_.emplace( + n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, cms::alpakatools::host(), queue_); + trackCandidatesInCPU_->setData(*trackCandidatesInCPU_); + alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU_ becomes usable + + auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); + + *trackCandidatesInCPU_->nTrackCandidates_buf.data() = nTrackCanHost; + alpaka::memcpy(queue_, + trackCandidatesInCPU_->hitIndices_buf, + trackCandidatesBuffers_->hitIndices_buf, + Params_pT5::kHits * nTrackCanHost); + alpaka::memcpy( + queue_, trackCandidatesInCPU_->pixelSeedIndex_buf, trackCandidatesBuffers_->pixelSeedIndex_buf, nTrackCanHost); + alpaka::memcpy(queue_, + trackCandidatesInCPU_->logicalLayers_buf, + trackCandidatesBuffers_->logicalLayers_buf, + Params_pT5::kLayers * nTrackCanHost); + alpaka::memcpy(queue_, + trackCandidatesInCPU_->directObjectIndices_buf, + trackCandidatesBuffers_->directObjectIndices_buf, + nTrackCanHost); + alpaka::memcpy(queue_, + trackCandidatesInCPU_->objectIndices_buf, + trackCandidatesBuffers_->objectIndices_buf, + 2 * nTrackCanHost); + alpaka::memcpy(queue_, + trackCandidatesInCPU_->trackCandidateType_buf, + trackCandidatesBuffers_->trackCandidateType_buf, + nTrackCanHost); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return trackCandidatesInCPU_.value(); +} + +TrackCandidatesBuffer& Event::getTrackCandidatesInCMSSW(bool sync) { + if (!trackCandidatesInCPU_) { + // Get nTrackCanHost parameter to initialize host based trackCandidatesInCPU_ + auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_, 1u); + alpaka::memcpy(queue_, nTrackCanHost_buf_h, trackCandidatesBuffers_->nTrackCandidates_buf); + trackCandidatesInCPU_.emplace( + n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, cms::alpakatools::host(), queue_); + trackCandidatesInCPU_->setData(*trackCandidatesInCPU_); + alpaka::wait(queue_); // wait for the value before using and trackCandidatesInCPU_ becomes usable + + auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); + + *trackCandidatesInCPU_->nTrackCandidates_buf.data() = nTrackCanHost; + alpaka::memcpy(queue_, + trackCandidatesInCPU_->hitIndices_buf, + trackCandidatesBuffers_->hitIndices_buf, + Params_pT5::kHits * nTrackCanHost); + alpaka::memcpy( + queue_, trackCandidatesInCPU_->pixelSeedIndex_buf, trackCandidatesBuffers_->pixelSeedIndex_buf, nTrackCanHost); + alpaka::memcpy(queue_, + trackCandidatesInCPU_->trackCandidateType_buf, + trackCandidatesBuffers_->trackCandidateType_buf, + nTrackCanHost); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return trackCandidatesInCPU_.value(); +} + +ModulesBuffer& Event::getModules(bool isFull, bool sync) { + if (!modulesInCPU_) { + // The last input here is just a small placeholder for the allocation. + modulesInCPU_.emplace(cms::alpakatools::host(), nModules_, nPixels_); + + modulesInCPU_->copyFromSrc(queue_, modulesBuffers_, isFull); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return modulesInCPU_.value(); +} diff --git a/RecoTracker/LSTCore/src/alpaka/Event.h b/RecoTracker/LSTCore/src/alpaka/Event.h new file mode 100644 index 0000000000000..2b09565cf4176 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Event.h @@ -0,0 +1,198 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Event_h +#define RecoTracker_LSTCore_src_alpaka_Event_h + +#include + +#include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" +#include "RecoTracker/LSTCore/interface/Module.h" + +#include "Hit.h" +#include "Segment.h" +#include "Triplet.h" +#include "Kernels.h" +#include "Quintuplet.h" +#include "MiniDoublet.h" +#include "PixelQuintuplet.h" +#include "PixelTriplet.h" +#include "TrackCandidate.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + class Event { + private: + Queue& queue_; + Device devAcc_; + bool addObjects_; + + std::array n_hits_by_layer_barrel_; + std::array n_hits_by_layer_endcap_; + std::array n_minidoublets_by_layer_barrel_; + std::array n_minidoublets_by_layer_endcap_; + std::array n_segments_by_layer_barrel_; + std::array n_segments_by_layer_endcap_; + std::array n_triplets_by_layer_barrel_; + std::array n_triplets_by_layer_endcap_; + std::array n_trackCandidates_by_layer_barrel_; + std::array n_trackCandidates_by_layer_endcap_; + std::array n_quintuplets_by_layer_barrel_; + std::array n_quintuplets_by_layer_endcap_; + unsigned int nTotalSegments_; + + //Device stuff + std::optional rangesInGPU_; + std::optional> rangesBuffers_; + std::optional hitsInGPU_; + std::optional> hitsBuffers_; + std::optional mdsInGPU_; + std::optional> miniDoubletsBuffers_; + std::optional segmentsInGPU_; + std::optional> segmentsBuffers_; + std::optional tripletsInGPU_; + std::optional> tripletsBuffers_; + std::optional quintupletsInGPU_; + std::optional> quintupletsBuffers_; + std::optional trackCandidatesInGPU_; + std::optional> trackCandidatesBuffers_; + std::optional pixelTripletsInGPU_; + std::optional> pixelTripletsBuffers_; + std::optional pixelQuintupletsInGPU_; + std::optional> pixelQuintupletsBuffers_; + + //CPU interface stuff + std::optional> rangesInCPU_; + std::optional> hitsInCPU_; + std::optional> mdsInCPU_; + std::optional> segmentsInCPU_; + std::optional> tripletsInCPU_; + std::optional> trackCandidatesInCPU_; + std::optional> modulesInCPU_; + std::optional> quintupletsInCPU_; + std::optional> pixelTripletsInCPU_; + std::optional> pixelQuintupletsInCPU_; + + void initSync(bool verbose); + + const uint16_t nModules_; + const uint16_t nLowerModules_; + const unsigned int nPixels_; + const unsigned int nEndCapMap_; + ModulesBuffer const& modulesBuffers_; + PixelMap const& pixelMapping_; + EndcapGeometryBuffer const& endcapGeometryBuffers_; + + public: + // Constructor used for CMSSW integration. Uses an external queue. + Event(bool verbose, Queue& q, const LSTESData* deviceESData) + : queue_(q), + devAcc_(alpaka::getDev(q)), + nModules_(deviceESData->nModules), + nLowerModules_(deviceESData->nLowerModules), + nPixels_(deviceESData->nPixels), + nEndCapMap_(deviceESData->nEndCapMap), + modulesBuffers_(deviceESData->modulesBuffers), + pixelMapping_(*deviceESData->pixelMapping), + endcapGeometryBuffers_(deviceESData->endcapGeometryBuffers) { + initSync(verbose); + } + void resetEventSync(); // synchronizes + void wait() const { alpaka::wait(queue_); } + + // Calls the appropriate hit function, then increments the counter + void addHitToEvent(std::vector const& x, + std::vector const& y, + std::vector const& z, + std::vector const& detId, + std::vector const& idxInNtuple); + void addPixelSegmentToEvent(std::vector const& hitIndices0, + std::vector const& hitIndices1, + std::vector const& hitIndices2, + std::vector const& hitIndices3, + std::vector const& dPhiChange, + std::vector const& ptIn, + std::vector const& ptErr, + std::vector const& px, + std::vector const& py, + std::vector const& pz, + std::vector const& eta, + std::vector const& etaErr, + std::vector const& phi, + std::vector const& charge, + std::vector const& seedIdx, + std::vector const& superbin, + std::vector const& pixelType, + std::vector const& isQuad); + + void createMiniDoublets(); + void createSegmentsWithModuleMap(); + void createTriplets(); + void createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets); + void createPixelTriplets(); + void createQuintuplets(); + void pixelLineSegmentCleaning(bool no_pls_dupclean); + void createPixelQuintuplets(); + + // functions that map the objects to the appropriate modules + void addMiniDoubletsToEventExplicit(); + void addSegmentsToEventExplicit(); + void addQuintupletsToEventExplicit(); + void addTripletsToEventExplicit(); + void resetObjectsInModule(); + + unsigned int getNumberOfHits(); + unsigned int getNumberOfHitsByLayer(unsigned int layer); + unsigned int getNumberOfHitsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfHitsByLayerEndcap(unsigned int layer); + + unsigned int getNumberOfMiniDoublets(); + unsigned int getNumberOfMiniDoubletsByLayer(unsigned int layer); + unsigned int getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer); + + unsigned int getNumberOfSegments(); + unsigned int getNumberOfSegmentsByLayer(unsigned int layer); + unsigned int getNumberOfSegmentsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfSegmentsByLayerEndcap(unsigned int layer); + + unsigned int getNumberOfTriplets(); + unsigned int getNumberOfTripletsByLayer(unsigned int layer); + unsigned int getNumberOfTripletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfTripletsByLayerEndcap(unsigned int layer); + + int getNumberOfPixelTriplets(); + int getNumberOfPixelQuintuplets(); + + unsigned int getNumberOfQuintuplets(); + unsigned int getNumberOfQuintupletsByLayer(unsigned int layer); + unsigned int getNumberOfQuintupletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfQuintupletsByLayerEndcap(unsigned int layer); + + int getNumberOfTrackCandidates(); + int getNumberOfPT5TrackCandidates(); + int getNumberOfPT3TrackCandidates(); + int getNumberOfPLSTrackCandidates(); + int getNumberOfPixelTrackCandidates(); + int getNumberOfT5TrackCandidates(); + + // sync adds alpaka::wait at the end of filling a buffer during lazy fill + // (has no effect on repeated calls) + // set to false may allow faster operation with concurrent calls of get* + // HANDLE WITH CARE + HitsBuffer& getHits(bool sync = true); + HitsBuffer& getHitsInCMSSW(bool sync = true); + ObjectRangesBuffer& getRanges(bool sync = true); + MiniDoubletsBuffer& getMiniDoublets(bool sync = true); + SegmentsBuffer& getSegments(bool sync = true); + TripletsBuffer& getTriplets(bool sync = true); + QuintupletsBuffer& getQuintuplets(bool sync = true); + PixelTripletsBuffer& getPixelTriplets(bool sync = true); + PixelQuintupletsBuffer& getPixelQuintuplets(bool sync = true); + TrackCandidatesBuffer& getTrackCandidates(bool sync = true); + TrackCandidatesBuffer& getTrackCandidatesInCMSSW(bool sync = true); + ModulesBuffer& getModules(bool isFull = false, bool sync = true); + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h new file mode 100644 index 0000000000000..3f559f4492df7 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -0,0 +1,256 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Hit_h +#define RecoTracker_LSTCore_src_alpaka_Hit_h + +#include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/Module.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + struct Hits { + unsigned int* nHits; + float* xs; + float* ys; + float* zs; + uint16_t* moduleIndices; + unsigned int* idxs; + unsigned int* detid; + float* rts; + float* phis; + float* etas; + float* highEdgeXs; + float* highEdgeYs; + float* lowEdgeXs; + float* lowEdgeYs; + int* hitRanges; + int* hitRangesLower; + int* hitRangesUpper; + int8_t* hitRangesnLower; + int8_t* hitRangesnUpper; + + template + void setData(TBuff& buf) { + nHits = buf.nHits_buf.data(); + xs = buf.xs_buf.data(); + ys = buf.ys_buf.data(); + zs = buf.zs_buf.data(); + moduleIndices = buf.moduleIndices_buf.data(); + idxs = buf.idxs_buf.data(); + detid = buf.detid_buf.data(); + rts = buf.rts_buf.data(); + phis = buf.phis_buf.data(); + etas = buf.etas_buf.data(); + highEdgeXs = buf.highEdgeXs_buf.data(); + highEdgeYs = buf.highEdgeYs_buf.data(); + lowEdgeXs = buf.lowEdgeXs_buf.data(); + lowEdgeYs = buf.lowEdgeYs_buf.data(); + hitRanges = buf.hitRanges_buf.data(); + hitRangesLower = buf.hitRangesLower_buf.data(); + hitRangesUpper = buf.hitRangesUpper_buf.data(); + hitRangesnLower = buf.hitRangesnLower_buf.data(); + hitRangesnUpper = buf.hitRangesnUpper_buf.data(); + } + }; + + template + struct HitsBuffer { + Buf nHits_buf; + Buf xs_buf; + Buf ys_buf; + Buf zs_buf; + Buf moduleIndices_buf; + Buf idxs_buf; + Buf detid_buf; + Buf rts_buf; + Buf phis_buf; + Buf etas_buf; + Buf highEdgeXs_buf; + Buf highEdgeYs_buf; + Buf lowEdgeXs_buf; + Buf lowEdgeYs_buf; + Buf hitRanges_buf; + Buf hitRangesLower_buf; + Buf hitRangesUpper_buf; + Buf hitRangesnLower_buf; + Buf hitRangesnUpper_buf; + + Hits data_; + + template + HitsBuffer(unsigned int nModules, unsigned int nMaxHits, TDevAcc const& devAccIn, TQueue& queue) + : nHits_buf(allocBufWrapper(devAccIn, 1u, queue)), + xs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), + ys_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), + zs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), + moduleIndices_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), + idxs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), + detid_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), + rts_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), + phis_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), + etas_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), + highEdgeXs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), + highEdgeYs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), + lowEdgeXs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), + lowEdgeYs_buf(allocBufWrapper(devAccIn, nMaxHits, queue)), + hitRanges_buf(allocBufWrapper(devAccIn, nModules * 2, queue)), + hitRangesLower_buf(allocBufWrapper(devAccIn, nModules, queue)), + hitRangesUpper_buf(allocBufWrapper(devAccIn, nModules, queue)), + hitRangesnLower_buf(allocBufWrapper(devAccIn, nModules, queue)), + hitRangesnUpper_buf(allocBufWrapper(devAccIn, nModules, queue)) { + alpaka::memset(queue, hitRanges_buf, 0xff); + alpaka::memset(queue, hitRangesLower_buf, 0xff); + alpaka::memset(queue, hitRangesUpper_buf, 0xff); + alpaka::memset(queue, hitRangesnLower_buf, 0xff); + alpaka::memset(queue, hitRangesnUpper_buf, 0xff); + } + + inline Hits const* data() const { return &data_; } + inline void setData(HitsBuffer& buf) { data_.setData(buf); } + }; + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float eta(TAcc const& acc, float x, float y, float z) { + float r3 = alpaka::math::sqrt(acc, x * x + y * y + z * z); + float rt = alpaka::math::sqrt(acc, x * x + y * y); + float eta = ((z > 0) - (z < 0)) * alpaka::math::acosh(acc, r3 / rt); + return eta; + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float phi_mpi_pi(TAcc const& acc, float x) { + if (alpaka::math::abs(acc, x) <= float(M_PI)) + return x; + + constexpr float o2pi = 1.f / (2.f * float(M_PI)); + float n = alpaka::math::round(acc, x * o2pi); + return x - n * float(2.f * float(M_PI)); + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float phi(TAcc const& acc, float x, float y) { + return phi_mpi_pi(acc, float(M_PI) + alpaka::math::atan2(acc, -y, -x)); + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float deltaPhi(TAcc const& acc, float x1, float y1, float x2, float y2) { + float phi1 = phi(acc, x1, y1); + float phi2 = phi(acc, x2, y2); + return phi_mpi_pi(acc, (phi2 - phi1)); + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float deltaPhiChange(TAcc const& acc, float x1, float y1, float x2, float y2) { + return deltaPhi(acc, x1, y1, x2 - x1, y2 - y1); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float calculate_dPhi(float phi1, float phi2) { + // Calculate dPhi + float dPhi = phi1 - phi2; + + // Normalize dPhi to be between -pi and pi + if (dPhi > float(M_PI)) { + dPhi -= 2 * float(M_PI); + } else if (dPhi < -float(M_PI)) { + dPhi += 2 * float(M_PI); + } + + return dPhi; + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE int binary_search(const unsigned int* data, // Array that we are searching over + unsigned int search_val, // Value we want to find in data array + unsigned int ndata) // Number of elements in data array + { + unsigned int low = 0; + unsigned int high = ndata - 1; + + while (low <= high) { + unsigned int mid = (low + high) / 2; + unsigned int test_val = data[mid]; + if (test_val == search_val) + return mid; + else if (test_val > search_val) + high = mid - 1; + else + low = mid + 1; + } + // Couldn't find search value in array. + return -1; + } + + struct ModuleRangesKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, Hits hitsInGPU, int nLowerModules) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int lowerIndex = globalThreadIdx[2]; lowerIndex < nLowerModules; lowerIndex += gridThreadExtent[2]) { + uint16_t upperIndex = modulesInGPU.partnerModuleIndices[lowerIndex]; + if (hitsInGPU.hitRanges[lowerIndex * 2] != -1 && hitsInGPU.hitRanges[upperIndex * 2] != -1) { + hitsInGPU.hitRangesLower[lowerIndex] = hitsInGPU.hitRanges[lowerIndex * 2]; + hitsInGPU.hitRangesUpper[lowerIndex] = hitsInGPU.hitRanges[upperIndex * 2]; + hitsInGPU.hitRangesnLower[lowerIndex] = + hitsInGPU.hitRanges[lowerIndex * 2 + 1] - hitsInGPU.hitRanges[lowerIndex * 2] + 1; + hitsInGPU.hitRangesnUpper[lowerIndex] = + hitsInGPU.hitRanges[upperIndex * 2 + 1] - hitsInGPU.hitRanges[upperIndex * 2] + 1; + } + } + } + }; + + struct HitLoopKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t Endcap, // Integer corresponding to endcap in module subdets + uint16_t TwoS, // Integer corresponding to TwoS in moduleType + unsigned int nModules, // Number of modules + unsigned int nEndCapMap, // Number of elements in endcap map + const unsigned int* geoMapDetId, // DetId's from endcap map + const float* geoMapPhi, // Phi values from endcap map + Modules modulesInGPU, + Hits hitsInGPU, + unsigned int nHits) const // Total number of hits in event + { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + for (unsigned int ihit = globalThreadIdx[2]; ihit < nHits; ihit += gridThreadExtent[2]) { + float ihit_x = hitsInGPU.xs[ihit]; + float ihit_y = hitsInGPU.ys[ihit]; + float ihit_z = hitsInGPU.zs[ihit]; + int iDetId = hitsInGPU.detid[ihit]; + + hitsInGPU.rts[ihit] = alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y); + hitsInGPU.phis[ihit] = phi(acc, ihit_x, ihit_y); + hitsInGPU.etas[ihit] = + ((ihit_z > 0) - (ihit_z < 0)) * + alpaka::math::acosh( + acc, + alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y + ihit_z * ihit_z) / hitsInGPU.rts[ihit]); + int found_index = binary_search(modulesInGPU.mapdetId, iDetId, nModules); + uint16_t lastModuleIndex = modulesInGPU.mapIdx[found_index]; + + hitsInGPU.moduleIndices[ihit] = lastModuleIndex; + + if (modulesInGPU.subdets[lastModuleIndex] == Endcap && modulesInGPU.moduleType[lastModuleIndex] == TwoS) { + found_index = binary_search(geoMapDetId, iDetId, nEndCapMap); + float phi = geoMapPhi[found_index]; + float cos_phi = alpaka::math::cos(acc, phi); + hitsInGPU.highEdgeXs[ihit] = ihit_x + 2.5f * cos_phi; + hitsInGPU.lowEdgeXs[ihit] = ihit_x - 2.5f * cos_phi; + float sin_phi = alpaka::math::sin(acc, phi); + hitsInGPU.highEdgeYs[ihit] = ihit_y + 2.5f * sin_phi; + hitsInGPU.lowEdgeYs[ihit] = ihit_y - 2.5f * sin_phi; + } + // Need to set initial value if index hasn't been seen before. + int old = alpaka::atomicCas( + acc, &(hitsInGPU.hitRanges[lastModuleIndex * 2]), -1, static_cast(ihit), alpaka::hierarchy::Threads{}); + // For subsequent visits, stores the min value. + if (old != -1) + alpaka::atomicMin( + acc, &hitsInGPU.hitRanges[lastModuleIndex * 2], static_cast(ihit), alpaka::hierarchy::Threads{}); + + alpaka::atomicMax( + acc, &hitsInGPU.hitRanges[lastModuleIndex * 2 + 1], static_cast(ihit), alpaka::hierarchy::Threads{}); + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h new file mode 100644 index 0000000000000..bc284d052cc05 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h @@ -0,0 +1,421 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Kernels_h +#define RecoTracker_LSTCore_src_alpaka_Kernels_h + +#include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/Module.h" + +#include "Hit.h" +#include "MiniDoublet.h" +#include "ObjectRanges.h" +#include "Segment.h" +#include "Triplet.h" +#include "Quintuplet.h" +#include "PixelQuintuplet.h" +#include "PixelTriplet.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmQuintupletFromMemory(Quintuplets& quintupletsInGPU, + unsigned int quintupletIndex, + bool secondpass = false) { + quintupletsInGPU.isDup[quintupletIndex] |= 1 + secondpass; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(PixelTriplets& pixelTripletsInGPU, + unsigned int pixelTripletIndex) { + pixelTripletsInGPU.isDup[pixelTripletIndex] = true; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(PixelQuintuplets& pixelQuintupletsInGPU, + unsigned int pixelQuintupletIndex) { + pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = true; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(Segments& segmentsInGPU, + unsigned int pixelSegmentArrayIndex, + bool secondpass = false) { + segmentsInGPU.isDup[pixelSegmentArrayIndex] |= 1 + secondpass; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitsT5(unsigned int ix, + unsigned int jx, + Quintuplets const& quintupletsInGPU) { + unsigned int hits1[Params_T5::kHits]; + unsigned int hits2[Params_T5::kHits]; + + for (int i = 0; i < Params_T5::kHits; i++) { + hits1[i] = quintupletsInGPU.hitIndices[Params_T5::kHits * ix + i]; + hits2[i] = quintupletsInGPU.hitIndices[Params_T5::kHits * jx + i]; + } + + int nMatched = 0; + for (int i = 0; i < Params_T5::kHits; i++) { + bool matched = false; + for (int j = 0; j < Params_T5::kHits; j++) { + if (hits1[i] == hits2[j]) { + matched = true; + break; + } + } + if (matched) { + nMatched++; + } + } + return nMatched; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitspT5(unsigned int ix, + unsigned int jx, + PixelQuintuplets const& pixelQuintupletsInGPU) { + unsigned int hits1[Params_pT5::kHits]; + unsigned int hits2[Params_pT5::kHits]; + + for (int i = 0; i < Params_pT5::kHits; i++) { + hits1[i] = pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * ix + i]; + hits2[i] = pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * jx + i]; + } + + int nMatched = 0; + for (int i = 0; i < Params_pT5::kHits; i++) { + bool matched = false; + for (int j = 0; j < Params_pT5::kHits; j++) { + if (hits1[i] == hits2[j]) { + matched = true; + break; + } + } + if (matched) { + nMatched++; + } + } + return nMatched; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void checkHitspT3(unsigned int ix, + unsigned int jx, + PixelTriplets const& pixelTripletsInGPU, + int* matched) { + int phits1[Params_pLS::kHits]; + int phits2[Params_pLS::kHits]; + + for (int i = 0; i < Params_pLS::kHits; i++) { + phits1[i] = pixelTripletsInGPU.hitIndices[Params_pT3::kHits * ix + i]; + phits2[i] = pixelTripletsInGPU.hitIndices[Params_pT3::kHits * jx + i]; + } + + int npMatched = 0; + for (int i = 0; i < Params_pLS::kHits; i++) { + bool pmatched = false; + for (int j = 0; j < Params_pLS::kHits; j++) { + if (phits1[i] == phits2[j]) { + pmatched = true; + break; + } + } + if (pmatched) { + npMatched++; + } + } + + int hits1[Params_T3::kHits]; + int hits2[Params_T3::kHits]; + + for (int i = 0; i < Params_T3::kHits; i++) { + hits1[i] = pixelTripletsInGPU.hitIndices[Params_pT3::kHits * ix + i + 4]; // Omitting the pLS hits + hits2[i] = pixelTripletsInGPU.hitIndices[Params_pT3::kHits * jx + i + 4]; // Omitting the pLS hits + } + + int nMatched = 0; + for (int i = 0; i < Params_T3::kHits; i++) { + bool tmatched = false; + for (int j = 0; j < Params_T3::kHits; j++) { + if (hits1[i] == hits2[j]) { + tmatched = true; + break; + } + } + if (tmatched) { + nMatched++; + } + } + + matched[0] = npMatched; + matched[1] = nMatched; + } + + struct RemoveDupQuintupletsInGPUAfterBuild { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + Quintuplets quintupletsInGPU, + ObjectRanges rangesInGPU) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int lowmod = globalThreadIdx[0]; lowmod < *modulesInGPU.nLowerModules; + lowmod += gridThreadExtent[0]) { + unsigned int nQuintuplets_lowmod = quintupletsInGPU.nQuintuplets[lowmod]; + int quintupletModuleIndices_lowmod = rangesInGPU.quintupletModuleIndices[lowmod]; + + for (unsigned int ix1 = globalThreadIdx[1]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[1]) { + unsigned int ix = quintupletModuleIndices_lowmod + ix1; + float eta1 = __H2F(quintupletsInGPU.eta[ix]); + float phi1 = __H2F(quintupletsInGPU.phi[ix]); + float score_rphisum1 = __H2F(quintupletsInGPU.score_rphisum[ix]); + + for (unsigned int jx1 = globalThreadIdx[2] + ix1 + 1; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[2]) { + unsigned int jx = quintupletModuleIndices_lowmod + jx1; + + float eta2 = __H2F(quintupletsInGPU.eta[jx]); + float phi2 = __H2F(quintupletsInGPU.phi[jx]); + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + float score_rphisum2 = __H2F(quintupletsInGPU.score_rphisum[jx]); + + if (dEta > 0.1f) + continue; + + if (alpaka::math::abs(acc, dPhi) > 0.1f) + continue; + + int nMatched = checkHitsT5(ix, jx, quintupletsInGPU); + const int minNHitsForDup_T5 = 7; + if (nMatched >= minNHitsForDup_T5) { + if (score_rphisum1 >= score_rphisum2) { + rmQuintupletFromMemory(quintupletsInGPU, ix); + } else { + rmQuintupletFromMemory(quintupletsInGPU, jx); + } + } + } + } + } + } + }; + + struct RemoveDupQuintupletsInGPUBeforeTC { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, Quintuplets quintupletsInGPU, ObjectRanges rangesInGPU) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < *(rangesInGPU.nEligibleT5Modules); + lowmodIdx1 += gridThreadExtent[1]) { + uint16_t lowmod1 = rangesInGPU.indicesOfEligibleT5Modules[lowmodIdx1]; + unsigned int nQuintuplets_lowmod1 = quintupletsInGPU.nQuintuplets[lowmod1]; + if (nQuintuplets_lowmod1 == 0) + continue; + + unsigned int quintupletModuleIndices_lowmod1 = rangesInGPU.quintupletModuleIndices[lowmod1]; + + for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < *(rangesInGPU.nEligibleT5Modules); + lowmodIdx2 += gridThreadExtent[2]) { + uint16_t lowmod2 = rangesInGPU.indicesOfEligibleT5Modules[lowmodIdx2]; + unsigned int nQuintuplets_lowmod2 = quintupletsInGPU.nQuintuplets[lowmod2]; + if (nQuintuplets_lowmod2 == 0) + continue; + + unsigned int quintupletModuleIndices_lowmod2 = rangesInGPU.quintupletModuleIndices[lowmod2]; + + for (unsigned int ix1 = 0; ix1 < nQuintuplets_lowmod1; ix1 += 1) { + unsigned int ix = quintupletModuleIndices_lowmod1 + ix1; + if (quintupletsInGPU.partOfPT5[ix] || (quintupletsInGPU.isDup[ix] & 1)) + continue; + + for (unsigned int jx1 = 0; jx1 < nQuintuplets_lowmod2; jx1++) { + unsigned int jx = quintupletModuleIndices_lowmod2 + jx1; + if (ix == jx) + continue; + + if (quintupletsInGPU.partOfPT5[jx] || (quintupletsInGPU.isDup[jx] & 1)) + continue; + + float eta1 = __H2F(quintupletsInGPU.eta[ix]); + float phi1 = __H2F(quintupletsInGPU.phi[ix]); + float score_rphisum1 = __H2F(quintupletsInGPU.score_rphisum[ix]); + + float eta2 = __H2F(quintupletsInGPU.eta[jx]); + float phi2 = __H2F(quintupletsInGPU.phi[jx]); + float score_rphisum2 = __H2F(quintupletsInGPU.score_rphisum[jx]); + + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + if (dEta > 0.1f) + continue; + + if (alpaka::math::abs(acc, dPhi) > 0.1f) + continue; + + float dR2 = dEta * dEta + dPhi * dPhi; + int nMatched = checkHitsT5(ix, jx, quintupletsInGPU); + const int minNHitsForDup_T5 = 5; + if (dR2 < 0.001f || nMatched >= minNHitsForDup_T5) { + if (score_rphisum1 > score_rphisum2) { + rmQuintupletFromMemory(quintupletsInGPU, ix, true); + } else if (score_rphisum1 < score_rphisum2) { + rmQuintupletFromMemory(quintupletsInGPU, jx, true); + } else { + rmQuintupletFromMemory(quintupletsInGPU, (ix < jx ? ix : jx), true); + } + } + } + } + } + } + } + }; + + struct RemoveDupPixelTripletsInGPUFromMap { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTripletsInGPU) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int ix = globalThreadIdx[1]; ix < *pixelTripletsInGPU.nPixelTriplets; ix += gridThreadExtent[1]) { + for (unsigned int jx = globalThreadIdx[2]; jx < *pixelTripletsInGPU.nPixelTriplets; jx += gridThreadExtent[2]) { + if (ix == jx) + continue; + + int nMatched[2]; + checkHitspT3(ix, jx, pixelTripletsInGPU, nMatched); + const int minNHitsForDup_pT3 = 5; + if ((nMatched[0] + nMatched[1]) >= minNHitsForDup_pT3) { + // Check the layers + if (pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * jx + 2] < + pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * ix + 2]) { + rmPixelTripletFromMemory(pixelTripletsInGPU, ix); + break; + } else if (pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * ix + 2] == + pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * jx + 2] && + __H2F(pixelTripletsInGPU.score[ix]) > __H2F(pixelTripletsInGPU.score[jx])) { + rmPixelTripletFromMemory(pixelTripletsInGPU, ix); + break; + } else if (pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * ix + 2] == + pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * jx + 2] && + (__H2F(pixelTripletsInGPU.score[ix]) == __H2F(pixelTripletsInGPU.score[jx])) && (ix < jx)) { + rmPixelTripletFromMemory(pixelTripletsInGPU, ix); + break; + } + } + } + } + } + }; + + struct RemoveDupPixelQuintupletsInGPUFromMap { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintupletsInGPU) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + unsigned int nPixelQuintuplets = *pixelQuintupletsInGPU.nPixelQuintuplets; + for (unsigned int ix = globalThreadIdx[1]; ix < nPixelQuintuplets; ix += gridThreadExtent[1]) { + float score1 = __H2F(pixelQuintupletsInGPU.score[ix]); + for (unsigned int jx = globalThreadIdx[2]; jx < nPixelQuintuplets; jx += gridThreadExtent[2]) { + if (ix == jx) + continue; + + int nMatched = checkHitspT5(ix, jx, pixelQuintupletsInGPU); + float score2 = __H2F(pixelQuintupletsInGPU.score[jx]); + const int minNHitsForDup_pT5 = 7; + if (nMatched >= minNHitsForDup_pT5) { + if (score1 > score2 or ((score1 == score2) and (ix > jx))) { + rmPixelQuintupletFromMemory(pixelQuintupletsInGPU, ix); + break; + } + } + } + } + } + }; + + struct CheckHitspLS { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, Segments segmentsInGPU, bool secondpass) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + int pixelModuleIndex = *modulesInGPU.nLowerModules; + unsigned int nPixelSegments = segmentsInGPU.nSegments[pixelModuleIndex]; + + if (nPixelSegments > n_max_pixel_segments_per_module) + nPixelSegments = n_max_pixel_segments_per_module; + + for (unsigned int ix = globalThreadIdx[1]; ix < nPixelSegments; ix += gridThreadExtent[1]) { + if (secondpass && (!segmentsInGPU.isQuad[ix] || (segmentsInGPU.isDup[ix] & 1))) + continue; + + unsigned int phits1[Params_pLS::kHits]; + phits1[0] = segmentsInGPU.pLSHitsIdxs[ix].x; + phits1[1] = segmentsInGPU.pLSHitsIdxs[ix].y; + phits1[2] = segmentsInGPU.pLSHitsIdxs[ix].z; + phits1[3] = segmentsInGPU.pLSHitsIdxs[ix].w; + float eta_pix1 = segmentsInGPU.eta[ix]; + float phi_pix1 = segmentsInGPU.phi[ix]; + + for (unsigned int jx = ix + 1 + globalThreadIdx[2]; jx < nPixelSegments; jx += gridThreadExtent[2]) { + float eta_pix2 = segmentsInGPU.eta[jx]; + float phi_pix2 = segmentsInGPU.phi[jx]; + + if (alpaka::math::abs(acc, eta_pix2 - eta_pix1) > 0.1f) + continue; + + if (secondpass && (!segmentsInGPU.isQuad[jx] || (segmentsInGPU.isDup[jx] & 1))) + continue; + + int8_t quad_diff = segmentsInGPU.isQuad[ix] - segmentsInGPU.isQuad[jx]; + float score_diff = segmentsInGPU.score[ix] - segmentsInGPU.score[jx]; + // Always keep quads over trips. If they are the same, we want the object with better score + int idxToRemove; + if (quad_diff > 0) + idxToRemove = jx; + else if (quad_diff < 0) + idxToRemove = ix; + else if (score_diff < 0) + idxToRemove = jx; + else if (score_diff > 0) + idxToRemove = ix; + else + idxToRemove = ix; + + unsigned int phits2[Params_pLS::kHits]; + phits2[0] = segmentsInGPU.pLSHitsIdxs[jx].x; + phits2[1] = segmentsInGPU.pLSHitsIdxs[jx].y; + phits2[2] = segmentsInGPU.pLSHitsIdxs[jx].z; + phits2[3] = segmentsInGPU.pLSHitsIdxs[jx].w; + + int npMatched = 0; + for (int i = 0; i < Params_pLS::kHits; i++) { + bool pmatched = false; + for (int j = 0; j < Params_pLS::kHits; j++) { + if (phits1[i] == phits2[j]) { + pmatched = true; + break; + } + } + if (pmatched) { + npMatched++; + // Only one hit is enough + if (secondpass) + break; + } + } + const int minNHitsForDup_pLS = 3; + if (npMatched >= minNHitsForDup_pLS) { + rmPixelSegmentFromMemory(segmentsInGPU, idxToRemove, secondpass); + } + if (secondpass) { + float dEta = alpaka::math::abs(acc, eta_pix1 - eta_pix2); + float dPhi = calculate_dPhi(phi_pix1, phi_pix2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if ((npMatched >= 1) || (dR2 < 1e-5f)) { + rmPixelSegmentFromMemory(segmentsInGPU, idxToRemove, secondpass); + } + } + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/LST.dev.cc b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc new file mode 100644 index 0000000000000..65543720a1d34 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/LST.dev.cc @@ -0,0 +1,438 @@ +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" + +#include "Event.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + +#include "Math/Vector3D.h" +#include "Math/VectorUtil.h" +using XYZVector = ROOT::Math::XYZVector; + +namespace { + XYZVector calculateR3FromPCA(const XYZVector& p3, float dxy, float dz) { + const float pt = p3.rho(); + const float p = p3.r(); + const float vz = dz * pt * pt / p / p; + + const float vx = -dxy * p3.y() / pt - p3.x() / p * p3.z() / p * dz; + const float vy = dxy * p3.x() / pt - p3.y() / p * p3.z() / p * dz; + return {vx, vy, vz}; + } +} // namespace + +void LST::prepareInput(std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z) { + unsigned int count = 0; + auto n_see = see_stateTrajGlbPx.size(); + std::vector px_vec; + px_vec.reserve(n_see); + std::vector py_vec; + py_vec.reserve(n_see); + std::vector pz_vec; + pz_vec.reserve(n_see); + std::vector hitIndices_vec0; + hitIndices_vec0.reserve(n_see); + std::vector hitIndices_vec1; + hitIndices_vec1.reserve(n_see); + std::vector hitIndices_vec2; + hitIndices_vec2.reserve(n_see); + std::vector hitIndices_vec3; + hitIndices_vec3.reserve(n_see); + std::vector ptIn_vec; + ptIn_vec.reserve(n_see); + std::vector ptErr_vec; + ptErr_vec.reserve(n_see); + std::vector etaErr_vec; + etaErr_vec.reserve(n_see); + std::vector eta_vec; + eta_vec.reserve(n_see); + std::vector phi_vec; + phi_vec.reserve(n_see); + std::vector charge_vec; + charge_vec.reserve(n_see); + std::vector seedIdx_vec; + seedIdx_vec.reserve(n_see); + std::vector deltaPhi_vec; + deltaPhi_vec.reserve(n_see); + std::vector trkX = ph2_x; + std::vector trkY = ph2_y; + std::vector trkZ = ph2_z; + std::vector hitId = ph2_detId; + std::vector hitIdxs(ph2_detId.size()); + + std::vector superbin_vec; + std::vector pixelType_vec; + std::vector isQuad_vec; + std::iota(hitIdxs.begin(), hitIdxs.end(), 0); + const int hit_size = trkX.size(); + + for (size_t iSeed = 0; iSeed < n_see; iSeed++) { + XYZVector p3LH(see_stateTrajGlbPx[iSeed], see_stateTrajGlbPy[iSeed], see_stateTrajGlbPz[iSeed]); + float ptIn = p3LH.rho(); + float eta = p3LH.eta(); + float ptErr = see_ptErr[iSeed]; + + if ((ptIn > 0.8 - 2 * ptErr)) { + XYZVector r3LH(see_stateTrajGlbX[iSeed], see_stateTrajGlbY[iSeed], see_stateTrajGlbZ[iSeed]); + XYZVector p3PCA(see_px[iSeed], see_py[iSeed], see_pz[iSeed]); + XYZVector r3PCA(calculateR3FromPCA(p3PCA, see_dxy[iSeed], see_dz[iSeed])); + + // The charge could be used directly in the line below + float pixelSegmentDeltaPhiChange = ROOT::Math::VectorUtil::DeltaPhi(p3LH, r3LH); + float etaErr = see_etaErr[iSeed]; + float px = p3LH.x(); + float py = p3LH.y(); + float pz = p3LH.z(); + + int charge = see_q[iSeed]; + PixelType pixtype = PixelType::kInvalid; + + if (ptIn >= 2.0) + pixtype = PixelType::kHighPt; + else if (ptIn >= (0.8 - 2 * ptErr) and ptIn < 2.0) { + if (pixelSegmentDeltaPhiChange >= 0) + pixtype = PixelType::kLowPtPosCurv; + else + pixtype = PixelType::kLowPtNegCurv; + } else + continue; + + unsigned int hitIdx0 = hit_size + count; + count++; + unsigned int hitIdx1 = hit_size + count; + count++; + unsigned int hitIdx2 = hit_size + count; + count++; + unsigned int hitIdx3; + if (see_hitIdx[iSeed].size() <= 3) + hitIdx3 = hitIdx2; + else { + hitIdx3 = hit_size + count; + count++; + } + + trkX.push_back(r3PCA.x()); + trkY.push_back(r3PCA.y()); + trkZ.push_back(r3PCA.z()); + trkX.push_back(p3PCA.rho()); + float p3PCA_Eta = p3PCA.eta(); + trkY.push_back(p3PCA_Eta); + float p3PCA_Phi = p3PCA.phi(); + trkZ.push_back(p3PCA_Phi); + trkX.push_back(r3LH.x()); + trkY.push_back(r3LH.y()); + trkZ.push_back(r3LH.z()); + hitId.push_back(1); + hitId.push_back(1); + hitId.push_back(1); + if (see_hitIdx[iSeed].size() > 3) { + trkX.push_back(r3LH.x()); + trkY.push_back(see_dxy[iSeed]); + trkZ.push_back(see_dz[iSeed]); + hitId.push_back(1); + } + px_vec.push_back(px); + py_vec.push_back(py); + pz_vec.push_back(pz); + + hitIndices_vec0.push_back(hitIdx0); + hitIndices_vec1.push_back(hitIdx1); + hitIndices_vec2.push_back(hitIdx2); + hitIndices_vec3.push_back(hitIdx3); + ptIn_vec.push_back(ptIn); + ptErr_vec.push_back(ptErr); + etaErr_vec.push_back(etaErr); + eta_vec.push_back(eta); + float phi = p3LH.phi(); + phi_vec.push_back(phi); + charge_vec.push_back(charge); + seedIdx_vec.push_back(iSeed); + deltaPhi_vec.push_back(pixelSegmentDeltaPhiChange); + + hitIdxs.push_back(see_hitIdx[iSeed][0]); + hitIdxs.push_back(see_hitIdx[iSeed][1]); + hitIdxs.push_back(see_hitIdx[iSeed][2]); + char isQuad = false; + if (see_hitIdx[iSeed].size() > 3) { + isQuad = true; + hitIdxs.push_back(see_hitIdx[iSeed][3]); + } + float neta = 25.; + float nphi = 72.; + float nz = 25.; + int etabin = (p3PCA_Eta + 2.6) / ((2 * 2.6) / neta); + int phibin = (p3PCA_Phi + 3.14159265358979323846) / ((2. * 3.14159265358979323846) / nphi); + int dzbin = (see_dz[iSeed] + 30) / (2 * 30 / nz); + int isuperbin = (nz * nphi) * etabin + (nz)*phibin + dzbin; + superbin_vec.push_back(isuperbin); + pixelType_vec.push_back(pixtype); + isQuad_vec.push_back(isQuad); + } + } + + in_trkX_ = trkX; + in_trkY_ = trkY; + in_trkZ_ = trkZ; + in_hitId_ = hitId; + in_hitIdxs_ = hitIdxs; + in_hitIndices_vec0_ = hitIndices_vec0; + in_hitIndices_vec1_ = hitIndices_vec1; + in_hitIndices_vec2_ = hitIndices_vec2; + in_hitIndices_vec3_ = hitIndices_vec3; + in_deltaPhi_vec_ = deltaPhi_vec; + in_ptIn_vec_ = ptIn_vec; + in_ptErr_vec_ = ptErr_vec; + in_px_vec_ = px_vec; + in_py_vec_ = py_vec; + in_pz_vec_ = pz_vec; + in_eta_vec_ = eta_vec; + in_etaErr_vec_ = etaErr_vec; + in_phi_vec_ = phi_vec; + in_charge_vec_ = charge_vec; + in_seedIdx_vec_ = seedIdx_vec; + in_superbin_vec_ = superbin_vec; + in_pixelType_vec_ = pixelType_vec; + in_isQuad_vec_ = isQuad_vec; +} + +std::vector LST::getHitIdxs(short trackCandidateType, + unsigned int TCIdx, + unsigned int const* TCHitIndices, + unsigned int const* hitIndices) { + std::vector hits; + + unsigned int maxNHits = 0; + if (trackCandidateType == 7) + maxNHits = Params_pT5::kHits; // pT5 + else if (trackCandidateType == 5) + maxNHits = Params_pT3::kHits; // pT3 + else if (trackCandidateType == 4) + maxNHits = Params_T5::kHits; // T5 + else if (trackCandidateType == 8) + maxNHits = Params_pLS::kHits; // pLS + + for (unsigned int i = 0; i < maxNHits; i++) { + unsigned int hitIdxInGPU = TCHitIndices[Params_pT5::kHits * TCIdx + i]; + unsigned int hitIdx = + (trackCandidateType == 8) + ? hitIdxInGPU + : hitIndices[hitIdxInGPU]; // Hit indices are stored differently in the standalone for pLS. + + // For p objects, the 3rd and 4th hit maybe the same, + // due to the way pLS hits are stored in the standalone. + // This is because pixel seeds can be either triplets or quadruplets. + if (trackCandidateType != 4 && hits.size() == 3 && hits.back() == hitIdx) // Remove duplicate 4th hits. + continue; + + hits.push_back(hitIdx); + } + + return hits; +} + +void LST::getOutput(Event& event) { + std::vector> tc_hitIdxs; + std::vector tc_len; + std::vector tc_seedIdx; + std::vector tc_trackCandidateType; + + HitsBuffer& hitsInGPU = event.getHitsInCMSSW(false); // sync on next line + TrackCandidates const* trackCandidates = event.getTrackCandidatesInCMSSW().data(); + + unsigned int nTrackCandidates = *trackCandidates->nTrackCandidates; + + for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { + short trackCandidateType = trackCandidates->trackCandidateType[idx]; + std::vector hit_idx = + getHitIdxs(trackCandidateType, idx, trackCandidates->hitIndices, hitsInGPU.data()->idxs); + + tc_hitIdxs.push_back(hit_idx); + tc_len.push_back(hit_idx.size()); + tc_seedIdx.push_back(trackCandidates->pixelSeedIndex[idx]); + tc_trackCandidateType.push_back(trackCandidateType); + } + + out_tc_hitIdxs_ = tc_hitIdxs; + out_tc_len_ = tc_len; + out_tc_seedIdx_ = tc_seedIdx; + out_tc_trackCandidateType_ = tc_trackCandidateType; +} + +void LST::run(Queue& queue, + bool verbose, + LSTESData const* deviceESData, + std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z, + bool no_pls_dupclean, + bool tc_pls_triplets) { + auto event = Event(verbose, queue, deviceESData); + prepareInput(see_px, + see_py, + see_pz, + see_dxy, + see_dz, + see_ptErr, + see_etaErr, + see_stateTrajGlbX, + see_stateTrajGlbY, + see_stateTrajGlbZ, + see_stateTrajGlbPx, + see_stateTrajGlbPy, + see_stateTrajGlbPz, + see_q, + see_hitIdx, + ph2_detId, + ph2_x, + ph2_y, + ph2_z); + + event.addHitToEvent(in_trkX_, in_trkY_, in_trkZ_, in_hitId_, in_hitIdxs_); + event.addPixelSegmentToEvent(in_hitIndices_vec0_, + in_hitIndices_vec1_, + in_hitIndices_vec2_, + in_hitIndices_vec3_, + in_deltaPhi_vec_, + in_ptIn_vec_, + in_ptErr_vec_, + in_px_vec_, + in_py_vec_, + in_pz_vec_, + in_eta_vec_, + in_etaErr_vec_, + in_phi_vec_, + in_charge_vec_, + in_seedIdx_vec_, + in_superbin_vec_, + in_pixelType_vec_, + in_isQuad_vec_); + event.createMiniDoublets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Mini-doublets produced: %d\n", event.getNumberOfMiniDoublets()); + printf("# of Mini-doublets produced barrel layer 1: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(0)); + printf("# of Mini-doublets produced barrel layer 2: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(1)); + printf("# of Mini-doublets produced barrel layer 3: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(2)); + printf("# of Mini-doublets produced barrel layer 4: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(3)); + printf("# of Mini-doublets produced barrel layer 5: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(4)); + printf("# of Mini-doublets produced barrel layer 6: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(5)); + printf("# of Mini-doublets produced endcap layer 1: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(0)); + printf("# of Mini-doublets produced endcap layer 2: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(1)); + printf("# of Mini-doublets produced endcap layer 3: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(2)); + printf("# of Mini-doublets produced endcap layer 4: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(3)); + printf("# of Mini-doublets produced endcap layer 5: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(4)); + } + + event.createSegmentsWithModuleMap(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Segments produced: %d\n", event.getNumberOfSegments()); + printf("# of Segments produced layer 1-2: %d\n", event.getNumberOfSegmentsByLayerBarrel(0)); + printf("# of Segments produced layer 2-3: %d\n", event.getNumberOfSegmentsByLayerBarrel(1)); + printf("# of Segments produced layer 3-4: %d\n", event.getNumberOfSegmentsByLayerBarrel(2)); + printf("# of Segments produced layer 4-5: %d\n", event.getNumberOfSegmentsByLayerBarrel(3)); + printf("# of Segments produced layer 5-6: %d\n", event.getNumberOfSegmentsByLayerBarrel(4)); + printf("# of Segments produced endcap layer 1: %d\n", event.getNumberOfSegmentsByLayerEndcap(0)); + printf("# of Segments produced endcap layer 2: %d\n", event.getNumberOfSegmentsByLayerEndcap(1)); + printf("# of Segments produced endcap layer 3: %d\n", event.getNumberOfSegmentsByLayerEndcap(2)); + printf("# of Segments produced endcap layer 4: %d\n", event.getNumberOfSegmentsByLayerEndcap(3)); + printf("# of Segments produced endcap layer 5: %d\n", event.getNumberOfSegmentsByLayerEndcap(4)); + } + + event.createTriplets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of T3s produced: %d\n", event.getNumberOfTriplets()); + printf("# of T3s produced layer 1-2-3: %d\n", event.getNumberOfTripletsByLayerBarrel(0)); + printf("# of T3s produced layer 2-3-4: %d\n", event.getNumberOfTripletsByLayerBarrel(1)); + printf("# of T3s produced layer 3-4-5: %d\n", event.getNumberOfTripletsByLayerBarrel(2)); + printf("# of T3s produced layer 4-5-6: %d\n", event.getNumberOfTripletsByLayerBarrel(3)); + printf("# of T3s produced endcap layer 1-2-3: %d\n", event.getNumberOfTripletsByLayerEndcap(0)); + printf("# of T3s produced endcap layer 2-3-4: %d\n", event.getNumberOfTripletsByLayerEndcap(1)); + printf("# of T3s produced endcap layer 3-4-5: %d\n", event.getNumberOfTripletsByLayerEndcap(2)); + printf("# of T3s produced endcap layer 1: %d\n", event.getNumberOfTripletsByLayerEndcap(0)); + printf("# of T3s produced endcap layer 2: %d\n", event.getNumberOfTripletsByLayerEndcap(1)); + printf("# of T3s produced endcap layer 3: %d\n", event.getNumberOfTripletsByLayerEndcap(2)); + printf("# of T3s produced endcap layer 4: %d\n", event.getNumberOfTripletsByLayerEndcap(3)); + printf("# of T3s produced endcap layer 5: %d\n", event.getNumberOfTripletsByLayerEndcap(4)); + } + + event.createQuintuplets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Quintuplets produced: %d\n", event.getNumberOfQuintuplets()); + printf("# of Quintuplets produced layer 1-2-3-4-5-6: %d\n", event.getNumberOfQuintupletsByLayerBarrel(0)); + printf("# of Quintuplets produced layer 2: %d\n", event.getNumberOfQuintupletsByLayerBarrel(1)); + printf("# of Quintuplets produced layer 3: %d\n", event.getNumberOfQuintupletsByLayerBarrel(2)); + printf("# of Quintuplets produced layer 4: %d\n", event.getNumberOfQuintupletsByLayerBarrel(3)); + printf("# of Quintuplets produced layer 5: %d\n", event.getNumberOfQuintupletsByLayerBarrel(4)); + printf("# of Quintuplets produced layer 6: %d\n", event.getNumberOfQuintupletsByLayerBarrel(5)); + printf("# of Quintuplets produced endcap layer 1: %d\n", event.getNumberOfQuintupletsByLayerEndcap(0)); + printf("# of Quintuplets produced endcap layer 2: %d\n", event.getNumberOfQuintupletsByLayerEndcap(1)); + printf("# of Quintuplets produced endcap layer 3: %d\n", event.getNumberOfQuintupletsByLayerEndcap(2)); + printf("# of Quintuplets produced endcap layer 4: %d\n", event.getNumberOfQuintupletsByLayerEndcap(3)); + printf("# of Quintuplets produced endcap layer 5: %d\n", event.getNumberOfQuintupletsByLayerEndcap(4)); + } + + event.pixelLineSegmentCleaning(no_pls_dupclean); + + event.createPixelQuintuplets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Pixel Quintuplets produced: %d\n", event.getNumberOfPixelQuintuplets()); + } + + event.createPixelTriplets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Pixel T3s produced: %d\n", event.getNumberOfPixelTriplets()); + } + + event.createTrackCandidates(no_pls_dupclean, tc_pls_triplets); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of TrackCandidates produced: %d\n", event.getNumberOfTrackCandidates()); + printf(" # of Pixel TrackCandidates produced: %d\n", event.getNumberOfPixelTrackCandidates()); + printf(" # of pT5 TrackCandidates produced: %d\n", event.getNumberOfPT5TrackCandidates()); + printf(" # of pT3 TrackCandidates produced: %d\n", event.getNumberOfPT3TrackCandidates()); + printf(" # of pLS TrackCandidates produced: %d\n", event.getNumberOfPLSTrackCandidates()); + printf(" # of T5 TrackCandidates produced: %d\n", event.getNumberOfT5TrackCandidates()); + } + + getOutput(event); + + event.resetEventSync(); +} diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h new file mode 100644 index 0000000000000..27ce7b97bffdd --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -0,0 +1,1081 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_MiniDoublet_h +#define RecoTracker_LSTCore_src_alpaka_MiniDoublet_h + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" + +#include "Hit.h" +#include "ObjectRanges.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + struct MiniDoublets { + unsigned int* nMemoryLocations; + + unsigned int* anchorHitIndices; + unsigned int* outerHitIndices; + uint16_t* moduleIndices; + unsigned int* nMDs; //counter per module + unsigned int* totOccupancyMDs; //counter per module + float* dphichanges; + + float* dzs; //will store drt if the module is endcap + float* dphis; + + float* shiftedXs; + float* shiftedYs; + float* shiftedZs; + float* noShiftedDphis; //if shifted module + float* noShiftedDphiChanges; //if shifted module + + float* anchorX; + float* anchorY; + float* anchorZ; + float* anchorRt; + float* anchorPhi; + float* anchorEta; + float* anchorHighEdgeX; + float* anchorHighEdgeY; + float* anchorLowEdgeX; + float* anchorLowEdgeY; + float* anchorLowEdgePhi; + float* anchorHighEdgePhi; + + float* outerX; + float* outerY; + float* outerZ; + float* outerRt; + float* outerPhi; + float* outerEta; + float* outerHighEdgeX; + float* outerHighEdgeY; + float* outerLowEdgeX; + float* outerLowEdgeY; + + template + void setData(TBuf& buf) { + nMemoryLocations = buf.nMemoryLocations_buf.data(); + anchorHitIndices = buf.anchorHitIndices_buf.data(); + outerHitIndices = buf.outerHitIndices_buf.data(); + moduleIndices = buf.moduleIndices_buf.data(); + nMDs = buf.nMDs_buf.data(); + totOccupancyMDs = buf.totOccupancyMDs_buf.data(); + dphichanges = buf.dphichanges_buf.data(); + dzs = buf.dzs_buf.data(); + dphis = buf.dphis_buf.data(); + shiftedXs = buf.shiftedXs_buf.data(); + shiftedYs = buf.shiftedYs_buf.data(); + shiftedZs = buf.shiftedZs_buf.data(); + noShiftedDphis = buf.noShiftedDphis_buf.data(); + noShiftedDphiChanges = buf.noShiftedDphiChanges_buf.data(); + anchorX = buf.anchorX_buf.data(); + anchorY = buf.anchorY_buf.data(); + anchorZ = buf.anchorZ_buf.data(); + anchorRt = buf.anchorRt_buf.data(); + anchorPhi = buf.anchorPhi_buf.data(); + anchorEta = buf.anchorEta_buf.data(); + anchorHighEdgeX = buf.anchorHighEdgeX_buf.data(); + anchorHighEdgeY = buf.anchorHighEdgeY_buf.data(); + anchorLowEdgeX = buf.anchorLowEdgeX_buf.data(); + anchorLowEdgeY = buf.anchorLowEdgeY_buf.data(); + outerX = buf.outerX_buf.data(); + outerY = buf.outerY_buf.data(); + outerZ = buf.outerZ_buf.data(); + outerRt = buf.outerRt_buf.data(); + outerPhi = buf.outerPhi_buf.data(); + outerEta = buf.outerEta_buf.data(); + outerHighEdgeX = buf.outerHighEdgeX_buf.data(); + outerHighEdgeY = buf.outerHighEdgeY_buf.data(); + outerLowEdgeX = buf.outerLowEdgeX_buf.data(); + outerLowEdgeY = buf.outerLowEdgeY_buf.data(); + anchorLowEdgePhi = buf.anchorLowEdgePhi_buf.data(); + anchorHighEdgePhi = buf.anchorHighEdgePhi_buf.data(); + } + }; + + template + struct MiniDoubletsBuffer { + Buf nMemoryLocations_buf; + + Buf anchorHitIndices_buf; + Buf outerHitIndices_buf; + Buf moduleIndices_buf; + Buf nMDs_buf; + Buf totOccupancyMDs_buf; + Buf dphichanges_buf; + + Buf dzs_buf; + Buf dphis_buf; + + Buf shiftedXs_buf; + Buf shiftedYs_buf; + Buf shiftedZs_buf; + Buf noShiftedDphis_buf; + Buf noShiftedDphiChanges_buf; + + Buf anchorX_buf; + Buf anchorY_buf; + Buf anchorZ_buf; + Buf anchorRt_buf; + Buf anchorPhi_buf; + Buf anchorEta_buf; + Buf anchorHighEdgeX_buf; + Buf anchorHighEdgeY_buf; + Buf anchorLowEdgeX_buf; + Buf anchorLowEdgeY_buf; + Buf anchorLowEdgePhi_buf; + Buf anchorHighEdgePhi_buf; + + Buf outerX_buf; + Buf outerY_buf; + Buf outerZ_buf; + Buf outerRt_buf; + Buf outerPhi_buf; + Buf outerEta_buf; + Buf outerHighEdgeX_buf; + Buf outerHighEdgeY_buf; + Buf outerLowEdgeX_buf; + Buf outerLowEdgeY_buf; + + MiniDoublets data_; + + template + MiniDoubletsBuffer(unsigned int nMemoryLoc, uint16_t nLowerModules, TDevAcc const& devAccIn, TQueue& queue) + : nMemoryLocations_buf(allocBufWrapper(devAccIn, 1, queue)), + anchorHitIndices_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + outerHitIndices_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + moduleIndices_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + nMDs_buf(allocBufWrapper(devAccIn, nLowerModules + 1, queue)), + totOccupancyMDs_buf(allocBufWrapper(devAccIn, nLowerModules + 1, queue)), + dphichanges_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + dzs_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + dphis_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + shiftedXs_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + shiftedYs_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + shiftedZs_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + noShiftedDphis_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + noShiftedDphiChanges_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + anchorX_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + anchorY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + anchorZ_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + anchorRt_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + anchorPhi_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + anchorEta_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + anchorHighEdgeX_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + anchorHighEdgeY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + anchorLowEdgeX_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + anchorLowEdgeY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + anchorLowEdgePhi_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + anchorHighEdgePhi_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + outerX_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + outerY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + outerZ_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + outerRt_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + outerPhi_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + outerEta_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + outerHighEdgeX_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + outerHighEdgeY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + outerLowEdgeX_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)), + outerLowEdgeY_buf(allocBufWrapper(devAccIn, nMemoryLoc, queue)) { + alpaka::memset(queue, nMDs_buf, 0u); + alpaka::memset(queue, totOccupancyMDs_buf, 0u); + } + + inline MiniDoublets const* data() const { return &data_; } + inline void setData(MiniDoubletsBuffer& buf) { data_.setData(buf); } + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addMDToMemory(TAcc const& acc, + MiniDoublets& mdsInGPU, + Hits const& hitsInGPU, + Modules const& modulesInGPU, + unsigned int lowerHitIdx, + unsigned int upperHitIdx, + uint16_t lowerModuleIdx, + float dz, + float dPhi, + float dPhiChange, + float shiftedX, + float shiftedY, + float shiftedZ, + float noShiftedDphi, + float noShiftedDPhiChange, + unsigned int idx) { + //the index into which this MD needs to be written will be computed in the kernel + //nMDs variable will be incremented in the kernel, no need to worry about that here + + mdsInGPU.moduleIndices[idx] = lowerModuleIdx; + unsigned int anchorHitIndex, outerHitIndex; + if (modulesInGPU.moduleType[lowerModuleIdx] == PS and modulesInGPU.moduleLayerType[lowerModuleIdx] == Strip) { + mdsInGPU.anchorHitIndices[idx] = upperHitIdx; + mdsInGPU.outerHitIndices[idx] = lowerHitIdx; + + anchorHitIndex = upperHitIdx; + outerHitIndex = lowerHitIdx; + } else { + mdsInGPU.anchorHitIndices[idx] = lowerHitIdx; + mdsInGPU.outerHitIndices[idx] = upperHitIdx; + + anchorHitIndex = lowerHitIdx; + outerHitIndex = upperHitIdx; + } + + mdsInGPU.dphichanges[idx] = dPhiChange; + + mdsInGPU.dphis[idx] = dPhi; + mdsInGPU.dzs[idx] = dz; + mdsInGPU.shiftedXs[idx] = shiftedX; + mdsInGPU.shiftedYs[idx] = shiftedY; + mdsInGPU.shiftedZs[idx] = shiftedZ; + + mdsInGPU.noShiftedDphis[idx] = noShiftedDphi; + mdsInGPU.noShiftedDphiChanges[idx] = noShiftedDPhiChange; + + mdsInGPU.anchorX[idx] = hitsInGPU.xs[anchorHitIndex]; + mdsInGPU.anchorY[idx] = hitsInGPU.ys[anchorHitIndex]; + mdsInGPU.anchorZ[idx] = hitsInGPU.zs[anchorHitIndex]; + mdsInGPU.anchorRt[idx] = hitsInGPU.rts[anchorHitIndex]; + mdsInGPU.anchorPhi[idx] = hitsInGPU.phis[anchorHitIndex]; + mdsInGPU.anchorEta[idx] = hitsInGPU.etas[anchorHitIndex]; + mdsInGPU.anchorHighEdgeX[idx] = hitsInGPU.highEdgeXs[anchorHitIndex]; + mdsInGPU.anchorHighEdgeY[idx] = hitsInGPU.highEdgeYs[anchorHitIndex]; + mdsInGPU.anchorLowEdgeX[idx] = hitsInGPU.lowEdgeXs[anchorHitIndex]; + mdsInGPU.anchorLowEdgeY[idx] = hitsInGPU.lowEdgeYs[anchorHitIndex]; + mdsInGPU.anchorHighEdgePhi[idx] = + alpaka::math::atan2(acc, mdsInGPU.anchorHighEdgeY[idx], mdsInGPU.anchorHighEdgeX[idx]); + mdsInGPU.anchorLowEdgePhi[idx] = + alpaka::math::atan2(acc, mdsInGPU.anchorLowEdgeY[idx], mdsInGPU.anchorLowEdgeX[idx]); + + mdsInGPU.outerX[idx] = hitsInGPU.xs[outerHitIndex]; + mdsInGPU.outerY[idx] = hitsInGPU.ys[outerHitIndex]; + mdsInGPU.outerZ[idx] = hitsInGPU.zs[outerHitIndex]; + mdsInGPU.outerRt[idx] = hitsInGPU.rts[outerHitIndex]; + mdsInGPU.outerPhi[idx] = hitsInGPU.phis[outerHitIndex]; + mdsInGPU.outerEta[idx] = hitsInGPU.etas[outerHitIndex]; + mdsInGPU.outerHighEdgeX[idx] = hitsInGPU.highEdgeXs[outerHitIndex]; + mdsInGPU.outerHighEdgeY[idx] = hitsInGPU.highEdgeYs[outerHitIndex]; + mdsInGPU.outerLowEdgeX[idx] = hitsInGPU.lowEdgeXs[outerHitIndex]; + mdsInGPU.outerLowEdgeY[idx] = hitsInGPU.lowEdgeYs[outerHitIndex]; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules(Modules const& modulesInGPU, uint16_t moduleIndex) { + // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing + // This is the same as what was previously considered as"isNormalTiltedModules" + // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf + short subdet = modulesInGPU.subdets[moduleIndex]; + short layer = modulesInGPU.layers[moduleIndex]; + short side = modulesInGPU.sides[moduleIndex]; + short rod = modulesInGPU.rods[moduleIndex]; + + if (subdet == Barrel) { + if ((side != Center and layer == 3) or (side == NegZ and layer == 2 and rod > 5) or + (side == PosZ and layer == 2 and rod < 8) or (side == NegZ and layer == 1 and rod > 9) or + (side == PosZ and layer == 1 and rod < 4)) + return true; + else + return false; + } else + return false; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize(Modules const& modulesInGPU, uint16_t moduleIndex) { + float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; + float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; + float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; + float miniDeltaEndcap[5][15]; + + for (size_t i = 0; i < 5; i++) { + for (size_t j = 0; j < 15; j++) { + if (i == 0 || i == 1) { + if (j < 10) { + miniDeltaEndcap[i][j] = 0.4f; + } else { + miniDeltaEndcap[i][j] = 0.18f; + } + } else if (i == 2 || i == 3) { + if (j < 8) { + miniDeltaEndcap[i][j] = 0.4f; + } else { + miniDeltaEndcap[i][j] = 0.18f; + } + } else { + if (j < 9) { + miniDeltaEndcap[i][j] = 0.4f; + } else { + miniDeltaEndcap[i][j] = 0.18f; + } + } + } + } + + unsigned int iL = modulesInGPU.layers[moduleIndex] - 1; + unsigned int iR = modulesInGPU.rings[moduleIndex] - 1; + short subdet = modulesInGPU.subdets[moduleIndex]; + short side = modulesInGPU.sides[moduleIndex]; + + float moduleSeparation = 0; + + if (subdet == Barrel and side == Center) { + moduleSeparation = miniDeltaFlat[iL]; + } else if (isTighterTiltedModules(modulesInGPU, moduleIndex)) { + moduleSeparation = miniDeltaTilted[iL]; + } else if (subdet == Endcap) { + moduleSeparation = miniDeltaEndcap[iL][iR]; + } else //Loose tilted modules + { + moduleSeparation = miniDeltaLooseTilted[iL]; + } + + return moduleSeparation; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold( + TAcc const& acc, float rt, Modules const& modulesInGPU, uint16_t moduleIndex, float dPhi = 0, float dz = 0) { + // ================================================================= + // Various constants + // ================================================================= + //mean of the horizontal layer position in y; treat this as R below + + // ================================================================= + // Computing some components that make up the cut threshold + // ================================================================= + + unsigned int iL = modulesInGPU.layers[moduleIndex] - 1; + const float miniSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rt * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float rLayNominal = + ((modulesInGPU.subdets[moduleIndex] == Barrel) ? kMiniRminMeanBarrel[iL] : kMiniRminMeanEndcap[iL]); + const float miniPVoff = 0.1f / rLayNominal; + const float miniMuls = ((modulesInGPU.subdets[moduleIndex] == Barrel) ? kMiniMulsPtScaleBarrel[iL] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[iL] * 3.f / ptCut); + const bool isTilted = modulesInGPU.subdets[moduleIndex] == Barrel and modulesInGPU.sides[moduleIndex] != Center; + //the lower module is sent in irrespective of its layer type. We need to fetch the drdz properly + + float drdz; + if (isTilted) { + if (modulesInGPU.moduleType[moduleIndex] == PS and modulesInGPU.moduleLayerType[moduleIndex] == Strip) { + drdz = modulesInGPU.drdzs[moduleIndex]; + } else { + drdz = modulesInGPU.drdzs[modulesInGPU.partnerModuleIndices[moduleIndex]]; + } + } else { + drdz = 0; + } + const float miniTilt2 = ((isTilted) ? (0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdz * drdz) / + (1.f + drdz * drdz) / moduleGapSize(modulesInGPU, moduleIndex) + : 0); + + // Compute luminous region requirement for endcap + const float miniLum = alpaka::math::abs(acc, dPhi * kDeltaZLum / dz); // Balaji's new error + + // ================================================================= + // Return the threshold value + // ================================================================= + // Following condition is met if the module is central and flatly lying + if (modulesInGPU.subdets[moduleIndex] == Barrel and modulesInGPU.sides[moduleIndex] == Center) { + return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff); + } + // Following condition is met if the module is central and tilted + else if (modulesInGPU.subdets[moduleIndex] == Barrel and + modulesInGPU.sides[moduleIndex] != Center) //all types of tilted modules + { + return miniSlope + + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniTilt2 * miniSlope * miniSlope); + } + // If not barrel, it is Endcap + else { + return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniLum * miniLum); + } + } + + template + ALPAKA_FN_INLINE ALPAKA_FN_ACC void shiftStripHits(TAcc const& acc, + Modules const& modulesInGPU, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float* shiftedCoords, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + // This is the strip shift scheme that is explained in http://uaf-10.t2.ucsd.edu/~phchang/talks/PhilipChang20190607_SDL_Update.pdf (see backup slides) + // The main feature of this shifting is that the strip hits are shifted to be "aligned" in the line of sight from interaction point to the the pixel hit. + // (since pixel hit is well defined in 3-d) + // The strip hit is shifted along the strip detector to be placed in a guessed position where we think they would have actually crossed + // The size of the radial direction shift due to module separation gap is computed in "radial" size, while the shift is done along the actual strip orientation + // This means that there may be very very subtle edge effects coming from whether the strip hit is center of the module or the at the edge of the module + // But this should be relatively minor effect + + // dependent variables for this if statement + // lowerModule + // lowerHit + // upperHit + // endcapGeometry + // tiltedGeometry + + // Some variables relevant to the function + float xp; // pixel x (pixel hit x) + float yp; // pixel y (pixel hit y) + float zp; // pixel y (pixel hit y) + float rtp; // pixel y (pixel hit y) + float xa; // "anchor" x (the anchor position on the strip module plane from pixel hit) + float ya; // "anchor" y (the anchor position on the strip module plane from pixel hit) + float xo; // old x (before the strip hit is moved up or down) + float yo; // old y (before the strip hit is moved up or down) + float xn; // new x (after the strip hit is moved up or down) + float yn; // new y (after the strip hit is moved up or down) + float abszn; // new z in absolute value + float zn; // new z with the sign (+/-) accounted + float angleA; // in r-z plane the theta of the pixel hit in polar coordinate is the angleA + float angleB; // this is the angle of tilted module in r-z plane ("drdz"), for endcap this is 90 degrees + bool isEndcap; // If endcap, drdz = infinity + float moduleSeparation; + float drprime; // The radial shift size in x-y plane projection + float drprime_x; // x-component of drprime + float drprime_y; // y-component of drprime + const float& slope = + modulesInGPU.dxdys[lowerModuleIndex]; // The slope of the possible strip hits for a given module in x-y plane + float absArctanSlope; + float angleM; // the angle M is the angle of rotation of the module in x-y plane if the possible strip hits are along the x-axis, then angleM = 0, and if the possible strip hits are along y-axis angleM = 90 degrees + float absdzprime; // The distance between the two points after shifting + const float& drdz_ = modulesInGPU.drdzs[lowerModuleIndex]; + // Assign hit pointers based on their hit type + if (modulesInGPU.moduleType[lowerModuleIndex] == PS) { + // TODO: This is somewhat of an mystery.... somewhat confused why this is the case + if (modulesInGPU.subdets[lowerModuleIndex] == Barrel ? modulesInGPU.moduleLayerType[lowerModuleIndex] != Pixel + : modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel) { + xo = xUpper; + yo = yUpper; + xp = xLower; + yp = yLower; + zp = zLower; + rtp = rtLower; + } else { + xo = xLower; + yo = yLower; + xp = xUpper; + yp = yUpper; + zp = zUpper; + rtp = rtUpper; + } + } else { + xo = xUpper; + yo = yUpper; + xp = xLower; + yp = yLower; + zp = zLower; + rtp = rtLower; + } + + // If it is endcap some of the math gets simplified (and also computers don't like infinities) + isEndcap = modulesInGPU.subdets[lowerModuleIndex] == Endcap; + + // NOTE: TODO: Keep in mind that the sin(atan) function can be simplified to something like x / sqrt(1 + x^2) and similar for cos + // I am not sure how slow sin, atan, cos, functions are in c++. If x / sqrt(1 + x^2) are faster change this later to reduce arithmetic computation time + angleA = alpaka::math::abs(acc, alpaka::math::atan(acc, rtp / zp)); + angleB = + ((isEndcap) + ? float(M_PI) / 2.f + : alpaka::math::atan( + acc, + drdz_)); // The tilt module on the positive z-axis has negative drdz slope in r-z plane and vice versa + + moduleSeparation = moduleGapSize(modulesInGPU, lowerModuleIndex); + + // Sign flips if the pixel is later layer + if (modulesInGPU.moduleType[lowerModuleIndex] == PS and modulesInGPU.moduleLayerType[lowerModuleIndex] != Pixel) { + moduleSeparation *= -1; + } + + drprime = (moduleSeparation / alpaka::math::sin(acc, angleA + angleB)) * alpaka::math::sin(acc, angleA); + + // Compute arctan of the slope and take care of the slope = infinity case + absArctanSlope = ((slope != lst_INF) ? fabs(alpaka::math::atan(acc, slope)) : float(M_PI) / 2.f); + + // Depending on which quadrant the pixel hit lies, we define the angleM by shifting them slightly differently + if (xp > 0 and yp > 0) { + angleM = absArctanSlope; + } else if (xp > 0 and yp < 0) { + angleM = float(M_PI) - absArctanSlope; + } else if (xp < 0 and yp < 0) { + angleM = float(M_PI) + absArctanSlope; + } else // if (xp < 0 and yp > 0) + { + angleM = 2.f * float(M_PI) - absArctanSlope; + } + + // Then since the angleM sign is taken care of properly + drprime_x = drprime * alpaka::math::sin(acc, angleM); + drprime_y = drprime * alpaka::math::cos(acc, angleM); + + // The new anchor position is + xa = xp + drprime_x; + ya = yp + drprime_y; + + // Compute the new strip hit position (if the slope value is in special condition take care of the exceptions) + if (slope == + lst_INF) // Designated for tilted module when the slope is exactly infinity (module lying along y-axis) + { + xn = xa; // New x point is simply where the anchor is + yn = yo; // No shift in y + } else if (slope == 0) { + xn = xo; // New x point is simply where the anchor is + yn = ya; // No shift in y + } else { + xn = (slope * xa + (1.f / slope) * xo - ya + yo) / (slope + (1.f / slope)); // new xn + yn = (xn - xa) * slope + ya; // new yn + } + + // Computing new Z position + absdzprime = alpaka::math::abs( + acc, + moduleSeparation / alpaka::math::sin(acc, angleA + angleB) * + alpaka::math::cos( + acc, + angleA)); // module separation sign is for shifting in radial direction for z-axis direction take care of the sign later + + // Depending on which one as closer to the interactin point compute the new z wrt to the pixel properly + if (modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel) { + abszn = alpaka::math::abs(acc, zp) + absdzprime; + } else { + abszn = alpaka::math::abs(acc, zp) - absdzprime; + } + + zn = abszn * ((zp > 0) ? 1 : -1); // Apply the sign of the zn + + shiftedCoords[0] = xn; + shiftedCoords[1] = yn; + shiftedCoords[2] = zn; + } + + template + ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc, + Modules const& modulesInGPU, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float& dz, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphiChange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + dz = zLower - zUpper; + const float dzCut = modulesInGPU.moduleType[lowerModuleIndex] == PS ? 2.f : 10.f; + const float sign = ((dz > 0) - (dz < 0)) * ((zLower > 0) - (zLower < 0)); + const float invertedcrossercut = (alpaka::math::abs(acc, dz) > 2) * sign; + + if ((alpaka::math::abs(acc, dz) >= dzCut) || (invertedcrossercut > 0)) + return false; + + float miniCut = 0; + + miniCut = modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel + ? dPhiThreshold(acc, rtLower, modulesInGPU, lowerModuleIndex) + : dPhiThreshold(acc, rtUpper, modulesInGPU, lowerModuleIndex); + + // Cut #2: dphi difference + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3085 + float xn = 0.f, yn = 0.f; // , zn = 0; + float shiftedRt2; + if (modulesInGPU.sides[lowerModuleIndex] != Center) // If barrel and not center it is tilted + { + // Shift the hits and calculate new xn, yn position + float shiftedCoords[3]; + shiftStripHits(acc, + modulesInGPU, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + shiftedCoords, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + xn = shiftedCoords[0]; + yn = shiftedCoords[1]; + + // Lower or the upper hit needs to be modified depending on which one was actually shifted + if (modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel) { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zUpper; + shiftedRt2 = xn * xn + yn * yn; + + dPhi = deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); //function from Hit.cc + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } else { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zLower; + shiftedRt2 = xn * xn + yn * yn; + dPhi = deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } + } else { + shiftedX = 0; + shiftedY = 0; + shiftedZ = 0; + dPhi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + noShiftedDphi = dPhi; + } + + if (alpaka::math::abs(acc, dPhi) >= miniCut) + return false; + + // Cut #3: The dphi change going from lower Hit to upper Hit + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3076 + if (modulesInGPU.sides[lowerModuleIndex] != Center) { + // When it is tilted, use the new shifted positions + // TODO: This is somewhat of an mystery.... somewhat confused why this is the case + if (modulesInGPU.moduleLayerType[lowerModuleIndex] != Pixel) { + // dPhi Change should be calculated so that the upper hit has higher rt. + // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. + // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) + // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) + // setdeltaPhiChange(lowerHit.rt() < upperHitMod.rt() ? lowerHit.deltaPhiChange(upperHitMod) : upperHitMod.deltaPhiChange(lowerHit)); + + dPhiChange = (rtLower * rtLower < shiftedRt2) ? deltaPhiChange(acc, xLower, yLower, shiftedX, shiftedY) + : deltaPhiChange(acc, shiftedX, shiftedY, xLower, yLower); + noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); + } else { + // dPhi Change should be calculated so that the upper hit has higher rt. + // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. + // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) + // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) + + dPhiChange = (shiftedRt2 < rtUpper * rtUpper) ? deltaPhiChange(acc, shiftedX, shiftedY, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, shiftedX, shiftedY); + noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); + } + } else { + // When it is flat lying module, whichever is the lowerSide will always have rt lower + dPhiChange = deltaPhiChange(acc, xLower, yLower, xUpper, yUpper); + noShiftedDphiChange = dPhiChange; + } + + return alpaka::math::abs(acc, dPhiChange) < miniCut; + } + + template + ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc, + Modules const& modulesInGPU, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float& drt, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphichange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + // There are series of cuts that applies to mini-doublet in a "endcap" region + // Cut #1 : dz cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3093 + // For PS module in case when it is tilted a different dz (after the strip hit shift) is calculated later. + + float dz = zLower - zUpper; // Not const since later it might change depending on the type of module + + const float dzCut = 1.f; + + if (alpaka::math::abs(acc, dz) >= dzCut) + return false; + // Cut #2 : drt cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3100 + const float drtCut = modulesInGPU.moduleType[lowerModuleIndex] == PS ? 2.f : 10.f; + drt = rtLower - rtUpper; + if (alpaka::math::abs(acc, drt) >= drtCut) + return false; + // The new scheme shifts strip hits to be "aligned" along the line of sight from interaction point to the pixel hit (if it is PS modules) + float xn = 0, yn = 0, zn = 0; + + float shiftedCoords[3]; + shiftStripHits(acc, + modulesInGPU, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + shiftedCoords, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + + xn = shiftedCoords[0]; + yn = shiftedCoords[1]; + zn = shiftedCoords[2]; + + if (modulesInGPU.moduleType[lowerModuleIndex] == PS) { + // Appropriate lower or upper hit is modified after checking which one was actually shifted + if (modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel) { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zUpper; + dPhi = deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } else { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zLower; + dPhi = deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } + } else { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zUpper; + dPhi = deltaPhi(acc, xLower, yLower, xn, yn); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } + + // dz needs to change if it is a PS module where the strip hits are shifted in order to properly account for the case when a tilted module falls under "endcap logic" + // if it was an endcap it will have zero effect + if (modulesInGPU.moduleType[lowerModuleIndex] == PS) { + dz = modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel ? zLower - zn : zUpper - zn; + } + + float miniCut = 0; + miniCut = modulesInGPU.moduleLayerType[lowerModuleIndex] == Pixel + ? dPhiThreshold(acc, rtLower, modulesInGPU, lowerModuleIndex, dPhi, dz) + : dPhiThreshold(acc, rtUpper, modulesInGPU, lowerModuleIndex, dPhi, dz); + + if (alpaka::math::abs(acc, dPhi) >= miniCut) + return false; + + // Cut #4: Another cut on the dphi after some modification + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3119-L3124 + + float dzFrac = alpaka::math::abs(acc, dz) / alpaka::math::abs(acc, zLower); + dPhiChange = dPhi / dzFrac * (1.f + dzFrac); + noShiftedDphichange = noShiftedDphi / dzFrac * (1.f + dzFrac); + + return alpaka::math::abs(acc, dPhiChange) < miniCut; + } + + template + ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgo(TAcc const& acc, + Modules const& modulesInGPU, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float& dz, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphiChange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + if (modulesInGPU.subdets[lowerModuleIndex] == Barrel) { + return runMiniDoubletDefaultAlgoBarrel(acc, + modulesInGPU, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + dz, + dPhi, + dPhiChange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + } else { + return runMiniDoubletDefaultAlgoEndcap(acc, + modulesInGPU, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + dz, + dPhi, + dPhiChange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + } + } + + struct CreateMiniDoubletsInGPUv2 { + template + ALPAKA_FN_ACC void operator()( + TAcc const& acc, Modules modulesInGPU, Hits hitsInGPU, MiniDoublets mdsInGPU, ObjectRanges rangesInGPU) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t lowerModuleIndex = globalThreadIdx[1]; lowerModuleIndex < (*modulesInGPU.nLowerModules); + lowerModuleIndex += gridThreadExtent[1]) { + uint16_t upperModuleIndex = modulesInGPU.partnerModuleIndices[lowerModuleIndex]; + int nLowerHits = hitsInGPU.hitRangesnLower[lowerModuleIndex]; + int nUpperHits = hitsInGPU.hitRangesnUpper[lowerModuleIndex]; + if (hitsInGPU.hitRangesLower[lowerModuleIndex] == -1) + continue; + unsigned int upHitArrayIndex = hitsInGPU.hitRangesUpper[lowerModuleIndex]; + unsigned int loHitArrayIndex = hitsInGPU.hitRangesLower[lowerModuleIndex]; + int limit = nUpperHits * nLowerHits; + + for (int hitIndex = globalThreadIdx[2]; hitIndex < limit; hitIndex += gridThreadExtent[2]) { + int lowerHitIndex = hitIndex / nUpperHits; + int upperHitIndex = hitIndex % nUpperHits; + if (upperHitIndex >= nUpperHits) + continue; + if (lowerHitIndex >= nLowerHits) + continue; + unsigned int lowerHitArrayIndex = loHitArrayIndex + lowerHitIndex; + float xLower = hitsInGPU.xs[lowerHitArrayIndex]; + float yLower = hitsInGPU.ys[lowerHitArrayIndex]; + float zLower = hitsInGPU.zs[lowerHitArrayIndex]; + float rtLower = hitsInGPU.rts[lowerHitArrayIndex]; + unsigned int upperHitArrayIndex = upHitArrayIndex + upperHitIndex; + float xUpper = hitsInGPU.xs[upperHitArrayIndex]; + float yUpper = hitsInGPU.ys[upperHitArrayIndex]; + float zUpper = hitsInGPU.zs[upperHitArrayIndex]; + float rtUpper = hitsInGPU.rts[upperHitArrayIndex]; + + float dz, dphi, dphichange, shiftedX, shiftedY, shiftedZ, noShiftedDphi, noShiftedDphiChange; + bool success = runMiniDoubletDefaultAlgo(acc, + modulesInGPU, + lowerModuleIndex, + upperModuleIndex, + lowerHitArrayIndex, + upperHitArrayIndex, + dz, + dphi, + dphichange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + if (success) { + int totOccupancyMDs = + alpaka::atomicAdd(acc, &mdsInGPU.totOccupancyMDs[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyMDs >= (rangesInGPU.miniDoubletModuleOccupancy[lowerModuleIndex])) { +#ifdef WARNINGS + printf("Mini-doublet excess alert! Module index = %d\n", lowerModuleIndex); +#endif + } else { + int mdModuleIndex = + alpaka::atomicAdd(acc, &mdsInGPU.nMDs[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + unsigned int mdIndex = rangesInGPU.miniDoubletModuleIndices[lowerModuleIndex] + mdModuleIndex; + + addMDToMemory(acc, + mdsInGPU, + hitsInGPU, + modulesInGPU, + lowerHitArrayIndex, + upperHitArrayIndex, + lowerModuleIndex, + dz, + dphi, + dphichange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + mdIndex); + } + } + } + } + } + }; + + struct CreateMDArrayRangesGPU { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, Modules modulesInGPU, ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + // Declare variables in shared memory and set to 0 + int& nTotalMDs = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) { + nTotalMDs = 0; + } + alpaka::syncBlockThreads(acc); + + // Create variables outside of the for loop. + int occupancy, category_number, eta_number; + + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { + short module_rings = modulesInGPU.rings[i]; + short module_layers = modulesInGPU.layers[i]; + short module_subdets = modulesInGPU.subdets[i]; + float module_eta = alpaka::math::abs(acc, modulesInGPU.eta[i]); + + if (module_layers <= 3 && module_subdets == 5) + category_number = 0; + else if (module_layers >= 4 && module_subdets == 5) + category_number = 1; + else if (module_layers <= 2 && module_subdets == 4 && module_rings >= 11) + category_number = 2; + else if (module_layers >= 3 && module_subdets == 4 && module_rings >= 8) + category_number = 2; + else if (module_layers <= 2 && module_subdets == 4 && module_rings <= 10) + category_number = 3; + else if (module_layers >= 3 && module_subdets == 4 && module_rings <= 7) + category_number = 3; + else + category_number = -1; + + if (module_eta < 0.75f) + eta_number = 0; + else if (module_eta < 1.5f) + eta_number = 1; + else if (module_eta < 2.25f) + eta_number = 2; + else if (module_eta < 3.0f) + eta_number = 3; + else + eta_number = -1; + + if (category_number == 0 && eta_number == 0) + occupancy = 49; + else if (category_number == 0 && eta_number == 1) + occupancy = 42; + else if (category_number == 0 && eta_number == 2) + occupancy = 37; + else if (category_number == 0 && eta_number == 3) + occupancy = 41; + else if (category_number == 1) + occupancy = 100; + else if (category_number == 2 && eta_number == 1) + occupancy = 16; + else if (category_number == 2 && eta_number == 2) + occupancy = 19; + else if (category_number == 3 && eta_number == 1) + occupancy = 14; + else if (category_number == 3 && eta_number == 2) + occupancy = 20; + else if (category_number == 3 && eta_number == 3) + occupancy = 25; + else { + occupancy = 0; +#ifdef WARNINGS + printf("Unhandled case in createMDArrayRangesGPU! Module index = %i\n", i); +#endif + } + + unsigned int nTotMDs = alpaka::atomicAdd(acc, &nTotalMDs, occupancy, alpaka::hierarchy::Threads{}); + + rangesInGPU.miniDoubletModuleIndices[i] = nTotMDs; + rangesInGPU.miniDoubletModuleOccupancy[i] = occupancy; + } + + // Wait for all threads to finish before reporting final values + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + rangesInGPU.miniDoubletModuleIndices[*modulesInGPU.nLowerModules] = nTotalMDs; + *rangesInGPU.device_nTotalMDs = nTotalMDs; + } + } + }; + + struct AddMiniDoubletRangesToEventExplicit { + template + ALPAKA_FN_ACC void operator()( + TAcc const& acc, Modules modulesInGPU, MiniDoublets mdsInGPU, ObjectRanges rangesInGPU, Hits hitsInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { + if (mdsInGPU.nMDs[i] == 0 or hitsInGPU.hitRanges[i * 2] == -1) { + rangesInGPU.mdRanges[i * 2] = -1; + rangesInGPU.mdRanges[i * 2 + 1] = -1; + } else { + rangesInGPU.mdRanges[i * 2] = rangesInGPU.miniDoubletModuleIndices[i]; + rangesInGPU.mdRanges[i * 2 + 1] = rangesInGPU.miniDoubletModuleIndices[i] + mdsInGPU.nMDs[i] - 1; + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h new file mode 100644 index 0000000000000..85b7b08dc075b --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h @@ -0,0 +1,165 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_NeuralNetwork_h +#define RecoTracker_LSTCore_src_alpaka_NeuralNetwork_h + +#include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/Module.h" + +#include "NeuralNetworkWeights.h" +#include "Segment.h" +#include "MiniDoublet.h" +#include "Hit.h" +#include "Triplet.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + namespace t5dnn { + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float runInference(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets const& tripletsInGPU, + const float* xVec, + const float* yVec, + const unsigned int* mdIndices, + const uint16_t* lowerModuleIndices, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex, + float innerRadius, + float outerRadius, + float bridgeRadius) { + // Unpack x-coordinates of hits + float x1 = xVec[0]; + float x2 = xVec[1]; + float x3 = xVec[2]; + float x4 = xVec[3]; + float x5 = xVec[4]; + // Unpack y-coordinates of hits + float y1 = yVec[0]; + float y2 = yVec[1]; + float y3 = yVec[2]; + float y4 = yVec[3]; + float y5 = yVec[4]; + // Unpack module indices + unsigned int mdIndex1 = mdIndices[0]; + unsigned int mdIndex2 = mdIndices[1]; + unsigned int mdIndex3 = mdIndices[2]; + unsigned int mdIndex4 = mdIndices[3]; + unsigned int mdIndex5 = mdIndices[4]; + // Unpack module indices + uint16_t lowerModuleIndex1 = lowerModuleIndices[0]; + uint16_t lowerModuleIndex2 = lowerModuleIndices[1]; + uint16_t lowerModuleIndex3 = lowerModuleIndices[2]; + uint16_t lowerModuleIndex4 = lowerModuleIndices[3]; + uint16_t lowerModuleIndex5 = lowerModuleIndices[4]; + // Compute some convenience variables + short layer2_adjustment = 0; + if (modulesInGPU.layers[lowerModuleIndex1] == 1) { + layer2_adjustment = 1; // get upper segment to be in second layer + } + unsigned int md_idx_for_t5_eta_phi = + segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + layer2_adjustment]]; + bool is_endcap1 = (modulesInGPU.subdets[lowerModuleIndex1] == 4); // true if anchor hit 1 is in the endcap + bool is_endcap2 = (modulesInGPU.subdets[lowerModuleIndex2] == 4); // true if anchor hit 2 is in the endcap + bool is_endcap3 = (modulesInGPU.subdets[lowerModuleIndex3] == 4); // true if anchor hit 3 is in the endcap + bool is_endcap4 = (modulesInGPU.subdets[lowerModuleIndex4] == 4); // true if anchor hit 4 is in the endcap + bool is_endcap5 = (modulesInGPU.subdets[lowerModuleIndex5] == 4); // true if anchor hit 5 is in the endcap + + // Build DNN input vector (corresponding output N-tuple branch noted in parenthetical in comment) + float x[38] = { + alpaka::math::log10(acc, 2 * k2Rinv1GeVf * innerRadius), // inner T3 pT (t3_pt) + mdsInGPU.anchorEta[mdIndex1], // inner T3 anchor hit 1 eta (t3_0_eta) + mdsInGPU.anchorPhi[mdIndex1], // inner T3 anchor hit 1 phi (t3_0_phi) + mdsInGPU.anchorZ[mdIndex1], // inner T3 anchor hit 1 z (t3_0_z) + alpaka::math::sqrt(acc, x1 * x1 + y1 * y1), // inner T3 anchor hit 1 r (t3_0_r) + float(modulesInGPU.layers[lowerModuleIndex1] + 6 * is_endcap1), // inner T3 anchor hit 1 layer (t3_0_layer) + mdsInGPU.anchorEta[mdIndex2], // inner T3 anchor hit 2 eta (t3_2_eta) + mdsInGPU.anchorPhi[mdIndex2], // inner T3 anchor hit 2 phi (t3_2_phi) + mdsInGPU.anchorZ[mdIndex2], // inner T3 anchor hit 2 z (t3_2_z) + alpaka::math::sqrt(acc, x2 * x2 + y2 * y2), // inner T3 anchor hit 2 r (t3_2_r) + float(modulesInGPU.layers[lowerModuleIndex2] + 6 * is_endcap2), // inner T3 anchor hit 2 layer (t3_2_layer) + mdsInGPU.anchorEta[mdIndex3], // inner T3 anchor hit 3 eta (t3_4_eta) + mdsInGPU.anchorPhi[mdIndex3], // inner T3 anchor hit 3 phi (t3_4_phi) + mdsInGPU.anchorZ[mdIndex3], // inner T3 anchor hit 3 z (t3_4_z) + alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // inner T3 anchor hit 3 r (t3_4_r) + float(modulesInGPU.layers[lowerModuleIndex3] + 6 * is_endcap3), // inner T3 anchor hit 3 layer (t3_4_layer) + alpaka::math::log10(acc, 2 * k2Rinv1GeVf * outerRadius), // outer T3 pT (t3_pt) + mdsInGPU.anchorEta[mdIndex3], // outer T3 anchor hit 4 eta (t3_0_eta) + mdsInGPU.anchorPhi[mdIndex3], // outer T3 anchor hit 4 phi (t3_0_phi) + mdsInGPU.anchorZ[mdIndex3], // outer T3 anchor hit 3 eta (t3_0_z) + alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // outer T3 anchor hit 3 r (t3_0_r) + float(modulesInGPU.layers[lowerModuleIndex3] + 6 * is_endcap3), // outer T3 anchor hit 3 layer (t3_0_layer) + mdsInGPU.anchorEta[mdIndex4], // outer T3 anchor hit 4 eta (t3_2_eta) + mdsInGPU.anchorPhi[mdIndex4], // outer T3 anchor hit 4 phi (t3_2_phi) + mdsInGPU.anchorZ[mdIndex4], // outer T3 anchor hit 4 z (t3_2_z) + alpaka::math::sqrt(acc, x4 * x4 + y4 * y4), // outer T3 anchor hit 4 r (t3_2_r) + float(modulesInGPU.layers[lowerModuleIndex4] + 6 * is_endcap4), // outer T3 anchor hit 4 layer (t3_2_layer) + mdsInGPU.anchorEta[mdIndex5], // outer T3 anchor hit 5 eta (t3_4_eta) + mdsInGPU.anchorPhi[mdIndex5], // outer T3 anchor hit 5 phi (t3_4_phi) + mdsInGPU.anchorZ[mdIndex5], // outer T3 anchor hit 5 z (t3_4_z) + alpaka::math::sqrt(acc, x5 * x5 + y5 * y5), // outer T3 anchor hit 5 r (t3_4_r) + float(modulesInGPU.layers[lowerModuleIndex5] + 6 * is_endcap5), // outer T3 anchor hit 5 layer (t3_4_layer) + alpaka::math::log10(acc, (innerRadius + outerRadius) * k2Rinv1GeVf), // T5 pT (t5_pt) + mdsInGPU.anchorEta[md_idx_for_t5_eta_phi], // T5 eta (t5_eta) + mdsInGPU.anchorPhi[md_idx_for_t5_eta_phi], // T5 phi (t5_phi) + alpaka::math::log10(acc, innerRadius), // T5 inner radius (t5_innerRadius) + alpaka::math::log10(acc, bridgeRadius), // T5 bridge radius (t5_bridgeRadius) + alpaka::math::log10(acc, outerRadius) // T5 outer radius (t5_outerRadius) + }; + + // (0): Linear(in_features=38, out_features=32, bias=True) => x = x*W_T + b + float x_0[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_0[col] = 0; + for (unsigned int inner = 0; inner < 38; ++inner) { + x_0[col] += x[inner] * wgtT_0[inner][col]; + } + x_0[col] += bias_0[col]; + } + + // (1): ReLU() + float x_1[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_1[col] = (x_0[col] > 0.f) ? x_0[col] : 0.f; + } + + // (2): Linear(in_features=32, out_features=32, bias=True) => x = x*W_T + b + float x_2[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_2[col] = 0; + for (unsigned int inner = 0; inner < 32; ++inner) { + x_2[col] += x_1[inner] * wgtT_2[inner][col]; + } + x_2[col] += bias_2[col]; + } + + // (3): ReLU() + float x_3[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_3[col] = (x_2[col] > 0.f) ? x_2[col] : 0.f; + } + + // (4): Linear(in_features=32, out_features=1, bias=True) => x = x*W_T + b + float x_4[1]; + for (unsigned int col = 0; col < 1; ++col) { + x_4[col] = 0; + for (unsigned int inner = 0; inner < 32; ++inner) { + x_4[col] += x_3[inner] * wgtT_4[inner][col]; + } + x_4[col] += bias_4[col]; + } + + // (5): Sigmoid() + float x_5[1]; + for (unsigned int col = 0; col < 1; ++col) { + x_5[col] = alpaka::math::exp(acc, x_4[col]) / (alpaka::math::exp(acc, x_4[col]) + 1); + } + + return x_5[0]; + } + + } // namespace t5dnn +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h new file mode 100644 index 0000000000000..d5321fea07a6e --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h @@ -0,0 +1,315 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_NeuralNetworkWeights_h +#define RecoTracker_LSTCore_src_alpaka_NeuralNetworkWeights_h + +#include + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + namespace t5dnn { + + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_0[32] = { + -4.5069356f, -5.8842053f, 1.0793180f, -0.1540973f, -0.4705772f, 6.4027028f, -0.6620818f, -7.0734525f, + 0.6211641f, 4.9630723f, 3.4310920f, -0.8856288f, 4.5843782f, -6.0180559f, 0.0126438f, -1.5725276f, + -0.8549317f, -6.8545237f, -1.2129461f, 3.0617838f, -0.3911322f, 0.0799793f, -2.5398655f, -0.5780622f, + 2.8533990f, -0.1777968f, -2.6457164f, -0.7976936f, 4.5644889f, -2.1747942f, 3.4286616f, -10.1073380f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_0[38][32] = { + {6.1269712f, -10.6625051f, 17.4907818f, -0.0019928f, -3.4468415f, 1.6674044f, -7.8957767f, 2.2077549f, + 9.5517254f, -5.1345053f, -30.1643391f, 4.0148559f, -19.8330841f, -18.3806915f, 0.1334764f, 1.6213616f, + -4.1423774f, -15.3062429f, -1.0209556f, 1.5580219f, 0.7426265f, 0.0033929f, 1.3924170f, 0.9196110f, + -0.8995734f, 1.0594707f, 39.4390869f, 8.7642002f, 28.4583893f, -5.9235659f, 3.7221889f, 14.4167147f}, + {1.7863803f, -0.6068707f, 0.3166098f, -0.0608759f, 0.5939785f, 0.4870262f, -3.1375074f, -17.7147388f, + -0.7231818f, -9.3808413f, 2.2070611f, 15.7461920f, 0.9355862f, 2.3942475f, -0.0671409f, 3.5954301f, + -3.0463996f, -2.0748904f, -0.5450584f, -4.4800100f, 0.6074556f, -0.0161482f, 3.0624702f, -4.5688419f, + 2.9881518f, -0.3714012f, -0.0387531f, -0.7699140f, 4.4028845f, 5.0333014f, -4.7350726f, -8.6568584f}, + {5.6548429f, -0.0207700f, 0.1785973f, 0.0881671f, 0.2530097f, -0.1893259f, -0.1105739f, -0.5183877f, + 1.0728362f, 0.1833011f, 1.7765219f, 0.3127359f, 0.0455277f, -0.1442616f, -0.1048361f, -0.1235604f, + -0.1217661f, -0.5487315f, 0.7575656f, -0.1177454f, -17.0993137f, 0.1628031f, 0.2789381f, 0.5304270f, + 0.0837841f, -3.1120780f, 0.0074821f, -0.1648044f, -0.3395336f, 0.3958135f, 0.8718957f, -1.1980486f}, + {0.2401041f, -0.0585765f, -0.0144584f, 0.0411095f, 0.0752229f, 0.0292672f, -0.2437613f, -1.4396472f, + -0.0971315f, -1.7181139f, 0.2417643f, 2.2030578f, 0.0566049f, 0.1081589f, -0.1060181f, 0.3473758f, + -0.7095683f, -0.0345675f, 0.2794849f, -1.1702278f, 0.2622930f, -0.0072611f, 0.5026371f, -1.2882922f, + -0.4712771f, 0.0597130f, -0.0039970f, -0.6050836f, 0.1554724f, 1.0991164f, -0.4975886f, 0.2597970f}, + {0.0766028f, 0.0218421f, -0.1739017f, -0.0076569f, 0.0384461f, -0.1841756f, 0.9677940f, -3.1114254f, + 2.3830564f, 2.0706992f, -0.9643140f, 0.7361387f, -0.0060253f, -0.1554846f, -0.0831100f, 2.8754771f, + -1.4403527f, -0.5281797f, 0.5157787f, 4.2405987f, 0.4807618f, 0.0217647f, -1.2626950f, 0.9145837f, + -0.3931780f, 0.3426280f, -0.0065206f, -0.7510439f, -0.4555758f, 2.7724340f, -1.2173026f, 0.1039017f}, + {0.5685715f, 0.3927337f, 0.4942532f, -0.0671033f, -0.2808350f, -0.0336000f, -1.3983957f, 0.9876546f, + -2.3840380f, 0.7315395f, -2.2009561f, -1.4631602f, -0.4672308f, -0.4994236f, 0.1169335f, -1.1894208f, + -1.2692982f, 0.3303853f, -2.0147655f, -0.9912014f, 1.0042895f, 0.1121151f, -1.0789106f, -2.2821584f, + -6.6459913f, -0.0959398f, -0.0068429f, -2.8177626f, 0.3213172f, -2.6832986f, -4.7613306f, -0.9985733f}, + {1.4419515f, -0.3864825f, -0.6756768f, -0.1273375f, 0.4321181f, 0.3354745f, -0.8236564f, -2.8190827f, + 0.7090831f, 1.9072700f, -3.1834064f, -2.6938572f, 0.5051147f, 1.4382831f, 0.1241910f, -0.7352629f, + 0.7703634f, -1.7556250f, -2.1104112f, 3.0603442f, 1.9873468f, -0.0358815f, -1.0087154f, 3.8253262f, + -0.5466214f, 0.0875162f, 0.2691758f, 0.7121435f, 1.9314718f, -0.1580560f, 3.6484149f, -5.3173709f}, + {6.9104381f, -0.0033664f, -1.4405546f, -0.1768288f, 0.2028089f, -0.1012344f, -4.4735684f, 0.6354278f, + 4.3039737f, 0.2056303f, 1.8338999f, -1.1351355f, 0.1015760f, -0.0733253f, -0.0561627f, 2.5292397f, + 1.6314448f, -0.9333628f, -0.7773662f, 0.8313186f, -0.7829623f, 0.1265118f, 0.5922315f, -0.3463379f, + -1.3269740f, -3.3302619f, -0.0061799f, 2.3374722f, 0.0880938f, 0.7470241f, -0.4205743f, -4.7557602f}, + {0.0380794f, 0.0947470f, 0.0419397f, 0.0582226f, -0.0603404f, 0.0234028f, -0.2575402f, 0.4125248f, + 0.3035339f, 0.2663808f, -0.6092452f, -1.4727812f, 0.0247187f, -0.0539688f, -0.0150413f, 0.2094955f, + 0.5379737f, -0.3255228f, -0.5639279f, 0.0786276f, 0.6703192f, 0.1557026f, -0.2753083f, 1.1463971f, + -0.9372965f, 0.5657740f, 0.0041413f, 0.0870248f, 0.0101520f, -0.8214461f, 0.1212932f, 1.5648646f}, + {-0.0969819f, 0.0137566f, 1.3515147f, -0.0155047f, -0.1416170f, -0.1636726f, 0.5184190f, 0.4732984f, + 0.6815788f, -1.0522166f, -0.4486531f, -0.0516016f, 0.0201894f, -0.0849667f, -0.0861271f, -1.2027841f, + 1.2458711f, -0.7061657f, 1.0381308f, -0.3450044f, -0.1300479f, -0.0828402f, 0.6859242f, -1.0575374f, + 0.6947553f, -0.0922188f, 0.0199132f, 0.8038982f, -0.1734094f, -0.1057449f, 1.6305015f, -0.0688597f}, + {-1.8151448f, 0.1024327f, 1.7063105f, 0.1130912f, -0.1081472f, -0.2904744f, -1.3465070f, -1.0455177f, + -0.4581082f, -3.2220871f, 0.5221398f, -5.1637673f, 0.0811146f, -0.1326323f, -0.0379338f, -3.0439703f, + -2.4246936f, -0.3670847f, -3.1256330f, -1.6595014f, -3.4715190f, -0.1526113f, -1.0420206f, 0.9536474f, + -3.2932863f, 1.6048199f, 0.0025162f, -3.6049840f, 0.0604250f, -2.2404826f, 1.8406851f, -3.1381185f}, + {1.2985691f, -1.1044264f, 0.9062797f, -0.0788333f, 0.2694912f, 0.0032800f, -0.0574267f, 0.9734111f, + 1.1532565f, 2.6786125f, -3.8574269f, -2.2871449f, -0.1261243f, 1.0545347f, -0.1454154f, -0.5609738f, + 1.8385800f, -0.8035598f, -1.7668265f, 5.1665063f, 0.7966110f, 0.0940206f, -2.3943975f, 2.3344002f, + 1.0342182f, 0.4806454f, -0.3880928f, 0.6998246f, 1.4011886f, -1.7313483f, 4.9702630f, -6.0058608f}, + {1.0300356f, 0.0616315f, -0.1113776f, -0.1694220f, 0.7159944f, 0.0626456f, 2.0994680f, 0.3452290f, + -3.0487001f, 0.0654031f, -1.1510723f, 0.5370992f, -0.0290704f, -0.0300795f, 0.0751569f, -0.2345951f, + -0.3472281f, 0.4424143f, 1.2444530f, -0.2114656f, 0.7865694f, -0.0709381f, -0.1839961f, -0.0529834f, + 0.5867608f, -3.8793530f, -0.0814745f, -0.6368676f, 0.0361213f, -0.5549288f, 0.5661780f, 1.8374584f}, + {0.3345098f, 0.0068199f, -0.4205509f, -0.1088801f, -0.1043202f, -0.0040804f, 0.3400922f, 0.2673528f, + -0.6050695f, 0.4443954f, -0.4319905f, -0.6044132f, -0.0260679f, 0.0137036f, 0.0765494f, -0.0095099f, + 0.5880439f, -0.0083854f, -0.2407522f, 0.1942379f, 0.6554548f, -0.1322891f, -0.8298992f, 0.7909554f, + 1.0528831f, 0.1970959f, 0.0754069f, -0.0947960f, -0.0279494f, -0.5888316f, 0.8919419f, 0.4828835f}, + {0.3995822f, -0.2139665f, 0.3982936f, -0.1285759f, -0.3445527f, -0.1167238f, -0.1263519f, 0.8393803f, + -0.7758383f, 0.0719291f, -0.0134762f, 0.1715237f, 0.0796666f, 0.1023507f, -0.1172728f, -1.2364722f, + 1.2592632f, -0.3168479f, 0.7487004f, -1.5170647f, -0.2235429f, -0.1620898f, 1.4064828f, -1.0821995f, + 0.0740103f, -1.0412805f, -0.0621277f, 0.2439800f, 0.2684972f, -1.1661061f, 0.7859434f, -0.6170313f}, + {2.1615884f, 0.1431713f, 0.0642652f, -0.0522325f, -0.2658786f, -0.0245810f, -1.6857448f, -0.6685011f, + -0.6978170f, -0.8716729f, 0.3129902f, -2.5870812f, -0.2855283f, -0.3205920f, -0.0084069f, 1.3182145f, + -0.6923816f, -0.3730274f, -2.3638811f, -1.1128502f, -2.4709859f, 0.1349022f, -0.3574466f, -0.6597407f, + -4.1122031f, 0.2240651f, 0.1806145f, -1.6836300f, -0.0766231f, -3.2611966f, 0.0091456f, -0.0997367f}, + {5.2476101f, -0.1966512f, 4.8935304f, -0.1551689f, 1.6919724f, -0.8324367f, 14.3318472f, -0.3503132f, + 10.3614969f, -9.1522884f, -0.2543063f, -1.8476851f, 16.7961140f, 9.9541416f, -0.0434563f, -9.6973553f, + -5.0469398f, 6.1688442f, 7.6429725f, -7.3149266f, 1.2345183f, 0.1412155f, 0.7114770f, -1.6378664f, + 5.1548996f, 0.3686100f, -45.3027611f, 3.0492647f, -37.3445892f, 2.7421410f, -2.7958770f, -25.2034016f}, + {1.4597454f, -1.0561740f, 0.9751291f, 0.0446527f, 0.3691662f, 0.1006782f, 0.1418435f, 0.8871480f, + 1.1603093f, 2.8034730f, -4.0856910f, -1.9786842f, -0.2206208f, 0.9539357f, 0.0868183f, -0.6811873f, + 1.9642411f, -0.8065316f, -2.0244894f, 5.2936082f, 0.6120632f, -0.1194160f, -2.3925939f, 2.5555069f, + 1.0149733f, 0.4607603f, -0.2197217f, 0.5703423f, 1.4049014f, -1.5900208f, 5.1645074f, -6.0569463f}, + {0.9000676f, -0.0028781f, -0.1967366f, 0.1039593f, 0.7993248f, 0.0655172f, 2.2296758f, 0.4391927f, + -3.0292840f, 0.0334536f, -1.1728534f, 0.3479103f, -0.1190938f, 0.0410203f, 0.1146637f, -0.2958017f, + -0.3240463f, 0.4361866f, 1.0564958f, -0.1989332f, 0.5194008f, -0.0628912f, -0.1733121f, -0.1255383f, + 0.5990249f, -3.7692382f, 0.0995128f, -0.7101220f, -0.0785123f, -0.3514554f, 0.6662078f, 2.0991604f}, + {0.1781942f, -0.1873588f, -0.4653996f, -0.0153059f, -0.1399561f, -0.0498718f, 0.4552556f, 0.2300792f, + -0.7682312f, 0.4342302f, -0.3787803f, -0.6089386f, -0.1049337f, 0.0395331f, 0.0220332f, 0.0114750f, + 0.4672548f, 0.1284784f, -0.2472819f, 0.2892784f, 0.4788667f, 0.0472555f, -0.6593549f, 0.6508777f, + 0.9286987f, 0.3043948f, -0.0635985f, 0.0814399f, -0.1168853f, -0.6688027f, 0.8876534f, 0.4865684f}, + {0.4024099f, 0.0480259f, 0.4588822f, -0.1793082f, -0.2151573f, -0.1871128f, -0.1502780f, 1.1011307f, + -0.9467706f, 0.2632496f, -0.1257263f, -0.0241331f, 0.2280627f, 0.0878608f, -0.1334262f, -1.1642927f, + 1.0943586f, -0.4799654f, 0.5981907f, -1.5051398f, -0.4235946f, 0.0012827f, 1.2342577f, -0.8281875f, + 0.2776567f, -1.0362227f, 0.0408372f, 0.1540821f, 0.1777556f, -1.2684357f, 0.8836584f, -0.4001710f}, + {2.1558056f, 0.2082023f, 0.0863442f, 0.0364868f, -0.3985825f, 0.0307202f, -1.8889453f, -0.5614714f, + -0.7311882f, -0.8075573f, 0.4895108f, -2.7770483f, -0.3121874f, -0.1671291f, -0.1281284f, 1.3212786f, + -0.5310181f, -0.1974759f, -2.6240873f, -0.8320529f, -2.3875966f, -0.0286360f, -0.6263188f, -0.6553424f, + -4.1658955f, -0.0601300f, 0.0946256f, -1.6795633f, -0.1251303f, -3.0974686f, 0.2412274f, -0.0687501f}, + {2.0523887f, -0.6387668f, 2.0633900f, -0.0550964f, 0.5181718f, -0.4202190f, 1.8569367f, 0.8295385f, + 0.8555872f, 2.4727983f, -0.2072828f, -1.9006120f, 0.5379534f, 0.4463673f, 0.1468820f, 0.4918649f, + -3.4016700f, 0.2884440f, -1.9418719f, 4.5157170f, -0.5160927f, -0.0199372f, 3.1353824f, -0.9863126f, + -1.5135859f, 0.7576568f, 0.6715558f, 2.7409093f, 0.9291748f, -0.3247162f, 1.8204515f, -8.9181070f}, + {-0.1428107f, -0.0829889f, 0.4213613f, 0.0225415f, 1.2238166f, 0.0477106f, 0.3031853f, -0.7466553f, + 2.0663500f, 0.7588379f, 0.3689216f, -0.2003786f, 0.1242338f, 0.1693589f, -0.0351716f, -0.0186597f, + -0.0189417f, 0.5468715f, -0.2862698f, -0.1311738f, 3.0747476f, -0.0310747f, 0.0943165f, 0.3139819f, + 0.6274695f, -1.8314874f, 0.0147495f, 0.3554756f, 0.3829916f, 0.4891713f, 0.1328600f, 1.0535098f}, + {0.0534900f, 0.1787969f, -0.0571320f, -0.0685673f, 0.1968977f, 0.0374476f, 0.7876674f, 0.0828491f, + 0.6444036f, -0.2203166f, -0.2383427f, 0.5397566f, 0.0106769f, -0.1230072f, -0.0135021f, -0.5691944f, + -1.5040319f, 0.0406933f, -0.0025478f, 0.9251419f, -1.7180276f, -0.1112956f, 1.4840862f, 0.0407115f, + -0.0100329f, 0.0583593f, -0.0110524f, 0.7431355f, -0.0971857f, -0.5501527f, -0.6371027f, -0.1935233f}, + {-0.6455778f, 0.2317368f, 0.9285696f, -0.1415854f, 0.0822560f, 0.2488030f, -2.6992166f, 0.0884904f, + 0.6735302f, -0.1467820f, 0.5641044f, 0.6436581f, 0.0818401f, -0.0336634f, -0.0729000f, -0.1206900f, + -2.5739892f, 0.5776953f, 0.9531668f, -1.2362405f, -0.0615577f, -0.0143544f, -2.7525210f, 1.3738545f, + 0.2751348f, -1.7463943f, -0.0020144f, 2.4814103f, 0.1716725f, -0.7055540f, -0.3474010f, 0.4482578f}, + {-0.2526205f, -0.7463821f, -3.6076138f, -0.1511098f, 0.1216256f, 0.0888247f, -1.0190924f, -1.3260181f, + -0.0443211f, -4.8911066f, -3.4385188f, -6.0057454f, 0.3340450f, 0.2997236f, -0.0907855f, 0.7500492f, + -0.4007562f, 1.9382039f, 0.5687234f, 2.6511824f, 4.7703862f, 0.0006749f, -0.0201394f, -3.5885489f, + -4.1518898f, 0.0807014f, -0.0584071f, -0.8100027f, 0.7697087f, -0.8038046f, -1.2945876f, -4.0110312f}, + {0.4337017f, -1.1532011f, 2.0740633f, 0.0271806f, 0.6654227f, 0.1012998f, -4.0791736f, 1.2631345f, + 1.9511020f, 2.3272331f, 1.2707534f, 1.6306664f, 0.4936035f, 0.8285242f, 0.0807625f, 3.8652387f, + 0.0281145f, 1.6877037f, 1.2557380f, -0.3036775f, 0.5604967f, 0.1551418f, -0.9599600f, -6.3067718f, + -0.6352320f, 0.8058553f, 0.3657880f, -2.0491202f, -0.3926269f, 2.5650854f, 1.3697821f, -8.3070078f}, + {5.1334143f, -0.0351738f, -0.4774780f, -0.0679726f, 1.4569254f, 0.0580191f, -0.3649136f, -0.2298838f, + -3.3826666f, -0.7392708f, -0.6036060f, -0.2612940f, -0.1877640f, -0.1145124f, -0.0042578f, -0.0311193f, + -0.0320479f, 0.5270581f, -0.4324475f, 0.2681437f, 4.7813129f, -0.0222701f, -0.0525629f, -0.2861001f, + -0.1251072f, 3.9112861f, 0.0045046f, -0.0426071f, -0.3299106f, -0.0686970f, -0.1602017f, -0.0070103f}, + {-0.6633690f, 0.0103367f, 0.5998458f, 0.1256577f, -0.0359184f, -0.0176820f, -0.6458368f, -0.0370536f, + 0.3542259f, 0.1394724f, 0.8255956f, 0.2501569f, 0.0320156f, -0.0256806f, 0.0277949f, 0.0036392f, + 0.2825173f, 0.1400358f, 1.0011463f, -0.6792242f, 0.0672508f, 0.0728705f, -0.1089695f, -1.0414587f, + -0.4135485f, 0.4293025f, -0.0041241f, -0.9564193f, 0.0314900f, 0.8658463f, -0.7734696f, -0.7610567f}, + {-0.0200122f, -0.0749178f, -1.5026549f, -0.0387432f, -0.0713735f, 0.1214790f, 1.8730290f, -0.0552839f, + -1.6867150f, 0.2282097f, 0.7161849f, -0.1018546f, -0.1092003f, 0.0365504f, -0.1326883f, 1.2310545f, + 0.1800210f, 0.7024739f, -2.9606545f, 1.2275347f, -0.2050014f, 0.0940569f, 0.4761694f, 0.8812068f, + -0.0083424f, -1.5406264f, 0.0061815f, -2.7606382f, 0.0248556f, 1.1086880f, -1.3608936f, 1.0795454f}, + {0.9734020f, 0.3905411f, -3.7008634f, 0.0013557f, 0.1649124f, 0.9935362f, 1.3489184f, 0.9505764f, + 0.7966231f, -0.1627246f, -2.5754328f, 1.4892205f, 0.8586300f, 0.6974363f, 0.1320204f, -0.7840260f, + 0.3121157f, 0.0966901f, 2.7447381f, 1.8256680f, 0.7229405f, -0.1723188f, 0.9145948f, -2.1376033f, + 0.5259342f, 0.0731194f, -0.2908303f, -0.2603913f, -0.2326528f, 3.6684167f, -0.2883157f, -2.8546307f}, + {-4.8917460f, 6.7944999f, -0.2255474f, 0.1051999f, 3.9000113f, 2.0624907f, 5.3019547f, 10.0209141f, + 1.1268179f, 2.2669628f, -6.5002980f, 1.8408583f, 5.3039579f, 2.2055962f, 0.1055369f, 1.7230233f, + 6.9605255f, 7.7025104f, 2.9880707f, -0.9274251f, -0.2287160f, -0.0206735f, 0.6885675f, 2.8179996f, + -7.1129837f, -1.3772345f, 3.8655453f, -5.9388318f, -0.0469947f, 7.2763596f, -6.3536129f, -17.0069847f}, + {1.8787041f, -0.9953383f, -1.4839923f, 0.1308209f, 0.3657510f, 0.3106483f, -1.4158971f, -6.7449651f, + 0.6553892f, -4.5046172f, -3.5489719f, 3.5363002f, 0.5454772f, 2.3521471f, 0.1612140f, -0.9744226f, + 0.6546553f, -2.7179255f, -1.7758157f, 0.3089439f, 1.7462813f, 0.1654593f, -0.2440207f, 3.9501827f, + 1.3750844f, 0.0596805f, -0.1977254f, 0.0264880f, 2.6396444f, 1.0816911f, 3.6413448f, -6.0299959f}, + {-4.1295738f, 0.1044480f, 0.2131937f, 0.0420826f, 0.5292229f, 0.0090477f, -0.0973486f, 0.9596778f, + 2.9579651f, -0.6364226f, -1.7556342f, 0.1539868f, -0.1273174f, -0.1348504f, 0.1257833f, -1.4168571f, + -1.0960362f, 0.0482449f, -1.4395387f, -0.2524115f, -2.9162085f, -0.0451428f, -0.4021681f, -0.5756381f, + 0.0515293f, -3.1996479f, -0.0007676f, -1.3878343f, -0.2864279f, -0.9579773f, -1.0999249f, 1.6500067f}, + {-2.4806111f, -6.8115449f, 3.2805641f, 0.1187415f, -0.9950783f, 6.2553434f, -1.6450261f, -6.1463733f, + 2.7507148f, 4.2995782f, 0.0461297f, -0.5417359f, 2.4306326f, -7.3530145f, 0.0698273f, -0.9394333f, + -1.3595498f, -7.5141478f, -1.4911395f, 3.2300410f, 0.1203540f, 0.0314884f, -2.0116949f, -0.8167119f, + 2.4133310f, 0.1920709f, 1.0619365f, 0.2459123f, 6.9166069f, -2.6384118f, 3.6829739f, -7.2385545f}, + {0.9408096f, 14.9067144f, 1.7709646f, 0.1105646f, -0.5600107f, -15.3188124f, -12.3718462f, -1.8893757f, + 13.6364670f, -5.7327847f, -14.1805468f, 1.0581509f, -14.2186184f, 14.8948650f, 0.0190344f, 5.4395180f, + 6.7243400f, 9.8468456f, 4.5144215f, -1.4551491f, 1.1032411f, -0.0317988f, 2.3398454f, -3.1671596f, + -7.7541409f, 1.1255593f, 6.7340465f, -4.4448423f, -9.1472626f, -3.1959128f, 4.4181323f, -2.7904994f}, + {-2.1621978f, -4.7202382f, 1.7378219f, 0.1417439f, -0.5000908f, 5.4468708f, 1.4260571f, -6.6136570f, + 1.5713804f, 3.4479704f, 2.7354901f, -0.7388076f, 5.4666147f, -3.8697338f, -0.1368596f, -2.7903373f, + -1.2043713f, -4.9554005f, 0.3324645f, 1.6767365f, 0.1156244f, -0.0326964f, -2.0945346f, -0.4590589f, + 3.0942657f, 0.0015020f, -6.2626700f, -0.3969755f, 0.7717427f, -1.9667094f, 2.9664171f, -11.9477053f}, + }; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_2[32] = { + 9.8383608f, 3.6922295f, 3.5774977f, -4.4619012f, 6.5087032f, -0.9540017f, -0.5059246f, 0.0706402f, + 14.3396597f, -0.2771132f, -4.8409863f, -8.3581600f, -3.5078344f, 4.3287506f, -5.7808843f, 3.9264839f, + -2.1697845f, -0.0040514f, -0.2095029f, -6.8678174f, 1.7911285f, -0.4510343f, 1.2410443f, -4.5678806f, + -0.5693849f, 2.3320096f, 4.4606552f, -6.3771009f, -4.3149071f, -0.1905672f, -3.5726390f, -1.0744030f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_2[32][32] = { + {-0.0155548f, 0.0243339f, 0.0037967f, -0.2771824f, 0.0111955f, -0.0115980f, 0.0079653f, -2.9803498f, + -0.0061037f, -0.0956634f, 0.0332446f, 0.0179244f, -0.0080377f, -9.0180779f, 0.1720033f, 0.0350694f, + -0.0146588f, -0.2135506f, -0.3158041f, 1.3697664f, 0.0119146f, 0.0119120f, -0.0986927f, 0.0297492f, + 0.0355827f, -0.1196868f, -0.0745119f, 0.0281862f, -0.0422190f, -0.3069138f, -0.0477367f, -0.0550450f}, + {-1.7374619f, 1.4822800f, -2.1885235f, 1.8354234f, -0.5380136f, 1.6621803f, 0.6251035f, 0.1008954f, + -0.8387129f, -0.2063313f, 1.0661691f, -0.9799694f, -5.1710258f, -3.2260630f, -1.5073707f, -1.0792168f, + 1.8569958f, -0.2289213f, 0.0563821f, -1.6398847f, -4.1649504f, -2.7527378f, -0.0134577f, 3.0424533f, + 0.0364320f, 0.6762254f, -3.1551330f, 2.4888904f, 1.4757305f, -0.3141717f, -2.0126467f, -0.1675602f}, + {-0.9571826f, 0.0914152f, 0.0404339f, 0.2927902f, 0.2933607f, 0.0619171f, 0.0772318f, -1.3796169f, + -0.8194544f, -0.2179988f, -1.1241078f, -0.1443964f, 0.0559355f, -1.2914546f, -0.3445117f, 0.2031156f, + 0.0273864f, -0.0193422f, -0.2136522f, 0.0429592f, 0.0212854f, 0.0414394f, -1.1734651f, 0.0582848f, + 0.0136039f, -0.1892604f, 0.0764908f, -0.0130132f, -0.1272559f, -0.0818855f, -0.0408583f, -0.1563294f}, + {-0.0213695f, 0.0596942f, -0.0641309f, -0.0146449f, 0.0416586f, -0.0378931f, 0.1234860f, 0.1622967f, + 0.0794091f, -0.0639933f, -0.1030663f, 0.0579078f, 0.1050275f, -0.0136866f, 0.0149978f, 0.0876813f, + 0.0693554f, 0.1612417f, -0.0595916f, -0.1008234f, -0.0579058f, 0.0915138f, 0.1321436f, -0.1484535f, + -0.0920316f, -0.0024532f, -0.1045300f, 0.0924260f, 0.0277524f, -0.0287276f, -0.1271127f, 0.1164243f}, + {0.0713067f, 0.0198056f, -0.3023696f, -0.0025908f, -0.0085885f, -1.1157553f, 0.0236462f, -0.0704844f, + -0.0189257f, -0.0997382f, 0.3379845f, -0.1229390f, -0.0616165f, -0.8968034f, 0.0401445f, -0.1144476f, + -0.0532077f, 0.0604580f, 0.0609454f, -0.1613472f, 0.0103525f, -0.1653874f, 0.0205189f, 0.0758978f, + -0.1514593f, 0.0151441f, 0.2043469f, 0.0349607f, -0.1361278f, -0.1255922f, 0.0631648f, 0.3570991f}, + {0.3371337f, -3.7541580f, 2.2215877f, -0.3390516f, 0.1912718f, -4.1861577f, -1.2264019f, 2.8179801f, + 0.0667294f, -0.0093539f, 2.3029909f, 3.1814916f, 3.9780347f, 0.2310601f, 0.3986159f, -0.8544636f, + 0.4139664f, -0.1876569f, -0.2448732f, -2.8053334f, 4.0488625f, 2.1094146f, -6.7310257f, -4.9950023f, + -0.8315823f, 0.0555959f, 2.4573720f, -3.7234364f, -4.2910552f, -0.2995245f, -3.2605181f, 2.3620574f}, + {-1.5522735f, -0.1866350f, -0.0067679f, 0.3196557f, 1.4052233f, 2.8143549f, -0.9992948f, -0.5309914f, + -25.8852596f, -0.1218249f, 0.6625420f, 0.3007106f, -0.2767264f, -0.1847300f, -0.5313534f, -0.0383462f, + -0.1987552f, 0.0581405f, -0.3376078f, 1.2621028f, 0.0818709f, -0.1401216f, -0.4550788f, -0.1592657f, + 0.0597123f, 0.1344101f, -0.1005317f, -0.1538406f, 2.9142656f, -0.0806051f, -0.4267367f, -31.9512234f}, + {0.6859627f, 0.1212986f, 0.1291616f, 0.0459838f, -0.0899920f, 0.0287645f, 0.1987007f, -2.7079368f, + -0.2628384f, -0.1402464f, -0.6302179f, -0.2923960f, -0.1106663f, 0.8256195f, -2.8054097f, -0.0296494f, + -0.5632019f, -0.1335654f, -0.1558440f, -6.8611612f, 0.0203786f, 0.0046566f, -0.4401442f, -0.0471430f, + 0.4535986f, -0.8657981f, 0.0684740f, 0.0518814f, -0.0123748f, -0.2270164f, 0.0922878f, -0.3863277f}, + {0.0127175f, 2.3346109f, -0.4390767f, -0.4657893f, 0.1659466f, -0.1132782f, -0.4928388f, 0.7652873f, + 1.1510741f, -0.0879600f, 0.2721785f, -0.1878961f, -0.3477249f, -0.8473209f, -0.8931856f, -0.4328294f, + -11.9181929f, -0.0282545f, -0.0217915f, 1.6676594f, -0.2122232f, -0.6190930f, 1.9053432f, -0.7592348f, + -1.0739189f, -0.7170524f, 0.3864411f, -0.8849231f, 0.1393488f, 0.0738489f, 0.4460345f, 1.9020857f}, + {0.4453296f, -0.0767821f, 0.1638939f, 1.6997167f, -0.1098599f, -0.0551604f, 0.0040561f, -13.5290670f, + -0.1285677f, -0.0590394f, 0.6499141f, -0.7617344f, 0.0453151f, 0.3104213f, -1.0711143f, 0.1361838f, + -0.4365610f, -0.1300649f, 0.2013344f, -0.5308123f, 0.1451896f, 0.1030715f, -0.6487910f, -0.3136590f, + -0.0280079f, 0.5394178f, 0.1318262f, -0.0159292f, 0.0636870f, -0.3224248f, -0.1868187f, -0.2468304f}, + {-0.0333494f, -0.0834255f, -0.1221875f, 0.6861304f, 0.0521738f, -0.0416543f, -0.4437352f, -19.3246250f, + -0.1520821f, 0.0528602f, -0.6375434f, -0.5803806f, -0.0958465f, -2.0058544f, -0.8282642f, 0.0259000f, + 0.4846996f, 0.1211179f, 0.0356884f, 1.0009497f, 0.0635682f, -0.0314105f, -0.0011147f, 0.0131714f, + -0.3410152f, 0.2798154f, 0.0961889f, 0.1266228f, -0.0934717f, -0.0904307f, 0.1355542f, 0.5722573f}, + {0.2146454f, 0.2143834f, 0.1290650f, -0.9063646f, 0.2100945f, 0.1331054f, -0.2620614f, -0.1264993f, + 0.1313979f, 0.0455465f, -0.8395286f, -0.4967833f, -0.0538581f, 0.9155380f, 0.6627046f, 0.1691243f, + 0.9887002f, -0.1597013f, -0.1236713f, -1.9041336f, 0.0427585f, 0.0849747f, -5.2559652f, -0.3133100f, + 0.0141170f, -0.1635530f, 0.4938746f, 0.0162943f, 0.2107756f, -0.3413893f, -0.0657575f, 1.0542560f}, + {-2.8868380f, -2.0837426f, -1.0611480f, -0.6143807f, -0.6398501f, -2.8018746f, 0.5166737f, -1.0814301f, + -1.9272422f, -0.1017482f, -0.4651161f, -1.4021232f, 1.8854499f, 0.1815407f, 0.5965426f, -2.3344259f, + -0.0690846f, -0.1678239f, -0.4219488f, 0.6215640f, 1.0270095f, -0.3473049f, -0.3926674f, -0.7942593f, + 1.1305071f, -1.4621233f, -0.8051161f, -0.7698632f, -2.6038630f, -0.3090037f, -1.6365144f, -1.0179478f}, + {0.0046026f, 1.1319581f, -2.6405678f, -2.0353596f, -2.1687336f, 0.3364883f, 2.1122196f, 0.2584647f, + -2.4344857f, -0.0378498f, 0.6158544f, -0.6060749f, -4.9598379f, 0.1570698f, 2.2436838f, -2.6198347f, + -2.0935996f, -0.1845744f, -0.0716080f, -1.9338604f, -4.1995640f, -3.6706774f, -1.6762524f, 3.9646862f, + -0.9677961f, 1.8319578f, -3.1916575f, 3.7312632f, 0.0820446f, -0.0497568f, -0.0898171f, -0.2499462f}, + {-0.0780375f, -0.0286571f, 0.1007227f, 0.0012229f, -0.0531285f, 0.0840718f, 0.1013894f, 0.1312424f, + -0.0673772f, 0.1603183f, 0.0074385f, -0.0718321f, -0.1549873f, 0.1616689f, 0.0405887f, -0.1558588f, + 0.0740745f, 0.1696893f, -0.0064026f, -0.1656420f, -0.1186674f, -0.1262667f, -0.0784757f, -0.1280154f, + 0.0909976f, 0.0853046f, -0.1075811f, 0.1310615f, 0.0610194f, 0.0647223f, 0.1360559f, 0.0440074f}, + {-0.2106480f, 0.0087131f, 0.1119385f, -1.0611318f, 0.5250220f, 0.0525479f, -0.2733742f, -1.0799565f, + -0.5601607f, -0.0651806f, -1.9793440f, -0.3373334f, -0.1550518f, 0.8932216f, 0.7264332f, -0.0450735f, + 1.2373760f, -0.1236272f, 0.0680048f, -3.0446634f, -0.1533586f, -0.0127355f, -0.3326311f, -0.0225603f, + -0.2265739f, -2.3752897f, -0.3771705f, -0.0728938f, 0.1741305f, 0.1111639f, 0.4131119f, 0.2239323f}, + {-2.5691276f, -1.4011253f, -2.0640867f, -3.7236946f, 1.5542637f, -0.9456654f, -1.7575809f, 3.6794879f, + -0.4439790f, -0.1009826f, 3.6702275f, -0.1935008f, -0.4423219f, -0.3825364f, -0.4784791f, 0.5927492f, + -2.3482494f, 0.0801714f, -0.1567418f, -1.7934613f, -0.1706410f, -0.6326947f, 0.6260155f, 0.3631033f, + -0.9325932f, 1.9647995f, -1.3409088f, 1.3501998f, 0.0367797f, -0.1744210f, 1.8690013f, -1.0737898f}, + {-0.5934777f, 0.6232591f, -0.3391055f, 0.2640936f, -0.2824444f, 0.4815128f, 0.6625078f, -0.1103976f, + 0.9555223f, -0.0624896f, -0.6778919f, 0.1181502f, -0.5425385f, 0.7297349f, -1.7261271f, -0.2917557f, + 1.1873137f, -0.2725933f, 0.0975242f, 1.7756181f, -0.5735835f, -0.4453230f, 0.9800369f, 0.9344145f, + -1.8692539f, 0.0120440f, -0.7315661f, 0.6250805f, 0.3839143f, -0.0376306f, 0.3816243f, 0.6059195f}, + {0.5522162f, -1.8043815f, -10.9379101f, 0.5719097f, -0.2246755f, -1.4856353f, 0.4877502f, 0.7163438f, + -11.8135147f, -0.0180790f, -0.9928634f, 0.1107815f, -0.0005064f, -0.3824990f, -0.7453306f, -1.9909632f, + -7.4362645f, -0.0245507f, -0.1815712f, -3.5507584f, -0.0075889f, -11.0296011f, -1.1292133f, -0.0710276f, + 0.5675677f, 0.2017778f, -0.0684891f, -0.0367653f, -1.6674192f, 0.0281711f, -0.8356591f, -0.0447807f}, + {0.2537312f, -3.0178010f, -0.3493635f, 1.8573236f, 0.4017631f, 0.9912633f, -0.8625028f, -0.7783228f, + -1.7815375f, -0.1204695f, 1.8551122f, 0.3344182f, -0.2828701f, -1.3226960f, -1.4470471f, 0.2895959f, + 0.6780876f, -0.2010069f, 0.0425280f, -2.1786852f, -0.1274053f, -0.2549899f, -0.2233993f, -0.1561645f, + -0.4640818f, 0.6375850f, 0.7733670f, -0.2388286f, 1.0447853f, -0.1503223f, 0.3823584f, -13.8176088f}, + {0.2575197f, -2.2127593f, -0.0389457f, -0.0215759f, 0.1659477f, -0.0097748f, -0.1935415f, -0.9091369f, + -0.1453371f, 0.0442428f, -0.1206519f, 0.1435609f, -0.0186047f, -5.0154042f, 0.0538177f, 0.0403250f, + 0.0240955f, 0.0331080f, 0.0517951f, 0.7422639f, 0.0069818f, 0.0248351f, -0.2205741f, -0.0082387f, + 0.2043269f, 0.0459435f, 0.0876343f, 0.0140607f, 0.1056308f, 0.0062555f, 0.0184278f, -0.5539715f}, + {-0.0398742f, 0.1075264f, 0.1725024f, -0.0755192f, -0.0360048f, 0.1325573f, 0.0903103f, -0.0882263f, + 0.1207692f, 0.0032722f, 0.0048489f, -0.1257241f, 0.1450990f, -0.0713558f, 0.1116815f, 0.1107689f, + -0.1447252f, 0.1581838f, -0.0160124f, -0.0425587f, 0.1411217f, 0.0865060f, -0.0643460f, -0.0431262f, + -0.1452804f, -0.0195101f, 0.1234572f, 0.0520887f, 0.1117576f, -0.0751791f, 0.1511539f, 0.1224861f}, + {0.7728126f, 2.3075340f, -0.0385258f, -3.1270287f, 0.9414487f, 3.5251477f, -0.8043440f, 0.7212446f, + -7.6850162f, -0.1609414f, -3.7687578f, -1.0751100f, -0.2052089f, 5.0728245f, 2.2835267f, 0.5930225f, + 0.1303335f, -0.1428799f, -0.3715075f, 0.5136011f, -0.4755619f, -0.2192461f, -3.8696294f, -0.0062392f, + -1.3774812f, -0.0034140f, -1.5944362f, 0.9773729f, 3.2859125f, -0.1616932f, -1.2785367f, -13.5732412f}, + {0.5535743f, 0.1461481f, -0.2218016f, -0.2971808f, -0.2169309f, 0.1564545f, -0.0390397f, 1.1558976f, + -0.0119933f, -0.0774637f, 1.1907971f, -0.5127968f, -0.0066028f, -1.6794037f, -0.3650940f, 0.2555613f, + -0.9488379f, 0.0449603f, -0.1620417f, 0.1583214f, 0.0000908f, 0.0152763f, -1.0660053f, -0.0139402f, + -1.7440189f, 0.2515209f, 0.3333162f, 0.1904725f, 0.1116094f, -0.2287960f, -0.0007165f, -1.7047704f}, + {-5.9897852f, -0.1316296f, -0.0218074f, -0.4602887f, 0.3288545f, -0.0882939f, -0.5929499f, 0.4294790f, + -0.0383545f, 0.0556869f, 0.1975944f, 0.1341491f, 0.0629570f, -2.2742157f, 0.0175826f, -0.1439869f, + -24.8701649f, -0.1582915f, -0.2460304f, -3.9643264f, 0.0863483f, 0.0180861f, -0.2210452f, -0.0868723f, + -0.4175525f, -0.8231756f, 0.0247534f, -0.1473545f, -0.0021330f, -0.0410253f, -1.1944869f, -1.1523768f}, + {0.1031547f, -3.3402514f, -4.3636522f, -0.1534714f, -0.0622189f, 0.0374694f, -0.0870097f, -4.1865788f, + -0.0555377f, 0.0252329f, 0.1339467f, 0.0461691f, -0.0503090f, 0.0289890f, -0.0095674f, -0.3289992f, + -0.0279080f, 0.0274977f, -0.0903500f, 0.5610157f, -0.0478177f, 0.4346960f, 0.4822784f, -0.1058945f, + -0.2026870f, -0.0560638f, 0.0910069f, -0.0818529f, 0.0819198f, -0.0292193f, 0.3040628f, -0.1275230f}, + {-5.8789845f, -17.1114635f, -4.6755161f, 0.1016624f, -0.8685016f, -0.3898779f, -2.3363957f, 0.1413794f, + -2.4254086f, -0.2171030f, -0.0901150f, 0.7058705f, 0.4166250f, -0.0231085f, -0.1789686f, -9.4244318f, + -0.6418229f, -0.0857969f, 0.1683681f, -0.0310597f, -0.0247807f, -5.3748040f, -7.4730940f, 0.1019564f, + -1.2126822f, -0.3726285f, -1.0287101f, 0.1803891f, -0.2227769f, -0.0791530f, -0.0159770f, -1.4883354f}, + {-17.9394970f, -0.5228514f, -11.3547935f, -0.0672671f, -2.0371394f, -0.9076943f, 2.4331825f, -6.9409127f, + 0.8286008f, 0.0208618f, -0.8009814f, 1.2268484f, 0.1943726f, -1.7297083f, -0.7668949f, -6.5505466f, + -0.6495168f, -0.0404727f, -0.1260914f, -3.5029383f, -0.0852898f, -2.9679556f, 1.6404767f, -0.0251449f, + 1.1460075f, -0.7877688f, -0.0586593f, -0.4741839f, -1.7420560f, 0.0295600f, -2.3574052f, 0.0974777f}, + {0.4443443f, 0.6384261f, 1.3317494f, -1.0085982f, 0.9508762f, 1.3168396f, -0.1862490f, -0.1801148f, + 1.1106120f, -0.0654911f, 0.1186706f, -0.7198273f, 0.5449172f, -0.5886080f, 0.7504217f, 1.8046317f, + -0.1294390f, -0.1939137f, -0.2383934f, 0.4131435f, 0.6910310f, 1.2821866f, -0.1088722f, -0.5660405f, + -0.1188610f, 0.0364403f, 0.3597929f, -0.6409024f, 1.2114668f, -0.0212278f, 0.8423592f, 0.4848156f}, + {-0.8772649f, -13.5265112f, -4.5540547f, -0.2856667f, 0.7604876f, -0.6829260f, -0.8320626f, 0.6541347f, + 0.4020181f, 0.0009324f, -10.9660740f, -0.3540186f, -0.2316812f, 0.3576394f, 0.0998953f, -1.5738430f, + 1.2089975f, 0.0706465f, -0.2538019f, 0.7016497f, -0.0282650f, -3.1291001f, -0.4375663f, -0.3979468f, + -0.1588882f, 0.3978875f, 0.2038192f, -0.4281644f, -0.5787544f, -0.0922198f, 0.9595569f, 0.0212818f}, + {0.3392667f, 0.1170919f, -0.0705636f, -0.1025443f, -0.1192213f, -0.0495686f, 0.0284667f, -0.1226804f, + 0.0050191f, -0.0516545f, -1.0892097f, 0.0033689f, 0.0471462f, 1.4266804f, 0.0288870f, -0.0110408f, + -1.1283765f, -0.1299917f, -0.4318301f, -0.9854419f, -0.0190479f, -0.0269406f, 0.3697925f, -0.0757695f, + -0.3632923f, -0.1714077f, 0.0669245f, 0.0557428f, -0.1713906f, -0.4307863f, -0.1749060f, -2.1246362f}, + {0.8383662f, -3.8122442f, 0.1568939f, -2.2105119f, -0.7086993f, -0.4664145f, -0.3578597f, 0.5554636f, + 0.6965880f, -0.1506968f, 0.2646832f, 0.2874083f, 0.1901203f, -2.4997077f, -0.3519035f, -0.0518054f, + 1.0862818f, -0.2502540f, -0.3133347f, -0.7411230f, 0.1268138f, 0.1069811f, -0.8109779f, 0.0264679f, + 0.1604289f, -0.7534032f, -0.1419461f, 0.0688303f, -0.1570919f, -0.3055144f, -0.7415189f, 2.5547018f}, + }; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_4[1] = {1.4616280f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_4[32][1] = { + {0.0609813f}, {0.0685224f}, {0.1655236f}, {-0.0599842f}, {0.0669006f}, {-0.1817371f}, {-0.0539167f}, + {-0.0737955f}, {0.0654664f}, {0.0302955f}, {-0.0586768f}, {0.0717433f}, {0.1472274f}, {-0.0610073f}, + {-0.0601061f}, {0.2086218f}, {-0.0545418f}, {-0.0388369f}, {-0.0613536f}, {-0.1141072f}, {-0.2289097f}, + {-0.3354485f}, {0.0831025f}, {0.1333673f}, {0.0490410f}, {0.0484894f}, {0.0436755f}, {-0.1479877f}, + {0.1540713f}, {0.0021261f}, {-0.0845848f}, {-0.0564973f}, + }; + + } // namespace t5dnn +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h new file mode 100644 index 0000000000000..81e4358ab30d6 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/ObjectRanges.h @@ -0,0 +1,154 @@ +#ifndef RecoTracker_LSTCore_interface_ObjectRanges_h +#define RecoTracker_LSTCore_interface_ObjectRanges_h + +#include "RecoTracker/LSTCore/interface/Constants.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + struct ObjectRanges { + int* hitRanges; + int* hitRangesLower; + int* hitRangesUpper; + int8_t* hitRangesnLower; + int8_t* hitRangesnUpper; + int* mdRanges; + int* segmentRanges; + int* trackletRanges; + int* tripletRanges; + int* trackCandidateRanges; + // Others will be added later + int* quintupletRanges; + + // This number is just nEligibleModules - 1, but still we want this to be independent of the TC kernel + uint16_t* nEligibleT5Modules; + // Will be allocated in createQuintuplets kernel! + uint16_t* indicesOfEligibleT5Modules; + // To store different starting points for variable occupancy stuff + int* quintupletModuleIndices; + int* quintupletModuleOccupancy; + int* miniDoubletModuleIndices; + int* miniDoubletModuleOccupancy; + int* segmentModuleIndices; + int* segmentModuleOccupancy; + int* tripletModuleIndices; + int* tripletModuleOccupancy; + + unsigned int* device_nTotalMDs; + unsigned int* device_nTotalSegs; + unsigned int* device_nTotalTrips; + unsigned int* device_nTotalQuints; + + template + void setData(TBuff& buf) { + hitRanges = buf.hitRanges_buf.data(); + hitRangesLower = buf.hitRangesLower_buf.data(); + hitRangesUpper = buf.hitRangesUpper_buf.data(); + hitRangesnLower = buf.hitRangesnLower_buf.data(); + hitRangesnUpper = buf.hitRangesnUpper_buf.data(); + mdRanges = buf.mdRanges_buf.data(); + segmentRanges = buf.segmentRanges_buf.data(); + trackletRanges = buf.trackletRanges_buf.data(); + tripletRanges = buf.tripletRanges_buf.data(); + trackCandidateRanges = buf.trackCandidateRanges_buf.data(); + quintupletRanges = buf.quintupletRanges_buf.data(); + + nEligibleT5Modules = buf.nEligibleT5Modules_buf.data(); + indicesOfEligibleT5Modules = buf.indicesOfEligibleT5Modules_buf.data(); + + quintupletModuleIndices = buf.quintupletModuleIndices_buf.data(); + quintupletModuleOccupancy = buf.quintupletModuleOccupancy_buf.data(); + miniDoubletModuleIndices = buf.miniDoubletModuleIndices_buf.data(); + miniDoubletModuleOccupancy = buf.miniDoubletModuleOccupancy_buf.data(); + segmentModuleIndices = buf.segmentModuleIndices_buf.data(); + segmentModuleOccupancy = buf.segmentModuleOccupancy_buf.data(); + tripletModuleIndices = buf.tripletModuleIndices_buf.data(); + tripletModuleOccupancy = buf.tripletModuleOccupancy_buf.data(); + + device_nTotalMDs = buf.device_nTotalMDs_buf.data(); + device_nTotalSegs = buf.device_nTotalSegs_buf.data(); + device_nTotalTrips = buf.device_nTotalTrips_buf.data(); + device_nTotalQuints = buf.device_nTotalQuints_buf.data(); + } + }; + + template + struct ObjectRangesBuffer { + Buf hitRanges_buf; + Buf hitRangesLower_buf; + Buf hitRangesUpper_buf; + Buf hitRangesnLower_buf; + Buf hitRangesnUpper_buf; + Buf mdRanges_buf; + Buf segmentRanges_buf; + Buf trackletRanges_buf; + Buf tripletRanges_buf; + Buf trackCandidateRanges_buf; + Buf quintupletRanges_buf; + + Buf nEligibleT5Modules_buf; + Buf indicesOfEligibleT5Modules_buf; + + Buf quintupletModuleIndices_buf; + Buf quintupletModuleOccupancy_buf; + Buf miniDoubletModuleIndices_buf; + Buf miniDoubletModuleOccupancy_buf; + Buf segmentModuleIndices_buf; + Buf segmentModuleOccupancy_buf; + Buf tripletModuleIndices_buf; + Buf tripletModuleOccupancy_buf; + + Buf device_nTotalMDs_buf; + Buf device_nTotalSegs_buf; + Buf device_nTotalTrips_buf; + Buf device_nTotalQuints_buf; + + ObjectRanges data_; + + template + ObjectRangesBuffer(unsigned int nMod, unsigned int nLowerMod, TDevAcc const& devAccIn, TQueue& queue) + : hitRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), + hitRangesLower_buf(allocBufWrapper(devAccIn, nMod, queue)), + hitRangesUpper_buf(allocBufWrapper(devAccIn, nMod, queue)), + hitRangesnLower_buf(allocBufWrapper(devAccIn, nMod, queue)), + hitRangesnUpper_buf(allocBufWrapper(devAccIn, nMod, queue)), + mdRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), + segmentRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), + trackletRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), + tripletRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), + trackCandidateRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), + quintupletRanges_buf(allocBufWrapper(devAccIn, nMod * 2, queue)), + nEligibleT5Modules_buf(allocBufWrapper(devAccIn, 1, queue)), + indicesOfEligibleT5Modules_buf(allocBufWrapper(devAccIn, nLowerMod, queue)), + quintupletModuleIndices_buf(allocBufWrapper(devAccIn, nLowerMod, queue)), + quintupletModuleOccupancy_buf(allocBufWrapper(devAccIn, nLowerMod, queue)), + miniDoubletModuleIndices_buf(allocBufWrapper(devAccIn, nLowerMod + 1, queue)), + miniDoubletModuleOccupancy_buf(allocBufWrapper(devAccIn, nLowerMod + 1, queue)), + segmentModuleIndices_buf(allocBufWrapper(devAccIn, nLowerMod + 1, queue)), + segmentModuleOccupancy_buf(allocBufWrapper(devAccIn, nLowerMod + 1, queue)), + tripletModuleIndices_buf(allocBufWrapper(devAccIn, nLowerMod, queue)), + tripletModuleOccupancy_buf(allocBufWrapper(devAccIn, nLowerMod, queue)), + device_nTotalMDs_buf(allocBufWrapper(devAccIn, 1, queue)), + device_nTotalSegs_buf(allocBufWrapper(devAccIn, 1, queue)), + device_nTotalTrips_buf(allocBufWrapper(devAccIn, 1, queue)), + device_nTotalQuints_buf(allocBufWrapper(devAccIn, 1, queue)) { + alpaka::memset(queue, hitRanges_buf, 0xff); + alpaka::memset(queue, hitRangesLower_buf, 0xff); + alpaka::memset(queue, hitRangesUpper_buf, 0xff); + alpaka::memset(queue, hitRangesnLower_buf, 0xff); + alpaka::memset(queue, hitRangesnUpper_buf, 0xff); + alpaka::memset(queue, mdRanges_buf, 0xff); + alpaka::memset(queue, segmentRanges_buf, 0xff); + alpaka::memset(queue, trackletRanges_buf, 0xff); + alpaka::memset(queue, tripletRanges_buf, 0xff); + alpaka::memset(queue, trackCandidateRanges_buf, 0xff); + alpaka::memset(queue, quintupletRanges_buf, 0xff); + alpaka::memset(queue, quintupletModuleIndices_buf, 0xff); + data_.setData(*this); + } + + inline ObjectRanges const* data() const { return &data_; } + void setData(ObjectRangesBuffer& buf) { data_.setData(buf); } + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h new file mode 100644 index 0000000000000..e773bdf9ce5b0 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -0,0 +1,931 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_PixelQuintuplet_h +#define RecoTracker_LSTCore_src_alpaka_PixelQuintuplet_h + +#include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/Module.h" + +#include "Segment.h" +#include "MiniDoublet.h" +#include "Hit.h" +#include "Triplet.h" +#include "Quintuplet.h" +#include "PixelTriplet.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + struct PixelQuintuplets { + unsigned int* pixelIndices; + unsigned int* T5Indices; + unsigned int* nPixelQuintuplets; + unsigned int* totOccupancyPixelQuintuplets; + bool* isDup; + FPX* score; + FPX* eta; + FPX* phi; + uint8_t* logicalLayers; + unsigned int* hitIndices; + uint16_t* lowerModuleIndices; + FPX* pixelRadius; + FPX* quintupletRadius; + FPX* centerX; + FPX* centerY; + float* rzChiSquared; + float* rPhiChiSquared; + float* rPhiChiSquaredInwards; + + template + void setData(TBuff& buf) { + pixelIndices = buf.pixelIndices_buf.data(); + T5Indices = buf.T5Indices_buf.data(); + nPixelQuintuplets = buf.nPixelQuintuplets_buf.data(); + totOccupancyPixelQuintuplets = buf.totOccupancyPixelQuintuplets_buf.data(); + isDup = buf.isDup_buf.data(); + score = buf.score_buf.data(); + eta = buf.eta_buf.data(); + phi = buf.phi_buf.data(); + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + pixelRadius = buf.pixelRadius_buf.data(); + quintupletRadius = buf.quintupletRadius_buf.data(); + centerX = buf.centerX_buf.data(); + centerY = buf.centerY_buf.data(); + rzChiSquared = buf.rzChiSquared_buf.data(); + rPhiChiSquared = buf.rPhiChiSquared_buf.data(); + rPhiChiSquaredInwards = buf.rPhiChiSquaredInwards_buf.data(); + } + }; + + template + struct PixelQuintupletsBuffer { + Buf pixelIndices_buf; + Buf T5Indices_buf; + Buf nPixelQuintuplets_buf; + Buf totOccupancyPixelQuintuplets_buf; + Buf isDup_buf; + Buf score_buf; + Buf eta_buf; + Buf phi_buf; + Buf logicalLayers_buf; + Buf hitIndices_buf; + Buf lowerModuleIndices_buf; + Buf pixelRadius_buf; + Buf quintupletRadius_buf; + Buf centerX_buf; + Buf centerY_buf; + Buf rzChiSquared_buf; + Buf rPhiChiSquared_buf; + Buf rPhiChiSquaredInwards_buf; + + PixelQuintuplets data_; + + template + PixelQuintupletsBuffer(unsigned int maxPixelQuintuplets, TDevAcc const& devAccIn, TQueue& queue) + : pixelIndices_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), + T5Indices_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), + nPixelQuintuplets_buf(allocBufWrapper(devAccIn, 1, queue)), + totOccupancyPixelQuintuplets_buf(allocBufWrapper(devAccIn, 1, queue)), + isDup_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), + score_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), + eta_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), + phi_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), + logicalLayers_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets * Params_pT5::kLayers, queue)), + hitIndices_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets * Params_pT5::kHits, queue)), + lowerModuleIndices_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets * Params_pT5::kLayers, queue)), + pixelRadius_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), + quintupletRadius_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), + centerX_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), + centerY_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), + rzChiSquared_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), + rPhiChiSquared_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)), + rPhiChiSquaredInwards_buf(allocBufWrapper(devAccIn, maxPixelQuintuplets, queue)) { + alpaka::memset(queue, nPixelQuintuplets_buf, 0u); + alpaka::memset(queue, totOccupancyPixelQuintuplets_buf, 0u); + } + + inline PixelQuintuplets const* data() const { return &data_; } + inline void setData(PixelQuintupletsBuffer& buf) { data_.setData(buf); } + }; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelQuintupletToMemory(Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Quintuplets const& quintupletsInGPU, + PixelQuintuplets& pixelQuintupletsInGPU, + unsigned int pixelIndex, + unsigned int T5Index, + unsigned int pixelQuintupletIndex, + float rzChiSquared, + float rPhiChiSquared, + float rPhiChiSquaredInwards, + float score, + float eta, + float phi, + float pixelRadius, + float quintupletRadius, + float centerX, + float centerY) { + pixelQuintupletsInGPU.pixelIndices[pixelQuintupletIndex] = pixelIndex; + pixelQuintupletsInGPU.T5Indices[pixelQuintupletIndex] = T5Index; + pixelQuintupletsInGPU.isDup[pixelQuintupletIndex] = false; + pixelQuintupletsInGPU.score[pixelQuintupletIndex] = __F2H(score); + pixelQuintupletsInGPU.eta[pixelQuintupletIndex] = __F2H(eta); + pixelQuintupletsInGPU.phi[pixelQuintupletIndex] = __F2H(phi); + + pixelQuintupletsInGPU.pixelRadius[pixelQuintupletIndex] = __F2H(pixelRadius); + pixelQuintupletsInGPU.quintupletRadius[pixelQuintupletIndex] = __F2H(quintupletRadius); + pixelQuintupletsInGPU.centerX[pixelQuintupletIndex] = __F2H(centerX); + pixelQuintupletsInGPU.centerY[pixelQuintupletIndex] = __F2H(centerY); + + pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex] = 0; + pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 1] = 0; + pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 2] = + quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers]; + pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 3] = + quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers + 1]; + pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 4] = + quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers + 2]; + pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 5] = + quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers + 3]; + pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex + 6] = + quintupletsInGPU.logicalLayers[T5Index * Params_T5::kLayers + 4]; + + pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex] = + segmentsInGPU.innerLowerModuleIndices[pixelIndex]; + pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 1] = + segmentsInGPU.outerLowerModuleIndices[pixelIndex]; + pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 2] = + quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers]; + pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 3] = + quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers + 1]; + pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 4] = + quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers + 2]; + pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 5] = + quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers + 3]; + pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex + 6] = + quintupletsInGPU.lowerModuleIndices[T5Index * Params_T5::kLayers + 4]; + + unsigned int pixelInnerMD = segmentsInGPU.mdIndices[Params_pLS::kLayers * pixelIndex]; + unsigned int pixelOuterMD = segmentsInGPU.mdIndices[Params_pLS::kLayers * pixelIndex + 1]; + + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex] = + mdsInGPU.anchorHitIndices[pixelInnerMD]; + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 1] = + mdsInGPU.outerHitIndices[pixelInnerMD]; + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 2] = + mdsInGPU.anchorHitIndices[pixelOuterMD]; + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 3] = + mdsInGPU.outerHitIndices[pixelOuterMD]; + + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 4] = + quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index]; + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 5] = + quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 1]; + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 6] = + quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 2]; + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 7] = + quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 3]; + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 8] = + quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 4]; + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 9] = + quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 5]; + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 10] = + quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 6]; + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 11] = + quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 7]; + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 12] = + quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 8]; + pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex + 13] = + quintupletsInGPU.hitIndices[Params_T5::kHits * T5Index + 9]; + + pixelQuintupletsInGPU.rzChiSquared[pixelQuintupletIndex] = rzChiSquared; + pixelQuintupletsInGPU.rPhiChiSquared[pixelQuintupletIndex] = rPhiChiSquared; + pixelQuintupletsInGPU.rPhiChiSquaredInwards[pixelQuintupletIndex] = rPhiChiSquaredInwards; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RZChiSquaredCuts(Modules const& modulesInGPU, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float rzChiSquared) { + const int layer1 = + modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS); + const int layer2 = + modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS); + const int layer3 = + modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS); + const int layer4 = + modulesInGPU.layers[lowerModuleIndex4] + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex4] == Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == TwoS); + const int layer5 = + modulesInGPU.layers[lowerModuleIndex5] + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex5] == Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == TwoS); + + if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 12 and layer5 == 13) { + return rzChiSquared < 451.141f; + } else if (layer4 == 4 and layer5 == 12) { + return rzChiSquared < 392.654f; + } else if (layer4 == 4 and layer5 == 5) { + return rzChiSquared < 225.322f; + } else if (layer4 == 7 and layer5 == 13) { + return rzChiSquared < 595.546f; + } else if (layer4 == 7 and layer5 == 8) { + return rzChiSquared < 196.111f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rzChiSquared < 297.446f; + } else if (layer4 == 8 and layer5 == 14) { + return rzChiSquared < 451.141f; + } else if (layer4 == 8 and layer5 == 9) { + return rzChiSquared < 518.339f; + } + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return rzChiSquared < 341.75f; + } else if (layer4 == 9 and layer5 == 15) { + return rzChiSquared < 341.75f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 12 and layer5 == 13) { + return rzChiSquared < 392.655f; + } else if (layer4 == 5 and layer5 == 12) { + return rzChiSquared < 341.75f; + } else if (layer4 == 5 and layer5 == 6) { + return rzChiSquared < 112.537f; + } + } else if (layer1 == 2 and layer2 == 3 and layer4 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rzChiSquared < 595.545f; + } else if (layer4 == 8 and layer5 == 14) { + return rzChiSquared < 74.198f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 14 and layer5 == 15) { + return rzChiSquared < 518.339f; + } else if (layer4 == 9 and layer5 == 10) { + return rzChiSquared < 8.046f; + } else if (layer4 == 9 and layer5 == 15) { + return rzChiSquared < 451.141f; + } + } else if (layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15) { + return rzChiSquared < 56.207f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + if (layer4 == 10 and layer5 == 11) { + return rzChiSquared < 64.578f; + } else if (layer4 == 10 and layer5 == 16) { + return rzChiSquared < 85.250f; + } else if (layer4 == 15 and layer5 == 16) { + return rzChiSquared < 85.250f; + } + } + return true; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredCuts(Modules const& modulesInGPU, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float rPhiChiSquared) { + const int layer1 = + modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS); + const int layer2 = + modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS); + const int layer3 = + modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS); + const int layer4 = + modulesInGPU.layers[lowerModuleIndex4] + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex4] == Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == TwoS); + const int layer5 = + modulesInGPU.layers[lowerModuleIndex5] + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex5] == Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == TwoS); + + if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 12 and layer5 == 13) { + return rPhiChiSquared < 48.921f; + } else if (layer4 == 4 and layer5 == 12) { + return rPhiChiSquared < 97.948f; + } else if (layer4 == 4 and layer5 == 5) { + return rPhiChiSquared < 129.3f; + } else if (layer4 == 7 and layer5 == 13) { + return rPhiChiSquared < 56.21f; + } else if (layer4 == 7 and layer5 == 8) { + return rPhiChiSquared < 74.198f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rPhiChiSquared < 21.265f; + } else if (layer4 == 8 and layer5 == 14) { + return rPhiChiSquared < 37.058f; + } else if (layer4 == 8 and layer5 == 9) { + return rPhiChiSquared < 42.578f; + } + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return rPhiChiSquared < 32.253f; + } else if (layer4 == 9 and layer5 == 15) { + return rPhiChiSquared < 37.058f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 12 and layer5 == 13) { + return rPhiChiSquared < 97.947f; + } else if (layer4 == 5 and layer5 == 12) { + return rPhiChiSquared < 129.3f; + } else if (layer4 == 5 and layer5 == 6) { + return rPhiChiSquared < 170.68f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rPhiChiSquared < 48.92f; + } else if (layer4 == 8 and layer5 == 14) { + return rPhiChiSquared < 74.2f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 14 and layer5 == 15) { + return rPhiChiSquared < 42.58f; + } else if (layer4 == 9 and layer5 == 10) { + return rPhiChiSquared < 37.06f; + } else if (layer4 == 9 and layer5 == 15) { + return rPhiChiSquared < 48.92f; + } + } else if (layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15) { + return rPhiChiSquared < 85.25f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + if (layer4 == 10 and layer5 == 11) { + return rPhiChiSquared < 42.58f; + } else if (layer4 == 10 and layer5 == 16) { + return rPhiChiSquared < 37.06f; + } else if (layer4 == 15 and layer5 == 16) { + return rPhiChiSquared < 37.06f; + } + } + return true; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquaredpT5(TAcc const& acc, + unsigned int nPoints, + float* xs, + float* ys, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + float g, + float f, + float radius) { + /* + Given values of (g, f, radius) and a set of points (and its uncertainties) compute chi squared + */ + float c = g * g + f * f - radius * radius; + float chiSquared = 0.f; + float absArctanSlope, angleM, xPrime, yPrime, sigma2; + for (size_t i = 0; i < nPoints; i++) { + absArctanSlope = + ((slopes[i] != lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f * float(M_PI)); + if (xs[i] > 0 and ys[i] > 0) { + angleM = 0.5f * float(M_PI) - absArctanSlope; + } else if (xs[i] < 0 and ys[i] > 0) { + angleM = absArctanSlope + 0.5f * float(M_PI); + } else if (xs[i] < 0 and ys[i] < 0) { + angleM = -(absArctanSlope + 0.5f * float(M_PI)); + } else if (xs[i] > 0 and ys[i] < 0) { + angleM = -(0.5f * float(M_PI) - absArctanSlope); + } else { + angleM = 0; + } + if (not isFlat[i]) { + xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] * alpaka::math::sin(acc, angleM); + yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] * alpaka::math::sin(acc, angleM); + } else { + xPrime = xs[i]; + yPrime = ys[i]; + } + sigma2 = 4 * ((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i])); + chiSquared += (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) * + (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / (sigma2); + } + return chiSquared; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression_pT5(TAcc const& acc, + Modules const& modulesInGPU, + const uint16_t* lowerModuleIndices, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + unsigned int nPoints = 5, + bool anchorHits = true) { + /* + bool anchorHits required to deal with a weird edge case wherein + the hits ultimately used in the regression are anchor hits, but the + lower modules need not all be Pixel Modules (in case of PS). Similarly, + when we compute the chi squared for the non-anchor hits, the "partner module" + need not always be a PS strip module, but all non-anchor hits sit on strip + modules. + */ + ModuleType moduleType; + short moduleSubdet, moduleSide; + float inv1 = kWidthPS / kWidth2S; + float inv2 = kPixelPSZpitch / kWidth2S; + float inv3 = kStripPSZpitch / kWidth2S; + for (size_t i = 0; i < nPoints; i++) { + moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]]; + moduleSubdet = modulesInGPU.subdets[lowerModuleIndices[i]]; + moduleSide = modulesInGPU.sides[lowerModuleIndices[i]]; + const float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; + slopes[i] = modulesInGPU.dxdys[lowerModuleIndices[i]]; + //category 1 - barrel PS flat + if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + delta1[i] = inv1; + delta2[i] = inv1; + slopes[i] = -999.f; + isFlat[i] = true; + } + //category 2 - barrel 2S + else if (moduleSubdet == Barrel and moduleType == TwoS) { + delta1[i] = 1.f; + delta2[i] = 1.f; + slopes[i] = -999.f; + isFlat[i] = true; + } + //category 3 - barrel PS tilted + else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + delta1[i] = inv1; + isFlat[i] = false; + + if (anchorHits) { + delta2[i] = (inv2 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } else { + delta2[i] = (inv3 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } + } + //category 4 - endcap PS + else if (moduleSubdet == Endcap and moduleType == PS) { + delta1[i] = inv1; + isFlat[i] = false; + /* + despite the type of the module layer of the lower module index, + all anchor hits are on the pixel side and all non-anchor hits are + on the strip side! + */ + if (anchorHits) { + delta2[i] = inv2; + } else { + delta2[i] = inv3; + } + } + //category 5 - endcap 2S + else if (moduleSubdet == Endcap and moduleType == TwoS) { + delta1[i] = 1.f; + delta2[i] = 500.f * inv1; + isFlat[i] = false; + } +#ifdef WARNINGS + else { + printf("ERROR!!!!! I SHOULDN'T BE HERE!!!! subdet = %d, type = %d, side = %d\n", + moduleSubdet, + moduleType, + moduleSide); + } +#endif + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RPhiChiSquared(TAcc const& acc, + Modules const& modulesInGPU, + uint16_t* lowerModuleIndices, + float g, + float f, + float radius, + float* xs, + float* ys) { + /* + Compute circle parameters from 3 pixel hits, and then use them to compute the chi squared for the outer hits + */ + + float delta1[5], delta2[5], slopes[5]; + bool isFlat[5]; + float chiSquared = 0; + + computeSigmasForRegression_pT5(acc, modulesInGPU, lowerModuleIndices, delta1, delta2, slopes, isFlat); + chiSquared = computeChiSquaredpT5(acc, 5, xs, ys, delta1, delta2, slopes, isFlat, g, f, radius); + + return chiSquared; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RPhiChiSquaredInwards( + float g, float f, float r, float* xPix, float* yPix) { + /* + Using the computed regression center and radius, compute the chi squared for the pixels + */ + + float chiSquared = 0; + for (size_t i = 0; i < 2; i++) { + float residual = (xPix[i] - g) * (xPix[i] - g) + (yPix[i] - f) * (yPix[i] - f) - r * r; + chiSquared += residual * residual; + } + chiSquared *= 0.5f; + return chiSquared; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredInwardsCuts(Modules const& modulesInGPU, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float rPhiChiSquared) { + const int layer1 = + modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS); + const int layer2 = + modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS); + const int layer3 = + modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS); + const int layer4 = + modulesInGPU.layers[lowerModuleIndex4] + 6 * (modulesInGPU.subdets[lowerModuleIndex4] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex4] == Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == TwoS); + const int layer5 = + modulesInGPU.layers[lowerModuleIndex5] + 6 * (modulesInGPU.subdets[lowerModuleIndex5] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex5] == Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == TwoS); + + if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 12 and layer5 == 13) { + return rPhiChiSquared < 451.141f; + } else if (layer4 == 4 and layer5 == 12) { + return rPhiChiSquared < 786.173f; + } else if (layer4 == 4 and layer5 == 5) { + return rPhiChiSquared < 595.545f; + } else if (layer4 == 7 and layer5 == 13) { + return rPhiChiSquared < 581.339f; + } else if (layer4 == 7 and layer5 == 8) { + return rPhiChiSquared < 112.537f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rPhiChiSquared < 225.322f; + } else if (layer4 == 8 and layer5 == 14) { + return rPhiChiSquared < 1192.402f; + } else if (layer4 == 8 and layer5 == 9) { + return rPhiChiSquared < 786.173f; + } + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return rPhiChiSquared < 1037.817f; + } else if (layer4 == 9 and layer5 == 15) { + return rPhiChiSquared < 1808.536f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 12 and layer5 == 13) { + return rPhiChiSquared < 684.253f; + } else if (layer4 == 5 and layer5 == 12) { + return rPhiChiSquared < 684.253f; + } else if (layer4 == 5 and layer5 == 6) { + return rPhiChiSquared < 684.253f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rPhiChiSquared < 451.141f; + } else if (layer4 == 8 and layer5 == 14) { + return rPhiChiSquared < 518.34f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 14 and layer5 == 15) { + return rPhiChiSquared < 2077.92f; + } else if (layer4 == 9 and layer5 == 10) { + return rPhiChiSquared < 74.20f; + } else if (layer4 == 9 and layer5 == 15) { + return rPhiChiSquared < 1808.536f; + } + } else if (layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15) { + return rPhiChiSquared < 786.173f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + if (layer4 == 10 and layer5 == 11) { + return rPhiChiSquared < 1574.076f; + } else if (layer4 == 10 and layer5 == 16) { + return rPhiChiSquared < 5492.11f; + } else if (layer4 == 15 and layer5 == 16) { + return rPhiChiSquared < 2743.037f; + } + } + return true; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RZChiSquared(TAcc const& acc, + Modules const& modulesInGPU, + uint16_t* lowerModuleIndices, + float* rtPix, + float* zPix, + float* rts, + float* zs) { + //use the two anchor hits of the pixel segment to compute the slope + //then compute the pseudo chi squared of the five outer hits + + float slope = (zPix[1] - zPix[0]) / (rtPix[1] - rtPix[0]); + float residual = 0; + float error2 = 0; + //hardcoded array indices!!! + float RMSE = 0; + for (size_t i = 0; i < Params_T5::kLayers; i++) { + uint16_t& lowerModuleIndex = lowerModuleIndices[i]; + const int moduleType = modulesInGPU.moduleType[lowerModuleIndex]; + const int moduleSide = modulesInGPU.sides[lowerModuleIndex]; + const int moduleSubdet = modulesInGPU.subdets[lowerModuleIndex]; + + residual = (moduleSubdet == Barrel) ? (zs[i] - zPix[0]) - slope * (rts[i] - rtPix[0]) + : (rts[i] - rtPix[0]) - (zs[i] - zPix[0]) / slope; + const float& drdz = modulesInGPU.drdzs[lowerModuleIndex]; + //PS Modules + if (moduleType == 0) { + error2 = kPixelPSZpitch * kPixelPSZpitch; + } else //2S modules + { + error2 = kStrip2SZpitch * kStrip2SZpitch; + } + + //special dispensation to tilted PS modules! + if (moduleType == 0 and moduleSubdet == Barrel and moduleSide != Center) { + error2 /= (1.f + drdz * drdz); + } + RMSE += (residual * residual) / error2; + } + + RMSE = alpaka::math::sqrt(acc, 0.2f * RMSE); // Divided by the degree of freedom 5. + return RMSE; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelQuintupletDefaultAlgo(TAcc const& acc, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets const& tripletsInGPU, + Quintuplets const& quintupletsInGPU, + unsigned int pixelSegmentIndex, + unsigned int quintupletIndex, + float& rzChiSquared, + float& rPhiChiSquared, + float& rPhiChiSquaredInwards, + float& pixelRadius, + float& quintupletRadius, + float& centerX, + float& centerY, + unsigned int pixelSegmentArrayIndex) { + unsigned int T5InnerT3Index = quintupletsInGPU.tripletIndices[2 * quintupletIndex]; + unsigned int T5OuterT3Index = quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1]; + + float pixelRadiusTemp, tripletRadius, rPhiChiSquaredTemp, rzChiSquaredTemp, rPhiChiSquaredInwardsTemp, centerXTemp, + centerYTemp; + + if (not runPixelTripletDefaultAlgo(acc, + modulesInGPU, + rangesInGPU, + mdsInGPU, + segmentsInGPU, + tripletsInGPU, + pixelSegmentIndex, + T5InnerT3Index, + pixelRadiusTemp, + tripletRadius, + centerXTemp, + centerYTemp, + rzChiSquaredTemp, + rPhiChiSquaredTemp, + rPhiChiSquaredInwardsTemp, + false)) + return false; + + unsigned int firstSegmentIndex = tripletsInGPU.segmentIndices[2 * T5InnerT3Index]; + unsigned int secondSegmentIndex = tripletsInGPU.segmentIndices[2 * T5InnerT3Index + 1]; + unsigned int thirdSegmentIndex = tripletsInGPU.segmentIndices[2 * T5OuterT3Index]; + unsigned int fourthSegmentIndex = tripletsInGPU.segmentIndices[2 * T5OuterT3Index + 1]; + + unsigned int pixelInnerMDIndex = segmentsInGPU.mdIndices[2 * pixelSegmentIndex]; + unsigned int pixelOuterMDIndex = segmentsInGPU.mdIndices[2 * pixelSegmentIndex + 1]; + unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * firstSegmentIndex]; + unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex]; + unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex + 1]; + unsigned int fourthMDIndex = segmentsInGPU.mdIndices[2 * thirdSegmentIndex + 1]; + unsigned int fifthMDIndex = segmentsInGPU.mdIndices[2 * fourthSegmentIndex + 1]; + + uint16_t lowerModuleIndex1 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex]; + uint16_t lowerModuleIndex2 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 1]; + uint16_t lowerModuleIndex3 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 2]; + uint16_t lowerModuleIndex4 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 3]; + uint16_t lowerModuleIndex5 = quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 4]; + + uint16_t lowerModuleIndices[Params_T5::kLayers] = { + lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5}; + + float zPix[Params_pLS::kLayers] = {mdsInGPU.anchorZ[pixelInnerMDIndex], mdsInGPU.anchorZ[pixelOuterMDIndex]}; + float rtPix[Params_pLS::kLayers] = {mdsInGPU.anchorRt[pixelInnerMDIndex], mdsInGPU.anchorRt[pixelOuterMDIndex]}; + float zs[Params_T5::kLayers] = {mdsInGPU.anchorZ[firstMDIndex], + mdsInGPU.anchorZ[secondMDIndex], + mdsInGPU.anchorZ[thirdMDIndex], + mdsInGPU.anchorZ[fourthMDIndex], + mdsInGPU.anchorZ[fifthMDIndex]}; + float rts[Params_T5::kLayers] = {mdsInGPU.anchorRt[firstMDIndex], + mdsInGPU.anchorRt[secondMDIndex], + mdsInGPU.anchorRt[thirdMDIndex], + mdsInGPU.anchorRt[fourthMDIndex], + mdsInGPU.anchorRt[fifthMDIndex]}; + + rzChiSquared = computePT5RZChiSquared(acc, modulesInGPU, lowerModuleIndices, rtPix, zPix, rts, zs); + + if (/*pixelRadius*/ 0 < 5.0f * kR1GeVf) { // FIXME: pixelRadius is not defined yet + if (not passPT5RZChiSquaredCuts(modulesInGPU, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + lowerModuleIndex5, + rzChiSquared)) + return false; + } + + //outer T5 + float xs[Params_T5::kLayers] = {mdsInGPU.anchorX[firstMDIndex], + mdsInGPU.anchorX[secondMDIndex], + mdsInGPU.anchorX[thirdMDIndex], + mdsInGPU.anchorX[fourthMDIndex], + mdsInGPU.anchorX[fifthMDIndex]}; + float ys[Params_T5::kLayers] = {mdsInGPU.anchorY[firstMDIndex], + mdsInGPU.anchorY[secondMDIndex], + mdsInGPU.anchorY[thirdMDIndex], + mdsInGPU.anchorY[fourthMDIndex], + mdsInGPU.anchorY[fifthMDIndex]}; + + //get the appropriate radii and centers + centerX = segmentsInGPU.circleCenterX[pixelSegmentArrayIndex]; + centerY = segmentsInGPU.circleCenterY[pixelSegmentArrayIndex]; + pixelRadius = segmentsInGPU.circleRadius[pixelSegmentArrayIndex]; + + float T5CenterX = quintupletsInGPU.regressionG[quintupletIndex]; + float T5CenterY = quintupletsInGPU.regressionF[quintupletIndex]; + quintupletRadius = quintupletsInGPU.regressionRadius[quintupletIndex]; + + rPhiChiSquared = + computePT5RPhiChiSquared(acc, modulesInGPU, lowerModuleIndices, centerX, centerY, pixelRadius, xs, ys); + + if (pixelRadius < 5.0f * kR1GeVf) { + if (not passPT5RPhiChiSquaredCuts(modulesInGPU, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + lowerModuleIndex5, + rPhiChiSquared)) + return false; + } + + float xPix[] = {mdsInGPU.anchorX[pixelInnerMDIndex], mdsInGPU.anchorX[pixelOuterMDIndex]}; + float yPix[] = {mdsInGPU.anchorY[pixelInnerMDIndex], mdsInGPU.anchorY[pixelOuterMDIndex]}; + rPhiChiSquaredInwards = computePT5RPhiChiSquaredInwards(T5CenterX, T5CenterY, quintupletRadius, xPix, yPix); + + if (quintupletsInGPU.regressionRadius[quintupletIndex] < 5.0f * kR1GeVf) { + if (not passPT5RPhiChiSquaredInwardsCuts(modulesInGPU, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + lowerModuleIndex5, + rPhiChiSquaredInwards)) + return false; + } + //trusting the T5 regression center to also be a good estimate.. + centerX = (centerX + T5CenterX) / 2; + centerY = (centerY + T5CenterY) / 2; + + return true; + } + + struct CreatePixelQuintupletsInGPUFromMapv2 { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + Triplets tripletsInGPU, + Quintuplets quintupletsInGPU, + PixelQuintuplets pixelQuintupletsInGPU, + unsigned int* connectedPixelSize, + unsigned int* connectedPixelIndex, + unsigned int nPixelSegments, + ObjectRanges rangesInGPU) const { + auto const globalBlockIdx = alpaka::getIdx(acc); + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridBlockExtent = alpaka::getWorkDiv(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int i_pLS = globalThreadIdx[1]; i_pLS < nPixelSegments; i_pLS += gridThreadExtent[1]) { + auto iLSModule_max = connectedPixelIndex[i_pLS] + connectedPixelSize[i_pLS]; + for (unsigned int iLSModule = connectedPixelIndex[i_pLS] + globalBlockIdx[0]; iLSModule < iLSModule_max; + iLSModule += gridBlockExtent[0]) { + //these are actual module indices + uint16_t quintupletLowerModuleIndex = modulesInGPU.connectedPixels[iLSModule]; + if (quintupletLowerModuleIndex >= *modulesInGPU.nLowerModules) + continue; + if (modulesInGPU.moduleType[quintupletLowerModuleIndex] == TwoS) + continue; + uint16_t pixelModuleIndex = *modulesInGPU.nLowerModules; + if (segmentsInGPU.isDup[i_pLS]) + continue; + unsigned int nOuterQuintuplets = quintupletsInGPU.nQuintuplets[quintupletLowerModuleIndex]; + + if (nOuterQuintuplets == 0) + continue; + + unsigned int pixelSegmentIndex = rangesInGPU.segmentModuleIndices[pixelModuleIndex] + i_pLS; + + //fetch the quintuplet + for (unsigned int outerQuintupletArrayIndex = globalThreadIdx[2]; + outerQuintupletArrayIndex < nOuterQuintuplets; + outerQuintupletArrayIndex += gridThreadExtent[2]) { + unsigned int quintupletIndex = + rangesInGPU.quintupletModuleIndices[quintupletLowerModuleIndex] + outerQuintupletArrayIndex; + + if (quintupletsInGPU.isDup[quintupletIndex]) + continue; + + float rzChiSquared, rPhiChiSquared, rPhiChiSquaredInwards, pixelRadius, quintupletRadius, centerX, centerY; + + bool success = runPixelQuintupletDefaultAlgo(acc, + modulesInGPU, + rangesInGPU, + mdsInGPU, + segmentsInGPU, + tripletsInGPU, + quintupletsInGPU, + pixelSegmentIndex, + quintupletIndex, + rzChiSquared, + rPhiChiSquared, + rPhiChiSquaredInwards, + pixelRadius, + quintupletRadius, + centerX, + centerY, + static_cast(i_pLS)); + if (success) { + unsigned int totOccupancyPixelQuintuplets = alpaka::atomicAdd( + acc, pixelQuintupletsInGPU.totOccupancyPixelQuintuplets, 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyPixelQuintuplets >= n_max_pixel_quintuplets) { +#ifdef WARNINGS + printf("Pixel Quintuplet excess alert!\n"); +#endif + } else { + unsigned int pixelQuintupletIndex = + alpaka::atomicAdd(acc, pixelQuintupletsInGPU.nPixelQuintuplets, 1u, alpaka::hierarchy::Threads{}); + float eta = __H2F(quintupletsInGPU.eta[quintupletIndex]); + float phi = __H2F(quintupletsInGPU.phi[quintupletIndex]); + + addPixelQuintupletToMemory(modulesInGPU, + mdsInGPU, + segmentsInGPU, + quintupletsInGPU, + pixelQuintupletsInGPU, + pixelSegmentIndex, + quintupletIndex, + pixelQuintupletIndex, + rzChiSquared, + rPhiChiSquared, + rPhiChiSquaredInwards, + rPhiChiSquared, + eta, + phi, + pixelRadius, + quintupletRadius, + centerX, + centerY); + + tripletsInGPU.partOfPT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex]] = true; + tripletsInGPU.partOfPT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1]] = true; + segmentsInGPU.partOfPT5[i_pLS] = true; + quintupletsInGPU.partOfPT5[quintupletIndex] = true; + } // tot occupancy + } // end success + } // end T5 + } // end iLS + } // end i_pLS + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h new file mode 100644 index 0000000000000..70c269dce6c10 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -0,0 +1,1678 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_PixelTriplet_h +#define RecoTracker_LSTCore_src_alpaka_PixelTriplet_h + +#include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/Module.h" + +#include "Triplet.h" +#include "Segment.h" +#include "MiniDoublet.h" +#include "Hit.h" +#include "ObjectRanges.h" +#include "Quintuplet.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + // One pixel segment, one outer tracker triplet! + struct PixelTriplets { + unsigned int* pixelSegmentIndices; + unsigned int* tripletIndices; + unsigned int* nPixelTriplets; + unsigned int* totOccupancyPixelTriplets; + + float* rPhiChiSquared; + float* rPhiChiSquaredInwards; + float* rzChiSquared; + + FPX* pixelRadius; + FPX* tripletRadius; + FPX* pt; + FPX* eta; + FPX* phi; + FPX* eta_pix; + FPX* phi_pix; + FPX* score; + bool* isDup; + bool* partOfPT5; + + uint8_t* logicalLayers; + unsigned int* hitIndices; + uint16_t* lowerModuleIndices; + FPX* centerX; + FPX* centerY; + + template + void setData(TBuff& buf) { + pixelSegmentIndices = buf.pixelSegmentIndices_buf.data(); + tripletIndices = buf.tripletIndices_buf.data(); + nPixelTriplets = buf.nPixelTriplets_buf.data(); + totOccupancyPixelTriplets = buf.totOccupancyPixelTriplets_buf.data(); + pixelRadius = buf.pixelRadius_buf.data(); + tripletRadius = buf.tripletRadius_buf.data(); + pt = buf.pt_buf.data(); + eta = buf.eta_buf.data(); + phi = buf.phi_buf.data(); + eta_pix = buf.eta_pix_buf.data(); + phi_pix = buf.phi_pix_buf.data(); + score = buf.score_buf.data(); + isDup = buf.isDup_buf.data(); + partOfPT5 = buf.partOfPT5_buf.data(); + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + centerX = buf.centerX_buf.data(); + centerY = buf.centerY_buf.data(); + rPhiChiSquared = buf.rPhiChiSquared_buf.data(); + rPhiChiSquaredInwards = buf.rPhiChiSquaredInwards_buf.data(); + rzChiSquared = buf.rzChiSquared_buf.data(); + } + }; + + template + struct PixelTripletsBuffer { + Buf pixelSegmentIndices_buf; + Buf tripletIndices_buf; + Buf nPixelTriplets_buf; + Buf totOccupancyPixelTriplets_buf; + Buf pixelRadius_buf; + Buf tripletRadius_buf; + Buf pt_buf; + Buf eta_buf; + Buf phi_buf; + Buf eta_pix_buf; + Buf phi_pix_buf; + Buf score_buf; + Buf isDup_buf; + Buf partOfPT5_buf; + Buf logicalLayers_buf; + Buf hitIndices_buf; + Buf lowerModuleIndices_buf; + Buf centerX_buf; + Buf centerY_buf; + Buf pixelRadiusError_buf; + Buf rPhiChiSquared_buf; + Buf rPhiChiSquaredInwards_buf; + Buf rzChiSquared_buf; + + PixelTriplets data_; + + template + PixelTripletsBuffer(unsigned int maxPixelTriplets, TDevAcc const& devAccIn, TQueue& queue) + : pixelSegmentIndices_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + tripletIndices_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + nPixelTriplets_buf(allocBufWrapper(devAccIn, 1, queue)), + totOccupancyPixelTriplets_buf(allocBufWrapper(devAccIn, 1, queue)), + pixelRadius_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + tripletRadius_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + pt_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + eta_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + phi_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + eta_pix_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + phi_pix_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + score_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + isDup_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + partOfPT5_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + logicalLayers_buf(allocBufWrapper(devAccIn, maxPixelTriplets * Params_pT3::kLayers, queue)), + hitIndices_buf(allocBufWrapper(devAccIn, maxPixelTriplets * Params_pT3::kHits, queue)), + lowerModuleIndices_buf(allocBufWrapper(devAccIn, maxPixelTriplets * Params_pT3::kLayers, queue)), + centerX_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + centerY_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + pixelRadiusError_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + rPhiChiSquared_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + rPhiChiSquaredInwards_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)), + rzChiSquared_buf(allocBufWrapper(devAccIn, maxPixelTriplets, queue)) { + alpaka::memset(queue, nPixelTriplets_buf, 0u); + alpaka::memset(queue, totOccupancyPixelTriplets_buf, 0u); + alpaka::memset(queue, partOfPT5_buf, false); + } + + inline PixelTriplets const* data() const { return &data_; } + inline void setData(PixelTripletsBuffer& buf) { data_.setData(buf); } + }; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelTripletToMemory(MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets const& tripletsInGPU, + PixelTriplets& pixelTripletsInGPU, + unsigned int pixelSegmentIndex, + unsigned int tripletIndex, + float pixelRadius, + float tripletRadius, + float centerX, + float centerY, + float rPhiChiSquared, + float rPhiChiSquaredInwards, + float rzChiSquared, + unsigned int pixelTripletIndex, + float pt, + float eta, + float phi, + float eta_pix, + float phi_pix, + float score) { + pixelTripletsInGPU.pixelSegmentIndices[pixelTripletIndex] = pixelSegmentIndex; + pixelTripletsInGPU.tripletIndices[pixelTripletIndex] = tripletIndex; + pixelTripletsInGPU.pixelRadius[pixelTripletIndex] = __F2H(pixelRadius); + pixelTripletsInGPU.tripletRadius[pixelTripletIndex] = __F2H(tripletRadius); + pixelTripletsInGPU.pt[pixelTripletIndex] = __F2H(pt); + pixelTripletsInGPU.eta[pixelTripletIndex] = __F2H(eta); + pixelTripletsInGPU.phi[pixelTripletIndex] = __F2H(phi); + pixelTripletsInGPU.eta_pix[pixelTripletIndex] = __F2H(eta_pix); + pixelTripletsInGPU.phi_pix[pixelTripletIndex] = __F2H(phi_pix); + pixelTripletsInGPU.isDup[pixelTripletIndex] = false; + pixelTripletsInGPU.score[pixelTripletIndex] = __F2H(score); + + pixelTripletsInGPU.centerX[pixelTripletIndex] = __F2H(centerX); + pixelTripletsInGPU.centerY[pixelTripletIndex] = __F2H(centerY); + pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex] = 0; + pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex + 1] = 0; + pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex + 2] = + tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers]; + pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex + 3] = + tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 1]; + pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex + 4] = + tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 2]; + + pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex] = + segmentsInGPU.innerLowerModuleIndices[pixelSegmentIndex]; + pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 1] = + segmentsInGPU.outerLowerModuleIndices[pixelSegmentIndex]; + pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 2] = + tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex]; + pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 3] = + tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 1]; + pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex + 4] = + tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 2]; + + unsigned int pixelInnerMD = segmentsInGPU.mdIndices[2 * pixelSegmentIndex]; + unsigned int pixelOuterMD = segmentsInGPU.mdIndices[2 * pixelSegmentIndex + 1]; + + pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex] = mdsInGPU.anchorHitIndices[pixelInnerMD]; + pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 1] = mdsInGPU.outerHitIndices[pixelInnerMD]; + pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 2] = mdsInGPU.anchorHitIndices[pixelOuterMD]; + pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 3] = mdsInGPU.outerHitIndices[pixelOuterMD]; + + pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 4] = + tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex]; + pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 5] = + tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 1]; + pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 6] = + tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 2]; + pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 7] = + tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 3]; + pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 8] = + tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 4]; + pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex + 9] = + tripletsInGPU.hitIndices[Params_T3::kHits * tripletIndex + 5]; + pixelTripletsInGPU.rPhiChiSquared[pixelTripletIndex] = rPhiChiSquared; + pixelTripletsInGPU.rPhiChiSquaredInwards[pixelTripletIndex] = rPhiChiSquaredInwards; + pixelTripletsInGPU.rzChiSquared[pixelTripletIndex] = rzChiSquared; + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTrackletDefaultAlgopT3(TAcc const& acc, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + uint16_t pixelLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex) { + short outerInnerLowerModuleSubdet = modulesInGPU.subdets[outerInnerLowerModuleIndex]; + short outerOuterLowerModuleSubdet = modulesInGPU.subdets[outerOuterLowerModuleIndex]; + + unsigned int firstMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * innerSegmentIndex]; + unsigned int secondMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * innerSegmentIndex + 1]; + + unsigned int thirdMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * outerSegmentIndex]; + unsigned int fourthMDIndex = segmentsInGPU.mdIndices[Params_LS::kLayers * outerSegmentIndex + 1]; + + if (outerInnerLowerModuleSubdet == Barrel and + (outerOuterLowerModuleSubdet == Barrel or outerOuterLowerModuleSubdet == Endcap)) { + return runTripletDefaultAlgoPPBB(acc, + modulesInGPU, + rangesInGPU, + mdsInGPU, + segmentsInGPU, + pixelLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { + return runTripletDefaultAlgoPPEE(acc, + modulesInGPU, + rangesInGPU, + mdsInGPU, + segmentsInGPU, + pixelLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } + return false; + }; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RZChiSquaredCuts(Modules const& modulesInGPU, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + float rzChiSquared) { + const int layer1 = + modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS); + const int layer2 = + modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS); + const int layer3 = + modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS); + + if (layer1 == 8 and layer2 == 9 and layer3 == 10) { + return rzChiSquared < 13.6067f; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 15) { + return rzChiSquared < 5.5953f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + return rzChiSquared < 3.9263f; + } + /* + else if(layer1 == 7 and layer2 == 8 and layer3 == 14) + { + // PS+PS+2S in endcap layers 1+2+3, which is not really feasible in the current geometry, + // without skipping barrel layers 1 and 2 (not allowed by algorithm logic). + } + */ + else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + return rzChiSquared < 9.4377f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + return rzChiSquared < 9.9975f; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + return rzChiSquared < 8.6369f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + return rzChiSquared < 37.945f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 12) { + return rzChiSquared < 43.0167f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + return rzChiSquared < 8.6923f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + return rzChiSquared < 11.9672f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 13) { + return rzChiSquared < 16.2133f; + } + + //default - category not found! + return true; + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquaredpT3(TAcc const& acc, + unsigned int nPoints, + float* xs, + float* ys, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + float g, + float f, + float radius) { + //given values of (g, f, radius) and a set of points (and its uncertainties) + //compute chi squared + float c = g * g + f * f - radius * radius; + float chiSquared = 0.f; + float absArctanSlope, angleM, xPrime, yPrime, sigma2; + for (size_t i = 0; i < nPoints; i++) { + absArctanSlope = + ((slopes[i] != lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f * float(M_PI)); + if (xs[i] > 0 and ys[i] > 0) { + angleM = 0.5f * float(M_PI) - absArctanSlope; + } else if (xs[i] < 0 and ys[i] > 0) { + angleM = absArctanSlope + 0.5f * float(M_PI); + } else if (xs[i] < 0 and ys[i] < 0) { + angleM = -(absArctanSlope + 0.5f * float(M_PI)); + } else if (xs[i] > 0 and ys[i] < 0) { + angleM = -(0.5f * float(M_PI) - absArctanSlope); + } else { + angleM = 0; + } + + if (not isFlat[i]) { + xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] * alpaka::math::sin(acc, angleM); + yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] * alpaka::math::sin(acc, angleM); + } else { + xPrime = xs[i]; + yPrime = ys[i]; + } + sigma2 = 4 * ((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i])); + chiSquared += (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) * + (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / sigma2; + } + return chiSquared; + }; + + //TODO: merge this one and the pT5 function later into a single function + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RPhiChiSquared(TAcc const& acc, + Modules const& modulesInGPU, + uint16_t* lowerModuleIndices, + float g, + float f, + float radius, + float* xs, + float* ys) { + float delta1[3]{}, delta2[3]{}, slopes[3]; + bool isFlat[3]{}; + float chiSquared = 0; + float inv1 = kWidthPS / kWidth2S; + float inv2 = kPixelPSZpitch / kWidth2S; + for (size_t i = 0; i < 3; i++) { + ModuleType moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]]; + short moduleSubdet = modulesInGPU.subdets[lowerModuleIndices[i]]; + short moduleSide = modulesInGPU.sides[lowerModuleIndices[i]]; + float drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; + slopes[i] = modulesInGPU.dxdys[lowerModuleIndices[i]]; + //category 1 - barrel PS flat + if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + delta1[i] = inv1; + delta2[i] = inv1; + slopes[i] = -999; + isFlat[i] = true; + } + //category 2 - barrel 2S + else if (moduleSubdet == Barrel and moduleType == TwoS) { + delta1[i] = 1; + delta2[i] = 1; + slopes[i] = -999; + isFlat[i] = true; + } + //category 3 - barrel PS tilted + else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + delta1[i] = inv1; + isFlat[i] = false; + delta2[i] = (inv2 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } + //category 4 - endcap PS + else if (moduleSubdet == Endcap and moduleType == PS) { + delta1[i] = inv1; + isFlat[i] = false; + + /* + despite the type of the module layer of the lower module index, all anchor + hits are on the pixel side and all non-anchor hits are on the strip side! + */ + delta2[i] = inv2; + } + //category 5 - endcap 2S + else if (moduleSubdet == Endcap and moduleType == TwoS) { + delta1[i] = 1; + delta2[i] = 500 * inv1; + isFlat[i] = false; + } +#ifdef WARNINGS + else { + printf("ERROR!!!!! I SHOULDN'T BE HERE!!!! subdet = %d, type = %d, side = %d\n", + moduleSubdet, + moduleType, + moduleSide); + } +#endif + } + chiSquared = computeChiSquaredpT3(acc, 3, xs, ys, delta1, delta2, slopes, isFlat, g, f, radius); + + return chiSquared; + }; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RPhiChiSquaredInwards( + float g, float f, float r, float* xPix, float* yPix) { + float residual = (xPix[0] - g) * (xPix[0] - g) + (yPix[0] - f) * (yPix[0] - f) - r * r; + float chiSquared = residual * residual; + residual = (xPix[1] - g) * (xPix[1] - g) + (yPix[1] - f) * (yPix[1] - f) - r * r; + chiSquared += residual * residual; + + chiSquared *= 0.5f; + return chiSquared; + }; + + //90pc threshold + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredCuts(Modules const& modulesInGPU, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + float chiSquared) { + const int layer1 = + modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS); + const int layer2 = + modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS); + const int layer3 = + modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS); + + if (layer1 == 8 and layer2 == 9 and layer3 == 10) { + return chiSquared < 7.003f; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 15) { + return chiSquared < 0.5f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + return chiSquared < 8.046f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 14) { + return chiSquared < 0.575f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + return chiSquared < 5.304f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + return chiSquared < 10.6211f; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + return chiSquared < 4.617f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + return chiSquared < 8.046f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 13) { + return chiSquared < 0.435f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + return chiSquared < 9.244f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 12) { + return chiSquared < 0.287f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + return chiSquared < 18.509f; + } + + return true; + }; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredInwardsCuts(Modules const& modulesInGPU, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + float chiSquared) { + const int layer1 = + modulesInGPU.layers[lowerModuleIndex1] + 6 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS); + const int layer2 = + modulesInGPU.layers[lowerModuleIndex2] + 6 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS); + const int layer3 = + modulesInGPU.layers[lowerModuleIndex3] + 6 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap) + + 5 * (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS); + + if (layer1 == 7 and layer2 == 8 and layer3 == 9) // endcap layer 1,2,3, ps + { + return chiSquared < 22016.8055f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 14) // endcap layer 1,2,3 layer3->2s + { + return chiSquared < 935179.56807f; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 10) // endcap layer 2,3,4 + { + return chiSquared < 29064.12959f; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 15) // endcap layer 2,3,4, layer3->2s + { + return chiSquared < 935179.5681f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) // barrel 1,2,3 + { + return chiSquared < 1370.0113195101474f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) // barrel 1,2 endcap 1 + { + return chiSquared < 5492.110048314815f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) // barrel 2,3,4 + { + return chiSquared < 4160.410806470067f; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) // barrel 1, endcap 1,2 + { + return chiSquared < 29064.129591225726f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) // barrel 2,3 endcap 1 + { + return chiSquared < 12634.215376250893f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 12) // barrel 2,3, endcap 1->2s + { + return chiSquared < 353821.69361145404f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) // barrel2, endcap 1,2 + { + return chiSquared < 33393.26076341235f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 13) //barrel 2, endcap 1, endcap2->2s + { + return chiSquared < 935179.5680742573f; + } + + return true; + }; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool checkIntervalOverlappT3(float firstMin, + float firstMax, + float secondMin, + float secondMax) { + return ((firstMin <= secondMin) && (secondMin < firstMax)) || ((secondMin < firstMin) && (firstMin < secondMax)); + }; + + /*bounds for high Pt taken from : http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_efficiency/efficiencies/new_efficiencies/efficiencies_20210513_T5_recovering_high_Pt_efficiencies/highE_radius_matching/highE_bounds.txt */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterionBBB(TAcc const& acc, + float pixelRadius, + float pixelRadiusError, + float tripletRadius) { + float tripletInvRadiusErrorBound = 0.15624f; + float pixelInvRadiusErrorBound = 0.17235f; + + if (pixelRadius > 2.0f * kR1GeVf) { + pixelInvRadiusErrorBound = 0.6375f; + tripletInvRadiusErrorBound = 0.6588f; + } + + float tripletRadiusInvMax = (1 + tripletInvRadiusErrorBound) / tripletRadius; + float tripletRadiusInvMin = alpaka::math::max(acc, (1 - tripletInvRadiusErrorBound) / tripletRadius, 0.0f); + + float pixelRadiusInvMax = + alpaka::math::max(acc, (1 + pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius - pixelRadiusError)); + float pixelRadiusInvMin = + alpaka::math::min(acc, (1 - pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius + pixelRadiusError)); + + return checkIntervalOverlappT3(tripletRadiusInvMin, tripletRadiusInvMax, pixelRadiusInvMin, pixelRadiusInvMax); + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterionBBE(TAcc const& acc, + float pixelRadius, + float pixelRadiusError, + float tripletRadius) { + float tripletInvRadiusErrorBound = 0.45972f; + float pixelInvRadiusErrorBound = 0.19644f; + + if (pixelRadius > 2.0f * kR1GeVf) { + pixelInvRadiusErrorBound = 0.6805f; + tripletInvRadiusErrorBound = 0.8557f; + } + + float tripletRadiusInvMax = (1 + tripletInvRadiusErrorBound) / tripletRadius; + float tripletRadiusInvMin = alpaka::math::max(acc, (1 - tripletInvRadiusErrorBound) / tripletRadius, 0.0f); + + float pixelRadiusInvMax = + alpaka::math::max(acc, (1 + pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius - pixelRadiusError)); + float pixelRadiusInvMin = + alpaka::math::min(acc, (1 - pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius + pixelRadiusError)); + + return checkIntervalOverlappT3(tripletRadiusInvMin, tripletRadiusInvMax, pixelRadiusInvMin, pixelRadiusInvMax); + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterionBEE(TAcc const& acc, + float pixelRadius, + float pixelRadiusError, + float tripletRadius) { + float tripletInvRadiusErrorBound = 1.59294f; + float pixelInvRadiusErrorBound = 0.255181f; + + if (pixelRadius > 2.0f * kR1GeVf) //as good as not having selections + { + pixelInvRadiusErrorBound = 2.2091f; + tripletInvRadiusErrorBound = 2.3548f; + } + + float tripletRadiusInvMax = (1 + tripletInvRadiusErrorBound) / tripletRadius; + float tripletRadiusInvMin = alpaka::math::max(acc, (1 - tripletInvRadiusErrorBound) / tripletRadius, 0.0f); + + float pixelRadiusInvMax = + alpaka::math::max(acc, (1 + pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius - pixelRadiusError)); + float pixelRadiusInvMin = + alpaka::math::min(acc, (1 - pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius + pixelRadiusError)); + pixelRadiusInvMin = alpaka::math::max(acc, pixelRadiusInvMin, 0.0f); + + return checkIntervalOverlappT3(tripletRadiusInvMin, tripletRadiusInvMax, pixelRadiusInvMin, pixelRadiusInvMax); + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterionEEE(TAcc const& acc, + float pixelRadius, + float pixelRadiusError, + float tripletRadius) { + float tripletInvRadiusErrorBound = 1.7006f; + float pixelInvRadiusErrorBound = 0.26367f; + + if (pixelRadius > 2.0f * kR1GeVf) //as good as not having selections + { + pixelInvRadiusErrorBound = 2.286f; + tripletInvRadiusErrorBound = 2.436f; + } + + float tripletRadiusInvMax = (1 + tripletInvRadiusErrorBound) / tripletRadius; + float tripletRadiusInvMin = alpaka::math::max(acc, (1 - tripletInvRadiusErrorBound) / tripletRadius, 0.0f); + + float pixelRadiusInvMax = + alpaka::math::max(acc, (1 + pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius - pixelRadiusError)); + float pixelRadiusInvMin = + alpaka::math::min(acc, (1 - pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius + pixelRadiusError)); + pixelRadiusInvMin = alpaka::math::max(acc, 0.0f, pixelRadiusInvMin); + + return checkIntervalOverlappT3(tripletRadiusInvMin, tripletRadiusInvMax, pixelRadiusInvMin, pixelRadiusInvMax); + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterion(TAcc const& acc, + Modules const& modulesInGPU, + float pixelRadius, + float pixelRadiusError, + float tripletRadius, + int16_t lowerModuleIndex, + uint16_t middleModuleIndex, + uint16_t upperModuleIndex) { + if (modulesInGPU.subdets[lowerModuleIndex] == Endcap) { + return passRadiusCriterionEEE(acc, pixelRadius, pixelRadiusError, tripletRadius); + } else if (modulesInGPU.subdets[middleModuleIndex] == Endcap) { + return passRadiusCriterionBEE(acc, pixelRadius, pixelRadiusError, tripletRadius); + } else if (modulesInGPU.subdets[upperModuleIndex] == Endcap) { + return passRadiusCriterionBBE(acc, pixelRadius, pixelRadiusError, tripletRadius); + } else { + return passRadiusCriterionBBB(acc, pixelRadius, pixelRadiusError, tripletRadius); + } + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RZChiSquared(TAcc const& acc, + Modules const& modulesInGPU, + const uint16_t* lowerModuleIndices, + const float* rtPix, + const float* xPix, + const float* yPix, + const float* zPix, + const float* rts, + const float* xs, + const float* ys, + const float* zs, + float pixelSegmentPt, + float pixelSegmentPx, + float pixelSegmentPy, + float pixelSegmentPz, + int pixelSegmentCharge) { + float residual = 0; + float error2 = 0; + float RMSE = 0; + + float Px = pixelSegmentPx, Py = pixelSegmentPy, Pz = pixelSegmentPz; + int charge = pixelSegmentCharge; + float x1 = xPix[1] / 100; + float y1 = yPix[1] / 100; + float z1 = zPix[1] / 100; + float r1 = rtPix[1] / 100; + + float a = -2.f * k2Rinv1GeVf * 100 * charge; // multiply by 100 to make the correct length units + + for (size_t i = 0; i < Params_T3::kLayers; i++) { + float zsi = zs[i] / 100; + float rtsi = rts[i] / 100; + uint16_t lowerModuleIndex = lowerModuleIndices[i]; + const int moduleType = modulesInGPU.moduleType[lowerModuleIndex]; + const int moduleSide = modulesInGPU.sides[lowerModuleIndex]; + const int moduleSubdet = modulesInGPU.subdets[lowerModuleIndex]; + + // calculation is detailed documented here https://indico.cern.ch/event/1185895/contributions/4982756/attachments/2526561/4345805/helix%20pT3%20summarize.pdf + float diffr, diffz; + float p = alpaka::math::sqrt(acc, Px * Px + Py * Py + Pz * Pz); + + float rou = a / p; + if (moduleSubdet == Endcap) { + float s = (zsi - z1) * p / Pz; + float x = x1 + Px / a * alpaka::math::sin(acc, rou * s) - Py / a * (1 - alpaka::math::cos(acc, rou * s)); + float y = y1 + Py / a * alpaka::math::sin(acc, rou * s) + Px / a * (1 - alpaka::math::cos(acc, rou * s)); + diffr = alpaka::math::abs(acc, rtsi - alpaka::math::sqrt(acc, x * x + y * y)) * 100; + } + + if (moduleSubdet == Barrel) { + float paraA = r1 * r1 + 2 * (Px * Px + Py * Py) / (a * a) + 2 * (y1 * Px - x1 * Py) / a - rtsi * rtsi; + float paraB = 2 * (x1 * Px + y1 * Py) / a; + float paraC = 2 * (y1 * Px - x1 * Py) / a + 2 * (Px * Px + Py * Py) / (a * a); + float A = paraB * paraB + paraC * paraC; + float B = 2 * paraA * paraB; + float C = paraA * paraA - paraC * paraC; + float sol1 = (-B + alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + float sol2 = (-B - alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + float solz1 = alpaka::math::asin(acc, sol1) / rou * Pz / p + z1; + float solz2 = alpaka::math::asin(acc, sol2) / rou * Pz / p + z1; + float diffz1 = alpaka::math::abs(acc, solz1 - zsi) * 100; + float diffz2 = alpaka::math::abs(acc, solz2 - zsi) * 100; + diffz = alpaka::math::min(acc, diffz1, diffz2); + } + + residual = moduleSubdet == Barrel ? diffz : diffr; + + //PS Modules + if (moduleType == 0) { + error2 = kPixelPSZpitch * kPixelPSZpitch; + } else //2S modules + { + error2 = kStrip2SZpitch * kStrip2SZpitch; + } + + //special dispensation to tilted PS modules! + if (moduleType == 0 and moduleSubdet == Barrel and moduleSide != Center) { + float drdz = modulesInGPU.drdzs[lowerModuleIndex]; + error2 /= (1 + drdz * drdz); + } + RMSE += (residual * residual) / error2; + } + + RMSE = alpaka::math::sqrt(acc, 0.2f * RMSE); // Divided by the degree of freedom 5. + + return RMSE; + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTripletDefaultAlgo(TAcc const& acc, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets const& tripletsInGPU, + unsigned int pixelSegmentIndex, + unsigned int tripletIndex, + float& pixelRadius, + float& tripletRadius, + float& centerX, + float& centerY, + float& rzChiSquared, + float& rPhiChiSquared, + float& rPhiChiSquaredInwards, + bool runChiSquaredCuts = true) { + //run pT4 compatibility between the pixel segment and inner segment, and between the pixel and outer segment of the triplet + uint16_t pixelModuleIndex = segmentsInGPU.innerLowerModuleIndices[pixelSegmentIndex]; + + uint16_t lowerModuleIndex = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex]; + uint16_t middleModuleIndex = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 1]; + uint16_t upperModuleIndex = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * tripletIndex + 2]; + + { + // pixel segment vs inner segment of the triplet + if (not runPixelTrackletDefaultAlgopT3(acc, + modulesInGPU, + rangesInGPU, + mdsInGPU, + segmentsInGPU, + pixelModuleIndex, + lowerModuleIndex, + middleModuleIndex, + pixelSegmentIndex, + tripletsInGPU.segmentIndices[Params_LS::kLayers * tripletIndex])) + return false; + + //pixel segment vs outer segment of triplet + if (not runPixelTrackletDefaultAlgopT3(acc, + modulesInGPU, + rangesInGPU, + mdsInGPU, + segmentsInGPU, + pixelModuleIndex, + middleModuleIndex, + upperModuleIndex, + pixelSegmentIndex, + tripletsInGPU.segmentIndices[Params_LS::kLayers * tripletIndex + 1])) + return false; + } + + //pt matching between the pixel ptin and the triplet circle pt + unsigned int pixelSegmentArrayIndex = pixelSegmentIndex - rangesInGPU.segmentModuleIndices[pixelModuleIndex]; + float pixelSegmentPt = segmentsInGPU.ptIn[pixelSegmentArrayIndex]; + float pixelSegmentPtError = segmentsInGPU.ptErr[pixelSegmentArrayIndex]; + float pixelSegmentPx = segmentsInGPU.px[pixelSegmentArrayIndex]; + float pixelSegmentPy = segmentsInGPU.py[pixelSegmentArrayIndex]; + float pixelSegmentPz = segmentsInGPU.pz[pixelSegmentArrayIndex]; + int pixelSegmentCharge = segmentsInGPU.charge[pixelSegmentArrayIndex]; + + float pixelG = segmentsInGPU.circleCenterX[pixelSegmentArrayIndex]; + float pixelF = segmentsInGPU.circleCenterY[pixelSegmentArrayIndex]; + float pixelRadiusPCA = segmentsInGPU.circleRadius[pixelSegmentArrayIndex]; + + unsigned int pixelInnerMDIndex = segmentsInGPU.mdIndices[Params_pLS::kLayers * pixelSegmentIndex]; + unsigned int pixelOuterMDIndex = segmentsInGPU.mdIndices[Params_pLS::kLayers * pixelSegmentIndex + 1]; + + pixelRadius = pixelSegmentPt * kR1GeVf; + float pixelRadiusError = pixelSegmentPtError * kR1GeVf; + unsigned int tripletInnerSegmentIndex = tripletsInGPU.segmentIndices[2 * tripletIndex]; + unsigned int tripletOuterSegmentIndex = tripletsInGPU.segmentIndices[2 * tripletIndex + 1]; + + unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * tripletInnerSegmentIndex]; + unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * tripletInnerSegmentIndex + 1]; + unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * tripletOuterSegmentIndex + 1]; + + float xs[Params_T3::kLayers] = { + mdsInGPU.anchorX[firstMDIndex], mdsInGPU.anchorX[secondMDIndex], mdsInGPU.anchorX[thirdMDIndex]}; + float ys[Params_T3::kLayers] = { + mdsInGPU.anchorY[firstMDIndex], mdsInGPU.anchorY[secondMDIndex], mdsInGPU.anchorY[thirdMDIndex]}; + + float g, f; + tripletRadius = tripletsInGPU.circleRadius[tripletIndex]; + g = tripletsInGPU.circleCenterX[tripletIndex]; + f = tripletsInGPU.circleCenterY[tripletIndex]; + + if (not passRadiusCriterion(acc, + modulesInGPU, + pixelRadius, + pixelRadiusError, + tripletRadius, + lowerModuleIndex, + middleModuleIndex, + upperModuleIndex)) + return false; + + uint16_t lowerModuleIndices[Params_T3::kLayers] = {lowerModuleIndex, middleModuleIndex, upperModuleIndex}; + + if (runChiSquaredCuts and pixelSegmentPt < 5.0f) { + float rts[Params_T3::kLayers] = { + mdsInGPU.anchorRt[firstMDIndex], mdsInGPU.anchorRt[secondMDIndex], mdsInGPU.anchorRt[thirdMDIndex]}; + float zs[Params_T3::kLayers] = { + mdsInGPU.anchorZ[firstMDIndex], mdsInGPU.anchorZ[secondMDIndex], mdsInGPU.anchorZ[thirdMDIndex]}; + float rtPix[Params_pLS::kLayers] = {mdsInGPU.anchorRt[pixelInnerMDIndex], mdsInGPU.anchorRt[pixelOuterMDIndex]}; + float xPix[Params_pLS::kLayers] = {mdsInGPU.anchorX[pixelInnerMDIndex], mdsInGPU.anchorX[pixelOuterMDIndex]}; + float yPix[Params_pLS::kLayers] = {mdsInGPU.anchorY[pixelInnerMDIndex], mdsInGPU.anchorY[pixelOuterMDIndex]}; + float zPix[Params_pLS::kLayers] = {mdsInGPU.anchorZ[pixelInnerMDIndex], mdsInGPU.anchorZ[pixelOuterMDIndex]}; + + rzChiSquared = computePT3RZChiSquared(acc, + modulesInGPU, + lowerModuleIndices, + rtPix, + xPix, + yPix, + zPix, + rts, + xs, + ys, + zs, + pixelSegmentPt, + pixelSegmentPx, + pixelSegmentPy, + pixelSegmentPz, + pixelSegmentCharge); + if (not passPT3RZChiSquaredCuts( + modulesInGPU, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rzChiSquared)) + return false; + } else { + rzChiSquared = -1; + } + + rPhiChiSquared = + computePT3RPhiChiSquared(acc, modulesInGPU, lowerModuleIndices, pixelG, pixelF, pixelRadiusPCA, xs, ys); + + if (runChiSquaredCuts and pixelSegmentPt < 5.0f) { + if (not passPT3RPhiChiSquaredCuts( + modulesInGPU, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rPhiChiSquared)) + return false; + } + + float xPix[Params_pLS::kLayers] = {mdsInGPU.anchorX[pixelInnerMDIndex], mdsInGPU.anchorX[pixelOuterMDIndex]}; + float yPix[Params_pLS::kLayers] = {mdsInGPU.anchorY[pixelInnerMDIndex], mdsInGPU.anchorY[pixelOuterMDIndex]}; + rPhiChiSquaredInwards = computePT3RPhiChiSquaredInwards(g, f, tripletRadius, xPix, yPix); + + if (runChiSquaredCuts and pixelSegmentPt < 5.0f) { + if (not passPT3RPhiChiSquaredInwardsCuts( + modulesInGPU, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rPhiChiSquaredInwards)) + return false; + } + centerX = 0; + centerY = 0; + return true; + }; + + struct CreatePixelTripletsInGPUFromMapv2 { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + ObjectRanges rangesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + Triplets tripletsInGPU, + PixelTriplets pixelTripletsInGPU, + unsigned int* connectedPixelSize, + unsigned int* connectedPixelIndex, + unsigned int nPixelSegments) const { + auto const globalBlockIdx = alpaka::getIdx(acc); + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridBlockExtent = alpaka::getWorkDiv(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int i_pLS = globalThreadIdx[1]; i_pLS < nPixelSegments; i_pLS += gridThreadExtent[1]) { + auto iLSModule_max = connectedPixelIndex[i_pLS] + connectedPixelSize[i_pLS]; + + for (unsigned int iLSModule = connectedPixelIndex[i_pLS] + globalBlockIdx[0]; iLSModule < iLSModule_max; + iLSModule += gridBlockExtent[0]) { + uint16_t tripletLowerModuleIndex = + modulesInGPU + .connectedPixels[iLSModule]; //connected pixels will have the appropriate lower module index by default! +#ifdef WARNINGS + if (tripletLowerModuleIndex >= *modulesInGPU.nLowerModules) { + printf("tripletLowerModuleIndex %d >= modulesInGPU.nLowerModules %d \n", + tripletLowerModuleIndex, + *modulesInGPU.nLowerModules); + continue; //sanity check + } +#endif + //Removes 2S-2S :FIXME: filter these out in the pixel map + if (modulesInGPU.moduleType[tripletLowerModuleIndex] == TwoS) + continue; + + uint16_t pixelModuleIndex = *modulesInGPU.nLowerModules; + unsigned int nOuterTriplets = tripletsInGPU.nTriplets[tripletLowerModuleIndex]; + if (nOuterTriplets == 0) + continue; + + unsigned int pixelSegmentIndex = rangesInGPU.segmentModuleIndices[pixelModuleIndex] + i_pLS; + + if (segmentsInGPU.isDup[i_pLS]) + continue; + if (segmentsInGPU.partOfPT5[i_pLS]) + continue; //don't make pT3s for those pixels that are part of pT5 + + short layer2_adjustment; + if (modulesInGPU.layers[tripletLowerModuleIndex] == 1) { + layer2_adjustment = 1; + } //get upper segment to be in second layer + else if (modulesInGPU.layers[tripletLowerModuleIndex] == 2) { + layer2_adjustment = 0; + } // get lower segment to be in second layer + else { + continue; + } + + //fetch the triplet + for (unsigned int outerTripletArrayIndex = globalThreadIdx[2]; outerTripletArrayIndex < nOuterTriplets; + outerTripletArrayIndex += gridThreadExtent[2]) { + unsigned int outerTripletIndex = + rangesInGPU.tripletModuleIndices[tripletLowerModuleIndex] + outerTripletArrayIndex; + if (modulesInGPU.moduleType[tripletsInGPU.lowerModuleIndices[3 * outerTripletIndex + 1]] == TwoS) + continue; //REMOVES PS-2S + + if (tripletsInGPU.partOfPT5[outerTripletIndex]) + continue; //don't create pT3s for T3s accounted in pT5s + + float pixelRadius, tripletRadius, rPhiChiSquared, rzChiSquared, rPhiChiSquaredInwards, centerX, centerY; + bool success = runPixelTripletDefaultAlgo(acc, + modulesInGPU, + rangesInGPU, + mdsInGPU, + segmentsInGPU, + tripletsInGPU, + pixelSegmentIndex, + outerTripletIndex, + pixelRadius, + tripletRadius, + centerX, + centerY, + rzChiSquared, + rPhiChiSquared, + rPhiChiSquaredInwards); + + if (success) { + float phi = + mdsInGPU.anchorPhi[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * outerTripletIndex] + + layer2_adjustment]]; + float eta = + mdsInGPU.anchorEta[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * outerTripletIndex] + + layer2_adjustment]]; + float eta_pix = segmentsInGPU.eta[i_pLS]; + float phi_pix = segmentsInGPU.phi[i_pLS]; + float pt = segmentsInGPU.ptIn[i_pLS]; + float score = rPhiChiSquared + rPhiChiSquaredInwards; + unsigned int totOccupancyPixelTriplets = alpaka::atomicAdd( + acc, pixelTripletsInGPU.totOccupancyPixelTriplets, 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyPixelTriplets >= n_max_pixel_triplets) { +#ifdef WARNINGS + printf("Pixel Triplet excess alert!\n"); +#endif + } else { + unsigned int pixelTripletIndex = + alpaka::atomicAdd(acc, pixelTripletsInGPU.nPixelTriplets, 1u, alpaka::hierarchy::Threads{}); + addPixelTripletToMemory(mdsInGPU, + segmentsInGPU, + tripletsInGPU, + pixelTripletsInGPU, + pixelSegmentIndex, + outerTripletIndex, + pixelRadius, + tripletRadius, + centerX, + centerY, + rPhiChiSquared, + rPhiChiSquaredInwards, + rzChiSquared, + pixelTripletIndex, + pt, + eta, + phi, + eta_pix, + phi_pix, + score); + tripletsInGPU.partOfPT3[outerTripletIndex] = true; + } + } + } // for outerTripletArrayIndex + } // for iLSModule < iLSModule_max + } // for i_pLS + } + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void runDeltaBetaIterationspT3(TAcc const& acc, + float& betaIn, + float& betaOut, + float betaAv, + float& pt_beta, + float sdIn_dr, + float sdOut_dr, + float dr, + float lIn) { + if (lIn == 0) { + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); + return; + } + + if (betaIn * betaOut > 0.f and + (alpaka::math::abs(acc, pt_beta) < 4.f * kPt_betaMax or + (lIn >= 11 and alpaka::math::abs(acc, pt_beta) < + 8.f * kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap + { + const float betaInUpd = + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + const float betaOutUpd = + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); //FIXME: need a faster version + betaAv = 0.5f * (betaInUpd + betaOutUpd); + + //1st update + const float pt_beta_inv = + 1.f / alpaka::math::abs(acc, dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv)); //get a better pt estimate + + betaIn += alpaka::math::copysign( + acc, + alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), + betaOut); //FIXME: need a faster version + //update the av and pt + betaAv = 0.5f * (betaIn + betaOut); + //2nd update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + } else if (lIn < 11 && alpaka::math::abs(acc, betaOut) < 0.2f * alpaka::math::abs(acc, betaIn) && + alpaka::math::abs(acc, pt_beta) < 12.f * kPt_betaMax) //use betaIn sign as ref + { + const float pt_betaIn = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaIn); + + const float betaInUpd = + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + const float betaOutUpd = + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, + alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaAv = (alpaka::math::abs(acc, betaOut) > 0.2f * alpaka::math::abs(acc, betaIn)) + ? (0.5f * (betaInUpd + betaOutUpd)) + : betaInUpd; + + //1st update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + betaIn += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + //update the av and pt + betaAv = 0.5f * (betaIn + betaOut); + //2nd update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + uint16_t pixelModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + float dPhi, betaIn, betaOut, pt_beta, zLo, zHi, zLoPointed, zHiPointed, dPhiCut, betaOutCut; + + bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS); + + float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; + float rt_InUp = mdsInGPU.anchorRt[secondMDIndex]; + float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex]; + float rt_OutUp = mdsInGPU.anchorRt[fourthMDIndex]; + + float z_InUp = mdsInGPU.anchorZ[secondMDIndex]; + float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; + + float x_InLo = mdsInGPU.anchorX[firstMDIndex]; + float x_InUp = mdsInGPU.anchorX[secondMDIndex]; + float x_OutLo = mdsInGPU.anchorX[thirdMDIndex]; + float x_OutUp = mdsInGPU.anchorX[fourthMDIndex]; + + float y_InLo = mdsInGPU.anchorY[firstMDIndex]; + float y_InUp = mdsInGPU.anchorY[secondMDIndex]; + float y_OutLo = mdsInGPU.anchorY[thirdMDIndex]; + float y_OutUp = mdsInGPU.anchorY[fourthMDIndex]; + + float rt_InOut = rt_InUp; + + if (alpaka::math::abs(acc, deltaPhi(acc, x_InUp, y_InUp, x_OutLo, y_OutLo)) > 0.5f * float(M_PI)) + return false; + + unsigned int pixelSegmentArrayIndex = innerSegmentIndex - rangesInGPU.segmentModuleIndices[pixelModuleIndex]; + float ptIn = segmentsInGPU.ptIn[pixelSegmentArrayIndex]; + float ptSLo = ptIn; + float px = segmentsInGPU.px[pixelSegmentArrayIndex]; + float py = segmentsInGPU.py[pixelSegmentArrayIndex]; + float pz = segmentsInGPU.pz[pixelSegmentArrayIndex]; + float ptErr = segmentsInGPU.ptErr[pixelSegmentArrayIndex]; + float etaErr = segmentsInGPU.etaErr[pixelSegmentArrayIndex]; + ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); + ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); + + float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float rtRatio_OutLoInOut = + rt_OutLo / rt_InOut; // Outer segment beginning rt divided by inner segment beginning rt; + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + const float zpitch_InLo = 0.05f; + const float zpitch_InOut = 0.05f; + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); + float zGeom = zpitch_InLo + zpitch_OutLo; + zHi = z_InUp + (z_InUp + kDeltaZLum) * (rtRatio_OutLoInOut - 1.f) * (z_InUp < 0.f ? 1.f : dzDrtScale) + + (zpitch_InOut + zpitch_OutLo); + zLo = z_InUp + (z_InUp - kDeltaZLum) * (rtRatio_OutLoInOut - 1.f) * (z_InUp > 0.f ? 1.f : dzDrtScale) - + (zpitch_InOut + zpitch_OutLo); //slope-correction only on outer end + + if ((z_OutLo < zLo) || (z_OutLo > zHi)) + return false; + + const float cosh2Eta = 1.f + (pz * pz) / (ptIn * ptIn); + + const float drt_OutLo_InUp = (rt_OutLo - rt_InUp); + + const float r3_InUp = alpaka::math::sqrt(acc, z_InUp * z_InUp + rt_InUp * rt_InUp); + + float drt_InSeg = rt_InOut - rt_InLo; + + const float thetaMuls2 = + (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InUp) / 50.f) * (r3_InUp / rt_InUp); + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + + float dzErr = (drt_OutLo_InUp * drt_OutLo_InUp) * (etaErr * etaErr) * cosh2Eta; + dzErr += 0.03f * 0.03f; // Approximately account for IT module size + dzErr *= 9.f; // 3 sigma + dzErr += muls2 * (drt_OutLo_InUp * drt_OutLo_InUp) / 3.f * cosh2Eta; + dzErr += zGeom * zGeom; + dzErr = alpaka::math::sqrt(acc, dzErr); + + const float dzDrIn = pz / ptIn; + const float zWindow = dzErr / drt_InSeg * drt_OutLo_InUp + zGeom; + const float dzMean = dzDrIn * drt_OutLo_InUp * + (1.f + drt_OutLo_InUp * drt_OutLo_InUp * 4 * k2Rinv1GeVf * k2Rinv1GeVf / ptIn / ptIn / + 24.f); // with curved path correction + // Constructing upper and lower bound + zLoPointed = z_InUp + dzMean - zWindow; + zHiPointed = z_InUp + dzMean + zWindow; + + if ((z_OutLo < zLoPointed) || (z_OutLo > zHiPointed)) + return false; + + const float pvOffset = 0.1f / rt_OutLo; + dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + //no dphipos cut + float midPointX = 0.5f * (x_InLo + x_OutLo); + float midPointY = 0.5f * (y_InLo + y_OutLo); + + float diffX = x_OutLo - x_InLo; + float diffY = y_OutLo - y_InLo; + + dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + //lots of array accesses below this... + + float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); + + bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == Endcap and + modulesInGPU.moduleType[outerOuterLowerModuleIndex] == TwoS; + + float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; + alpha_OutUp = deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); + + alpha_OutUp_highEdge = alpha_OutUp; + alpha_OutUp_lowEdge = alpha_OutUp; + + float tl_axis_x = x_OutUp - x_InUp; + float tl_axis_y = y_OutUp - y_InUp; + + float tl_axis_highEdge_x = tl_axis_x; + float tl_axis_highEdge_y = tl_axis_y; + + float tl_axis_lowEdge_x = tl_axis_x; + float tl_axis_lowEdge_y = tl_axis_y; + + betaIn = -deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + + betaOut = -alpha_OutUp + deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); + + float betaOutRHmin = betaOut; + float betaOutRHmax = betaOut; + + if (isEC_lastLayer) { + alpha_OutUp_highEdge = deltaPhi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex], + mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_OutLo, + mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_OutLo); + alpha_OutUp_lowEdge = deltaPhi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex], + mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_OutLo, + mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_OutLo); + + tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_InUp; + tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_InUp; + tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_InUp; + tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_InUp; + + betaOutRHmin = -alpha_OutUp_highEdge + deltaPhi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex], + tl_axis_highEdge_x, + tl_axis_highEdge_y); + betaOutRHmax = -alpha_OutUp_lowEdge + deltaPhi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex], + tl_axis_lowEdge_x, + tl_axis_lowEdge_y); + } + + //beta computation + float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + + //innerOuterAnchor - innerInnerAnchor + const float rt_InSeg = + alpaka::math::sqrt(acc, (x_InUp - x_InLo) * (x_InUp - x_InLo) + (y_InUp - y_InLo) * (y_InUp - y_InLo)); + + //no betaIn cut for the pixels + float betaAv = 0.5f * (betaIn + betaOut); + pt_beta = ptIn; + + int lIn = 0; + int lOut = isEC_lastLayer ? 11 : 5; + float sdOut_dr = + alpaka::math::sqrt(acc, (x_OutUp - x_OutLo) * (x_OutUp - x_OutLo) + (y_OutUp - y_OutLo) * (y_OutUp - y_OutLo)); + float sdOut_d = rt_OutUp - rt_OutLo; + + runDeltaBetaIterationspT3(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_ptBetaMax = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_ptBetaMax * min_ptBeta_ptBetaMax); + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_InLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InUp * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_OutLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InUp); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + + const float sinDPhi = alpaka::math::sin(acc, dPhi); + const float dBetaRIn2 = 0; // TODO-RH + + float dBetaROut = 0; + if (isEC_lastLayer) { + dBetaROut = + (alpaka::math::sqrt(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + + mdsInGPU.anchorHighEdgeY[fourthMDIndex] * mdsInGPU.anchorHighEdgeY[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex] * mdsInGPU.anchorLowEdgeX[fourthMDIndex] + + mdsInGPU.anchorLowEdgeY[fourthMDIndex] * mdsInGPU.anchorLowEdgeY[fourthMDIndex])) * + sinDPhi / drt_tl_axis; + } + + const float dBetaROut2 = dBetaROut * dBetaROut; + + //FIXME: need faster version + betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + const float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, drt_InSeg); + const float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + float dBeta = betaIn - betaOut; + return dBeta * dBeta <= dBetaCut2; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, + Modules const& modulesInGPU, + ObjectRanges const& rangesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + uint16_t pixelModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + float dPhi, betaIn, betaOut, pt_beta, rtLo, rtHi, dPhiCut, betaOutCut; + + bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS); + + float z_InUp = mdsInGPU.anchorZ[secondMDIndex]; + float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; + + if (z_InUp * z_OutLo <= 0) + return false; + + float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; + float rt_InUp = mdsInGPU.anchorRt[secondMDIndex]; + float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex]; + float rt_OutUp = mdsInGPU.anchorRt[fourthMDIndex]; + + float x_InLo = mdsInGPU.anchorX[firstMDIndex]; + float x_InUp = mdsInGPU.anchorX[secondMDIndex]; + float x_OutLo = mdsInGPU.anchorX[thirdMDIndex]; + float x_OutUp = mdsInGPU.anchorX[fourthMDIndex]; + + float y_InLo = mdsInGPU.anchorY[firstMDIndex]; + float y_InUp = mdsInGPU.anchorY[secondMDIndex]; + float y_OutLo = mdsInGPU.anchorY[thirdMDIndex]; + float y_OutUp = mdsInGPU.anchorY[fourthMDIndex]; + + unsigned int pixelSegmentArrayIndex = innerSegmentIndex - rangesInGPU.segmentModuleIndices[pixelModuleIndex]; + + float ptIn = segmentsInGPU.ptIn[pixelSegmentArrayIndex]; + float ptSLo = ptIn; + float px = segmentsInGPU.px[pixelSegmentArrayIndex]; + float py = segmentsInGPU.py[pixelSegmentArrayIndex]; + float pz = segmentsInGPU.pz[pixelSegmentArrayIndex]; + float ptErr = segmentsInGPU.ptErr[pixelSegmentArrayIndex]; + float etaErr = segmentsInGPU.etaErr[pixelSegmentArrayIndex]; + + ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); + ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); + + const float zpitch_InLo = 0.05f; + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); + float zGeom = zpitch_InLo + zpitch_OutLo; + + const float slope = alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float dzDrtScale = alpaka::math::tan(acc, slope) / slope; //FIXME: need approximate value + + const float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InUp); + bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS; + + const float rtGeom1 = isOutSgInnerMDPS + ? kPixelPSZpitch + : kStrip2SZpitch; //FIXME: make this chosen by configuration for lay11,12 full PS + const float zGeom1 = alpaka::math::copysign(acc, zGeom, z_InUp); //used in B-E region + rtLo = rt_InUp * (1.f + (z_OutLo - z_InUp - zGeom1) / (z_InUp + zGeom1 + dLum) / dzDrtScale) - + rtGeom1; //slope correction only on the lower end + + float zInForHi = z_InUp - zGeom1 - dLum; + if (zInForHi * z_InUp < 0) + zInForHi = alpaka::math::copysign(acc, 0.1f, z_InUp); + rtHi = rt_InUp * (1.f + (z_OutLo - z_InUp + zGeom1) / zInForHi) + rtGeom1; + + // Cut #2: rt condition + if ((rt_OutLo < rtLo) || (rt_OutLo > rtHi)) + return false; + + const float dzOutInAbs = alpaka::math::abs(acc, z_OutLo - z_InUp); + const float cosh2Eta = 1.f + (pz * pz) / (ptIn * ptIn); + const float multDzDr2 = (dzOutInAbs * dzOutInAbs) * cosh2Eta / ((cosh2Eta - 1.f) * (cosh2Eta - 1.f)); + const float r3_InUp = alpaka::math::sqrt(acc, z_InUp * z_InUp + rt_InUp * rt_InUp); + const float thetaMuls2 = + (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InUp) / 50.f) * (r3_InUp / rt_InUp); + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + + float drtErr = (etaErr * etaErr) * multDzDr2; + drtErr += 0.03f * 0.03f; // Approximately account for IT module size + drtErr *= 9.f; // 3 sigma + drtErr += muls2 * multDzDr2 / 3.f * cosh2Eta; + drtErr = alpaka::math::sqrt(acc, drtErr); + const float drtDzIn = alpaka::math::abs(acc, ptIn / pz); + + const float drt_OutLo_InUp = (rt_OutLo - rt_InUp); // drOutIn + + const float rtWindow = drtErr + rtGeom1; + const float drtMean = drtDzIn * dzOutInAbs * + (1.f - drt_OutLo_InUp * drt_OutLo_InUp * 4 * k2Rinv1GeVf * k2Rinv1GeVf / ptIn / ptIn / + 24.f); // with curved path correction + const float rtLo_point = rt_InUp + drtMean - rtWindow; + const float rtHi_point = rt_InUp + drtMean + rtWindow; + + // Cut #3: rt-z pointed + if ((rt_OutLo < rtLo_point) || (rt_OutLo > rtHi_point)) + return false; + + const float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float pvOffset = 0.1f / rt_OutLo; + dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + float midPointX = 0.5f * (x_InLo + x_OutLo); + float midPointY = 0.5f * (y_InLo + y_OutLo); + + float diffX = x_OutLo - x_InLo; + float diffY = y_OutLo - y_InLo; + + dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + + // Cut #5: deltaPhiChange + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); + + bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == Endcap and + modulesInGPU.moduleType[outerOuterLowerModuleIndex] == TwoS; + + float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; + + alpha_OutUp = deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); + alpha_OutUp_highEdge = alpha_OutUp; + alpha_OutUp_lowEdge = alpha_OutUp; + + float tl_axis_x = x_OutUp - x_InUp; + float tl_axis_y = y_OutUp - y_InUp; + + float tl_axis_highEdge_x = tl_axis_x; + float tl_axis_highEdge_y = tl_axis_y; + + float tl_axis_lowEdge_x = tl_axis_x; + float tl_axis_lowEdge_y = tl_axis_y; + + betaIn = -deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + + betaOut = -alpha_OutUp + deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); + float betaOutRHmin = betaOut; + float betaOutRHmax = betaOut; + + if (isEC_lastLayer) { + alpha_OutUp_highEdge = deltaPhi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex], + mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_OutLo, + mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_OutLo); + alpha_OutUp_lowEdge = deltaPhi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex], + mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_OutLo, + mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_OutLo); + + tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - x_InUp; + tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - y_InUp; + tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - x_InUp; + tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - y_InUp; + + betaOutRHmin = -alpha_OutUp_highEdge + deltaPhi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex], + tl_axis_highEdge_x, + tl_axis_highEdge_y); + betaOutRHmax = -alpha_OutUp_lowEdge + deltaPhi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex], + tl_axis_lowEdge_x, + tl_axis_lowEdge_y); + } + + //beta computation + float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + //no betaIn cut for the pixels + const float rt_InSeg = + alpaka::math::sqrt(acc, (x_InUp - x_InLo) * (x_InUp - x_InLo) + (y_InUp - y_InLo) * (y_InUp - y_InLo)); + + float betaAv = 0.5f * (betaIn + betaOut); + pt_beta = ptIn; + + int lIn = 0; + int lOut = isEC_lastLayer ? 11 : 5; + float sdOut_dr = + alpaka::math::sqrt(acc, (x_OutUp - x_OutLo) * (x_OutUp - x_OutLo) + (y_OutUp - y_OutLo) * (y_OutUp - y_OutLo)); + float sdOut_d = rt_OutUp - rt_OutLo; + + runDeltaBetaIterationspT3(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_ptBetaMax = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_ptBetaMax * min_ptBeta_ptBetaMax); + + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_InLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InUp * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_OutLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InUp); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + + const float sinDPhi = alpaka::math::sin(acc, dPhi); + const float dBetaRIn2 = 0; // TODO-RH + + float dBetaROut = 0; + if (isEC_lastLayer) { + dBetaROut = + (alpaka::math::sqrt(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + + mdsInGPU.anchorHighEdgeY[fourthMDIndex] * mdsInGPU.anchorHighEdgeY[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex] * mdsInGPU.anchorLowEdgeX[fourthMDIndex] + + mdsInGPU.anchorLowEdgeY[fourthMDIndex] * mdsInGPU.anchorLowEdgeY[fourthMDIndex])) * + sinDPhi / drt_tl_axis; + } + + const float dBetaROut2 = dBetaROut * dBetaROut; + + betaOutCut = + alpaka::math::asin( + acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) //FIXME: need faster version + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + + float drt_InSeg = rt_InUp - rt_InLo; + + const float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, drt_InSeg); + const float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + float dBeta = betaIn - betaOut; + return dBeta * dBeta <= dBetaCut2; + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h new file mode 100644 index 0000000000000..1b75100c874e8 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -0,0 +1,2766 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Quintuplet_h +#define RecoTracker_LSTCore_src_alpaka_Quintuplet_h + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" + +#include "NeuralNetwork.h" +#include "Segment.h" +#include "MiniDoublet.h" +#include "Hit.h" +#include "ObjectRanges.h" +#include "Triplet.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + struct Quintuplets { + unsigned int* tripletIndices; + uint16_t* lowerModuleIndices; + unsigned int* nQuintuplets; + unsigned int* totOccupancyQuintuplets; + unsigned int* nMemoryLocations; + + FPX* innerRadius; + FPX* bridgeRadius; + FPX* outerRadius; + FPX* pt; + FPX* eta; + FPX* phi; + FPX* score_rphisum; + uint8_t* layer; + char* isDup; + bool* TightCutFlag; + bool* partOfPT5; + + float* regressionRadius; + float* regressionG; + float* regressionF; + + uint8_t* logicalLayers; + unsigned int* hitIndices; + float* rzChiSquared; + float* chiSquared; + float* nonAnchorChiSquared; + + template + void setData(TBuff& buf) { + tripletIndices = buf.tripletIndices_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + nQuintuplets = buf.nQuintuplets_buf.data(); + totOccupancyQuintuplets = buf.totOccupancyQuintuplets_buf.data(); + nMemoryLocations = buf.nMemoryLocations_buf.data(); + innerRadius = buf.innerRadius_buf.data(); + bridgeRadius = buf.bridgeRadius_buf.data(); + outerRadius = buf.outerRadius_buf.data(); + pt = buf.pt_buf.data(); + eta = buf.eta_buf.data(); + phi = buf.phi_buf.data(); + score_rphisum = buf.score_rphisum_buf.data(); + layer = buf.layer_buf.data(); + isDup = buf.isDup_buf.data(); + TightCutFlag = buf.TightCutFlag_buf.data(); + partOfPT5 = buf.partOfPT5_buf.data(); + regressionRadius = buf.regressionRadius_buf.data(); + regressionG = buf.regressionG_buf.data(); + regressionF = buf.regressionF_buf.data(); + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + rzChiSquared = buf.rzChiSquared_buf.data(); + chiSquared = buf.chiSquared_buf.data(); + nonAnchorChiSquared = buf.nonAnchorChiSquared_buf.data(); + } + }; + + template + struct QuintupletsBuffer { + Buf tripletIndices_buf; + Buf lowerModuleIndices_buf; + Buf nQuintuplets_buf; + Buf totOccupancyQuintuplets_buf; + Buf nMemoryLocations_buf; + + Buf innerRadius_buf; + Buf bridgeRadius_buf; + Buf outerRadius_buf; + Buf pt_buf; + Buf eta_buf; + Buf phi_buf; + Buf score_rphisum_buf; + Buf layer_buf; + Buf isDup_buf; + Buf TightCutFlag_buf; + Buf partOfPT5_buf; + + Buf regressionRadius_buf; + Buf regressionG_buf; + Buf regressionF_buf; + + Buf logicalLayers_buf; + Buf hitIndices_buf; + Buf rzChiSquared_buf; + Buf chiSquared_buf; + Buf nonAnchorChiSquared_buf; + + Quintuplets data_; + + template + QuintupletsBuffer(unsigned int nTotalQuintuplets, unsigned int nLowerModules, TDevAcc const& devAccIn, TQueue& queue) + : tripletIndices_buf(allocBufWrapper(devAccIn, 2 * nTotalQuintuplets, queue)), + lowerModuleIndices_buf(allocBufWrapper(devAccIn, Params_T5::kLayers * nTotalQuintuplets, queue)), + nQuintuplets_buf(allocBufWrapper(devAccIn, nLowerModules, queue)), + totOccupancyQuintuplets_buf(allocBufWrapper(devAccIn, nLowerModules, queue)), + nMemoryLocations_buf(allocBufWrapper(devAccIn, 1, queue)), + innerRadius_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + bridgeRadius_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + outerRadius_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + pt_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + eta_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + phi_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + score_rphisum_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + layer_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + isDup_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + TightCutFlag_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + partOfPT5_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + regressionRadius_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + regressionG_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + regressionF_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + logicalLayers_buf(allocBufWrapper(devAccIn, Params_T5::kLayers * nTotalQuintuplets, queue)), + hitIndices_buf(allocBufWrapper(devAccIn, Params_T5::kHits * nTotalQuintuplets, queue)), + rzChiSquared_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + chiSquared_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)), + nonAnchorChiSquared_buf(allocBufWrapper(devAccIn, nTotalQuintuplets, queue)) { + alpaka::memset(queue, nQuintuplets_buf, 0u); + alpaka::memset(queue, totOccupancyQuintuplets_buf, 0u); + alpaka::memset(queue, isDup_buf, 0u); + alpaka::memset(queue, TightCutFlag_buf, false); + alpaka::memset(queue, partOfPT5_buf, false); + } + + inline Quintuplets const* data() const { return &data_; } + inline void setData(QuintupletsBuffer& buf) { data_.setData(buf); } + }; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool checkIntervalOverlap(float firstMin, + float firstMax, + float secondMin, + float secondMax) { + return ((firstMin <= secondMin) && (secondMin < firstMax)) || ((secondMin < firstMin) && (firstMin < secondMax)); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(Triplets const& tripletsInGPU, + Quintuplets& quintupletsInGPU, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex, + uint16_t lowerModule1, + uint16_t lowerModule2, + uint16_t lowerModule3, + uint16_t lowerModule4, + uint16_t lowerModule5, + float innerRadius, + float bridgeRadius, + float outerRadius, + float regressionG, + float regressionF, + float regressionRadius, + float rzChiSquared, + float rPhiChiSquared, + float nonAnchorChiSquared, + float pt, + float eta, + float phi, + float scores, + uint8_t layer, + unsigned int quintupletIndex, + bool TightCutFlag) { + quintupletsInGPU.tripletIndices[2 * quintupletIndex] = innerTripletIndex; + quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1] = outerTripletIndex; + + quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex] = lowerModule1; + quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 1] = lowerModule2; + quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 2] = lowerModule3; + quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 3] = lowerModule4; + quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex + 4] = lowerModule5; + quintupletsInGPU.innerRadius[quintupletIndex] = __F2H(innerRadius); + quintupletsInGPU.outerRadius[quintupletIndex] = __F2H(outerRadius); + quintupletsInGPU.pt[quintupletIndex] = __F2H(pt); + quintupletsInGPU.eta[quintupletIndex] = __F2H(eta); + quintupletsInGPU.phi[quintupletIndex] = __F2H(phi); + quintupletsInGPU.score_rphisum[quintupletIndex] = __F2H(scores); + quintupletsInGPU.layer[quintupletIndex] = layer; + quintupletsInGPU.isDup[quintupletIndex] = 0; + quintupletsInGPU.TightCutFlag[quintupletIndex] = TightCutFlag; + quintupletsInGPU.regressionRadius[quintupletIndex] = regressionRadius; + quintupletsInGPU.regressionG[quintupletIndex] = regressionG; + quintupletsInGPU.regressionF[quintupletIndex] = regressionF; + quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex] = + tripletsInGPU.logicalLayers[Params_T3::kLayers * innerTripletIndex]; + quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex + 1] = + tripletsInGPU.logicalLayers[Params_T3::kLayers * innerTripletIndex + 1]; + quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex + 2] = + tripletsInGPU.logicalLayers[Params_T3::kLayers * innerTripletIndex + 2]; + quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex + 3] = + tripletsInGPU.logicalLayers[Params_T3::kLayers * outerTripletIndex + 1]; + quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex + 4] = + tripletsInGPU.logicalLayers[Params_T3::kLayers * outerTripletIndex + 2]; + + quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex] = + tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex]; + quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 1] = + tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 1]; + quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 2] = + tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 2]; + quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 3] = + tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 3]; + quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 4] = + tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 4]; + quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 5] = + tripletsInGPU.hitIndices[Params_T3::kHits * innerTripletIndex + 5]; + quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 6] = + tripletsInGPU.hitIndices[Params_T3::kHits * outerTripletIndex + 2]; + quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 7] = + tripletsInGPU.hitIndices[Params_T3::kHits * outerTripletIndex + 3]; + quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 8] = + tripletsInGPU.hitIndices[Params_T3::kHits * outerTripletIndex + 4]; + quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex + 9] = + tripletsInGPU.hitIndices[Params_T3::kHits * outerTripletIndex + 5]; + quintupletsInGPU.bridgeRadius[quintupletIndex] = bridgeRadius; + quintupletsInGPU.rzChiSquared[quintupletIndex] = rzChiSquared; + quintupletsInGPU.chiSquared[quintupletIndex] = rPhiChiSquared; + quintupletsInGPU.nonAnchorChiSquared[quintupletIndex] = nonAnchorChiSquared; + } + + //90% constraint + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passChiSquaredConstraint(Modules const& modulesInGPU, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float chiSquared) { + // Using lstLayer numbering convention defined in ModuleMethods.h + const int layer1 = modulesInGPU.lstLayers[lowerModuleIndex1]; + const int layer2 = modulesInGPU.lstLayers[lowerModuleIndex2]; + const int layer3 = modulesInGPU.lstLayers[lowerModuleIndex3]; + const int layer4 = modulesInGPU.lstLayers[lowerModuleIndex4]; + const int layer5 = modulesInGPU.lstLayers[lowerModuleIndex5]; + + if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + if (layer4 == 10 and layer5 == 11) { + return chiSquared < 0.01788f; + } else if (layer4 == 10 and layer5 == 16) { + return chiSquared < 0.04725f; + } else if (layer4 == 15 and layer5 == 16) { + return chiSquared < 0.04725f; + } + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return chiSquared < 0.01788f; + } else if (layer4 == 9 and layer5 == 15) { + return chiSquared < 0.08234f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 8 and layer5 == 9) { + return chiSquared < 0.02360f; + } else if (layer4 == 8 and layer5 == 14) { + return chiSquared < 0.07167f; + } else if (layer4 == 13 and layer5 == 14) { + return chiSquared < 0.08234f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 7 and layer5 == 8) { + return chiSquared < 0.01026f; + } else if (layer4 == 7 and layer5 == 13) { + return chiSquared < 0.06238f; + } else if (layer4 == 12 and layer5 == 13) { + return chiSquared < 0.06238f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3 and layer4 == 4) { + if (layer5 == 5) { + return chiSquared < 0.04725f; + } else if (layer5 == 12) { + return chiSquared < 0.09461f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return chiSquared < 0.00512f; + } + if (layer4 == 9 and layer5 == 15) { + return chiSquared < 0.04112f; + } else if (layer4 == 14 and layer5 == 15) { + return chiSquared < 0.06238f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + if (layer4 == 8 and layer5 == 14) { + return chiSquared < 0.07167f; + } else if (layer4 == 13 and layer5 == 14) { + return chiSquared < 0.06238f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 5 and layer5 == 6) { + return chiSquared < 0.08234f; + } else if (layer4 == 5 and layer5 == 12) { + return chiSquared < 0.10870f; + } else if (layer4 == 12 and layer5 == 13) { + return chiSquared < 0.10870f; + } + } else if (layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15) { + return chiSquared < 0.09461f; + } else if (layer1 == 3 and layer2 == 4 and layer3 == 5 and layer4 == 12 and layer5 == 13) { + return chiSquared < 0.09461f; + } + + return true; + } + + //bounds can be found at http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_RZFix/t5_rz_thresholds.txt + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passT5RZConstraint(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex, + unsigned int fifthMDIndex, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float& rzChiSquared, + float inner_pt, + float innerRadius, + float g, + float f, + bool& TightCutFlag) { + //(g,f) is the center of the circle fitted by the innermost 3 points on x,y coordinates + const float& rt1 = mdsInGPU.anchorRt[firstMDIndex] / 100; //in the unit of m instead of cm + const float& rt2 = mdsInGPU.anchorRt[secondMDIndex] / 100; + const float& rt3 = mdsInGPU.anchorRt[thirdMDIndex] / 100; + const float& rt4 = mdsInGPU.anchorRt[fourthMDIndex] / 100; + const float& rt5 = mdsInGPU.anchorRt[fifthMDIndex] / 100; + + const float& z1 = mdsInGPU.anchorZ[firstMDIndex] / 100; + const float& z2 = mdsInGPU.anchorZ[secondMDIndex] / 100; + const float& z3 = mdsInGPU.anchorZ[thirdMDIndex] / 100; + const float& z4 = mdsInGPU.anchorZ[fourthMDIndex] / 100; + const float& z5 = mdsInGPU.anchorZ[fifthMDIndex] / 100; + + // Using lst_layer numbering convention defined in ModuleMethods.h + const int layer1 = modulesInGPU.lstLayers[lowerModuleIndex1]; + const int layer2 = modulesInGPU.lstLayers[lowerModuleIndex2]; + const int layer3 = modulesInGPU.lstLayers[lowerModuleIndex3]; + const int layer4 = modulesInGPU.lstLayers[lowerModuleIndex4]; + const int layer5 = modulesInGPU.lstLayers[lowerModuleIndex5]; + + //slope computed using the internal T3s + const int moduleType1 = modulesInGPU.moduleType[lowerModuleIndex1]; //0 is ps, 1 is 2s + const int moduleType2 = modulesInGPU.moduleType[lowerModuleIndex2]; + const int moduleType3 = modulesInGPU.moduleType[lowerModuleIndex3]; + const int moduleType4 = modulesInGPU.moduleType[lowerModuleIndex4]; + const int moduleType5 = modulesInGPU.moduleType[lowerModuleIndex5]; + + const float& x1 = mdsInGPU.anchorX[firstMDIndex] / 100; + const float& x2 = mdsInGPU.anchorX[secondMDIndex] / 100; + const float& x3 = mdsInGPU.anchorX[thirdMDIndex] / 100; + const float& x4 = mdsInGPU.anchorX[fourthMDIndex] / 100; + const float& y1 = mdsInGPU.anchorY[firstMDIndex] / 100; + const float& y2 = mdsInGPU.anchorY[secondMDIndex] / 100; + const float& y3 = mdsInGPU.anchorY[thirdMDIndex] / 100; + const float& y4 = mdsInGPU.anchorY[fourthMDIndex] / 100; + + float residual = 0; + float error2 = 0; + float x_center = g / 100, y_center = f / 100; + float x_init = mdsInGPU.anchorX[thirdMDIndex] / 100; + float y_init = mdsInGPU.anchorY[thirdMDIndex] / 100; + float z_init = mdsInGPU.anchorZ[thirdMDIndex] / 100; + float rt_init = mdsInGPU.anchorRt[thirdMDIndex] / 100; //use the second MD as initial point + + if (moduleType3 == 1) // 1: if MD3 is in 2s layer + { + x_init = mdsInGPU.anchorX[secondMDIndex] / 100; + y_init = mdsInGPU.anchorY[secondMDIndex] / 100; + z_init = mdsInGPU.anchorZ[secondMDIndex] / 100; + rt_init = mdsInGPU.anchorRt[secondMDIndex] / 100; + } + + // start from a circle of inner T3. + // to determine the charge + int charge = 0; + float slope3c = (y3 - y_center) / (x3 - x_center); + float slope1c = (y1 - y_center) / (x1 - x_center); + // these 4 "if"s basically separate the x-y plane into 4 quarters. It determines geometrically how a circle and line slope goes and their positions, and we can get the charges correspondingly. + if ((y3 - y_center) > 0 && (y1 - y_center) > 0) { + if (slope1c > 0 && slope3c < 0) + charge = -1; // on x axis of a quarter, 3 hits go anti-clockwise + else if (slope1c < 0 && slope3c > 0) + charge = 1; // on x axis of a quarter, 3 hits go clockwise + else if (slope3c > slope1c) + charge = -1; + else if (slope3c < slope1c) + charge = 1; + } else if ((y3 - y_center) < 0 && (y1 - y_center) < 0) { + if (slope1c < 0 && slope3c > 0) + charge = 1; + else if (slope1c > 0 && slope3c < 0) + charge = -1; + else if (slope3c > slope1c) + charge = -1; + else if (slope3c < slope1c) + charge = 1; + } else if ((y3 - y_center) < 0 && (y1 - y_center) > 0) { + if ((x3 - x_center) > 0 && (x1 - x_center) > 0) + charge = 1; + else if ((x3 - x_center) < 0 && (x1 - x_center) < 0) + charge = -1; + } else if ((y3 - y_center) > 0 && (y1 - y_center) < 0) { + if ((x3 - x_center) > 0 && (x1 - x_center) > 0) + charge = -1; + else if ((x3 - x_center) < 0 && (x1 - x_center) < 0) + charge = 1; + } + + float pseudo_phi = alpaka::math::atan( + acc, (y_init - y_center) / (x_init - x_center)); //actually represent pi/2-phi, wrt helix axis z + float Pt = inner_pt, Px = Pt * alpaka::math::abs(acc, alpaka::math::sin(acc, pseudo_phi)), + Py = Pt * alpaka::math::abs(acc, cos(pseudo_phi)); + + // Above line only gives you the correct value of Px and Py, but signs of Px and Py calculated below. + // We look at if the circle is clockwise or anti-clock wise, to make it simpler, we separate the x-y plane into 4 quarters. + if (x_init > x_center && y_init > y_center) //1st quad + { + if (charge == 1) + Py = -Py; + if (charge == -1) + Px = -Px; + } + if (x_init < x_center && y_init > y_center) //2nd quad + { + if (charge == -1) { + Px = -Px; + Py = -Py; + } + } + if (x_init < x_center && y_init < y_center) //3rd quad + { + if (charge == 1) + Px = -Px; + if (charge == -1) + Py = -Py; + } + if (x_init > x_center && y_init < y_center) //4th quad + { + if (charge == 1) { + Px = -Px; + Py = -Py; + } + } + + // But if the initial T5 curve goes across quarters(i.e. cross axis to separate the quarters), need special redeclaration of Px,Py signs on these to avoid errors + if (moduleType3 == 0) { // 0 is ps + if (x4 < x3 && x3 < x2) + Px = -alpaka::math::abs(acc, Px); + else if (x4 > x3 && x3 > x2) + Px = alpaka::math::abs(acc, Px); + if (y4 < y3 && y3 < y2) + Py = -alpaka::math::abs(acc, Py); + else if (y4 > y3 && y3 > y2) + Py = alpaka::math::abs(acc, Py); + } else if (moduleType3 == 1) // 1 is 2s + { + if (x3 < x2 && x2 < x1) + Px = -alpaka::math::abs(acc, Px); + else if (x3 > x2 && x2 > x1) + Px = alpaka::math::abs(acc, Px); + if (y3 < y2 && y2 < y1) + Py = -alpaka::math::abs(acc, Py); + else if (y3 > y2 && y2 > y1) + Py = alpaka::math::abs(acc, Py); + } + + //to get Pz, we use pt/pz=ds/dz, ds is the arclength between MD1 and MD3. + float AO = alpaka::math::sqrt(acc, (x1 - x_center) * (x1 - x_center) + (y1 - y_center) * (y1 - y_center)); + float BO = + alpaka::math::sqrt(acc, (x_init - x_center) * (x_init - x_center) + (y_init - y_center) * (y_init - y_center)); + float AB2 = (x1 - x_init) * (x1 - x_init) + (y1 - y_init) * (y1 - y_init); + float dPhi = alpaka::math::acos(acc, (AO * AO + BO * BO - AB2) / (2 * AO * BO)); + float ds = innerRadius / 100 * dPhi; + + float Pz = (z_init - z1) / ds * Pt; + float p = alpaka::math::sqrt(acc, Px * Px + Py * Py + Pz * Pz); + + float a = -2.f * k2Rinv1GeVf * 100 * charge; // multiply by 100 to make the correct length units + + float zsi, rtsi; + int layeri, moduleTypei; + rzChiSquared = 0; + for (size_t i = 2; i < 6; i++) { + if (i == 2) { + zsi = z2; + rtsi = rt2; + layeri = layer2; + moduleTypei = moduleType2; + } else if (i == 3) { + zsi = z3; + rtsi = rt3; + layeri = layer3; + moduleTypei = moduleType3; + } else if (i == 4) { + zsi = z4; + rtsi = rt4; + layeri = layer4; + moduleTypei = moduleType4; + } else if (i == 5) { + zsi = z5; + rtsi = rt5; + layeri = layer5; + moduleTypei = moduleType5; + } + + if (moduleType3 == 0) { //0: ps + if (i == 3) + continue; + } else { + if (i == 2) + continue; + } + + // calculation is copied from PixelTriplet.cc computePT3RZChiSquared + float diffr = 0, diffz = 0; + + float rou = a / p; + // for endcap + float s = (zsi - z_init) * p / Pz; + float x = x_init + Px / a * alpaka::math::sin(acc, rou * s) - Py / a * (1 - alpaka::math::cos(acc, rou * s)); + float y = y_init + Py / a * alpaka::math::sin(acc, rou * s) + Px / a * (1 - alpaka::math::cos(acc, rou * s)); + diffr = (rtsi - alpaka::math::sqrt(acc, x * x + y * y)) * 100; + + // for barrel + if (layeri <= 6) { + float paraA = + rt_init * rt_init + 2 * (Px * Px + Py * Py) / (a * a) + 2 * (y_init * Px - x_init * Py) / a - rtsi * rtsi; + float paraB = 2 * (x_init * Px + y_init * Py) / a; + float paraC = 2 * (y_init * Px - x_init * Py) / a + 2 * (Px * Px + Py * Py) / (a * a); + float A = paraB * paraB + paraC * paraC; + float B = 2 * paraA * paraB; + float C = paraA * paraA - paraC * paraC; + float sol1 = (-B + alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + float sol2 = (-B - alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + float solz1 = alpaka::math::asin(acc, sol1) / rou * Pz / p + z_init; + float solz2 = alpaka::math::asin(acc, sol2) / rou * Pz / p + z_init; + float diffz1 = (solz1 - zsi) * 100; + float diffz2 = (solz2 - zsi) * 100; + // Alpaka : Needs to be moved over + if (alpaka::math::isnan(acc, diffz1)) + diffz = diffz2; + else if (alpaka::math::isnan(acc, diffz2)) + diffz = diffz1; + else { + diffz = (alpaka::math::abs(acc, diffz1) < alpaka::math::abs(acc, diffz2)) ? diffz1 : diffz2; + } + } + residual = (layeri > 6) ? diffr : diffz; + + //PS Modules + if (moduleTypei == 0) { + error2 = kPixelPSZpitch * kPixelPSZpitch; + } else //2S modules + { + error2 = kStrip2SZpitch * kStrip2SZpitch; + } + + //check the tilted module, side: PosZ, NegZ, Center(for not tilted) + float drdz; + short side, subdets; + if (i == 2) { + drdz = alpaka::math::abs(acc, modulesInGPU.drdzs[lowerModuleIndex2]); + side = modulesInGPU.sides[lowerModuleIndex2]; + subdets = modulesInGPU.subdets[lowerModuleIndex2]; + } + if (i == 3) { + drdz = alpaka::math::abs(acc, modulesInGPU.drdzs[lowerModuleIndex3]); + side = modulesInGPU.sides[lowerModuleIndex3]; + subdets = modulesInGPU.subdets[lowerModuleIndex3]; + } + if (i == 2 || i == 3) { + residual = (layeri <= 6 && ((side == Center) or (drdz < 1))) ? diffz : diffr; + float projection_missing2 = 1.f; + if (drdz < 1) + projection_missing2 = + ((subdets == Endcap) or (side == Center)) ? 1.f : 1.f / (1 + drdz * drdz); // cos(atan(drdz)), if dr/dz<1 + if (drdz > 1) + projection_missing2 = ((subdets == Endcap) or (side == Center)) + ? 1.f + : (drdz * drdz) / (1 + drdz * drdz); //sin(atan(drdz)), if dr/dz>1 + error2 = error2 * projection_missing2; + } + rzChiSquared += 12 * (residual * residual) / error2; + } + // for set rzchi2 cut + // if the 5 points are linear, helix calculation gives nan + // Alpaka : Needs to be moved over + if (inner_pt > 100 || alpaka::math::isnan(acc, rzChiSquared)) { + float slope; + if (moduleType1 == 0 and moduleType2 == 0 and moduleType3 == 1) //PSPS2S + { + slope = (z2 - z1) / (rt2 - rt1); + } else { + slope = (z3 - z1) / (rt3 - rt1); + } + float residual4_linear = (layer4 <= 6) ? ((z4 - z1) - slope * (rt4 - rt1)) : ((rt4 - rt1) - (z4 - z1) / slope); + float residual5_linear = (layer4 <= 6) ? ((z5 - z1) - slope * (rt5 - rt1)) : ((rt5 - rt1) - (z5 - z1) / slope); + + // creating a chi squared type quantity + // 0-> PS, 1->2S + residual4_linear = (moduleType4 == 0) ? residual4_linear / kPixelPSZpitch : residual4_linear / kStrip2SZpitch; + residual5_linear = (moduleType5 == 0) ? residual5_linear / kPixelPSZpitch : residual5_linear / kStrip2SZpitch; + residual4_linear = residual4_linear * 100; + residual5_linear = residual5_linear * 100; + + rzChiSquared = 12 * (residual4_linear * residual4_linear + residual5_linear * residual5_linear); + return rzChiSquared < 4.677f; + } + + // when building T5, apply 99% chi2 cuts as default, and add to pT5 collection. But when adding T5 to TC collections, apply 95% cut to reduce the fake rate + TightCutFlag = false; + // The category numbers are related to module regions and layers, decoding of the region numbers can be found here in slide 2 table. https://github.com/SegmentLinking/TrackLooper/files/11420927/part.2.pdf + // The commented numbers after each case is the region code, and can look it up from the table to see which category it belongs to. For example, //0 means T5 built with Endcap 1,2,3,4,5 ps modules + if (layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 10 and layer5 == 11) //0 + { + if (rzChiSquared < 94.470f) + TightCutFlag = true; + return true; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 10 and layer5 == 16) //1 + { + if (rzChiSquared < 22.099f) + TightCutFlag = true; + return rzChiSquared < 37.956f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 15 and layer5 == 16) //2 + { + if (rzChiSquared < 7.992f) + TightCutFlag = true; + return rzChiSquared < 11.622f; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8 and layer4 == 9) { + if (layer5 == 10) //3 + { + if (rzChiSquared < 111.390f) + TightCutFlag = true; + return true; + } + if (layer5 == 15) //4 + { + if (rzChiSquared < 18.351f) + TightCutFlag = true; + return rzChiSquared < 37.941f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 8 and layer5 == 9) //5 + { + if (rzChiSquared < 116.148f) + TightCutFlag = true; + return true; + } + if (layer4 == 8 and layer5 == 14) //6 + { + if (rzChiSquared < 19.352f) + TightCutFlag = true; + return rzChiSquared < 52.561f; + } else if (layer4 == 13 and layer5 == 14) //7 + { + if (rzChiSquared < 10.392f) + TightCutFlag = true; + return rzChiSquared < 13.76f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 7 and layer5 == 8) //8 + { + if (rzChiSquared < 27.824f) + TightCutFlag = true; + return rzChiSquared < 44.247f; + } else if (layer4 == 7 and layer5 == 13) //9 + { + if (rzChiSquared < 18.145f) + TightCutFlag = true; + return rzChiSquared < 33.752f; + } else if (layer4 == 12 and layer5 == 13) //10 + { + if (rzChiSquared < 13.308f) + TightCutFlag = true; + return rzChiSquared < 21.213f; + } else if (layer4 == 4 and layer5 == 5) //11 + { + if (rzChiSquared < 15.627f) + TightCutFlag = true; + return rzChiSquared < 29.035f; + } else if (layer4 == 4 and layer5 == 12) //12 + { + if (rzChiSquared < 14.64f) + TightCutFlag = true; + return rzChiSquared < 23.037f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 15) //14 + { + if (rzChiSquared < 24.662f) + TightCutFlag = true; + return rzChiSquared < 41.036f; + } else if (layer4 == 14 and layer5 == 15) //15 + { + if (rzChiSquared < 8.866f) + TightCutFlag = true; + return rzChiSquared < 14.092f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + if (layer4 == 8 and layer5 == 14) //16 + { + if (rzChiSquared < 23.730f) + TightCutFlag = true; + return rzChiSquared < 23.748f; + } + if (layer4 == 13 and layer5 == 14) //17 + { + if (rzChiSquared < 10.772f) + TightCutFlag = true; + return rzChiSquared < 17.945f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 5 and layer5 == 6) //18 + { + if (rzChiSquared < 6.065f) + TightCutFlag = true; + return rzChiSquared < 8.803f; + } else if (layer4 == 5 and layer5 == 12) //19 + { + if (rzChiSquared < 5.693f) + TightCutFlag = true; + return rzChiSquared < 7.930f; + } + + else if (layer4 == 12 and layer5 == 13) //20 + { + if (rzChiSquared < 5.473f) + TightCutFlag = true; + return rzChiSquared < 7.626f; + } + } + return true; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(Triplets const& tripletsInGPU, + Segments const& segmentsInGPU, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex) { + unsigned int innerOuterSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex + 1]; + unsigned int outerInnerSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex]; + unsigned int innerOuterOuterMiniDoubletIndex = + segmentsInGPU.mdIndices[2 * innerOuterSegmentIndex + 1]; //inner triplet outer segment outer MD index + unsigned int outerInnerInnerMiniDoubletIndex = + segmentsInGPU.mdIndices[2 * outerInnerSegmentIndex]; //outer triplet inner segment inner MD index + + return (innerOuterOuterMiniDoubletIndex == outerInnerInnerMiniDoubletIndex); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeErrorInRadius(TAcc const& acc, + float* x1Vec, + float* y1Vec, + float* x2Vec, + float* y2Vec, + float* x3Vec, + float* y3Vec, + float& minimumRadius, + float& maximumRadius) { + //brute force + float candidateRadius; + float g, f; + minimumRadius = lst_INF; + maximumRadius = 0.f; + for (size_t i = 0; i < 3; i++) { + float x1 = x1Vec[i]; + float y1 = y1Vec[i]; + for (size_t j = 0; j < 3; j++) { + float x2 = x2Vec[j]; + float y2 = y2Vec[j]; + for (size_t k = 0; k < 3; k++) { + float x3 = x3Vec[k]; + float y3 = y3Vec[k]; + candidateRadius = computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3, g, f); + maximumRadius = alpaka::math::max(acc, candidateRadius, maximumRadius); + minimumRadius = alpaka::math::min(acc, candidateRadius, minimumRadius); + } + } + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE12378(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.178f; + float bridgeInvRadiusErrorBound = 0.507f; + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, + innerInvRadiusMax, + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + /*bounds for high Pt taken from : http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_efficiency/efficiencies/new_efficiencies/efficiencies_20210513_T5_recovering_high_Pt_efficiencies/highE_radius_matching/highE_bounds.txt */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBBB(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.1512f; + float bridgeInvRadiusErrorBound = 0.1781f; + + if (innerRadius > 2.0f / (2.f * k2Rinv1GeVf)) { + innerInvRadiusErrorBound = 0.4449f; + bridgeInvRadiusErrorBound = 0.4033f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBBE(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.1781f; + float bridgeInvRadiusErrorBound = 0.2167f; + + if (innerRadius > 2.0f / (2.f * k2Rinv1GeVf)) { + innerInvRadiusErrorBound = 0.4750f; + bridgeInvRadiusErrorBound = 0.3903f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE23478(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.2097f; + float bridgeInvRadiusErrorBound = 0.8557f; + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, + innerInvRadiusMax, + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE34578(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.066f; + float bridgeInvRadiusErrorBound = 0.617f; + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, + innerInvRadiusMax, + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBEEE(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.6376f; + float bridgeInvRadiusErrorBound = 2.1381f; + + if (innerRadius > 2.0f / (2.f * k2Rinv1GeVf)) //as good as no selections! + { + innerInvRadiusErrorBound = 12.9173f; + bridgeInvRadiusErrorBound = 5.1700f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, + innerInvRadiusMax, + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBEEEE(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float innerRadiusMin2S, + float innerRadiusMax2S, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 1.9382f; + float bridgeInvRadiusErrorBound = 3.7280f; + + if (innerRadius > 2.0f / (2.f * k2Rinv1GeVf)) { + innerInvRadiusErrorBound = 23.2713f; + bridgeInvRadiusErrorBound = 21.7980f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(alpaka::math::min(acc, innerInvRadiusMin, 1.0 / innerRadiusMax2S), + alpaka::math::max(acc, innerInvRadiusMax, 1.0 / innerRadiusMin2S), + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0 / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0 / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiEEEEE(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float innerRadiusMin2S, + float innerRadiusMax2S, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 1.9382f; + float bridgeInvRadiusErrorBound = 2.2091f; + + if (innerRadius > 2.0f / (2.f * k2Rinv1GeVf)) { + innerInvRadiusErrorBound = 22.5226f; + bridgeInvRadiusErrorBound = 21.0966f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(alpaka::math::min(acc, innerInvRadiusMin, 1.0 / innerRadiusMax2S), + alpaka::math::max(acc, innerInvRadiusMax, 1.0 / innerRadiusMin2S), + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0 / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0 / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression(TAcc const& acc, + Modules const& modulesInGPU, + const uint16_t* lowerModuleIndices, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + unsigned int nPoints = 5, + bool anchorHits = true) { + /* + Bool anchorHits required to deal with a weird edge case wherein + the hits ultimately used in the regression are anchor hits, but the + lower modules need not all be Pixel Modules (in case of PS). Similarly, + when we compute the chi squared for the non-anchor hits, the "partner module" + need not always be a PS strip module, but all non-anchor hits sit on strip + modules. + */ + + ModuleType moduleType; + short moduleSubdet, moduleSide; + float inv1 = kWidthPS / kWidth2S; + float inv2 = kPixelPSZpitch / kWidth2S; + float inv3 = kStripPSZpitch / kWidth2S; + for (size_t i = 0; i < nPoints; i++) { + moduleType = modulesInGPU.moduleType[lowerModuleIndices[i]]; + moduleSubdet = modulesInGPU.subdets[lowerModuleIndices[i]]; + moduleSide = modulesInGPU.sides[lowerModuleIndices[i]]; + const float& drdz = modulesInGPU.drdzs[lowerModuleIndices[i]]; + slopes[i] = modulesInGPU.dxdys[lowerModuleIndices[i]]; + //category 1 - barrel PS flat + if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + delta1[i] = inv1; + delta2[i] = inv1; + slopes[i] = -999.f; + isFlat[i] = true; + } + //category 2 - barrel 2S + else if (moduleSubdet == Barrel and moduleType == TwoS) { + delta1[i] = 1.f; + delta2[i] = 1.f; + slopes[i] = -999.f; + isFlat[i] = true; + } + //category 3 - barrel PS tilted + else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + delta1[i] = inv1; + isFlat[i] = false; + + if (anchorHits) { + delta2[i] = (inv2 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } else { + delta2[i] = (inv3 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } + } + //category 4 - endcap PS + else if (moduleSubdet == Endcap and moduleType == PS) { + delta1[i] = inv1; + isFlat[i] = false; + + /* + despite the type of the module layer of the lower module index, + all anchor hits are on the pixel side and all non-anchor hits are + on the strip side! + */ + if (anchorHits) { + delta2[i] = inv2; + } else { + delta2[i] = inv3; + } + } + //category 5 - endcap 2S + else if (moduleSubdet == Endcap and moduleType == TwoS) { + delta1[i] = 1.f; + delta2[i] = 500.f * inv1; + isFlat[i] = false; + } else { +#ifdef WARNINGS + printf("ERROR!!!!! I SHOULDN'T BE HERE!!!! subdet = %d, type = %d, side = %d\n", + moduleSubdet, + moduleType, + moduleSide); +#endif + } + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeRadiusUsingRegression(TAcc const& acc, + unsigned int nPoints, + float* xs, + float* ys, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + float& g, + float& f, + float* sigmas2, + float& chiSquared) { + float radius = 0.f; + + // Some extra variables + // the two variables will be called x1 and x2, and y (which is x^2 + y^2) + + float sigmaX1Squared = 0.f; + float sigmaX2Squared = 0.f; + float sigmaX1X2 = 0.f; + float sigmaX1y = 0.f; + float sigmaX2y = 0.f; + float sigmaY = 0.f; + float sigmaX1 = 0.f; + float sigmaX2 = 0.f; + float sigmaOne = 0.f; + + float xPrime, yPrime, absArctanSlope, angleM; + for (size_t i = 0; i < nPoints; i++) { + // Computing sigmas is a very tricky affair + // if the module is tilted or endcap, we need to use the slopes properly! + + absArctanSlope = + ((slopes[i] != lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f * float(M_PI)); + + if (xs[i] > 0 and ys[i] > 0) { + angleM = 0.5f * float(M_PI) - absArctanSlope; + } else if (xs[i] < 0 and ys[i] > 0) { + angleM = absArctanSlope + 0.5f * float(M_PI); + } else if (xs[i] < 0 and ys[i] < 0) { + angleM = -(absArctanSlope + 0.5f * float(M_PI)); + } else if (xs[i] > 0 and ys[i] < 0) { + angleM = -(0.5f * float(M_PI) - absArctanSlope); + } else { + angleM = 0; + } + + if (not isFlat[i]) { + xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] * alpaka::math::sin(acc, angleM); + yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] * alpaka::math::sin(acc, angleM); + } else { + xPrime = xs[i]; + yPrime = ys[i]; + } + sigmas2[i] = 4 * ((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i])); + + sigmaX1Squared += (xs[i] * xs[i]) / sigmas2[i]; + sigmaX2Squared += (ys[i] * ys[i]) / sigmas2[i]; + sigmaX1X2 += (xs[i] * ys[i]) / sigmas2[i]; + sigmaX1y += (xs[i] * (xs[i] * xs[i] + ys[i] * ys[i])) / sigmas2[i]; + sigmaX2y += (ys[i] * (xs[i] * xs[i] + ys[i] * ys[i])) / sigmas2[i]; + sigmaY += (xs[i] * xs[i] + ys[i] * ys[i]) / sigmas2[i]; + sigmaX1 += xs[i] / sigmas2[i]; + sigmaX2 += ys[i] / sigmas2[i]; + sigmaOne += 1.0f / sigmas2[i]; + } + float denominator = (sigmaX1X2 - sigmaX1 * sigmaX2) * (sigmaX1X2 - sigmaX1 * sigmaX2) - + (sigmaX1Squared - sigmaX1 * sigmaX1) * (sigmaX2Squared - sigmaX2 * sigmaX2); + + float twoG = ((sigmaX2y - sigmaX2 * sigmaY) * (sigmaX1X2 - sigmaX1 * sigmaX2) - + (sigmaX1y - sigmaX1 * sigmaY) * (sigmaX2Squared - sigmaX2 * sigmaX2)) / + denominator; + float twoF = ((sigmaX1y - sigmaX1 * sigmaY) * (sigmaX1X2 - sigmaX1 * sigmaX2) - + (sigmaX2y - sigmaX2 * sigmaY) * (sigmaX1Squared - sigmaX1 * sigmaX1)) / + denominator; + + float c = -(sigmaY - twoG * sigmaX1 - twoF * sigmaX2) / sigmaOne; + g = 0.5f * twoG; + f = 0.5f * twoF; + if (g * g + f * f - c < 0) { +#ifdef WARNINGS + printf("FATAL! r^2 < 0!\n"); +#endif + chiSquared = -1; + return -1; + } + + radius = alpaka::math::sqrt(acc, g * g + f * f - c); + // compute chi squared + chiSquared = 0.f; + for (size_t i = 0; i < nPoints; i++) { + chiSquared += (xs[i] * xs[i] + ys[i] * ys[i] - twoG * xs[i] - twoF * ys[i] + c) * + (xs[i] * xs[i] + ys[i] * ys[i] - twoG * xs[i] - twoF * ys[i] + c) / sigmas2[i]; + } + return radius; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquared(TAcc const& acc, + unsigned int nPoints, + float* xs, + float* ys, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + float g, + float f, + float radius) { + // given values of (g, f, radius) and a set of points (and its uncertainties) + // compute chi squared + float c = g * g + f * f - radius * radius; + float chiSquared = 0.f; + float absArctanSlope, angleM, xPrime, yPrime, sigma2; + for (size_t i = 0; i < nPoints; i++) { + absArctanSlope = + ((slopes[i] != lst_INF) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) : 0.5f * float(M_PI)); + if (xs[i] > 0 and ys[i] > 0) { + angleM = 0.5f * float(M_PI) - absArctanSlope; + } else if (xs[i] < 0 and ys[i] > 0) { + angleM = absArctanSlope + 0.5f * float(M_PI); + } else if (xs[i] < 0 and ys[i] < 0) { + angleM = -(absArctanSlope + 0.5f * float(M_PI)); + } else if (xs[i] > 0 and ys[i] < 0) { + angleM = -(0.5f * float(M_PI) - absArctanSlope); + } else { + angleM = 0; + } + + if (not isFlat[i]) { + xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] * alpaka::math::sin(acc, angleM); + yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] * alpaka::math::sin(acc, angleM); + } else { + xPrime = xs[i]; + yPrime = ys[i]; + } + sigma2 = 4 * ((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i])); + chiSquared += (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) * + (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / sigma2; + } + return chiSquared; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void runDeltaBetaIterationsT5(TAcc const& acc, + float& betaIn, + float& betaOut, + float betaAv, + float& pt_beta, + float sdIn_dr, + float sdOut_dr, + float dr, + float lIn) { + if (lIn == 0) { + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); + return; + } + + if (betaIn * betaOut > 0.f and + (alpaka::math::abs(acc, pt_beta) < 4.f * kPt_betaMax or + (lIn >= 11 and alpaka::math::abs(acc, pt_beta) < + 8.f * kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap + { + const float betaInUpd = + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + const float betaOutUpd = + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); //FIXME: need a faster version + betaAv = 0.5f * (betaInUpd + betaOutUpd); + + //1st update + const float pt_beta_inv = + 1.f / alpaka::math::abs(acc, dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv)); //get a better pt estimate + + betaIn += alpaka::math::copysign( + acc, + alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), + betaOut); //FIXME: need a faster version + //update the av and pt + betaAv = 0.5f * (betaIn + betaOut); + //2nd update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + } else if (lIn < 11 && alpaka::math::abs(acc, betaOut) < 0.2f * alpaka::math::abs(acc, betaIn) && + alpaka::math::abs(acc, pt_beta) < 12.f * kPt_betaMax) //use betaIn sign as ref + { + const float pt_betaIn = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaIn); + + const float betaInUpd = + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + const float betaOutUpd = + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, + alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaAv = (alpaka::math::abs(acc, betaOut) > 0.2f * alpaka::math::abs(acc, betaIn)) + ? (0.5f * (betaInUpd + betaOutUpd)) + : betaInUpd; + + //1st update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + betaIn += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + //update the av and pt + betaAv = 0.5f * (betaIn + betaOut); + //2nd update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBBB(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + uint16_t innerInnerLowerModuleIndex, + uint16_t innerOuterLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == PS); + bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS); + + float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; + float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; + float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex]; + + float z_InLo = mdsInGPU.anchorZ[firstMDIndex]; + float z_InOut = mdsInGPU.anchorZ[secondMDIndex]; + float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; + + float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float rtRatio_OutLoInLo = rt_OutLo / rt_InLo; // Outer segment beginning rt divided by inner segment beginning rt; + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + float zpitch_InLo = (isPS_InLo ? kPixelPSZpitch : kStrip2SZpitch); + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); + + float zHi = z_InLo + (z_InLo + kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo < 0.f ? 1.f : dzDrtScale) + + (zpitch_InLo + zpitch_OutLo); + float zLo = z_InLo + (z_InLo - kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo > 0.f ? 1.f : dzDrtScale) - + (zpitch_InLo + zpitch_OutLo); + + //Cut 1 - z compatibility + if ((z_OutLo < zLo) || (z_OutLo > zHi)) + return false; + + float drt_OutLo_InLo = (rt_OutLo - rt_InLo); + float r3_InLo = alpaka::math::sqrt(acc, z_InLo * z_InLo + rt_InLo * rt_InLo); + float drt_InSeg = rt_InOut - rt_InLo; + float dz_InSeg = z_InOut - z_InLo; + float dr3_InSeg = alpaka::math::sqrt(acc, rt_InOut * rt_InOut + z_InOut * z_InOut) - + alpaka::math::sqrt(acc, rt_InLo * rt_InLo + z_InLo * z_InLo); + + float coshEta = dr3_InSeg / drt_InSeg; + float dzErr = (zpitch_InLo + zpitch_OutLo) * (zpitch_InLo + zpitch_OutLo) * 2.f; + + float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * (r3_InLo / rt_InLo); + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + dzErr += muls2 * drt_OutLo_InLo * drt_OutLo_InLo / 3.f * coshEta * coshEta; + dzErr = alpaka::math::sqrt(acc, dzErr); + + // Constructing upper and lower bound + const float dzMean = dz_InSeg / drt_InSeg * drt_OutLo_InLo; + const float zWindow = + dzErr / drt_InSeg * drt_OutLo_InLo + + (zpitch_InLo + zpitch_OutLo); //FIXME for ptCut lower than ~0.8 need to add curv path correction + float zLoPointed = z_InLo + dzMean * (z_InLo > 0.f ? 1.f : dzDrtScale) - zWindow; + float zHiPointed = z_InLo + dzMean * (z_InLo < 0.f ? 1.f : dzDrtScale) + zWindow; + + // Cut #2: Pointed Z (Inner segment two MD points to outer segment inner MD) + if ((z_OutLo < zLoPointed) || (z_OutLo > zHiPointed)) + return false; + + float pvOffset = 0.1f / rt_OutLo; + float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + float deltaPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); + // Cut #3: FIXME:deltaPhiPos can be tighter + if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) + return false; + + float midPointX = 0.5f * (mdsInGPU.anchorX[firstMDIndex] + mdsInGPU.anchorX[thirdMDIndex]); + float midPointY = 0.5f * (mdsInGPU.anchorY[firstMDIndex] + mdsInGPU.anchorY[thirdMDIndex]); + float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; + float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + + // Cut #4: deltaPhiChange + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + // First obtaining the raw betaIn and betaOut values without any correction and just purely based on the mini-doublet hit positions + + float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + float alpha_OutLo = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); + + bool isEC_lastLayer = modulesInGPU.subdets[outerOuterLowerModuleIndex] == Endcap and + modulesInGPU.moduleType[outerOuterLowerModuleIndex] == TwoS; + + float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; + + alpha_OutUp = phi_mpi_pi(acc, + phi(acc, + mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], + mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - + mdsInGPU.anchorPhi[fourthMDIndex]); + + alpha_OutUp_highEdge = alpha_OutUp; + alpha_OutUp_lowEdge = alpha_OutUp; + + float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; + float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + float tl_axis_highEdge_x = tl_axis_x; + float tl_axis_highEdge_y = tl_axis_y; + float tl_axis_lowEdge_x = tl_axis_x; + float tl_axis_lowEdge_y = tl_axis_y; + + float betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + float betaOut = -alpha_OutUp + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); + + float betaOutRHmin = betaOut; + float betaOutRHmax = betaOut; + + if (isEC_lastLayer) { + alpha_OutUp_highEdge = phi_mpi_pi(acc, + phi(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], + mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - + mdsInGPU.anchorHighEdgePhi[fourthMDIndex]); + alpha_OutUp_lowEdge = phi_mpi_pi(acc, + phi(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], + mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - + mdsInGPU.anchorLowEdgePhi[fourthMDIndex]); + + tl_axis_highEdge_x = mdsInGPU.anchorHighEdgeX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; + tl_axis_highEdge_y = mdsInGPU.anchorHighEdgeY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + tl_axis_lowEdge_x = mdsInGPU.anchorLowEdgeX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; + tl_axis_lowEdge_y = mdsInGPU.anchorLowEdgeY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + + betaOutRHmin = + -alpha_OutUp_highEdge + + phi_mpi_pi(acc, phi(acc, tl_axis_highEdge_x, tl_axis_highEdge_y) - mdsInGPU.anchorHighEdgePhi[fourthMDIndex]); + betaOutRHmax = + -alpha_OutUp_lowEdge + + phi_mpi_pi(acc, phi(acc, tl_axis_lowEdge_x, tl_axis_lowEdge_y) - mdsInGPU.anchorLowEdgePhi[fourthMDIndex]); + } + + //beta computation + float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + + float corrF = 1.f; + //innerOuterAnchor - innerInnerAnchor + const float rt_InSeg = + alpaka::math::sqrt(acc, + (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * + (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); + float betaInCut = + alpaka::math::asin( + acc, alpaka::math::min(acc, (-rt_InSeg * corrF + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / drt_InSeg); + + //Cut #5: first beta cut + if (alpaka::math::abs(acc, betaInRHmin) >= betaInCut) + return false; + + float betaAv = 0.5f * (betaIn + betaOut); + float pt_beta = drt_tl_axis * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + int lIn = 5; + int lOut = isEC_lastLayer ? 11 : 5; + float sdOut_dr = alpaka::math::sqrt(acc, + (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) * + (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) + + (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) * + (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex])); + float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex]; + + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.f; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.f; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_maxPtBeta = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confimm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); + + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_InLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_OutLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + const float sinDPhi = alpaka::math::sin(acc, dPhi); + + const float dBetaRIn2 = 0; // TODO-RH + float dBetaROut = 0; + if (isEC_lastLayer) { + dBetaROut = + (alpaka::math::sqrt(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + + mdsInGPU.anchorHighEdgeY[fourthMDIndex] * mdsInGPU.anchorHighEdgeY[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex] * mdsInGPU.anchorLowEdgeX[fourthMDIndex] + + mdsInGPU.anchorLowEdgeY[fourthMDIndex] * mdsInGPU.anchorLowEdgeY[fourthMDIndex])) * + sinDPhi / drt_tl_axis; + } + + const float dBetaROut2 = dBetaROut * dBetaROut; + + float betaOutCut = + alpaka::math::asin(acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + + float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, drt_InSeg); + float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + + float dBeta = betaIn - betaOut; + return dBeta * dBeta <= dBetaCut2; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBEE(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + uint16_t innerInnerLowerModuleIndex, + uint16_t innerOuterLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + bool isPS_InLo = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == PS); + bool isPS_OutLo = (modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS); + + float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; + float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; + float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex]; + + float z_InLo = mdsInGPU.anchorZ[firstMDIndex]; + float z_InOut = mdsInGPU.anchorZ[secondMDIndex]; + float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; + + float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + float zpitch_InLo = (isPS_InLo ? kPixelPSZpitch : kStrip2SZpitch); + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); + float zGeom = zpitch_InLo + zpitch_OutLo; + + // Cut #0: Preliminary (Only here in endcap case) + if (z_InLo * z_OutLo <= 0) + return false; + + float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InLo); + bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS; + float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch : kStrip2SZpitch; + float zGeom1 = alpaka::math::copysign(acc, zGeom, z_InLo); + float rtLo = rt_InLo * (1.f + (z_OutLo - z_InLo - zGeom1) / (z_InLo + zGeom1 + dLum) / dzDrtScale) - + rtGeom1; //slope correction only on the lower end + float rtOut = rt_OutLo; + + //Cut #1: rt condition + if (rtOut < rtLo) + return false; + + float zInForHi = z_InLo - zGeom1 - dLum; + if (zInForHi * z_InLo < 0) { + zInForHi = alpaka::math::copysign(acc, 0.1f, z_InLo); + } + float rtHi = rt_InLo * (1.f + (z_OutLo - z_InLo + zGeom1) / zInForHi) + rtGeom1; + + //Cut #2: rt condition + if ((rt_OutLo < rtLo) || (rt_OutLo > rtHi)) + return false; + + float rIn = alpaka::math::sqrt(acc, z_InLo * z_InLo + rt_InLo * rt_InLo); + const float drtSDIn = rt_InOut - rt_InLo; + const float dzSDIn = z_InOut - z_InLo; + const float dr3SDIn = alpaka::math::sqrt(acc, rt_InOut * rt_InOut + z_InOut * z_InOut) - + alpaka::math::sqrt(acc, rt_InLo * rt_InLo + z_InLo * z_InLo); + + const float coshEta = dr3SDIn / drtSDIn; //direction estimate + const float dzOutInAbs = alpaka::math::abs(acc, z_OutLo - z_InLo); + const float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); + const float zGeom1_another = kPixelPSZpitch; + float kZ = (z_OutLo - z_InLo) / dzSDIn; + float drtErr = + zGeom1_another * zGeom1_another * drtSDIn * drtSDIn / dzSDIn / dzSDIn * (1.f - 2.f * kZ + 2.f * kZ * kZ); + const float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * (rIn / rt_InLo); + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + drtErr += muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta; + drtErr = alpaka::math::sqrt(acc, drtErr); + + //Cut #3: rt-z pointed + if ((kZ < 0) || (rtOut < rtLo) || (rtOut > rtHi)) + return false; + + const float pvOffset = 0.1f / rt_OutLo; + float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + float deltaPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); + + //Cut #4: deltaPhiPos can be tighter + if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) + return false; + + float midPointX = 0.5f * (mdsInGPU.anchorX[firstMDIndex] + mdsInGPU.anchorX[thirdMDIndex]); + float midPointY = 0.5f * (mdsInGPU.anchorY[firstMDIndex] + mdsInGPU.anchorY[thirdMDIndex]); + float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; + float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + // Cut #5: deltaPhiChange + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + float sdIn_alpha_min = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]); + float sdIn_alpha_max = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); + float sdOut_alpha = sdIn_alpha; + + float sdOut_alphaOut = phi_mpi_pi(acc, + phi(acc, + mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex], + mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) - + mdsInGPU.anchorPhi[fourthMDIndex]); + + float sdOut_alphaOut_min = phi_mpi_pi( + acc, __H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMins[outerSegmentIndex])); + float sdOut_alphaOut_max = phi_mpi_pi( + acc, __H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]) - __H2F(segmentsInGPU.dPhiMaxs[outerSegmentIndex])); + + float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; + float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + + float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + float betaOut = + -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); + + float betaOutRHmin = betaOut; + float betaOutRHmax = betaOut; + + bool isEC_secondLayer = (modulesInGPU.subdets[innerOuterLowerModuleIndex] == Endcap) and + (modulesInGPU.moduleType[innerOuterLowerModuleIndex] == TwoS); + + if (isEC_secondLayer) { + betaInRHmin = betaIn - sdIn_alpha_min + sdIn_alpha; + betaInRHmax = betaIn - sdIn_alpha_max + sdIn_alpha; + } + + betaOutRHmin = betaOut - sdOut_alphaOut_min + sdOut_alphaOut; + betaOutRHmax = betaOut - sdOut_alphaOut_max + sdOut_alphaOut; + + float swapTemp; + if (alpaka::math::abs(acc, betaOutRHmin) > alpaka::math::abs(acc, betaOutRHmax)) { + swapTemp = betaOutRHmin; + betaOutRHmin = betaOutRHmax; + betaOutRHmax = swapTemp; + } + + if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { + swapTemp = betaInRHmin; + betaInRHmin = betaInRHmax; + betaInRHmax = swapTemp; + } + + float sdIn_dr = alpaka::math::sqrt(acc, + (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * + (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); + float sdIn_d = rt_InOut - rt_InLo; + + float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + const float corrF = 1.f; + float betaInCut = + alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdIn_d); + + //Cut #6: first beta cut + if (alpaka::math::abs(acc, betaInRHmin) >= betaInCut) + return false; + + float betaAv = 0.5f * (betaIn + betaOut); + float pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + + float lIn = 5; + float lOut = 11; + + float sdOut_dr = alpaka::math::sqrt(acc, + (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) * + (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) + + (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) * + (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex])); + float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex]; + + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_maxPtBeta = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); + + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdIn_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdOut_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + const float sinDPhi = alpaka::math::sin(acc, dPhi); + + const float dBetaRIn2 = 0; // TODO-RH + float dBetaROut = 0; + if (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == TwoS) { + dBetaROut = + (alpaka::math::sqrt(acc, + mdsInGPU.anchorHighEdgeX[fourthMDIndex] * mdsInGPU.anchorHighEdgeX[fourthMDIndex] + + mdsInGPU.anchorHighEdgeY[fourthMDIndex] * mdsInGPU.anchorHighEdgeY[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mdsInGPU.anchorLowEdgeX[fourthMDIndex] * mdsInGPU.anchorLowEdgeX[fourthMDIndex] + + mdsInGPU.anchorLowEdgeY[fourthMDIndex] * mdsInGPU.anchorLowEdgeY[fourthMDIndex])) * + sinDPhi / dr; + } + + const float dBetaROut2 = dBetaROut * dBetaROut; + float betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, dr * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + + float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, sdIn_d); + float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + float dBeta = betaIn - betaOut; + //Cut #7: Cut on dBet + return dBeta * dBeta <= dBetaCut2; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoEEEE(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + uint16_t innerInnerLowerModuleIndex, + uint16_t innerOuterLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; + float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; + float rt_OutLo = mdsInGPU.anchorRt[thirdMDIndex]; + + float z_InLo = mdsInGPU.anchorZ[firstMDIndex]; + float z_InOut = mdsInGPU.anchorZ[secondMDIndex]; + float z_OutLo = mdsInGPU.anchorZ[thirdMDIndex]; + + float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + + // Cut #0: Preliminary (Only here in endcap case) + if ((z_InLo * z_OutLo) <= 0) + return false; + + float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InLo); + bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerInnerLowerModuleIndex] == PS; + bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == PS; + + float rtGeom = (isInSgInnerMDPS and isOutSgInnerMDPS) ? 2.f * kPixelPSZpitch + : (isInSgInnerMDPS or isOutSgInnerMDPS) ? kPixelPSZpitch + kStrip2SZpitch + : 2.f * kStrip2SZpitch; + + float dz = z_OutLo - z_InLo; + float rtLo = rt_InLo * (1.f + dz / (z_InLo + dLum) / dzDrtScale) - rtGeom; //slope correction only on the lower end + + float rtOut = rt_OutLo; + + //Cut #1: rt condition + + float rtHi = rt_InLo * (1.f + dz / (z_InLo - dLum)) + rtGeom; + + if ((rtOut < rtLo) || (rtOut > rtHi)) + return false; + + bool isInSgOuterMDPS = modulesInGPU.moduleType[innerOuterLowerModuleIndex] == PS; + + const float drtSDIn = rt_InOut - rt_InLo; + const float dzSDIn = z_InOut - z_InLo; + const float dr3SDIn = alpaka::math::sqrt(acc, rt_InOut * rt_InOut + z_InOut * z_InOut) - + alpaka::math::sqrt(acc, rt_InLo * rt_InLo + z_InLo * z_InLo); + float coshEta = dr3SDIn / drtSDIn; //direction estimate + float dzOutInAbs = alpaka::math::abs(acc, z_OutLo - z_InLo); + float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); + + float kZ = (z_OutLo - z_InLo) / dzSDIn; + float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f); + + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + + float drtErr = + alpaka::math::sqrt(acc, + kPixelPSZpitch * kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + + muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); + + float drtMean = drtSDIn * dzOutInAbs / alpaka::math::abs(acc, dzSDIn); + float rtWindow = drtErr + rtGeom; + float rtLo_point = rt_InLo + drtMean / dzDrtScale - rtWindow; + float rtHi_point = rt_InLo + drtMean + rtWindow; + + // Cut #3: rt-z pointed + // https://github.com/slava77/cms-tkph2-ntuple/blob/superDoubletLinked-91X-noMock/doubletAnalysis.C#L3765 + + if (isInSgInnerMDPS and isInSgOuterMDPS) // If both PS then we can point + { + if (kZ < 0 || rtOut < rtLo_point || rtOut > rtHi_point) + return false; + } + + float pvOffset = 0.1f / rtOut; + float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + float deltaPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[secondMDIndex]); + + if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) + return false; + + float midPointX = 0.5f * (mdsInGPU.anchorX[firstMDIndex] + mdsInGPU.anchorX[thirdMDIndex]); + float midPointY = 0.5f * (mdsInGPU.anchorY[firstMDIndex] + mdsInGPU.anchorY[thirdMDIndex]); + float diffX = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; + float diffY = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + + // Cut #5: deltaPhiChange + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + float sdOut_alpha = sdIn_alpha; //weird + float sdOut_dPhiPos = phi_mpi_pi(acc, mdsInGPU.anchorPhi[fourthMDIndex] - mdsInGPU.anchorPhi[thirdMDIndex]); + + float sdOut_dPhiChange = __H2F(segmentsInGPU.dPhiChanges[outerSegmentIndex]); + float sdOut_dPhiChange_min = __H2F(segmentsInGPU.dPhiChangeMins[outerSegmentIndex]); + float sdOut_dPhiChange_max = __H2F(segmentsInGPU.dPhiChangeMaxs[outerSegmentIndex]); + + float sdOut_alphaOutRHmin = phi_mpi_pi(acc, sdOut_dPhiChange_min - sdOut_dPhiPos); + float sdOut_alphaOutRHmax = phi_mpi_pi(acc, sdOut_dPhiChange_max - sdOut_dPhiPos); + float sdOut_alphaOut = phi_mpi_pi(acc, sdOut_dPhiChange - sdOut_dPhiPos); + + float tl_axis_x = mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[firstMDIndex]; + float tl_axis_y = mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + + float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + + float sdIn_alphaRHmin = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]); + float sdIn_alphaRHmax = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); + float betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha; + float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; + + float betaOut = + -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[fourthMDIndex]); + + float betaOutRHmin = betaOut - sdOut_alphaOutRHmin + sdOut_alphaOut; + float betaOutRHmax = betaOut - sdOut_alphaOutRHmax + sdOut_alphaOut; + + float swapTemp; + if (alpaka::math::abs(acc, betaOutRHmin) > alpaka::math::abs(acc, betaOutRHmax)) { + swapTemp = betaOutRHmin; + betaOutRHmin = betaOutRHmax; + betaOutRHmax = swapTemp; + } + + if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { + swapTemp = betaInRHmin; + betaInRHmin = betaInRHmax; + betaInRHmax = swapTemp; + } + float sdIn_dr = alpaka::math::sqrt(acc, + (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * + (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); + float sdIn_d = rt_InOut - rt_InLo; + + float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + const float corrF = 1.f; + float betaInCut = + alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdIn_d); + + //Cut #6: first beta cut + if (alpaka::math::abs(acc, betaInRHmin) >= betaInCut) + return false; + + float betaAv = 0.5f * (betaIn + betaOut); + float pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + + int lIn = 11; //endcap + int lOut = 13; //endcap + + float sdOut_dr = alpaka::math::sqrt(acc, + (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) * + (mdsInGPU.anchorX[fourthMDIndex] - mdsInGPU.anchorX[thirdMDIndex]) + + (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex]) * + (mdsInGPU.anchorY[fourthMDIndex] - mdsInGPU.anchorY[thirdMDIndex])); + float sdOut_d = mdsInGPU.anchorRt[fourthMDIndex] - mdsInGPU.anchorRt[thirdMDIndex]; + + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_maxPtBeta = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); + + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdIn_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdOut_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + + const float dBetaRIn2 = 0; // TODO-RH + + float dBetaROut2 = 0; //TODO-RH + float betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, dr * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + + float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, sdIn_d); + float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + float dBeta = betaIn - betaOut; + //Cut #7: Cut on dBeta + return dBeta * dBeta <= dBetaCut2; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletAlgoSelector(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + uint16_t innerInnerLowerModuleIndex, + uint16_t innerOuterLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + short innerInnerLowerModuleSubdet = modulesInGPU.subdets[innerInnerLowerModuleIndex]; + short innerOuterLowerModuleSubdet = modulesInGPU.subdets[innerOuterLowerModuleIndex]; + short outerInnerLowerModuleSubdet = modulesInGPU.subdets[outerInnerLowerModuleIndex]; + short outerOuterLowerModuleSubdet = modulesInGPU.subdets[outerOuterLowerModuleIndex]; + + if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Barrel and + outerInnerLowerModuleSubdet == Barrel and outerOuterLowerModuleSubdet == Barrel) { + return runQuintupletDefaultAlgoBBBB(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Barrel and + outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { + return runQuintupletDefaultAlgoBBEE(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Barrel and + outerInnerLowerModuleSubdet == Barrel and outerOuterLowerModuleSubdet == Endcap) { + return runQuintupletDefaultAlgoBBBB(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Endcap and + outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { + return runQuintupletDefaultAlgoBBEE(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (innerInnerLowerModuleSubdet == Endcap and innerOuterLowerModuleSubdet == Endcap and + outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { + return runQuintupletDefaultAlgoEEEE(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } + + return false; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgo(TAcc const& acc, + Modules& modulesInGPU, + MiniDoublets& mdsInGPU, + Segments& segmentsInGPU, + Triplets& tripletsInGPU, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex, + float& innerRadius, + float& outerRadius, + float& bridgeRadius, + float& regressionG, + float& regressionF, + float& regressionRadius, + float& rzChiSquared, + float& chiSquared, + float& nonAnchorChiSquared, + bool& TightCutFlag) { + unsigned int firstSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex]; + unsigned int secondSegmentIndex = tripletsInGPU.segmentIndices[2 * innerTripletIndex + 1]; + unsigned int thirdSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex]; + unsigned int fourthSegmentIndex = tripletsInGPU.segmentIndices[2 * outerTripletIndex + 1]; + + unsigned int innerOuterOuterMiniDoubletIndex = + segmentsInGPU.mdIndices[2 * secondSegmentIndex + 1]; //inner triplet outer segment outer MD index + unsigned int outerInnerInnerMiniDoubletIndex = + segmentsInGPU.mdIndices[2 * thirdSegmentIndex]; //outer triplet inner segment inner MD index + + //this cut reduces the number of candidates by a factor of 3, i.e., 2 out of 3 warps can end right here! + if (innerOuterOuterMiniDoubletIndex != outerInnerInnerMiniDoubletIndex) + return false; + + unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * firstSegmentIndex]; + unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex]; + unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * secondSegmentIndex + 1]; + unsigned int fourthMDIndex = segmentsInGPU.mdIndices[2 * thirdSegmentIndex + 1]; + unsigned int fifthMDIndex = segmentsInGPU.mdIndices[2 * fourthSegmentIndex + 1]; + + if (not runQuintupletAlgoSelector(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + firstSegmentIndex, + thirdSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex)) + return false; + + if (not runQuintupletAlgoSelector(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex4, + lowerModuleIndex5, + firstSegmentIndex, + fourthSegmentIndex, + firstMDIndex, + secondMDIndex, + fourthMDIndex, + fifthMDIndex)) + return false; + + float x1 = mdsInGPU.anchorX[firstMDIndex]; + float x2 = mdsInGPU.anchorX[secondMDIndex]; + float x3 = mdsInGPU.anchorX[thirdMDIndex]; + float x4 = mdsInGPU.anchorX[fourthMDIndex]; + float x5 = mdsInGPU.anchorX[fifthMDIndex]; + + float y1 = mdsInGPU.anchorY[firstMDIndex]; + float y2 = mdsInGPU.anchorY[secondMDIndex]; + float y3 = mdsInGPU.anchorY[thirdMDIndex]; + float y4 = mdsInGPU.anchorY[fourthMDIndex]; + float y5 = mdsInGPU.anchorY[fifthMDIndex]; + + //construct the arrays + float x1Vec[] = {x1, x1, x1}; + float y1Vec[] = {y1, y1, y1}; + float x2Vec[] = {x2, x2, x2}; + float y2Vec[] = {y2, y2, y2}; + float x3Vec[] = {x3, x3, x3}; + float y3Vec[] = {y3, y3, y3}; + + if (modulesInGPU.subdets[lowerModuleIndex1] == Endcap and modulesInGPU.moduleType[lowerModuleIndex1] == TwoS) { + x1Vec[1] = mdsInGPU.anchorLowEdgeX[firstMDIndex]; + x1Vec[2] = mdsInGPU.anchorHighEdgeX[firstMDIndex]; + + y1Vec[1] = mdsInGPU.anchorLowEdgeY[firstMDIndex]; + y1Vec[2] = mdsInGPU.anchorHighEdgeY[firstMDIndex]; + } + if (modulesInGPU.subdets[lowerModuleIndex2] == Endcap and modulesInGPU.moduleType[lowerModuleIndex2] == TwoS) { + x2Vec[1] = mdsInGPU.anchorLowEdgeX[secondMDIndex]; + x2Vec[2] = mdsInGPU.anchorHighEdgeX[secondMDIndex]; + + y2Vec[1] = mdsInGPU.anchorLowEdgeY[secondMDIndex]; + y2Vec[2] = mdsInGPU.anchorHighEdgeY[secondMDIndex]; + } + if (modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.moduleType[lowerModuleIndex3] == TwoS) { + x3Vec[1] = mdsInGPU.anchorLowEdgeX[thirdMDIndex]; + x3Vec[2] = mdsInGPU.anchorHighEdgeX[thirdMDIndex]; + + y3Vec[1] = mdsInGPU.anchorLowEdgeY[thirdMDIndex]; + y3Vec[2] = mdsInGPU.anchorHighEdgeY[thirdMDIndex]; + } + + float innerRadiusMin2S, innerRadiusMax2S; + computeErrorInRadius(acc, x1Vec, y1Vec, x2Vec, y2Vec, x3Vec, y3Vec, innerRadiusMin2S, innerRadiusMax2S); + + for (int i = 0; i < 3; i++) { + x1Vec[i] = x4; + y1Vec[i] = y4; + } + if (modulesInGPU.subdets[lowerModuleIndex4] == Endcap and modulesInGPU.moduleType[lowerModuleIndex4] == TwoS) { + x1Vec[1] = mdsInGPU.anchorLowEdgeX[fourthMDIndex]; + x1Vec[2] = mdsInGPU.anchorHighEdgeX[fourthMDIndex]; + + y1Vec[1] = mdsInGPU.anchorLowEdgeY[fourthMDIndex]; + y1Vec[2] = mdsInGPU.anchorHighEdgeY[fourthMDIndex]; + } + + float bridgeRadiusMin2S, bridgeRadiusMax2S; + computeErrorInRadius(acc, x2Vec, y2Vec, x3Vec, y3Vec, x1Vec, y1Vec, bridgeRadiusMin2S, bridgeRadiusMax2S); + + for (int i = 0; i < 3; i++) { + x2Vec[i] = x5; + y2Vec[i] = y5; + } + if (modulesInGPU.subdets[lowerModuleIndex5] == Endcap and modulesInGPU.moduleType[lowerModuleIndex5] == TwoS) { + x2Vec[1] = mdsInGPU.anchorLowEdgeX[fifthMDIndex]; + x2Vec[2] = mdsInGPU.anchorHighEdgeX[fifthMDIndex]; + + y2Vec[1] = mdsInGPU.anchorLowEdgeY[fifthMDIndex]; + y2Vec[2] = mdsInGPU.anchorHighEdgeY[fifthMDIndex]; + } + + float outerRadiusMin2S, outerRadiusMax2S; + computeErrorInRadius(acc, x3Vec, y3Vec, x1Vec, y1Vec, x2Vec, y2Vec, outerRadiusMin2S, outerRadiusMax2S); + + float g, f; + outerRadius = tripletsInGPU.circleRadius[outerTripletIndex]; + bridgeRadius = computeRadiusFromThreeAnchorHits(acc, x2, y2, x3, y3, x4, y4, g, f); + innerRadius = tripletsInGPU.circleRadius[innerTripletIndex]; + g = tripletsInGPU.circleCenterX[innerTripletIndex]; + f = tripletsInGPU.circleCenterY[innerTripletIndex]; + +#ifdef USE_RZCHI2 + float inner_pt = 2 * k2Rinv1GeVf * innerRadius; + + if (not passT5RZConstraint(acc, + modulesInGPU, + mdsInGPU, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex, + fifthMDIndex, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + lowerModuleIndex5, + rzChiSquared, + inner_pt, + innerRadius, + g, + f, + TightCutFlag)) + return false; +#else + rzChiSquared = -1; +#endif + if (innerRadius < 0.95f * ptCut / (2.f * k2Rinv1GeVf)) + return false; + + //split by category + bool matchedRadii; + if (modulesInGPU.subdets[lowerModuleIndex1] == Barrel and modulesInGPU.subdets[lowerModuleIndex2] == Barrel and + modulesInGPU.subdets[lowerModuleIndex3] == Barrel and modulesInGPU.subdets[lowerModuleIndex4] == Barrel and + modulesInGPU.subdets[lowerModuleIndex5] == Barrel) { + matchedRadii = matchRadiiBBBBB(acc, innerRadius, bridgeRadius, outerRadius); + } else if (modulesInGPU.subdets[lowerModuleIndex1] == Barrel and + modulesInGPU.subdets[lowerModuleIndex2] == Barrel and + modulesInGPU.subdets[lowerModuleIndex3] == Barrel and + modulesInGPU.subdets[lowerModuleIndex4] == Barrel and + modulesInGPU.subdets[lowerModuleIndex5] == Endcap) { + matchedRadii = matchRadiiBBBBE(acc, innerRadius, bridgeRadius, outerRadius); + } else if (modulesInGPU.subdets[lowerModuleIndex1] == Barrel and + modulesInGPU.subdets[lowerModuleIndex2] == Barrel and + modulesInGPU.subdets[lowerModuleIndex3] == Barrel and + modulesInGPU.subdets[lowerModuleIndex4] == Endcap and + modulesInGPU.subdets[lowerModuleIndex5] == Endcap) { + if (modulesInGPU.layers[lowerModuleIndex1] == 1) { + matchedRadii = + matchRadiiBBBEE12378(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); + } else if (modulesInGPU.layers[lowerModuleIndex1] == 2) { + matchedRadii = + matchRadiiBBBEE23478(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); + } else { + matchedRadii = + matchRadiiBBBEE34578(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); + } + } + + else if (modulesInGPU.subdets[lowerModuleIndex1] == Barrel and modulesInGPU.subdets[lowerModuleIndex2] == Barrel and + modulesInGPU.subdets[lowerModuleIndex3] == Endcap and modulesInGPU.subdets[lowerModuleIndex4] == Endcap and + modulesInGPU.subdets[lowerModuleIndex5] == Endcap) { + matchedRadii = matchRadiiBBEEE(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); + } else if (modulesInGPU.subdets[lowerModuleIndex1] == Barrel and + modulesInGPU.subdets[lowerModuleIndex2] == Endcap and + modulesInGPU.subdets[lowerModuleIndex3] == Endcap and + modulesInGPU.subdets[lowerModuleIndex4] == Endcap and + modulesInGPU.subdets[lowerModuleIndex5] == Endcap) { + matchedRadii = matchRadiiBEEEE(acc, + innerRadius, + bridgeRadius, + outerRadius, + innerRadiusMin2S, + innerRadiusMax2S, + bridgeRadiusMin2S, + bridgeRadiusMax2S); + } else { + matchedRadii = matchRadiiEEEEE(acc, + innerRadius, + bridgeRadius, + outerRadius, + innerRadiusMin2S, + innerRadiusMax2S, + bridgeRadiusMin2S, + bridgeRadiusMax2S); + } + + //compute regression radius right here - this computation is expensive!!! + if (not matchedRadii) + return false; + + float xVec[] = {x1, x2, x3, x4, x5}; + float yVec[] = {y1, y2, y3, y4, y5}; + const uint16_t lowerModuleIndices[] = { + lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5}; + + // 5 categories for sigmas + float sigmas2[5], delta1[5], delta2[5], slopes[5]; + bool isFlat[5]; + + computeSigmasForRegression(acc, modulesInGPU, lowerModuleIndices, delta1, delta2, slopes, isFlat); + regressionRadius = computeRadiusUsingRegression(acc, + Params_T5::kLayers, + xVec, + yVec, + delta1, + delta2, + slopes, + isFlat, + regressionG, + regressionF, + sigmas2, + chiSquared); + +#ifdef USE_T5_DNN + unsigned int mdIndices[] = {firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, fifthMDIndex}; + float inference = t5dnn::runInference(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + tripletsInGPU, + xVec, + yVec, + mdIndices, + lowerModuleIndices, + innerTripletIndex, + outerTripletIndex, + innerRadius, + outerRadius, + bridgeRadius); + TightCutFlag = TightCutFlag and (inference > t5dnn::kLSTWp2); // T5-in-TC cut + if (inference <= t5dnn::kLSTWp2) // T5-building cut + return false; +#endif + +#ifdef USE_RPHICHI2 + // extra chi squared cuts! + if (regressionRadius < 5.0f / (2.f * k2Rinv1GeVf)) { + if (not passChiSquaredConstraint(modulesInGPU, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + lowerModuleIndex5, + chiSquared)) + return false; + } +#endif + + //compute the other chisquared + //non anchor is always shifted for tilted and endcap! + float nonAnchorDelta1[Params_T5::kLayers], nonAnchorDelta2[Params_T5::kLayers], nonAnchorSlopes[Params_T5::kLayers]; + float nonAnchorxs[] = {mdsInGPU.outerX[firstMDIndex], + mdsInGPU.outerX[secondMDIndex], + mdsInGPU.outerX[thirdMDIndex], + mdsInGPU.outerX[fourthMDIndex], + mdsInGPU.outerX[fifthMDIndex]}; + float nonAnchorys[] = {mdsInGPU.outerY[firstMDIndex], + mdsInGPU.outerY[secondMDIndex], + mdsInGPU.outerY[thirdMDIndex], + mdsInGPU.outerY[fourthMDIndex], + mdsInGPU.outerY[fifthMDIndex]}; + + computeSigmasForRegression(acc, + modulesInGPU, + lowerModuleIndices, + nonAnchorDelta1, + nonAnchorDelta2, + nonAnchorSlopes, + isFlat, + Params_T5::kLayers, + false); + nonAnchorChiSquared = computeChiSquared(acc, + Params_T5::kLayers, + nonAnchorxs, + nonAnchorys, + nonAnchorDelta1, + nonAnchorDelta2, + nonAnchorSlopes, + isFlat, + regressionG, + regressionF, + regressionRadius); + return true; + } + + struct CreateQuintupletsInGPUv2 { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + Triplets tripletsInGPU, + Quintuplets quintupletsInGPU, + ObjectRanges rangesInGPU, + uint16_t nEligibleT5Modules) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int iter = globalThreadIdx[0]; iter < nEligibleT5Modules; iter += gridThreadExtent[0]) { + uint16_t lowerModule1 = rangesInGPU.indicesOfEligibleT5Modules[iter]; + short layer2_adjustment; + int layer = modulesInGPU.layers[lowerModule1]; + if (layer == 1) { + layer2_adjustment = 1; + } // get upper segment to be in second layer + else if (layer == 2) { + layer2_adjustment = 0; + } // get lower segment to be in second layer + else { + continue; + } + unsigned int nInnerTriplets = tripletsInGPU.nTriplets[lowerModule1]; + for (unsigned int innerTripletArrayIndex = globalThreadIdx[1]; innerTripletArrayIndex < nInnerTriplets; + innerTripletArrayIndex += gridThreadExtent[1]) { + unsigned int innerTripletIndex = rangesInGPU.tripletModuleIndices[lowerModule1] + innerTripletArrayIndex; + uint16_t lowerModule2 = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * innerTripletIndex + 1]; + uint16_t lowerModule3 = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * innerTripletIndex + 2]; + unsigned int nOuterTriplets = tripletsInGPU.nTriplets[lowerModule3]; + for (unsigned int outerTripletArrayIndex = globalThreadIdx[2]; outerTripletArrayIndex < nOuterTriplets; + outerTripletArrayIndex += gridThreadExtent[2]) { + unsigned int outerTripletIndex = rangesInGPU.tripletModuleIndices[lowerModule3] + outerTripletArrayIndex; + uint16_t lowerModule4 = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * outerTripletIndex + 1]; + uint16_t lowerModule5 = tripletsInGPU.lowerModuleIndices[Params_T3::kLayers * outerTripletIndex + 2]; + + float innerRadius, outerRadius, bridgeRadius, regressionG, regressionF, regressionRadius, rzChiSquared, + chiSquared, nonAnchorChiSquared; //required for making distributions + + bool TightCutFlag = false; + bool success = runQuintupletDefaultAlgo(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + tripletsInGPU, + lowerModule1, + lowerModule2, + lowerModule3, + lowerModule4, + lowerModule5, + innerTripletIndex, + outerTripletIndex, + innerRadius, + outerRadius, + bridgeRadius, + regressionG, + regressionF, + regressionRadius, + rzChiSquared, + chiSquared, + nonAnchorChiSquared, + TightCutFlag); + + if (success) { + int totOccupancyQuintuplets = alpaka::atomicAdd( + acc, &quintupletsInGPU.totOccupancyQuintuplets[lowerModule1], 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyQuintuplets >= rangesInGPU.quintupletModuleOccupancy[lowerModule1]) { +#ifdef WARNINGS + printf("Quintuplet excess alert! Module index = %d\n", lowerModule1); +#endif + } else { + int quintupletModuleIndex = alpaka::atomicAdd( + acc, &quintupletsInGPU.nQuintuplets[lowerModule1], 1u, alpaka::hierarchy::Threads{}); + //this if statement should never get executed! + if (rangesInGPU.quintupletModuleIndices[lowerModule1] == -1) { +#ifdef WARNINGS + printf("Quintuplets : no memory for module at module index = %d\n", lowerModule1); +#endif + } else { + unsigned int quintupletIndex = + rangesInGPU.quintupletModuleIndices[lowerModule1] + quintupletModuleIndex; + float phi = + mdsInGPU.anchorPhi[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + + layer2_adjustment]]]; + float eta = + mdsInGPU.anchorEta[segmentsInGPU.mdIndices[2 * tripletsInGPU.segmentIndices[2 * innerTripletIndex + + layer2_adjustment]]]; + float pt = (innerRadius + outerRadius) * k2Rinv1GeVf; + float scores = chiSquared + nonAnchorChiSquared; + addQuintupletToMemory(tripletsInGPU, + quintupletsInGPU, + innerTripletIndex, + outerTripletIndex, + lowerModule1, + lowerModule2, + lowerModule3, + lowerModule4, + lowerModule5, + innerRadius, + bridgeRadius, + outerRadius, + regressionG, + regressionF, + regressionRadius, + rzChiSquared, + chiSquared, + nonAnchorChiSquared, + pt, + eta, + phi, + scores, + layer, + quintupletIndex, + TightCutFlag); + + tripletsInGPU.partOfT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex]] = true; + tripletsInGPU.partOfT5[quintupletsInGPU.tripletIndices[2 * quintupletIndex + 1]] = true; + } + } + } + } + } + } + } + }; + + struct CreateEligibleModulesListForQuintupletsGPU { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + Triplets tripletsInGPU, + ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + // Initialize variables in shared memory and set to 0 + int& nEligibleT5Modulesx = alpaka::declareSharedVar(acc); + int& nTotalQuintupletsx = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) { + nTotalQuintupletsx = 0; + nEligibleT5Modulesx = 0; + } + alpaka::syncBlockThreads(acc); + + // Create variables outside of the for loop. + int occupancy, category_number, eta_number; + + for (int i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { + // Condition for a quintuple to exist for a module + // TCs don't exist for layers 5 and 6 barrel, and layers 2,3,4,5 endcap + short module_rings = modulesInGPU.rings[i]; + short module_layers = modulesInGPU.layers[i]; + short module_subdets = modulesInGPU.subdets[i]; + float module_eta = alpaka::math::abs(acc, modulesInGPU.eta[i]); + + if (tripletsInGPU.nTriplets[i] == 0) + continue; + if (module_subdets == Barrel and module_layers >= 3) + continue; + if (module_subdets == Endcap and module_layers > 1) + continue; + + int nEligibleT5Modules = alpaka::atomicAdd(acc, &nEligibleT5Modulesx, 1, alpaka::hierarchy::Threads{}); + + if (module_layers <= 3 && module_subdets == 5) + category_number = 0; + else if (module_layers >= 4 && module_subdets == 5) + category_number = 1; + else if (module_layers <= 2 && module_subdets == 4 && module_rings >= 11) + category_number = 2; + else if (module_layers >= 3 && module_subdets == 4 && module_rings >= 8) + category_number = 2; + else if (module_layers <= 2 && module_subdets == 4 && module_rings <= 10) + category_number = 3; + else if (module_layers >= 3 && module_subdets == 4 && module_rings <= 7) + category_number = 3; + else + category_number = -1; + + if (module_eta < 0.75f) + eta_number = 0; + else if (module_eta < 1.5f) + eta_number = 1; + else if (module_eta < 2.25f) + eta_number = 2; + else if (module_eta < 3.0f) + eta_number = 3; + else + eta_number = -1; + + if (category_number == 0 && eta_number == 0) + occupancy = 336; + else if (category_number == 0 && eta_number == 1) + occupancy = 414; + else if (category_number == 0 && eta_number == 2) + occupancy = 231; + else if (category_number == 0 && eta_number == 3) + occupancy = 146; + else if (category_number == 3 && eta_number == 1) + occupancy = 0; + else if (category_number == 3 && eta_number == 2) + occupancy = 191; + else if (category_number == 3 && eta_number == 3) + occupancy = 106; + else { + occupancy = 0; +#ifdef WARNINGS + printf("Unhandled case in createEligibleModulesListForQuintupletsGPU! Module index = %i\n", i); +#endif + } + + int nTotQ = alpaka::atomicAdd(acc, &nTotalQuintupletsx, occupancy, alpaka::hierarchy::Threads{}); + rangesInGPU.quintupletModuleIndices[i] = nTotQ; + rangesInGPU.indicesOfEligibleT5Modules[nEligibleT5Modules] = i; + rangesInGPU.quintupletModuleOccupancy[i] = occupancy; + } + + // Wait for all threads to finish before reporting final values + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + *rangesInGPU.nEligibleT5Modules = static_cast(nEligibleT5Modulesx); + *rangesInGPU.device_nTotalQuints = static_cast(nTotalQuintupletsx); + } + } + }; + + struct AddQuintupletRangesToEventExplicit { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + Quintuplets quintupletsInGPU, + ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { + if (quintupletsInGPU.nQuintuplets[i] == 0 or rangesInGPU.quintupletModuleIndices[i] == -1) { + rangesInGPU.quintupletRanges[i * 2] = -1; + rangesInGPU.quintupletRanges[i * 2 + 1] = -1; + } else { + rangesInGPU.quintupletRanges[i * 2] = rangesInGPU.quintupletModuleIndices[i]; + rangesInGPU.quintupletRanges[i * 2 + 1] = + rangesInGPU.quintupletModuleIndices[i] + quintupletsInGPU.nQuintuplets[i] - 1; + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h new file mode 100644 index 0000000000000..bc2d1d82a5fc9 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -0,0 +1,1010 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Segment_h +#define RecoTracker_LSTCore_src_alpaka_Segment_h + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/Module.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" + +#include "MiniDoublet.h" +#include "Hit.h" +#include "ObjectRanges.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + struct Segments { + FPX* dPhis; + FPX* dPhiMins; + FPX* dPhiMaxs; + FPX* dPhiChanges; + FPX* dPhiChangeMins; + FPX* dPhiChangeMaxs; + uint16_t* innerLowerModuleIndices; + uint16_t* outerLowerModuleIndices; + unsigned int* seedIdx; + unsigned int* mdIndices; + unsigned int* nMemoryLocations; + unsigned int* innerMiniDoubletAnchorHitIndices; + unsigned int* outerMiniDoubletAnchorHitIndices; + int* charge; + int* superbin; + unsigned int* nSegments; //number of segments per inner lower module + unsigned int* totOccupancySegments; //number of segments per inner lower module + uint4* pLSHitsIdxs; + PixelType* pixelType; + char* isQuad; + char* isDup; + bool* partOfPT5; + float* ptIn; + float* ptErr; + float* px; + float* py; + float* pz; + float* etaErr; + float* eta; + float* phi; + float* score; + float* circleCenterX; + float* circleCenterY; + float* circleRadius; + + template + void setData(TBuff& buf) { + dPhis = buf.dPhis_buf.data(); + dPhiMins = buf.dPhiMins_buf.data(); + dPhiMaxs = buf.dPhiMaxs_buf.data(); + dPhiChanges = buf.dPhiChanges_buf.data(); + dPhiChangeMins = buf.dPhiChangeMins_buf.data(); + dPhiChangeMaxs = buf.dPhiChangeMaxs_buf.data(); + innerLowerModuleIndices = buf.innerLowerModuleIndices_buf.data(); + outerLowerModuleIndices = buf.outerLowerModuleIndices_buf.data(); + seedIdx = buf.seedIdx_buf.data(); + mdIndices = buf.mdIndices_buf.data(); + nMemoryLocations = buf.nMemoryLocations_buf.data(); + innerMiniDoubletAnchorHitIndices = buf.innerMiniDoubletAnchorHitIndices_buf.data(); + outerMiniDoubletAnchorHitIndices = buf.outerMiniDoubletAnchorHitIndices_buf.data(); + charge = buf.charge_buf.data(); + superbin = buf.superbin_buf.data(); + nSegments = buf.nSegments_buf.data(); + totOccupancySegments = buf.totOccupancySegments_buf.data(); + pLSHitsIdxs = buf.pLSHitsIdxs_buf.data(); + pixelType = buf.pixelType_buf.data(); + isQuad = buf.isQuad_buf.data(); + isDup = buf.isDup_buf.data(); + partOfPT5 = buf.partOfPT5_buf.data(); + ptIn = buf.ptIn_buf.data(); + ptErr = buf.ptErr_buf.data(); + px = buf.px_buf.data(); + py = buf.py_buf.data(); + pz = buf.pz_buf.data(); + etaErr = buf.etaErr_buf.data(); + eta = buf.eta_buf.data(); + phi = buf.phi_buf.data(); + score = buf.score_buf.data(); + circleCenterX = buf.circleCenterX_buf.data(); + circleCenterY = buf.circleCenterY_buf.data(); + circleRadius = buf.circleRadius_buf.data(); + } + }; + + template + struct SegmentsBuffer { + Buf dPhis_buf; + Buf dPhiMins_buf; + Buf dPhiMaxs_buf; + Buf dPhiChanges_buf; + Buf dPhiChangeMins_buf; + Buf dPhiChangeMaxs_buf; + Buf innerLowerModuleIndices_buf; + Buf outerLowerModuleIndices_buf; + Buf seedIdx_buf; + Buf mdIndices_buf; + Buf nMemoryLocations_buf; + Buf innerMiniDoubletAnchorHitIndices_buf; + Buf outerMiniDoubletAnchorHitIndices_buf; + Buf charge_buf; + Buf superbin_buf; + Buf nSegments_buf; + Buf totOccupancySegments_buf; + Buf pLSHitsIdxs_buf; + Buf pixelType_buf; + Buf isQuad_buf; + Buf isDup_buf; + Buf partOfPT5_buf; + Buf ptIn_buf; + Buf ptErr_buf; + Buf px_buf; + Buf py_buf; + Buf pz_buf; + Buf etaErr_buf; + Buf eta_buf; + Buf phi_buf; + Buf score_buf; + Buf circleCenterX_buf; + Buf circleCenterY_buf; + Buf circleRadius_buf; + + Segments data_; + + template + SegmentsBuffer(unsigned int nMemoryLocationsIn, + uint16_t nLowerModules, + unsigned int maxPixelSegments, + TDevAcc const& devAccIn, + TQueue& queue) + : dPhis_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), + dPhiMins_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), + dPhiMaxs_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), + dPhiChanges_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), + dPhiChangeMins_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), + dPhiChangeMaxs_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), + innerLowerModuleIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), + outerLowerModuleIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), + seedIdx_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + mdIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn * 2, queue)), + nMemoryLocations_buf(allocBufWrapper(devAccIn, 1, queue)), + innerMiniDoubletAnchorHitIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), + outerMiniDoubletAnchorHitIndices_buf(allocBufWrapper(devAccIn, nMemoryLocationsIn, queue)), + charge_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + superbin_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + nSegments_buf(allocBufWrapper(devAccIn, nLowerModules + 1, queue)), + totOccupancySegments_buf(allocBufWrapper(devAccIn, nLowerModules + 1, queue)), + pLSHitsIdxs_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + pixelType_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + isQuad_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + isDup_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + partOfPT5_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + ptIn_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + ptErr_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + px_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + py_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + pz_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + etaErr_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + eta_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + phi_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + score_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + circleCenterX_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + circleCenterY_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)), + circleRadius_buf(allocBufWrapper(devAccIn, maxPixelSegments, queue)) { + alpaka::memset(queue, nSegments_buf, 0u); + alpaka::memset(queue, totOccupancySegments_buf, 0u); + alpaka::memset(queue, partOfPT5_buf, false); + alpaka::memset(queue, pLSHitsIdxs_buf, 0u); + } + + inline Segments const* data() const { return &data_; } + inline void setData(SegmentsBuffer& buf) { data_.setData(buf); } + }; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules_seg(Modules const& modulesInGPU, + unsigned int moduleIndex) { + // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing + // This is the same as what was previously considered as"isNormalTiltedModules" + // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf + short subdet = modulesInGPU.subdets[moduleIndex]; + short layer = modulesInGPU.layers[moduleIndex]; + short side = modulesInGPU.sides[moduleIndex]; + short rod = modulesInGPU.rods[moduleIndex]; + + return (subdet == Barrel) && (((side != Center) && (layer == 3)) || + ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || + ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float isTighterTiltedModules_seg(short subdet, short layer, short side, short rod) { + // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing + // This is the same as what was previously considered as"isNormalTiltedModules" + // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf + return (subdet == Barrel) && (((side != Center) && (layer == 3)) || + ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || + ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(short layer, short ring, short subdet, short side, short rod) { + static constexpr float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; + static constexpr float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; + static constexpr float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; + static constexpr float miniDeltaEndcap[5][15] = { + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}}; + + unsigned int iL = layer - 1; + unsigned int iR = ring - 1; + + float moduleSeparation = 0; + + if (subdet == Barrel and side == Center) { + moduleSeparation = miniDeltaFlat[iL]; + } else if (isTighterTiltedModules_seg(subdet, layer, side, rod)) { + moduleSeparation = miniDeltaTilted[iL]; + } else if (subdet == Endcap) { + moduleSeparation = miniDeltaEndcap[iL][iR]; + } else //Loose tilted modules + { + moduleSeparation = miniDeltaLooseTilted[iL]; + } + + return moduleSeparation; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(Modules const& modulesInGPU, unsigned int moduleIndex) { + static constexpr float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; + static constexpr float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; + static constexpr float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; + static constexpr float miniDeltaEndcap[5][15] = { + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}}; + + unsigned int iL = modulesInGPU.layers[moduleIndex] - 1; + unsigned int iR = modulesInGPU.rings[moduleIndex] - 1; + short subdet = modulesInGPU.subdets[moduleIndex]; + short side = modulesInGPU.sides[moduleIndex]; + + float moduleSeparation = 0; + + if (subdet == Barrel and side == Center) { + moduleSeparation = miniDeltaFlat[iL]; + } else if (isTighterTiltedModules_seg(modulesInGPU, moduleIndex)) { + moduleSeparation = miniDeltaTilted[iL]; + } else if (subdet == Endcap) { + moduleSeparation = miniDeltaEndcap[iL][iR]; + } else //Loose tilted modules + { + moduleSeparation = miniDeltaLooseTilted[iL]; + } + + return moduleSeparation; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void dAlphaThreshold(TAcc const& acc, + float* dAlphaThresholdValues, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + float xIn, + float yIn, + float zIn, + float rtIn, + float xOut, + float yOut, + float zOut, + float rtOut, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDIndex, + unsigned int outerMDIndex) { + float sdMuls = (modulesInGPU.subdets[innerLowerModuleIndex] == Barrel) + ? kMiniMulsPtScaleBarrel[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut; + + //more accurate then outer rt - inner rt + float segmentDr = alpaka::math::sqrt(acc, (yOut - yIn) * (yOut - yIn) + (xOut - xIn) * (xOut - xIn)); + + const float dAlpha_Bfield = + alpaka::math::asin(acc, alpaka::math::min(acc, segmentDr * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + bool isInnerTilted = + modulesInGPU.subdets[innerLowerModuleIndex] == Barrel and modulesInGPU.sides[innerLowerModuleIndex] != Center; + bool isOuterTilted = + modulesInGPU.subdets[outerLowerModuleIndex] == Barrel and modulesInGPU.sides[outerLowerModuleIndex] != Center; + + float drdzInner = modulesInGPU.drdzs[innerLowerModuleIndex]; + float drdzOuter = modulesInGPU.drdzs[outerLowerModuleIndex]; + float innerModuleGapSize = moduleGapSize_seg(modulesInGPU, innerLowerModuleIndex); + float outerModuleGapSize = moduleGapSize_seg(modulesInGPU, outerLowerModuleIndex); + const float innerminiTilt2 = isInnerTilted + ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzInner * drdzInner) / + (1.f + drdzInner * drdzInner) / (innerModuleGapSize * innerModuleGapSize)) + : 0; + + const float outerminiTilt2 = isOuterTilted + ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzOuter * drdzOuter) / + (1.f + drdzOuter * drdzOuter) / (outerModuleGapSize * outerModuleGapSize)) + : 0; + + float miniDelta = innerModuleGapSize; + + float sdLumForInnerMini2; + float sdLumForOuterMini2; + + if (modulesInGPU.subdets[innerLowerModuleIndex] == Barrel) { + sdLumForInnerMini2 = innerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); + } else { + sdLumForInnerMini2 = (mdsInGPU.dphis[innerMDIndex] * mdsInGPU.dphis[innerMDIndex]) * (kDeltaZLum * kDeltaZLum) / + (mdsInGPU.dzs[innerMDIndex] * mdsInGPU.dzs[innerMDIndex]); + } + + if (modulesInGPU.subdets[outerLowerModuleIndex] == Barrel) { + sdLumForOuterMini2 = outerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); + } else { + sdLumForOuterMini2 = (mdsInGPU.dphis[outerMDIndex] * mdsInGPU.dphis[outerMDIndex]) * (kDeltaZLum * kDeltaZLum) / + (mdsInGPU.dzs[outerMDIndex] * mdsInGPU.dzs[outerMDIndex]); + } + + // Unique stuff for the segment dudes alone + float dAlpha_res_inner = + 0.02f / miniDelta * + (modulesInGPU.subdets[innerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn); + float dAlpha_res_outer = + 0.02f / miniDelta * + (modulesInGPU.subdets[outerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut); + + float dAlpha_res = dAlpha_res_inner + dAlpha_res_outer; + + if (modulesInGPU.subdets[innerLowerModuleIndex] == Barrel and modulesInGPU.sides[innerLowerModuleIndex] == Center) { + dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); + } else { + dAlphaThresholdValues[0] = + dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls + sdLumForInnerMini2); + } + + if (modulesInGPU.subdets[outerLowerModuleIndex] == Barrel and modulesInGPU.sides[outerLowerModuleIndex] == Center) { + dAlphaThresholdValues[1] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); + } else { + dAlphaThresholdValues[1] = + dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls + sdLumForOuterMini2); + } + + //Inner to outer + dAlphaThresholdValues[2] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addSegmentToMemory(Segments& segmentsInGPU, + unsigned int lowerMDIndex, + unsigned int upperMDIndex, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDAnchorHitIndex, + unsigned int outerMDAnchorHitIndex, + float dPhi, + float dPhiMin, + float dPhiMax, + float dPhiChange, + float dPhiChangeMin, + float dPhiChangeMax, + unsigned int idx) { + segmentsInGPU.mdIndices[idx * 2] = lowerMDIndex; + segmentsInGPU.mdIndices[idx * 2 + 1] = upperMDIndex; + segmentsInGPU.innerLowerModuleIndices[idx] = innerLowerModuleIndex; + segmentsInGPU.outerLowerModuleIndices[idx] = outerLowerModuleIndex; + segmentsInGPU.innerMiniDoubletAnchorHitIndices[idx] = innerMDAnchorHitIndex; + segmentsInGPU.outerMiniDoubletAnchorHitIndices[idx] = outerMDAnchorHitIndex; + + segmentsInGPU.dPhis[idx] = __F2H(dPhi); + segmentsInGPU.dPhiMins[idx] = __F2H(dPhiMin); + segmentsInGPU.dPhiMaxs[idx] = __F2H(dPhiMax); + segmentsInGPU.dPhiChanges[idx] = __F2H(dPhiChange); + segmentsInGPU.dPhiChangeMins[idx] = __F2H(dPhiChangeMin); + segmentsInGPU.dPhiChangeMaxs[idx] = __F2H(dPhiChangeMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelSegmentToMemory(TAcc const& acc, + Segments& segmentsInGPU, + MiniDoublets const& mdsInGPU, + unsigned int innerMDIndex, + unsigned int outerMDIndex, + uint16_t pixelModuleIndex, + unsigned int hitIdxs[4], + unsigned int innerAnchorHitIndex, + unsigned int outerAnchorHitIndex, + float dPhiChange, + unsigned int idx, + unsigned int pixelSegmentArrayIndex, + float score) { + segmentsInGPU.mdIndices[idx * 2] = innerMDIndex; + segmentsInGPU.mdIndices[idx * 2 + 1] = outerMDIndex; + segmentsInGPU.innerLowerModuleIndices[idx] = pixelModuleIndex; + segmentsInGPU.outerLowerModuleIndices[idx] = pixelModuleIndex; + segmentsInGPU.innerMiniDoubletAnchorHitIndices[idx] = innerAnchorHitIndex; + segmentsInGPU.outerMiniDoubletAnchorHitIndices[idx] = outerAnchorHitIndex; + segmentsInGPU.dPhiChanges[idx] = __F2H(dPhiChange); + segmentsInGPU.isDup[pixelSegmentArrayIndex] = false; + segmentsInGPU.score[pixelSegmentArrayIndex] = score; + + segmentsInGPU.pLSHitsIdxs[pixelSegmentArrayIndex].x = hitIdxs[0]; + segmentsInGPU.pLSHitsIdxs[pixelSegmentArrayIndex].y = hitIdxs[1]; + segmentsInGPU.pLSHitsIdxs[pixelSegmentArrayIndex].z = hitIdxs[2]; + segmentsInGPU.pLSHitsIdxs[pixelSegmentArrayIndex].w = hitIdxs[3]; + + //computing circle parameters + /* + The two anchor hits are r3PCA and r3LH. p3PCA pt, eta, phi is hitIndex1 x, y, z + */ + float circleRadius = mdsInGPU.outerX[innerMDIndex] / (2 * k2Rinv1GeVf); + float circlePhi = mdsInGPU.outerZ[innerMDIndex]; + float candidateCenterXs[] = {mdsInGPU.anchorX[innerMDIndex] + circleRadius * alpaka::math::sin(acc, circlePhi), + mdsInGPU.anchorX[innerMDIndex] - circleRadius * alpaka::math::sin(acc, circlePhi)}; + float candidateCenterYs[] = {mdsInGPU.anchorY[innerMDIndex] - circleRadius * alpaka::math::cos(acc, circlePhi), + mdsInGPU.anchorY[innerMDIndex] + circleRadius * alpaka::math::cos(acc, circlePhi)}; + + //check which of the circles can accommodate r3LH better (we won't get perfect agreement) + float bestChiSquared = lst_INF; + float chiSquared; + size_t bestIndex; + for (size_t i = 0; i < 2; i++) { + chiSquared = + alpaka::math::abs(acc, + alpaka::math::sqrt(acc, + (mdsInGPU.anchorX[outerMDIndex] - candidateCenterXs[i]) * + (mdsInGPU.anchorX[outerMDIndex] - candidateCenterXs[i]) + + (mdsInGPU.anchorY[outerMDIndex] - candidateCenterYs[i]) * + (mdsInGPU.anchorY[outerMDIndex] - candidateCenterYs[i])) - + circleRadius); + if (chiSquared < bestChiSquared) { + bestChiSquared = chiSquared; + bestIndex = i; + } + } + segmentsInGPU.circleCenterX[pixelSegmentArrayIndex] = candidateCenterXs[bestIndex]; + segmentsInGPU.circleCenterY[pixelSegmentArrayIndex] = candidateCenterYs[bestIndex]; + segmentsInGPU.circleRadius[pixelSegmentArrayIndex] = circleRadius; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoBarrel(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDIndex, + unsigned int outerMDIndex, + float& dPhi, + float& dPhiMin, + float& dPhiMax, + float& dPhiChange, + float& dPhiChangeMin, + float& dPhiChangeMax) { + float sdMuls = (modulesInGPU.subdets[innerLowerModuleIndex] == Barrel) + ? kMiniMulsPtScaleBarrel[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[modulesInGPU.layers[innerLowerModuleIndex] - 1] * 3.f / ptCut; + + float xIn, yIn, zIn, rtIn, xOut, yOut, zOut, rtOut; + + xIn = mdsInGPU.anchorX[innerMDIndex]; + yIn = mdsInGPU.anchorY[innerMDIndex]; + zIn = mdsInGPU.anchorZ[innerMDIndex]; + rtIn = mdsInGPU.anchorRt[innerMDIndex]; + + xOut = mdsInGPU.anchorX[outerMDIndex]; + yOut = mdsInGPU.anchorY[outerMDIndex]; + zOut = mdsInGPU.anchorZ[outerMDIndex]; + rtOut = mdsInGPU.anchorRt[outerMDIndex]; + + float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + float sdPVoff = 0.1f / rtOut; + float dzDrtScale = alpaka::math::tan(acc, sdSlope) / sdSlope; //FIXME: need appropriate value + + const float zGeom = modulesInGPU.layers[innerLowerModuleIndex] <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch; + + float zLo = zIn + (zIn - kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - + zGeom; //slope-correction only on outer end + float zHi = zIn + (zIn + kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn < 0.f ? 1.f : dzDrtScale) + zGeom; + + if ((zOut < zLo) || (zOut > zHi)) + return false; + + float sdCut = sdSlope + alpaka::math::sqrt(acc, sdMuls * sdMuls + sdPVoff * sdPVoff); + + dPhi = phi_mpi_pi(acc, mdsInGPU.anchorPhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + + if (alpaka::math::abs(acc, dPhi) > sdCut) + return false; + + dPhiChange = phi_mpi_pi(acc, phi(acc, xOut - xIn, yOut - yIn) - mdsInGPU.anchorPhi[innerMDIndex]); + + if (alpaka::math::abs(acc, dPhiChange) > sdCut) + return false; + + float dAlphaThresholdValues[3]; + dAlphaThreshold(acc, + dAlphaThresholdValues, + modulesInGPU, + mdsInGPU, + xIn, + yIn, + zIn, + rtIn, + xOut, + yOut, + zOut, + rtOut, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex); + + float innerMDAlpha = mdsInGPU.dphichanges[innerMDIndex]; + float outerMDAlpha = mdsInGPU.dphichanges[outerMDIndex]; + float dAlphaInnerMDSegment = innerMDAlpha - dPhiChange; + float dAlphaOuterMDSegment = outerMDAlpha - dPhiChange; + float dAlphaInnerMDOuterMD = innerMDAlpha - outerMDAlpha; + + float dAlphaInnerMDSegmentThreshold = dAlphaThresholdValues[0]; + float dAlphaOuterMDSegmentThreshold = dAlphaThresholdValues[1]; + float dAlphaInnerMDOuterMDThreshold = dAlphaThresholdValues[2]; + + if (alpaka::math::abs(acc, dAlphaInnerMDSegment) >= dAlphaInnerMDSegmentThreshold) + return false; + if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) + return false; + return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoEndcap(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDIndex, + unsigned int outerMDIndex, + float& dPhi, + float& dPhiMin, + float& dPhiMax, + float& dPhiChange, + float& dPhiChangeMin, + float& dPhiChangeMax) { + float xIn, yIn, zIn, rtIn, xOut, yOut, zOut, rtOut; + + xIn = mdsInGPU.anchorX[innerMDIndex]; + yIn = mdsInGPU.anchorY[innerMDIndex]; + zIn = mdsInGPU.anchorZ[innerMDIndex]; + rtIn = mdsInGPU.anchorRt[innerMDIndex]; + + xOut = mdsInGPU.anchorX[outerMDIndex]; + yOut = mdsInGPU.anchorY[outerMDIndex]; + zOut = mdsInGPU.anchorZ[outerMDIndex]; + rtOut = mdsInGPU.anchorRt[outerMDIndex]; + + bool outerLayerEndcapTwoS = (modulesInGPU.subdets[outerLowerModuleIndex] == Endcap) && + (modulesInGPU.moduleType[outerLowerModuleIndex] == TwoS); + + float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + float disks2SMinRadius = 60.f; + + float rtGeom = ((rtIn < disks2SMinRadius && rtOut < disks2SMinRadius) + ? (2.f * kPixelPSZpitch) + : ((rtIn < disks2SMinRadius || rtOut < disks2SMinRadius) ? (kPixelPSZpitch + kStrip2SZpitch) + : (2.f * kStrip2SZpitch))); + + //cut 0 - z compatibility + if (zIn * zOut < 0) + return false; + + float dz = zOut - zIn; + // Alpaka: Needs to be moved over + float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + float drtDzScale = sdSlope / alpaka::math::tan(acc, sdSlope); + + float rtLo = alpaka::math::max( + acc, rtIn * (1.f + dz / (zIn + dLum) * drtDzScale) - rtGeom, rtIn - 0.5f * rtGeom); //rt should increase + float rtHi = rtIn * (zOut - dLum) / (zIn - dLum) + + rtGeom; //dLum for luminous; rGeom for measurement size; no tanTheta_loc(pt) correction + + // Completeness + if ((rtOut < rtLo) || (rtOut > rtHi)) + return false; + + dPhi = phi_mpi_pi(acc, mdsInGPU.anchorPhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + + float sdCut = sdSlope; + if (outerLayerEndcapTwoS) { + float dPhiPos_high = phi_mpi_pi(acc, mdsInGPU.anchorHighEdgePhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + float dPhiPos_low = phi_mpi_pi(acc, mdsInGPU.anchorLowEdgePhi[outerMDIndex] - mdsInGPU.anchorPhi[innerMDIndex]); + + dPhiMax = alpaka::math::abs(acc, dPhiPos_high) > alpaka::math::abs(acc, dPhiPos_low) ? dPhiPos_high : dPhiPos_low; + dPhiMin = alpaka::math::abs(acc, dPhiPos_high) > alpaka::math::abs(acc, dPhiPos_low) ? dPhiPos_low : dPhiPos_high; + } else { + dPhiMax = dPhi; + dPhiMin = dPhi; + } + if (alpaka::math::abs(acc, dPhi) > sdCut) + return false; + + float dzFrac = dz / zIn; + dPhiChange = dPhi / dzFrac * (1.f + dzFrac); + dPhiChangeMin = dPhiMin / dzFrac * (1.f + dzFrac); + dPhiChangeMax = dPhiMax / dzFrac * (1.f + dzFrac); + + if (alpaka::math::abs(acc, dPhiChange) > sdCut) + return false; + + float dAlphaThresholdValues[3]; + dAlphaThreshold(acc, + dAlphaThresholdValues, + modulesInGPU, + mdsInGPU, + xIn, + yIn, + zIn, + rtIn, + xOut, + yOut, + zOut, + rtOut, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex); + + float innerMDAlpha = mdsInGPU.dphichanges[innerMDIndex]; + float outerMDAlpha = mdsInGPU.dphichanges[outerMDIndex]; + float dAlphaInnerMDSegment = innerMDAlpha - dPhiChange; + float dAlphaOuterMDSegment = outerMDAlpha - dPhiChange; + float dAlphaInnerMDOuterMD = innerMDAlpha - outerMDAlpha; + + float dAlphaInnerMDSegmentThreshold = dAlphaThresholdValues[0]; + float dAlphaOuterMDSegmentThreshold = dAlphaThresholdValues[1]; + float dAlphaInnerMDOuterMDThreshold = dAlphaThresholdValues[2]; + + if (alpaka::math::abs(acc, dAlphaInnerMDSegment) >= dAlphaInnerMDSegmentThreshold) + return false; + if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) + return false; + return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgo(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDIndex, + unsigned int outerMDIndex, + float& dPhi, + float& dPhiMin, + float& dPhiMax, + float& dPhiChange, + float& dPhiChangeMin, + float& dPhiChangeMax) { + if (modulesInGPU.subdets[innerLowerModuleIndex] == Barrel and + modulesInGPU.subdets[outerLowerModuleIndex] == Barrel) { + return runSegmentDefaultAlgoBarrel(acc, + modulesInGPU, + mdsInGPU, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax); + } else { + return runSegmentDefaultAlgoEndcap(acc, + modulesInGPU, + mdsInGPU, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax); + } + } + + struct CreateSegmentsInGPUv2 { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + ObjectRanges rangesInGPU) const { + auto const globalBlockIdx = alpaka::getIdx(acc); + auto const blockThreadIdx = alpaka::getIdx(acc); + auto const gridBlockExtent = alpaka::getWorkDiv(acc); + auto const blockThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t innerLowerModuleIndex = globalBlockIdx[2]; innerLowerModuleIndex < (*modulesInGPU.nLowerModules); + innerLowerModuleIndex += gridBlockExtent[2]) { + unsigned int nInnerMDs = mdsInGPU.nMDs[innerLowerModuleIndex]; + if (nInnerMDs == 0) + continue; + + unsigned int nConnectedModules = modulesInGPU.nConnectedModules[innerLowerModuleIndex]; + + for (uint16_t outerLowerModuleArrayIdx = blockThreadIdx[1]; outerLowerModuleArrayIdx < nConnectedModules; + outerLowerModuleArrayIdx += blockThreadExtent[1]) { + uint16_t outerLowerModuleIndex = + modulesInGPU.moduleMap[innerLowerModuleIndex * max_connected_modules + outerLowerModuleArrayIdx]; + + unsigned int nOuterMDs = mdsInGPU.nMDs[outerLowerModuleIndex]; + + unsigned int limit = nInnerMDs * nOuterMDs; + + if (limit == 0) + continue; + for (unsigned int hitIndex = blockThreadIdx[2]; hitIndex < limit; hitIndex += blockThreadExtent[2]) { + unsigned int innerMDArrayIdx = hitIndex / nOuterMDs; + unsigned int outerMDArrayIdx = hitIndex % nOuterMDs; + if (outerMDArrayIdx >= nOuterMDs) + continue; + + unsigned int innerMDIndex = rangesInGPU.mdRanges[innerLowerModuleIndex * 2] + innerMDArrayIdx; + unsigned int outerMDIndex = rangesInGPU.mdRanges[outerLowerModuleIndex * 2] + outerMDArrayIdx; + + float dPhi, dPhiMin, dPhiMax, dPhiChange, dPhiChangeMin, dPhiChangeMax; + + unsigned int innerMiniDoubletAnchorHitIndex = mdsInGPU.anchorHitIndices[innerMDIndex]; + unsigned int outerMiniDoubletAnchorHitIndex = mdsInGPU.anchorHitIndices[outerMDIndex]; + dPhiMin = 0; + dPhiMax = 0; + dPhiChangeMin = 0; + dPhiChangeMax = 0; + if (runSegmentDefaultAlgo(acc, + modulesInGPU, + mdsInGPU, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax)) { + unsigned int totOccupancySegments = alpaka::atomicAdd( + acc, &segmentsInGPU.totOccupancySegments[innerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + if (static_cast(totOccupancySegments) >= rangesInGPU.segmentModuleOccupancy[innerLowerModuleIndex]) { +#ifdef WARNINGS + printf("Segment excess alert! Module index = %d\n", innerLowerModuleIndex); +#endif + } else { + unsigned int segmentModuleIdx = alpaka::atomicAdd( + acc, &segmentsInGPU.nSegments[innerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + unsigned int segmentIdx = rangesInGPU.segmentModuleIndices[innerLowerModuleIndex] + segmentModuleIdx; + + addSegmentToMemory(segmentsInGPU, + innerMDIndex, + outerMDIndex, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMiniDoubletAnchorHitIndex, + outerMiniDoubletAnchorHitIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax, + segmentIdx); + } + } + } + } + } + } + }; + + struct CreateSegmentArrayRanges { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + ObjectRanges rangesInGPU, + MiniDoublets mdsInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + // Initialize variables in shared memory and set to 0 + int& nTotalSegments = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) { + nTotalSegments = 0; + } + alpaka::syncBlockThreads(acc); + + // Create variables outside of the for loop. + int occupancy, category_number, eta_number; + + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { + if (modulesInGPU.nConnectedModules[i] == 0) { + rangesInGPU.segmentModuleIndices[i] = nTotalSegments; + rangesInGPU.segmentModuleOccupancy[i] = 0; + continue; + } + + short module_rings = modulesInGPU.rings[i]; + short module_layers = modulesInGPU.layers[i]; + short module_subdets = modulesInGPU.subdets[i]; + float module_eta = alpaka::math::abs(acc, modulesInGPU.eta[i]); + + if (module_layers <= 3 && module_subdets == 5) + category_number = 0; + else if (module_layers >= 4 && module_subdets == 5) + category_number = 1; + else if (module_layers <= 2 && module_subdets == 4 && module_rings >= 11) + category_number = 2; + else if (module_layers >= 3 && module_subdets == 4 && module_rings >= 8) + category_number = 2; + else if (module_layers <= 2 && module_subdets == 4 && module_rings <= 10) + category_number = 3; + else if (module_layers >= 3 && module_subdets == 4 && module_rings <= 7) + category_number = 3; + else + category_number = -1; + + if (module_eta < 0.75f) + eta_number = 0; + else if (module_eta < 1.5f) + eta_number = 1; + else if (module_eta < 2.25f) + eta_number = 2; + else if (module_eta < 3.0f) + eta_number = 3; + else + eta_number = -1; + + if (category_number == 0 && eta_number == 0) + occupancy = 572; + else if (category_number == 0 && eta_number == 1) + occupancy = 300; + else if (category_number == 0 && eta_number == 2) + occupancy = 183; + else if (category_number == 0 && eta_number == 3) + occupancy = 62; + else if (category_number == 1 && eta_number == 0) + occupancy = 191; + else if (category_number == 1 && eta_number == 1) + occupancy = 128; + else if (category_number == 2 && eta_number == 1) + occupancy = 107; + else if (category_number == 2 && eta_number == 2) + occupancy = 102; + else if (category_number == 3 && eta_number == 1) + occupancy = 64; + else if (category_number == 3 && eta_number == 2) + occupancy = 79; + else if (category_number == 3 && eta_number == 3) + occupancy = 85; + else { + occupancy = 0; +#ifdef WARNINGS + printf("Unhandled case in createSegmentArrayRanges! Module index = %i\n", i); +#endif + } + + int nTotSegs = alpaka::atomicAdd(acc, &nTotalSegments, occupancy, alpaka::hierarchy::Threads{}); + rangesInGPU.segmentModuleIndices[i] = nTotSegs; + rangesInGPU.segmentModuleOccupancy[i] = occupancy; + } + + // Wait for all threads to finish before reporting final values + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + rangesInGPU.segmentModuleIndices[*modulesInGPU.nLowerModules] = nTotalSegments; + *rangesInGPU.device_nTotalSegs = nTotalSegments; + } + } + }; + + struct AddSegmentRangesToEventExplicit { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + Segments segmentsInGPU, + ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { + if (segmentsInGPU.nSegments[i] == 0) { + rangesInGPU.segmentRanges[i * 2] = -1; + rangesInGPU.segmentRanges[i * 2 + 1] = -1; + } else { + rangesInGPU.segmentRanges[i * 2] = rangesInGPU.segmentModuleIndices[i]; + rangesInGPU.segmentRanges[i * 2 + 1] = rangesInGPU.segmentModuleIndices[i] + segmentsInGPU.nSegments[i] - 1; + } + } + } + }; + + struct AddPixelSegmentToEventKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + ObjectRanges rangesInGPU, + Hits hitsInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + unsigned int* hitIndices0, + unsigned int* hitIndices1, + unsigned int* hitIndices2, + unsigned int* hitIndices3, + float* dPhiChange, + uint16_t pixelModuleIndex, + int size) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int tid = globalThreadIdx[2]; tid < size; tid += gridThreadExtent[2]) { + unsigned int innerMDIndex = rangesInGPU.miniDoubletModuleIndices[pixelModuleIndex] + 2 * (tid); + unsigned int outerMDIndex = rangesInGPU.miniDoubletModuleIndices[pixelModuleIndex] + 2 * (tid) + 1; + unsigned int pixelSegmentIndex = rangesInGPU.segmentModuleIndices[pixelModuleIndex] + tid; + + addMDToMemory(acc, + mdsInGPU, + hitsInGPU, + modulesInGPU, + hitIndices0[tid], + hitIndices1[tid], + pixelModuleIndex, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + innerMDIndex); + addMDToMemory(acc, + mdsInGPU, + hitsInGPU, + modulesInGPU, + hitIndices2[tid], + hitIndices3[tid], + pixelModuleIndex, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + outerMDIndex); + + //in outer hits - pt, eta, phi + float slope = alpaka::math::sinh(acc, hitsInGPU.ys[mdsInGPU.outerHitIndices[innerMDIndex]]); + float intercept = hitsInGPU.zs[mdsInGPU.anchorHitIndices[innerMDIndex]] - + slope * hitsInGPU.rts[mdsInGPU.anchorHitIndices[innerMDIndex]]; + float score_lsq = (hitsInGPU.rts[mdsInGPU.anchorHitIndices[outerMDIndex]] * slope + intercept) - + (hitsInGPU.zs[mdsInGPU.anchorHitIndices[outerMDIndex]]); + score_lsq = score_lsq * score_lsq; + + unsigned int hits1[Params_pLS::kHits]; + hits1[0] = hitsInGPU.idxs[mdsInGPU.anchorHitIndices[innerMDIndex]]; + hits1[1] = hitsInGPU.idxs[mdsInGPU.anchorHitIndices[outerMDIndex]]; + hits1[2] = hitsInGPU.idxs[mdsInGPU.outerHitIndices[innerMDIndex]]; + hits1[3] = hitsInGPU.idxs[mdsInGPU.outerHitIndices[outerMDIndex]]; + addPixelSegmentToMemory(acc, + segmentsInGPU, + mdsInGPU, + innerMDIndex, + outerMDIndex, + pixelModuleIndex, + hits1, + hitIndices0[tid], + hitIndices2[tid], + dPhiChange[tid], + pixelSegmentIndex, + tid, + score_lsq); + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h new file mode 100644 index 0000000000000..16f36df3257cd --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -0,0 +1,590 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_TrackCandidate_h +#define RecoTracker_LSTCore_src_alpaka_TrackCandidate_h + +#include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/Module.h" + +#include "Triplet.h" +#include "Segment.h" +#include "MiniDoublet.h" +#include "PixelTriplet.h" +#include "Quintuplet.h" +#include "Hit.h" +#include "ObjectRanges.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + struct TrackCandidates { + short* trackCandidateType; // 4-T5 5-pT3 7-pT5 8-pLS + unsigned int* directObjectIndices; // Will hold direct indices to each type containers + unsigned int* objectIndices; // Will hold tracklet and triplet indices - check the type!! + unsigned int* nTrackCandidates; + unsigned int* nTrackCandidatespT3; + unsigned int* nTrackCandidatespT5; + unsigned int* nTrackCandidatespLS; + unsigned int* nTrackCandidatesT5; + + uint8_t* logicalLayers; + unsigned int* hitIndices; + int* pixelSeedIndex; + uint16_t* lowerModuleIndices; + + FPX* centerX; + FPX* centerY; + FPX* radius; + + template + void setData(TBuff& buf) { + trackCandidateType = buf.trackCandidateType_buf.data(); + directObjectIndices = buf.directObjectIndices_buf.data(); + objectIndices = buf.objectIndices_buf.data(); + nTrackCandidates = buf.nTrackCandidates_buf.data(); + nTrackCandidatespT3 = buf.nTrackCandidatespT3_buf.data(); + nTrackCandidatespT5 = buf.nTrackCandidatespT5_buf.data(); + nTrackCandidatespLS = buf.nTrackCandidatespLS_buf.data(); + nTrackCandidatesT5 = buf.nTrackCandidatesT5_buf.data(); + + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + pixelSeedIndex = buf.pixelSeedIndex_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + + centerX = buf.centerX_buf.data(); + centerY = buf.centerY_buf.data(); + radius = buf.radius_buf.data(); + } + }; + + template + struct TrackCandidatesBuffer { + Buf trackCandidateType_buf; + Buf directObjectIndices_buf; + Buf objectIndices_buf; + Buf nTrackCandidates_buf; + Buf nTrackCandidatespT3_buf; + Buf nTrackCandidatespT5_buf; + Buf nTrackCandidatespLS_buf; + Buf nTrackCandidatesT5_buf; + + Buf logicalLayers_buf; + Buf hitIndices_buf; + Buf pixelSeedIndex_buf; + Buf lowerModuleIndices_buf; + + Buf centerX_buf; + Buf centerY_buf; + Buf radius_buf; + + TrackCandidates data_; + + template + TrackCandidatesBuffer(unsigned int maxTrackCandidates, TDevAcc const& devAccIn, TQueue& queue) + : trackCandidateType_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), + directObjectIndices_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), + objectIndices_buf(allocBufWrapper(devAccIn, 2 * maxTrackCandidates, queue)), + nTrackCandidates_buf(allocBufWrapper(devAccIn, 1, queue)), + nTrackCandidatespT3_buf(allocBufWrapper(devAccIn, 1, queue)), + nTrackCandidatespT5_buf(allocBufWrapper(devAccIn, 1, queue)), + nTrackCandidatespLS_buf(allocBufWrapper(devAccIn, 1, queue)), + nTrackCandidatesT5_buf(allocBufWrapper(devAccIn, 1, queue)), + logicalLayers_buf(allocBufWrapper(devAccIn, Params_pT5::kLayers * maxTrackCandidates, queue)), + hitIndices_buf(allocBufWrapper(devAccIn, Params_pT5::kHits * maxTrackCandidates, queue)), + pixelSeedIndex_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), + lowerModuleIndices_buf(allocBufWrapper(devAccIn, Params_pT5::kLayers * maxTrackCandidates, queue)), + centerX_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), + centerY_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)), + radius_buf(allocBufWrapper(devAccIn, maxTrackCandidates, queue)) { + alpaka::memset(queue, nTrackCandidates_buf, 0u); + alpaka::memset(queue, nTrackCandidatesT5_buf, 0u); + alpaka::memset(queue, nTrackCandidatespT3_buf, 0u); + alpaka::memset(queue, nTrackCandidatespT5_buf, 0u); + alpaka::memset(queue, nTrackCandidatespLS_buf, 0u); + alpaka::memset(queue, logicalLayers_buf, 0u); + alpaka::memset(queue, lowerModuleIndices_buf, 0u); + alpaka::memset(queue, hitIndices_buf, 0u); + alpaka::memset(queue, pixelSeedIndex_buf, 0); + } + + inline TrackCandidates const* data() const { return &data_; } + inline void setData(TrackCandidatesBuffer& buf) { data_.setData(buf); } + }; + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(TrackCandidates& trackCandidatesInGPU, + unsigned int trackletIndex, + unsigned int trackCandidateIndex, + uint4 hitIndices, + int pixelSeedIndex) { + trackCandidatesInGPU.trackCandidateType[trackCandidateIndex] = 8; // type for pLS + trackCandidatesInGPU.directObjectIndices[trackCandidateIndex] = trackletIndex; + trackCandidatesInGPU.pixelSeedIndex[trackCandidateIndex] = pixelSeedIndex; + + trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex] = trackletIndex; + trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex + 1] = trackletIndex; + + trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 0] = + hitIndices.x; // Order explanation in https://github.com/SegmentLinking/TrackLooper/issues/267 + trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 1] = hitIndices.z; + trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 2] = hitIndices.y; + trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + 3] = hitIndices.w; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(TrackCandidates& trackCandidatesInGPU, + short trackCandidateType, + unsigned int innerTrackletIndex, + unsigned int outerTrackletIndex, + uint8_t* logicalLayerIndices, + uint16_t* lowerModuleIndices, + unsigned int* hitIndices, + int pixelSeedIndex, + float centerX, + float centerY, + float radius, + unsigned int trackCandidateIndex, + unsigned int directObjectIndex) { + trackCandidatesInGPU.trackCandidateType[trackCandidateIndex] = trackCandidateType; + trackCandidatesInGPU.directObjectIndices[trackCandidateIndex] = directObjectIndex; + trackCandidatesInGPU.pixelSeedIndex[trackCandidateIndex] = pixelSeedIndex; + + trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex] = innerTrackletIndex; + trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex + 1] = outerTrackletIndex; + + size_t limits = trackCandidateType == 7 + ? Params_pT5::kLayers + : Params_pT3::kLayers; // 7 means pT5, Params_pT3::kLayers = Params_T5::kLayers = 5 + + //send the starting pointer to the logicalLayer and hitIndices + for (size_t i = 0; i < limits; i++) { + trackCandidatesInGPU.logicalLayers[Params_pT5::kLayers * trackCandidateIndex + i] = logicalLayerIndices[i]; + trackCandidatesInGPU.lowerModuleIndices[Params_pT5::kLayers * trackCandidateIndex + i] = lowerModuleIndices[i]; + } + for (size_t i = 0; i < 2 * limits; i++) { + trackCandidatesInGPU.hitIndices[Params_pT5::kHits * trackCandidateIndex + i] = hitIndices[i]; + } + trackCandidatesInGPU.centerX[trackCandidateIndex] = __F2H(centerX); + trackCandidatesInGPU.centerY[trackCandidateIndex] = __F2H(centerY); + trackCandidatesInGPU.radius[trackCandidateIndex] = __F2H(radius); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkPixelHits(unsigned int ix, + unsigned int jx, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Hits const& hitsInGPU) { + int phits1[Params_pLS::kHits]; + int phits2[Params_pLS::kHits]; + + phits1[0] = hitsInGPU.idxs[mdsInGPU.anchorHitIndices[segmentsInGPU.mdIndices[2 * ix]]]; + phits1[1] = hitsInGPU.idxs[mdsInGPU.anchorHitIndices[segmentsInGPU.mdIndices[2 * ix + 1]]]; + phits1[2] = hitsInGPU.idxs[mdsInGPU.outerHitIndices[segmentsInGPU.mdIndices[2 * ix]]]; + phits1[3] = hitsInGPU.idxs[mdsInGPU.outerHitIndices[segmentsInGPU.mdIndices[2 * ix + 1]]]; + + phits2[0] = hitsInGPU.idxs[mdsInGPU.anchorHitIndices[segmentsInGPU.mdIndices[2 * jx]]]; + phits2[1] = hitsInGPU.idxs[mdsInGPU.anchorHitIndices[segmentsInGPU.mdIndices[2 * jx + 1]]]; + phits2[2] = hitsInGPU.idxs[mdsInGPU.outerHitIndices[segmentsInGPU.mdIndices[2 * jx]]]; + phits2[3] = hitsInGPU.idxs[mdsInGPU.outerHitIndices[segmentsInGPU.mdIndices[2 * jx + 1]]]; + + int npMatched = 0; + + for (int i = 0; i < Params_pLS::kHits; i++) { + bool pmatched = false; + if (phits1[i] == -1) + continue; + + for (int j = 0; j < Params_pLS::kHits; j++) { + if (phits2[j] == -1) + continue; + + if (phits1[i] == phits2[j]) { + pmatched = true; + break; + } + } + if (pmatched) + npMatched++; + } + return npMatched; + } + + struct CrossCleanpT3 { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + ObjectRanges rangesInGPU, + PixelTriplets pixelTripletsInGPU, + Segments segmentsInGPU, + PixelQuintuplets pixelQuintupletsInGPU) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + unsigned int nPixelTriplets = *pixelTripletsInGPU.nPixelTriplets; + for (unsigned int pixelTripletIndex = globalThreadIdx[2]; pixelTripletIndex < nPixelTriplets; + pixelTripletIndex += gridThreadExtent[2]) { + if (pixelTripletsInGPU.isDup[pixelTripletIndex]) + continue; + + // Cross cleaning step + float eta1 = __H2F(pixelTripletsInGPU.eta_pix[pixelTripletIndex]); + float phi1 = __H2F(pixelTripletsInGPU.phi_pix[pixelTripletIndex]); + + int pixelModuleIndex = *modulesInGPU.nLowerModules; + unsigned int prefix = rangesInGPU.segmentModuleIndices[pixelModuleIndex]; + + unsigned int nPixelQuintuplets = *pixelQuintupletsInGPU.nPixelQuintuplets; + for (unsigned int pixelQuintupletIndex = globalThreadIdx[1]; pixelQuintupletIndex < nPixelQuintuplets; + pixelQuintupletIndex += gridThreadExtent[1]) { + unsigned int pLS_jx = pixelQuintupletsInGPU.pixelIndices[pixelQuintupletIndex]; + float eta2 = segmentsInGPU.eta[pLS_jx - prefix]; + float phi2 = segmentsInGPU.phi[pLS_jx - prefix]; + float dEta = alpaka::math::abs(acc, (eta1 - eta2)); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 1e-5f) + pixelTripletsInGPU.isDup[pixelTripletIndex] = true; + } + } + } + }; + + struct CrossCleanT5 { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + Quintuplets quintupletsInGPU, + PixelQuintuplets pixelQuintupletsInGPU, + PixelTriplets pixelTripletsInGPU, + ObjectRanges rangesInGPU) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int innerInnerInnerLowerModuleArrayIndex = globalThreadIdx[0]; + innerInnerInnerLowerModuleArrayIndex < *(modulesInGPU.nLowerModules); + innerInnerInnerLowerModuleArrayIndex += gridThreadExtent[0]) { + if (rangesInGPU.quintupletModuleIndices[innerInnerInnerLowerModuleArrayIndex] == -1) + continue; + + unsigned int nQuints = quintupletsInGPU.nQuintuplets[innerInnerInnerLowerModuleArrayIndex]; + for (unsigned int innerObjectArrayIndex = globalThreadIdx[1]; innerObjectArrayIndex < nQuints; + innerObjectArrayIndex += gridThreadExtent[1]) { + unsigned int quintupletIndex = + rangesInGPU.quintupletModuleIndices[innerInnerInnerLowerModuleArrayIndex] + innerObjectArrayIndex; + + // Don't add duplicate T5s or T5s that are accounted in pT5s + if (quintupletsInGPU.isDup[quintupletIndex] or quintupletsInGPU.partOfPT5[quintupletIndex]) + continue; +#ifdef Crossclean_T5 + unsigned int loop_bound = *pixelQuintupletsInGPU.nPixelQuintuplets + *pixelTripletsInGPU.nPixelTriplets; + // Cross cleaning step + float eta1 = __H2F(quintupletsInGPU.eta[quintupletIndex]); + float phi1 = __H2F(quintupletsInGPU.phi[quintupletIndex]); + + for (unsigned int jx = globalThreadIdx[2]; jx < loop_bound; jx += gridThreadExtent[2]) { + float eta2, phi2; + if (jx < *pixelQuintupletsInGPU.nPixelQuintuplets) { + eta2 = __H2F(pixelQuintupletsInGPU.eta[jx]); + phi2 = __H2F(pixelQuintupletsInGPU.phi[jx]); + } else { + eta2 = __H2F(pixelTripletsInGPU.eta[jx - *pixelQuintupletsInGPU.nPixelQuintuplets]); + phi2 = __H2F(pixelTripletsInGPU.phi[jx - *pixelQuintupletsInGPU.nPixelQuintuplets]); + } + + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 1e-3f) + quintupletsInGPU.isDup[quintupletIndex] = true; + } +#endif + } + } + } + }; + + struct CrossCleanpLS { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + ObjectRanges rangesInGPU, + PixelTriplets pixelTripletsInGPU, + TrackCandidates trackCandidatesInGPU, + Segments segmentsInGPU, + MiniDoublets mdsInGPU, + Hits hitsInGPU, + Quintuplets quintupletsInGPU) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + int pixelModuleIndex = *modulesInGPU.nLowerModules; + unsigned int nPixels = segmentsInGPU.nSegments[pixelModuleIndex]; + for (unsigned int pixelArrayIndex = globalThreadIdx[2]; pixelArrayIndex < nPixels; + pixelArrayIndex += gridThreadExtent[2]) { + if (!segmentsInGPU.isQuad[pixelArrayIndex] || segmentsInGPU.isDup[pixelArrayIndex]) + continue; + + float eta1 = segmentsInGPU.eta[pixelArrayIndex]; + float phi1 = segmentsInGPU.phi[pixelArrayIndex]; + unsigned int prefix = rangesInGPU.segmentModuleIndices[pixelModuleIndex]; + + unsigned int nTrackCandidates = *(trackCandidatesInGPU.nTrackCandidates); + for (unsigned int trackCandidateIndex = globalThreadIdx[1]; trackCandidateIndex < nTrackCandidates; + trackCandidateIndex += gridThreadExtent[1]) { + short type = trackCandidatesInGPU.trackCandidateType[trackCandidateIndex]; + unsigned int innerTrackletIdx = trackCandidatesInGPU.objectIndices[2 * trackCandidateIndex]; + if (type == 4) // T5 + { + unsigned int quintupletIndex = innerTrackletIdx; // T5 index + float eta2 = __H2F(quintupletsInGPU.eta[quintupletIndex]); + float phi2 = __H2F(quintupletsInGPU.phi[quintupletIndex]); + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 1e-3f) + segmentsInGPU.isDup[pixelArrayIndex] = true; + } + if (type == 5) // pT3 + { + int pLSIndex = pixelTripletsInGPU.pixelSegmentIndices[innerTrackletIdx]; + int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mdsInGPU, segmentsInGPU, hitsInGPU); + if (npMatched > 0) + segmentsInGPU.isDup[pixelArrayIndex] = true; + + int pT3Index = innerTrackletIdx; + float eta2 = __H2F(pixelTripletsInGPU.eta_pix[pT3Index]); + float phi2 = __H2F(pixelTripletsInGPU.phi_pix[pT3Index]); + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 0.000001f) + segmentsInGPU.isDup[pixelArrayIndex] = true; + } + if (type == 7) // pT5 + { + unsigned int pLSIndex = innerTrackletIdx; + int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mdsInGPU, segmentsInGPU, hitsInGPU); + if (npMatched > 0) { + segmentsInGPU.isDup[pixelArrayIndex] = true; + } + + float eta2 = segmentsInGPU.eta[pLSIndex - prefix]; + float phi2 = segmentsInGPU.phi[pLSIndex - prefix]; + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 0.000001f) + segmentsInGPU.isDup[pixelArrayIndex] = true; + } + } + } + } + }; + + struct AddpT3asTrackCandidatesInGPU { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t nLowerModules, + PixelTriplets pixelTripletsInGPU, + TrackCandidates trackCandidatesInGPU, + Segments segmentsInGPU, + ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + unsigned int nPixelTriplets = *pixelTripletsInGPU.nPixelTriplets; + unsigned int pLS_offset = rangesInGPU.segmentModuleIndices[nLowerModules]; + for (unsigned int pixelTripletIndex = globalThreadIdx[0]; pixelTripletIndex < nPixelTriplets; + pixelTripletIndex += gridThreadExtent[0]) { + if ((pixelTripletsInGPU.isDup[pixelTripletIndex])) + continue; + + unsigned int trackCandidateIdx = + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx >= n_max_pixel_track_candidates) // This is done before any non-pixel TCs are added + { +#ifdef WARNINGS + printf("Track Candidate excess alert! Type = pT3"); +#endif + alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + break; + + } else { + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespT3, 1u, alpaka::hierarchy::Threads{}); + + float radius = 0.5f * (__H2F(pixelTripletsInGPU.pixelRadius[pixelTripletIndex]) + + __H2F(pixelTripletsInGPU.tripletRadius[pixelTripletIndex])); + unsigned int pT3PixelIndex = pixelTripletsInGPU.pixelSegmentIndices[pixelTripletIndex]; + addTrackCandidateToMemory(trackCandidatesInGPU, + 5 /*track candidate type pT3=5*/, + pixelTripletIndex, + pixelTripletIndex, + &pixelTripletsInGPU.logicalLayers[Params_pT3::kLayers * pixelTripletIndex], + &pixelTripletsInGPU.lowerModuleIndices[Params_pT3::kLayers * pixelTripletIndex], + &pixelTripletsInGPU.hitIndices[Params_pT3::kHits * pixelTripletIndex], + segmentsInGPU.seedIdx[pT3PixelIndex - pLS_offset], + __H2F(pixelTripletsInGPU.centerX[pixelTripletIndex]), + __H2F(pixelTripletsInGPU.centerY[pixelTripletIndex]), + radius, + trackCandidateIdx, + pixelTripletIndex); + } + } + } + }; + + struct AddT5asTrackCandidateInGPU { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t nLowerModules, + Quintuplets quintupletsInGPU, + TrackCandidates trackCandidatesInGPU, + ObjectRanges rangesInGPU) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int idx = globalThreadIdx[1]; idx < nLowerModules; idx += gridThreadExtent[1]) { + if (rangesInGPU.quintupletModuleIndices[idx] == -1) + continue; + + unsigned int nQuints = quintupletsInGPU.nQuintuplets[idx]; + for (unsigned int jdx = globalThreadIdx[2]; jdx < nQuints; jdx += gridThreadExtent[2]) { + unsigned int quintupletIndex = rangesInGPU.quintupletModuleIndices[idx] + jdx; + if (quintupletsInGPU.isDup[quintupletIndex] or quintupletsInGPU.partOfPT5[quintupletIndex]) + continue; + if (!(quintupletsInGPU.TightCutFlag[quintupletIndex])) + continue; + + unsigned int trackCandidateIdx = + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx - *trackCandidatesInGPU.nTrackCandidatespT5 - + *trackCandidatesInGPU.nTrackCandidatespT3 >= + n_max_nonpixel_track_candidates) // pT5 and pT3 TCs have been added, but not pLS TCs + { +#ifdef WARNINGS + printf("Track Candidate excess alert! Type = T5"); +#endif + alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + break; + } else { + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatesT5, 1u, alpaka::hierarchy::Threads{}); + addTrackCandidateToMemory(trackCandidatesInGPU, + 4 /*track candidate type T5=4*/, + quintupletIndex, + quintupletIndex, + &quintupletsInGPU.logicalLayers[Params_T5::kLayers * quintupletIndex], + &quintupletsInGPU.lowerModuleIndices[Params_T5::kLayers * quintupletIndex], + &quintupletsInGPU.hitIndices[Params_T5::kHits * quintupletIndex], + -1 /*no pixel seed index for T5s*/, + quintupletsInGPU.regressionG[quintupletIndex], + quintupletsInGPU.regressionF[quintupletIndex], + quintupletsInGPU.regressionRadius[quintupletIndex], + trackCandidateIdx, + quintupletIndex); + } + } + } + } + }; + + struct AddpLSasTrackCandidateInGPU { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t nLowerModules, + TrackCandidates trackCandidatesInGPU, + Segments segmentsInGPU, + bool tc_pls_triplets) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + unsigned int nPixels = segmentsInGPU.nSegments[nLowerModules]; + for (unsigned int pixelArrayIndex = globalThreadIdx[2]; pixelArrayIndex < nPixels; + pixelArrayIndex += gridThreadExtent[2]) { + if ((tc_pls_triplets ? 0 : !segmentsInGPU.isQuad[pixelArrayIndex]) || (segmentsInGPU.isDup[pixelArrayIndex])) + continue; + + unsigned int trackCandidateIdx = + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx - *trackCandidatesInGPU.nTrackCandidatesT5 >= + n_max_pixel_track_candidates) // T5 TCs have already been added + { +#ifdef WARNINGS + printf("Track Candidate excess alert! Type = pLS"); +#endif + alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + break; + + } else { + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespLS, 1u, alpaka::hierarchy::Threads{}); + addpLSTrackCandidateToMemory(trackCandidatesInGPU, + pixelArrayIndex, + trackCandidateIdx, + segmentsInGPU.pLSHitsIdxs[pixelArrayIndex], + segmentsInGPU.seedIdx[pixelArrayIndex]); + } + } + } + }; + + struct AddpT5asTrackCandidateInGPU { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t nLowerModules, + PixelQuintuplets pixelQuintupletsInGPU, + TrackCandidates trackCandidatesInGPU, + Segments segmentsInGPU, + ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + int nPixelQuintuplets = *pixelQuintupletsInGPU.nPixelQuintuplets; + unsigned int pLS_offset = rangesInGPU.segmentModuleIndices[nLowerModules]; + for (int pixelQuintupletIndex = globalThreadIdx[0]; pixelQuintupletIndex < nPixelQuintuplets; + pixelQuintupletIndex += gridThreadExtent[0]) { + if (pixelQuintupletsInGPU.isDup[pixelQuintupletIndex]) + continue; + + unsigned int trackCandidateIdx = + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx >= n_max_pixel_track_candidates) // No other TCs have been added yet + { +#ifdef WARNINGS + printf("Track Candidate excess alert! Type = pT5"); +#endif + alpaka::atomicSub(acc, trackCandidatesInGPU.nTrackCandidates, 1u, alpaka::hierarchy::Threads{}); + break; + + } else { + alpaka::atomicAdd(acc, trackCandidatesInGPU.nTrackCandidatespT5, 1u, alpaka::hierarchy::Threads{}); + + float radius = 0.5f * (__H2F(pixelQuintupletsInGPU.pixelRadius[pixelQuintupletIndex]) + + __H2F(pixelQuintupletsInGPU.quintupletRadius[pixelQuintupletIndex])); + unsigned int pT5PixelIndex = pixelQuintupletsInGPU.pixelIndices[pixelQuintupletIndex]; + addTrackCandidateToMemory( + trackCandidatesInGPU, + 7 /*track candidate type pT5=7*/, + pT5PixelIndex, + pixelQuintupletsInGPU.T5Indices[pixelQuintupletIndex], + &pixelQuintupletsInGPU.logicalLayers[Params_pT5::kLayers * pixelQuintupletIndex], + &pixelQuintupletsInGPU.lowerModuleIndices[Params_pT5::kLayers * pixelQuintupletIndex], + &pixelQuintupletsInGPU.hitIndices[Params_pT5::kHits * pixelQuintupletIndex], + segmentsInGPU.seedIdx[pT5PixelIndex - pLS_offset], + __H2F(pixelQuintupletsInGPU.centerX[pixelQuintupletIndex]), + __H2F(pixelQuintupletsInGPU.centerY[pixelQuintupletIndex]), + radius, + trackCandidateIdx, + pixelQuintupletIndex); + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h new file mode 100644 index 0000000000000..5e1b352748573 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -0,0 +1,1049 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Triplet_h +#define RecoTracker_LSTCore_src_alpaka_Triplet_h + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Constants.h" +#include "RecoTracker/LSTCore/interface/Module.h" + +#include "Segment.h" +#include "MiniDoublet.h" +#include "Hit.h" +#include "ObjectRanges.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + struct Triplets { + unsigned int* segmentIndices; + uint16_t* lowerModuleIndices; //3 of them + unsigned int* nTriplets; + unsigned int* totOccupancyTriplets; + unsigned int* nMemoryLocations; + uint8_t* logicalLayers; + unsigned int* hitIndices; + FPX* betaIn; + float* circleRadius; + float* circleCenterX; + float* circleCenterY; + bool* partOfPT5; + bool* partOfT5; + bool* partOfPT3; + +#ifdef CUT_VALUE_DEBUG + //debug variables + float* zOut; + float* rtOut; + float* betaInCut; +#endif + template + void setData(TBuff& buf) { + segmentIndices = buf.segmentIndices_buf.data(); + lowerModuleIndices = buf.lowerModuleIndices_buf.data(); + nTriplets = buf.nTriplets_buf.data(); + totOccupancyTriplets = buf.totOccupancyTriplets_buf.data(); + nMemoryLocations = buf.nMemoryLocations_buf.data(); + logicalLayers = buf.logicalLayers_buf.data(); + hitIndices = buf.hitIndices_buf.data(); + betaIn = buf.betaIn_buf.data(); + circleRadius = buf.circleRadius_buf.data(); + circleCenterX = buf.circleCenterX_buf.data(); + circleCenterY = buf.circleCenterY_buf.data(); + partOfPT5 = buf.partOfPT5_buf.data(); + partOfT5 = buf.partOfT5_buf.data(); + partOfPT3 = buf.partOfPT3_buf.data(); +#ifdef CUT_VALUE_DEBUG + zOut = buf.zOut_buf.data(); + rtOut = buf.rtOut_buf.data(); + betaInCut = buf.betaInCut_buf.data(); +#endif + } + }; + + template + struct TripletsBuffer { + Buf segmentIndices_buf; + Buf lowerModuleIndices_buf; + Buf nTriplets_buf; + Buf totOccupancyTriplets_buf; + Buf nMemoryLocations_buf; + Buf logicalLayers_buf; + Buf hitIndices_buf; + Buf betaIn_buf; + Buf circleRadius_buf; + Buf circleCenterX_buf; + Buf circleCenterY_buf; + Buf partOfPT5_buf; + Buf partOfT5_buf; + Buf partOfPT3_buf; + +#ifdef CUT_VALUE_DEBUG + Buf zOut_buf; + Buf rtOut_buf; + Buf deltaPhiPos_buf; + Buf deltaPhi_buf; + Buf zLo_buf; + Buf zHi_buf; + Buf zLoPointed_buf; + Buf zHiPointed_buf; + Buf dPhiCut_buf; + Buf betaInCut_buf; + Buf rtLo_buf; + Buf rtHi_buf; +#endif + + Triplets data_; + + template + TripletsBuffer(unsigned int maxTriplets, unsigned int nLowerModules, TDevAcc const& devAccIn, TQueue& queue) + : segmentIndices_buf(allocBufWrapper(devAccIn, 2 * maxTriplets, queue)), + lowerModuleIndices_buf(allocBufWrapper(devAccIn, Params_T3::kLayers * maxTriplets, queue)), + nTriplets_buf(allocBufWrapper(devAccIn, nLowerModules, queue)), + totOccupancyTriplets_buf(allocBufWrapper(devAccIn, nLowerModules, queue)), + nMemoryLocations_buf(allocBufWrapper(devAccIn, 1, queue)), + logicalLayers_buf(allocBufWrapper(devAccIn, maxTriplets * Params_T3::kLayers, queue)), + hitIndices_buf(allocBufWrapper(devAccIn, maxTriplets * Params_T3::kHits, queue)), + betaIn_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + circleRadius_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + circleCenterX_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + circleCenterY_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + partOfPT5_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + partOfT5_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + partOfPT3_buf(allocBufWrapper(devAccIn, maxTriplets, queue)) +#ifdef CUT_VALUE_DEBUG + , + zOut_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + rtOut_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + deltaPhiPos_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + deltaPhi_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + zLo_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + zHi_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + zLoPointed_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + zHiPointed_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + dPhiCut_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + betaInCut_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + rtLo_buf(allocBufWrapper(devAccIn, maxTriplets, queue)), + rtHi_buf(allocBufWrapper(devAccIn, maxTriplets, queue)) +#endif + { + alpaka::memset(queue, nTriplets_buf, 0u); + alpaka::memset(queue, totOccupancyTriplets_buf, 0u); + alpaka::memset(queue, partOfPT5_buf, false); + alpaka::memset(queue, partOfT5_buf, false); + alpaka::memset(queue, partOfPT3_buf, false); + } + + inline Triplets const* data() const { return &data_; } + inline void setData(TripletsBuffer& buf) { data_.setData(buf); } + }; + +#ifdef CUT_VALUE_DEBUG + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets& tripletsInGPU, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + float zOut, + float rtOut, + float betaIn, + float betaInCut, + float circleRadius, + float circleCenterX, + float circleCenterY, + unsigned int tripletIndex) +#else + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + Triplets& tripletsInGPU, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + float betaIn, + float circleRadius, + float circleCenterX, + float circleCenterY, + unsigned int tripletIndex) +#endif + { + tripletsInGPU.segmentIndices[tripletIndex * 2] = innerSegmentIndex; + tripletsInGPU.segmentIndices[tripletIndex * 2 + 1] = outerSegmentIndex; + tripletsInGPU.lowerModuleIndices[tripletIndex * Params_T3::kLayers] = innerInnerLowerModuleIndex; + tripletsInGPU.lowerModuleIndices[tripletIndex * Params_T3::kLayers + 1] = middleLowerModuleIndex; + tripletsInGPU.lowerModuleIndices[tripletIndex * Params_T3::kLayers + 2] = outerOuterLowerModuleIndex; + + tripletsInGPU.betaIn[tripletIndex] = __F2H(betaIn); + tripletsInGPU.circleRadius[tripletIndex] = circleRadius; + tripletsInGPU.circleCenterX[tripletIndex] = circleCenterX; + tripletsInGPU.circleCenterY[tripletIndex] = circleCenterY; + tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers] = + modulesInGPU.layers[innerInnerLowerModuleIndex] + (modulesInGPU.subdets[innerInnerLowerModuleIndex] == 4) * 6; + tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 1] = + modulesInGPU.layers[middleLowerModuleIndex] + (modulesInGPU.subdets[middleLowerModuleIndex] == 4) * 6; + tripletsInGPU.logicalLayers[tripletIndex * Params_T3::kLayers + 2] = + modulesInGPU.layers[outerOuterLowerModuleIndex] + (modulesInGPU.subdets[outerOuterLowerModuleIndex] == 4) * 6; + //get the hits + unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * innerSegmentIndex]; + unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * innerSegmentIndex + 1]; + unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * outerSegmentIndex + 1]; + + tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits] = mdsInGPU.anchorHitIndices[firstMDIndex]; + tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 1] = mdsInGPU.outerHitIndices[firstMDIndex]; + tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 2] = mdsInGPU.anchorHitIndices[secondMDIndex]; + tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 3] = mdsInGPU.outerHitIndices[secondMDIndex]; + tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 4] = mdsInGPU.anchorHitIndices[thirdMDIndex]; + tripletsInGPU.hitIndices[tripletIndex * Params_T3::kHits + 5] = mdsInGPU.outerHitIndices[thirdMDIndex]; +#ifdef CUT_VALUE_DEBUG + tripletsInGPU.zOut[tripletIndex] = zOut; + tripletsInGPU.rtOut[tripletIndex] = rtOut; + tripletsInGPU.betaInCut[tripletIndex] = betaInCut; +#endif + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRZConstraint(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex) { + //get the rt and z + const float& r1 = mdsInGPU.anchorRt[firstMDIndex]; + const float& r2 = mdsInGPU.anchorRt[secondMDIndex]; + const float& r3 = mdsInGPU.anchorRt[thirdMDIndex]; + + const float& z1 = mdsInGPU.anchorZ[firstMDIndex]; + const float& z2 = mdsInGPU.anchorZ[secondMDIndex]; + const float& z3 = mdsInGPU.anchorZ[thirdMDIndex]; + + // Using lst_layer numbering convention defined in ModuleMethods.h + const int layer1 = modulesInGPU.lstLayers[innerInnerLowerModuleIndex]; + const int layer2 = modulesInGPU.lstLayers[middleLowerModuleIndex]; + const int layer3 = modulesInGPU.lstLayers[outerOuterLowerModuleIndex]; + + const float residual = z2 - ((z3 - z1) / (r3 - r1) * (r2 - r1) + z1); + + if (layer1 == 12 and layer2 == 13 and layer3 == 14) { + return false; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + return alpaka::math::abs(acc, residual) < 0.53f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + return alpaka::math::abs(acc, residual) < 1; + } else if (layer1 == 13 and layer2 == 14 and layer3 == 15) { + return false; + } else if (layer1 == 14 and layer2 == 15 and layer3 == 16) { + return false; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + return alpaka::math::abs(acc, residual) < 1; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + return alpaka::math::abs(acc, residual) < 1.21f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + return alpaka::math::abs(acc, residual) < 1.f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + return alpaka::math::abs(acc, residual) < 1.f; + } else if (layer1 == 3 and layer2 == 4 and layer3 == 5) { + return alpaka::math::abs(acc, residual) < 2.7f; + } else if (layer1 == 4 and layer2 == 5 and layer3 == 6) { + return alpaka::math::abs(acc, residual) < 3.06f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + return alpaka::math::abs(acc, residual) < 1; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 10) { + return alpaka::math::abs(acc, residual) < 1; + } else if (layer1 == 9 and layer2 == 10 and layer3 == 11) { + return alpaka::math::abs(acc, residual) < 1; + } else { + return alpaka::math::abs(acc, residual) < 5; + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBB(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + float& zOut, + float& rtOut, + unsigned int innerSegmentIndex, + float& betaIn, + float& betaInCut) { + bool isPSIn = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == PS); + bool isPSOut = (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == PS); + + float rtIn = mdsInGPU.anchorRt[firstMDIndex]; + float rtMid = mdsInGPU.anchorRt[secondMDIndex]; + rtOut = mdsInGPU.anchorRt[thirdMDIndex]; + + float zIn = mdsInGPU.anchorZ[firstMDIndex]; + float zMid = mdsInGPU.anchorZ[secondMDIndex]; + zOut = mdsInGPU.anchorZ[thirdMDIndex]; + + float alpha1GeVOut = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float rtRatio_OutIn = rtOut / rtIn; // Outer segment beginning rt divided by inner segment beginning rt; + float dzDrtScale = alpaka::math::tan(acc, alpha1GeVOut) / alpha1GeVOut; // The track can bend in r-z plane slightly + float zpitchIn = (isPSIn ? kPixelPSZpitch : kStrip2SZpitch); + float zpitchOut = (isPSOut ? kPixelPSZpitch : kStrip2SZpitch); + + const float zHi = + zIn + (zIn + kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn < 0.f ? 1.f : dzDrtScale) + (zpitchIn + zpitchOut); + const float zLo = zIn + (zIn - kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - + (zpitchIn + zpitchOut); //slope-correction only on outer end + + //Cut 1 - z compatibility + if ((zOut < zLo) || (zOut > zHi)) + return false; + + float drt_OutIn = (rtOut - rtIn); + + float r3In = alpaka::math::sqrt(acc, zIn * zIn + rtIn * rtIn); + float drt_InSeg = rtMid - rtIn; + float dz_InSeg = zMid - zIn; + float dr3_InSeg = + alpaka::math::sqrt(acc, rtMid * rtMid + zMid * zMid) - alpaka::math::sqrt(acc, rtIn * rtIn + zIn * zIn); + + float coshEta = dr3_InSeg / drt_InSeg; + float dzErr = (zpitchIn + zpitchOut) * (zpitchIn + zpitchOut) * 2.f; + + float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rtOut - rtIn) / 50.f) * (r3In / rtIn); + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + dzErr += muls2 * drt_OutIn * drt_OutIn / 3.f * coshEta * coshEta; + dzErr = alpaka::math::sqrt(acc, dzErr); + + // Constructing upper and lower bound + const float dzMean = dz_InSeg / drt_InSeg * drt_OutIn; + const float zWindow = dzErr / drt_InSeg * drt_OutIn + + (zpitchIn + zpitchOut); //FIXME for ptCut lower than ~0.8 need to add curv path correction + const float zLoPointed = zIn + dzMean * (zIn > 0.f ? 1.f : dzDrtScale) - zWindow; + const float zHiPointed = zIn + dzMean * (zIn < 0.f ? 1.f : dzDrtScale) + zWindow; + + // Constructing upper and lower bound + + // Cut #2: Pointed Z (Inner segment two MD points to outer segment inner MD) + if ((zOut < zLoPointed) || (zOut > zHiPointed)) + return false; + + // raw betaIn value without any correction, based on the mini-doublet hit positions + float alpha_InLo = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + float tl_axis_x = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; + float tl_axis_y = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + + //beta computation + float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + + //innerOuterAnchor - innerInnerAnchor + const float rt_InSeg = + alpaka::math::sqrt(acc, + (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * + (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); + betaInCut = + alpaka::math::asin(acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / drt_InSeg); + + //Cut #3: first beta cut + return alpaka::math::abs(acc, betaIn) < betaInCut; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBE(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + float& zOut, + float& rtOut, + uint16_t innerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + float& betaIn, + float& betaInCut) { + bool isPSIn = (modulesInGPU.moduleType[innerInnerLowerModuleIndex] == PS); + bool isPSOut = (modulesInGPU.moduleType[outerOuterLowerModuleIndex] == PS); + + float rtIn = mdsInGPU.anchorRt[firstMDIndex]; + float rtMid = mdsInGPU.anchorRt[secondMDIndex]; + rtOut = mdsInGPU.anchorRt[thirdMDIndex]; + + float zIn = mdsInGPU.anchorZ[firstMDIndex]; + float zMid = mdsInGPU.anchorZ[secondMDIndex]; + zOut = mdsInGPU.anchorZ[thirdMDIndex]; + + float alpha1GeV_OutLo = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + float zpitchIn = (isPSIn ? kPixelPSZpitch : kStrip2SZpitch); + float zpitchOut = (isPSOut ? kPixelPSZpitch : kStrip2SZpitch); + float zGeom = zpitchIn + zpitchOut; + + // Cut #0: Preliminary (Only here in endcap case) + if (zIn * zOut <= 0) + return false; + + float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + bool isOutSgInnerMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == PS; + float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch : kStrip2SZpitch; + float zGeom1 = alpaka::math::copysign(acc, zGeom, zIn); + float rtLo = rtIn * (1.f + (zOut - zIn - zGeom1) / (zIn + zGeom1 + dLum) / dzDrtScale) - + rtGeom1; //slope correction only on the lower end + + //Cut #1: rt condition + float zInForHi = zIn - zGeom1 - dLum; + if (zInForHi * zIn < 0) { + zInForHi = alpaka::math::copysign(acc, 0.1f, zIn); + } + float rtHi = rtIn * (1.f + (zOut - zIn + zGeom1) / zInForHi) + rtGeom1; + + //Cut #2: rt condition + if ((rtOut < rtLo) || (rtOut > rtHi)) + return false; + + float rIn = alpaka::math::sqrt(acc, zIn * zIn + rtIn * rtIn); + + const float drtSDIn = rtMid - rtIn; + const float dzSDIn = zMid - zIn; + const float dr3SDIn = + alpaka::math::sqrt(acc, rtMid * rtMid + zMid * zMid) - alpaka::math::sqrt(acc, rtIn * rtIn + zIn * zIn); + + const float coshEta = dr3SDIn / drtSDIn; //direction estimate + const float dzOutInAbs = alpaka::math::abs(acc, zOut - zIn); + const float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); + const float zGeom1_another = kPixelPSZpitch; + const float kZ = (zOut - zIn) / dzSDIn; + float drtErr = + zGeom1_another * zGeom1_another * drtSDIn * drtSDIn / dzSDIn / dzSDIn * (1.f - 2.f * kZ + 2.f * kZ * kZ); + const float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2 * (rtOut - rtIn) / 50.f) * (rIn / rtIn); + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + drtErr += muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta; + drtErr = alpaka::math::sqrt(acc, drtErr); + + //Cut #3: rt-z pointed + + if ((kZ < 0) || (rtOut < rtLo) || (rtOut > rtHi)) + return false; + + float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; + float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; + + float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + + float tl_axis_x = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; + float tl_axis_y = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + + betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + + float swapTemp; + + if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { + swapTemp = betaInRHmin; + betaInRHmin = betaInRHmax; + betaInRHmax = swapTemp; + } + + float sdIn_dr = alpaka::math::sqrt(acc, + (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * + (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); + float sdIn_d = rt_InOut - rt_InLo; + + float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdIn_d); + + //Cut #4: first beta cut + return alpaka::math::abs(acc, betaInRHmin) < betaInCut; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintEEE(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + float& zOut, + float& rtOut, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + float& betaIn, + float& betaInCut) { + float rtIn = mdsInGPU.anchorRt[firstMDIndex]; + float rtMid = mdsInGPU.anchorRt[secondMDIndex]; + rtOut = mdsInGPU.anchorRt[thirdMDIndex]; + + float zIn = mdsInGPU.anchorZ[firstMDIndex]; + float zMid = mdsInGPU.anchorZ[secondMDIndex]; + zOut = mdsInGPU.anchorZ[thirdMDIndex]; + + float alpha1GeV_Out = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_Out) / alpha1GeV_Out; // The track can bend in r-z plane slightly + + // Cut #0: Preliminary (Only here in endcap case) + if (zIn * zOut <= 0) + return false; + + float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + bool isOutSgOuterMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == PS; + bool isInSgInnerMDPS = modulesInGPU.moduleType[innerInnerLowerModuleIndex] == PS; + + float rtGeom = (isInSgInnerMDPS and isOutSgOuterMDPS) ? 2.f * kPixelPSZpitch + : (isInSgInnerMDPS or isOutSgOuterMDPS) ? kPixelPSZpitch + kStrip2SZpitch + : 2.f * kStrip2SZpitch; + + float dz = zOut - zIn; + const float rtLo = rtIn * (1.f + dz / (zIn + dLum) / dzDrtScale) - rtGeom; //slope correction only on the lower end + const float rtHi = rtIn * (1.f + dz / (zIn - dLum)) + rtGeom; + + //Cut #1: rt condition + if ((rtOut < rtLo) || (rtOut > rtHi)) + return false; + + bool isInSgOuterMDPS = modulesInGPU.moduleType[outerOuterLowerModuleIndex] == PS; + + float drtSDIn = rtMid - rtIn; + float dzSDIn = zMid - zIn; + float dr3SDIn = + alpaka::math::sqrt(acc, rtMid * rtMid + zMid * zMid) - alpaka::math::sqrt(acc, rtIn * rtIn + zIn * zIn); + + float coshEta = dr3SDIn / drtSDIn; //direction estimate + float dzOutInAbs = alpaka::math::abs(acc, zOut - zIn); + float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); + + float kZ = (zOut - zIn) / dzSDIn; + float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rtOut - rtIn) / 50.f); + + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + + float drtErr = + alpaka::math::sqrt(acc, + kPixelPSZpitch * kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + + muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); + + float drtMean = drtSDIn * dzOutInAbs / alpaka::math::abs(acc, dzSDIn); + float rtWindow = drtErr + rtGeom; + float rtLo_point = rtIn + drtMean / dzDrtScale - rtWindow; + float rtHi_point = rtIn + drtMean + rtWindow; + + // Cut #3: rt-z pointed + // https://github.com/slava77/cms-tkph2-ntuple/blob/superDoubletLinked-91X-noMock/doubletAnalysis.C#L3765 + + if (isInSgInnerMDPS and isInSgOuterMDPS) // If both PS then we can point + { + if ((kZ < 0) || (rtOut < rtLo_point) || (rtOut > rtHi_point)) + return false; + } + + float rt_InLo = mdsInGPU.anchorRt[firstMDIndex]; + float rt_InOut = mdsInGPU.anchorRt[secondMDIndex]; + float sdIn_alpha = __H2F(segmentsInGPU.dPhiChanges[innerSegmentIndex]); + + float tl_axis_x = mdsInGPU.anchorX[thirdMDIndex] - mdsInGPU.anchorX[firstMDIndex]; + float tl_axis_y = mdsInGPU.anchorY[thirdMDIndex] - mdsInGPU.anchorY[firstMDIndex]; + + betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mdsInGPU.anchorPhi[firstMDIndex]); + + float sdIn_alphaRHmin = __H2F(segmentsInGPU.dPhiChangeMins[innerSegmentIndex]); + float sdIn_alphaRHmax = __H2F(segmentsInGPU.dPhiChangeMaxs[innerSegmentIndex]); + float betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha; + float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; + + float swapTemp; + + if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { + swapTemp = betaInRHmin; + betaInRHmin = betaInRHmax; + betaInRHmax = swapTemp; + } + float sdIn_dr = alpaka::math::sqrt(acc, + (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) * + (mdsInGPU.anchorX[secondMDIndex] - mdsInGPU.anchorX[firstMDIndex]) + + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex]) * + (mdsInGPU.anchorY[secondMDIndex] - mdsInGPU.anchorY[firstMDIndex])); + float sdIn_d = rt_InOut - rt_InLo; + + float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdIn_d); + + //Cut #4: first beta cut + return alpaka::math::abs(acc, betaInRHmin) < betaInCut; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + float& zOut, + float& rtOut, + uint16_t innerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + float& betaIn, + float& betaInCut) { + short innerInnerLowerModuleSubdet = modulesInGPU.subdets[innerInnerLowerModuleIndex]; + short middleLowerModuleSubdet = modulesInGPU.subdets[middleLowerModuleIndex]; + short outerOuterLowerModuleSubdet = modulesInGPU.subdets[outerOuterLowerModuleIndex]; + + if (innerInnerLowerModuleSubdet == Barrel and middleLowerModuleSubdet == Barrel and + outerOuterLowerModuleSubdet == Barrel) { + return passPointingConstraintBBB(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + innerSegmentIndex, + betaIn, + betaInCut); + } else if (innerInnerLowerModuleSubdet == Barrel and middleLowerModuleSubdet == Barrel and + outerOuterLowerModuleSubdet == Endcap) { + return passPointingConstraintBBE(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + innerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + betaIn, + betaInCut); + } else if (innerInnerLowerModuleSubdet == Barrel and middleLowerModuleSubdet == Endcap and + outerOuterLowerModuleSubdet == Endcap) { + return passPointingConstraintBBE(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + innerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + betaIn, + betaInCut); + + } + + else if (innerInnerLowerModuleSubdet == Endcap and middleLowerModuleSubdet == Endcap and + outerOuterLowerModuleSubdet == Endcap) { + return passPointingConstraintEEE(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + innerSegmentIndex, + outerSegmentIndex, + betaIn, + betaInCut); + } + return false; // failsafe + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeRadiusFromThreeAnchorHits( + TAcc const& acc, float x1, float y1, float x2, float y2, float x3, float y3, float& g, float& f) { + float radius = 0.f; + + //(g,f) -> center + //first anchor hit - (x1,y1), second anchor hit - (x2,y2), third anchor hit - (x3, y3) + + float denomInv = 1.0f / ((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3)); + + float xy1sqr = x1 * x1 + y1 * y1; + + float xy2sqr = x2 * x2 + y2 * y2; + + float xy3sqr = x3 * x3 + y3 * y3; + + g = 0.5f * ((y3 - y2) * xy1sqr + (y1 - y3) * xy2sqr + (y2 - y1) * xy3sqr) * denomInv; + + f = 0.5f * ((x2 - x3) * xy1sqr + (x3 - x1) * xy2sqr + (x1 - x2) * xy3sqr) * denomInv; + + float c = ((x2 * y3 - x3 * y2) * xy1sqr + (x3 * y1 - x1 * y3) * xy2sqr + (x1 * y2 - x2 * y1) * xy3sqr) * denomInv; + + if (((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3) == 0) || (g * g + f * f - c < 0)) { +#ifdef WARNINGS + printf("three collinear points or FATAL! r^2 < 0!\n"); +#endif + radius = -1.f; + } else + radius = alpaka::math::sqrt(acc, g * g + f * f - c); + + return radius; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletConstraintsAndAlgo(TAcc const& acc, + Modules const& modulesInGPU, + MiniDoublets const& mdsInGPU, + Segments const& segmentsInGPU, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + float& zOut, + float& rtOut, + float& betaIn, + float& betaInCut, + float& circleRadius, + float& circleCenterX, + float& circleCenterY) { + //this cut reduces the number of candidates by a factor of 4, i.e., 3 out of 4 warps can end right here! + if (segmentsInGPU.mdIndices[2 * innerSegmentIndex + 1] != segmentsInGPU.mdIndices[2 * outerSegmentIndex]) + return false; + + unsigned int firstMDIndex = segmentsInGPU.mdIndices[2 * innerSegmentIndex]; + unsigned int secondMDIndex = segmentsInGPU.mdIndices[2 * outerSegmentIndex]; + unsigned int thirdMDIndex = segmentsInGPU.mdIndices[2 * outerSegmentIndex + 1]; + + if (not(passRZConstraint(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex))) + return false; + if (not(passPointingConstraint(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + middleLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + betaIn, + betaInCut))) + return false; + + float x1 = mdsInGPU.anchorX[firstMDIndex]; + float x2 = mdsInGPU.anchorX[secondMDIndex]; + float x3 = mdsInGPU.anchorX[thirdMDIndex]; + float y1 = mdsInGPU.anchorY[firstMDIndex]; + float y2 = mdsInGPU.anchorY[secondMDIndex]; + float y3 = mdsInGPU.anchorY[thirdMDIndex]; + + circleRadius = computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3, circleCenterX, circleCenterY); + return true; + } + + struct CreateTripletsInGPUv2 { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + MiniDoublets mdsInGPU, + Segments segmentsInGPU, + Triplets tripletsInGPU, + ObjectRanges rangesInGPU, + uint16_t* index_gpu, + uint16_t nonZeroModules) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t innerLowerModuleArrayIdx = globalThreadIdx[0]; innerLowerModuleArrayIdx < nonZeroModules; + innerLowerModuleArrayIdx += gridThreadExtent[0]) { + uint16_t innerInnerLowerModuleIndex = index_gpu[innerLowerModuleArrayIdx]; + if (innerInnerLowerModuleIndex >= *modulesInGPU.nLowerModules) + continue; + + uint16_t nConnectedModules = modulesInGPU.nConnectedModules[innerInnerLowerModuleIndex]; + if (nConnectedModules == 0) + continue; + + unsigned int nInnerSegments = segmentsInGPU.nSegments[innerInnerLowerModuleIndex]; + for (unsigned int innerSegmentArrayIndex = globalThreadIdx[1]; innerSegmentArrayIndex < nInnerSegments; + innerSegmentArrayIndex += gridThreadExtent[1]) { + unsigned int innerSegmentIndex = + rangesInGPU.segmentRanges[innerInnerLowerModuleIndex * 2] + innerSegmentArrayIndex; + + // middle lower module - outer lower module of inner segment + uint16_t middleLowerModuleIndex = segmentsInGPU.outerLowerModuleIndices[innerSegmentIndex]; + + unsigned int nOuterSegments = segmentsInGPU.nSegments[middleLowerModuleIndex]; + for (unsigned int outerSegmentArrayIndex = globalThreadIdx[2]; outerSegmentArrayIndex < nOuterSegments; + outerSegmentArrayIndex += gridThreadExtent[2]) { + unsigned int outerSegmentIndex = + rangesInGPU.segmentRanges[2 * middleLowerModuleIndex] + outerSegmentArrayIndex; + + uint16_t outerOuterLowerModuleIndex = segmentsInGPU.outerLowerModuleIndices[outerSegmentIndex]; + + float zOut, rtOut, betaIn, betaInCut, circleRadius, circleCenterX, circleCenterY; + + bool success = runTripletConstraintsAndAlgo(acc, + modulesInGPU, + mdsInGPU, + segmentsInGPU, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + zOut, + rtOut, + betaIn, + betaInCut, + circleRadius, + circleCenterX, + circleCenterY); + + if (success) { + unsigned int totOccupancyTriplets = + alpaka::atomicAdd(acc, + &tripletsInGPU.totOccupancyTriplets[innerInnerLowerModuleIndex], + 1u, + alpaka::hierarchy::Threads{}); + if (static_cast(totOccupancyTriplets) >= + rangesInGPU.tripletModuleOccupancy[innerInnerLowerModuleIndex]) { +#ifdef WARNINGS + printf("Triplet excess alert! Module index = %d\n", innerInnerLowerModuleIndex); +#endif + } else { + unsigned int tripletModuleIndex = alpaka::atomicAdd( + acc, &tripletsInGPU.nTriplets[innerInnerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + unsigned int tripletIndex = + rangesInGPU.tripletModuleIndices[innerInnerLowerModuleIndex] + tripletModuleIndex; +#ifdef CUT_VALUE_DEBUG + addTripletToMemory(modulesInGPU, + mdsInGPU, + segmentsInGPU, + tripletsInGPU, + innerSegmentIndex, + outerSegmentIndex, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + zOut, + rtOut, + betaIn, + betaInCut, + circleRadius, + circleCenterX, + circleCenterY, + tripletIndex); +#else + addTripletToMemory(modulesInGPU, + mdsInGPU, + segmentsInGPU, + tripletsInGPU, + innerSegmentIndex, + outerSegmentIndex, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + betaIn, + circleRadius, + circleCenterX, + circleCenterY, + tripletIndex); +#endif + } + } + } + } + } + } + }; + + struct CreateTripletArrayRanges { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + ObjectRanges rangesInGPU, + Segments segmentsInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + // Initialize variables in shared memory and set to 0 + int& nTotalTriplets = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) { + nTotalTriplets = 0; + } + alpaka::syncBlockThreads(acc); + + // Create variables outside of the for loop. + int occupancy, category_number, eta_number; + + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { + if (segmentsInGPU.nSegments[i] == 0) { + rangesInGPU.tripletModuleIndices[i] = nTotalTriplets; + rangesInGPU.tripletModuleOccupancy[i] = 0; + continue; + } + + short module_rings = modulesInGPU.rings[i]; + short module_layers = modulesInGPU.layers[i]; + short module_subdets = modulesInGPU.subdets[i]; + float module_eta = alpaka::math::abs(acc, modulesInGPU.eta[i]); + + if (module_layers <= 3 && module_subdets == 5) + category_number = 0; + else if (module_layers >= 4 && module_subdets == 5) + category_number = 1; + else if (module_layers <= 2 && module_subdets == 4 && module_rings >= 11) + category_number = 2; + else if (module_layers >= 3 && module_subdets == 4 && module_rings >= 8) + category_number = 2; + else if (module_layers <= 2 && module_subdets == 4 && module_rings <= 10) + category_number = 3; + else if (module_layers >= 3 && module_subdets == 4 && module_rings <= 7) + category_number = 3; + else + category_number = -1; + + if (module_eta < 0.75f) + eta_number = 0; + else if (module_eta < 1.5f) + eta_number = 1; + else if (module_eta < 2.25f) + eta_number = 2; + else if (module_eta < 3.0f) + eta_number = 3; + else + eta_number = -1; + + if (category_number == 0 && eta_number == 0) + occupancy = 543; + else if (category_number == 0 && eta_number == 1) + occupancy = 235; + else if (category_number == 0 && eta_number == 2) + occupancy = 88; + else if (category_number == 0 && eta_number == 3) + occupancy = 46; + else if (category_number == 1 && eta_number == 0) + occupancy = 755; + else if (category_number == 1 && eta_number == 1) + occupancy = 347; + else if (category_number == 2 && eta_number == 1) + occupancy = 0; + else if (category_number == 2 && eta_number == 2) + occupancy = 0; + else if (category_number == 3 && eta_number == 1) + occupancy = 38; + else if (category_number == 3 && eta_number == 2) + occupancy = 46; + else if (category_number == 3 && eta_number == 3) + occupancy = 39; + else { + occupancy = 0; +#ifdef WARNINGS + printf("Unhandled case in createTripletArrayRanges! Module index = %i\n", i); +#endif + } + + rangesInGPU.tripletModuleOccupancy[i] = occupancy; + unsigned int nTotT = alpaka::atomicAdd(acc, &nTotalTriplets, occupancy, alpaka::hierarchy::Threads{}); + rangesInGPU.tripletModuleIndices[i] = nTotT; + } + + // Wait for all threads to finish before reporting final values + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + *rangesInGPU.device_nTotalTrips = nTotalTriplets; + } + } + }; + + struct AddTripletRangesToEventExplicit { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Modules modulesInGPU, + Triplets tripletsInGPU, + ObjectRanges rangesInGPU) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t i = globalThreadIdx[0]; i < *modulesInGPU.nLowerModules; i += gridThreadExtent[0]) { + if (tripletsInGPU.nTriplets[i] == 0) { + rangesInGPU.tripletRanges[i * 2] = -1; + rangesInGPU.tripletRanges[i * 2 + 1] = -1; + } else { + rangesInGPU.tripletRanges[i * 2] = rangesInGPU.tripletModuleIndices[i]; + rangesInGPU.tripletRanges[i * 2 + 1] = rangesInGPU.tripletModuleIndices[i] + tripletsInGPU.nTriplets[i] - 1; + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/standalone/.gitignore b/RecoTracker/LSTCore/standalone/.gitignore new file mode 100644 index 0000000000000..29e86cb6b932a --- /dev/null +++ b/RecoTracker/LSTCore/standalone/.gitignore @@ -0,0 +1,43 @@ +mtv +*~ +results/ +*.o +debug.root +*.pdf +plots/ +plots_*/ +scripts/moduleconnection*.txt +*.root +.make.log* +bin/doAnalysis +bin/lst +bin/lst_cuda +bin/lst_cpu +bin/lst_rocm +code/rooutil/librooutil.so +code/rooutil/rooutil.so +.gitversion.txt +efficiency/doAnalysis +.jobs.txt +efficiency/results* +efficiencies/ +efficiency/bin/createEffNumDenPlots +efficiency/bin/createPerfNumDenHists +efficiency/compare +efficiency/summary +*.txt +*.pyc +output* +movetoweb.sh +*.nvvp +*.ipynb +*.log +*.nsys-rep +*.sqlite +*.ncu-rep +*.swp + +*.nfs* +.directoryhash +performance/ +notebooks/ diff --git a/RecoTracker/LSTCore/standalone/LST/.gitignore b/RecoTracker/LSTCore/standalone/LST/.gitignore new file mode 100644 index 0000000000000..32429d8358fb5 --- /dev/null +++ b/RecoTracker/LSTCore/standalone/LST/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +.vscode/ diff --git a/RecoTracker/LSTCore/standalone/LST/Makefile b/RecoTracker/LSTCore/standalone/LST/Makefile new file mode 100644 index 0000000000000..ba5e19e6a2779 --- /dev/null +++ b/RecoTracker/LSTCore/standalone/LST/Makefile @@ -0,0 +1,150 @@ +# +# stuff to make +# + +CCSOURCES=$(wildcard ../../src/*.cc) +ALPAKACCSOURCES=$(filter-out ../../src.alpaka/LST.dev.cc, $(wildcard ../../src/alpaka/*.dev.cc)) +CCOBJECTS_CPU=$(patsubst ../../src/alpaka/%.dev.cc, %_cpu.o, $(ALPAKACCSOURCES)) $(patsubst ../../src/%.cc, %_cpu.o, $(CCSOURCES)) +CCOBJECTS_CUDA=$(patsubst ../../src/alpaka/%.dev.cc, %_cuda.o, $(ALPAKACCSOURCES)) $(patsubst ../../src/%.cc, %_cuda.o, $(CCSOURCES)) +CCOBJECTS_ROCM=$(patsubst ../../src/alpaka/%.dev.cc, %_rocm.o, $(ALPAKACCSOURCES)) $(patsubst ../../src/%.cc, %_rocm.o, $(CCSOURCES)) + +LSTSOURCES=../../src/alpaka/LST.dev.cc +LSTOBJECTS_CPU=$(patsubst ../../src/alpaka/%.dev.cc, %_cpu.o, $(LSTSOURCES)) +LSTOBJECTS_CUDA=$(patsubst ../../src/alpaka/%.dev.cc, %_cuda.o, $(LSTSOURCES)) +LSTOBJECTS_ROCM=$(patsubst ../../src/alpaka/%.dev.cc, %_rocm.o, $(LSTSOURCES)) + +# Default to CPU and CUDA backends +ifeq ($(BACKEND),) + LIB_CPU=liblst_cpu.so + LIB_CUDA=liblst_cuda.so +endif + +ifneq ($(findstring cpu,$(BACKEND)),) + LIB_CPU=liblst_cpu.so +endif +ifneq ($(findstring cuda,$(BACKEND)),) + LIB_CUDA=liblst_cuda.so +endif +ifneq ($(findstring rocm,$(BACKEND)),) + LIB_ROCM=liblst_rocm.so +endif +ifneq ($(findstring all,$(BACKEND)),) + LIB_CPU=liblst_cpu.so + LIB_CUDA=liblst_cuda.so + LIB_ROCM=liblst_rocm.so +endif + +LIBS=$(LIB_CPU) $(LIB_CUDA) $(LIB_ROCM) + +# +# flags to keep track of +# + +# Different architectures to optimize for +GENCODE_CUDA := -gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_89,code=[sm_89,compute_89] + +CXX = g++ +CXXFLAGS_CPU = -march=native -mtune=native -Ofast -fno-reciprocal-math -fopenmp-simd -g -Wall -Wshadow -Woverloaded-virtual -fPIC -fopenmp -I.. +CXXFLAGS_CUDA = -O3 -g --compiler-options -Wall --compiler-options -Wshadow --compiler-options -Woverloaded-virtual --compiler-options -fPIC --compiler-options -fopenmp -dc -lineinfo --ptxas-options=-v --cudart shared $(GENCODE_CUDA) --use_fast_math --default-stream per-thread -I.. +CXXFLAGS_ROCM = -O3 -g -Wall -Wshadow -Woverloaded-virtual -fPIC -I${ROCM_ROOT}/include -I.. +CMSSWINCLUDE := -I${TRACKLOOPERDIR}/../../../ -I${CMSSW_BASE}/src +ifdef CMSSW_RELEASE_BASE +CMSSWINCLUDE := ${CMSSWINCLUDE} -I${CMSSW_RELEASE_BASE}/src +endif +ALPAKAINCLUDE = -I${ALPAKA_ROOT}/include -I/${BOOST_ROOT}/include -std=c++17 ${CMSSWINCLUDE} +ALPAKASERIAL = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED +ALPAKACUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_ACC_GPU_CUDA_ONLY --expt-relaxed-constexpr +ALPAKAROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_ACC_GPU_HIP_ONLY -DALPAKA_DISABLE_VENDOR_RNG +ROOTINCLUDE = -I$(ROOT_ROOT)/include +ROOTCFLAGS = -pthread -m64 $(ROOTINCLUDE) +PRINTFLAG = -DT4FromT3 +DUPLICATES = -DDUP_pLS -DDUP_T5 -DDUP_pT5 -DDUP_pT3 -DCrossclean_T5 -DCrossclean_pT3 #-DFP16_Base +CACHEFLAG = +PTCUTFLAG = +LSTWARNINGSFLAG = +CMSSW_WERRORS_CPU = -Werror=pointer-arith -Werror=overlength-strings -Werror=return-type -Werror=missing-braces -Werror=unused-value -Werror=unused-label \ + -Werror=address -Werror=format -Werror=sign-compare -Werror=write-strings -Werror=delete-non-virtual-dtor -Werror=strict-aliasing -Werror=narrowing \ + -Werror=unused-but-set-variable -Werror=reorder -Werror=unused-variable -Werror=conversion-null -Werror=return-local-addr -Wnon-virtual-dtor -Werror=switch \ + -Werror=main -Werror=overflow -Werror=format-contains-nul -Werror=type-limits -Wreturn-type -Wextra -Wpessimizing-move -Wclass-memaccess -Wunused \ + -Wparentheses -Wno-vla -Wno-non-template-friend -Wno-long-long -Wno-cast-function-type -Wno-unused-but-set-parameter -Wno-ignored-qualifiers \ + -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-attributes +CMSSW_WERRORS_CUDA = $(patsubst %,-Xcompiler %,$(CMSSW_WERRORS_CPU)) +CMSSW_WERRORS_ROCM = $(CMSSW_WERRORS_CPU) +CACHEFLAG_FLAGS = -DCACHE_ALLOC +T5CUTFLAGS = $(T5DNNFLAG) $(T5RZCHI2FLAG) $(T5RPHICHI2FLAG) + +LD_CPU = g++ +SOFLAGS_CPU = -g -shared -fPIC +ALPAKABACKEND_CPU = $(ALPAKASERIAL) +COMPILE_CMD_CPU = $(LD_CPU) -c + +LD_CUDA = nvcc +SOFLAGS_CUDA = -g -shared --compiler-options -fPIC --cudart shared $(GENCODE_CUDA) +ALPAKABACKEND_CUDA = $(ALPAKACUDA) +COMPILE_CMD_CUDA = $(LD_CUDA) -x cu + +LD_ROCM = hipcc +SOFLAGS_ROCM = -g -shared -fPIC +ALPAKABACKEND_ROCM = $(ALPAKAROCM) +COMPILE_CMD_ROCM = $(LD_ROCM) -c + +CUTVALUEFLAG = +CUTVALUEFLAG_FLAGS = -DCUT_VALUE_DEBUG + +%_cpu.o: ../../src/alpaka/%.dev.cc + $(COMPILE_CMD_CPU) $(CXXFLAGS_CPU) $(ROOTINCLUDE) $(PRINTFLAG) $(CACHEFLAG) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CPU) $(T5CUTFLAGS) $(PTCUTFLAG) $(DUPLICATES) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CPU) $< -o $@ + +%_cuda.o: ../../src/alpaka/%.dev.cc + $(COMPILE_CMD_CUDA) $(CXXFLAGS_CUDA) $(ROOTINCLUDE) $(PRINTFLAG) $(CACHEFLAG) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CUDA) $(T5CUTFLAGS) $(PTCUTFLAG) $(DUPLICATES) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CUDA) $< -o $@ + +%_rocm.o: ../../src/alpaka/%.dev.cc + $(COMPILE_CMD_ROCM) $(CXXFLAGS_ROCM) $(ROOTINCLUDE) $(PRINTFLAG) $(CACHEFLAG) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_ROCM) $(T5CUTFLAGS) $(PTCUTFLAG) $(DUPLICATES) $(ALPAKAINCLUDE) $(ALPAKABACKEND_ROCM) $< -o $@ + +%_cpu.o: ../../src/%.cc + $(COMPILE_CMD_CPU) $(CXXFLAGS_CPU) $(ROOTINCLUDE) $(PRINTFLAG) $(CACHEFLAG) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CPU) $(T5CUTFLAGS) $(PTCUTFLAG) $(DUPLICATES) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CPU) $< -o $@ + +%_cuda.o: ../../src/%.cc + $(COMPILE_CMD_CUDA) $(CXXFLAGS_CUDA) $(ROOTINCLUDE) $(PRINTFLAG) $(CACHEFLAG) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CUDA) $(T5CUTFLAGS) $(PTCUTFLAG) $(DUPLICATES) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CUDA) $< -o $@ + +%_rocm.o: ../../src/%.cc + $(COMPILE_CMD_ROCM) $(CXXFLAGS_ROCM) $(ROOTINCLUDE) $(PRINTFLAG) $(CACHEFLAG) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_ROCM) $(T5CUTFLAGS) $(PTCUTFLAG) $(DUPLICATES) $(ALPAKAINCLUDE) $(ALPAKABACKEND_ROCM) $< -o $@ + +$(LIB_CPU): $(CCOBJECTS_CPU) $(LSTOBJECTS_CPU) + $(LD_CPU) $(SOFLAGS_CPU) $^ -o $@ + +$(LIB_CUDA): $(CCOBJECTS_CUDA) $(LSTOBJECTS_CUDA) + $(LD_CUDA) $(SOFLAGS_CUDA) $^ -o $@ + +$(LIB_ROCM): $(CCOBJECTS_ROCM) $(LSTOBJECTS_ROCM) + $(LD_ROCM) $(SOFLAGS_ROCM) $^ -o $@ + +explicit: $(LIBS) + +explicit_cache: CACHEFLAG += $(CACHEFLAG_FLAGS) +explicit_cache: $(LIBS) + +explicit_cache_cutvalue: CUTVALUEFLAG = $(CUTVALUEFLAG_FLAGS) +explicit_cache_cutvalue: CACHEFLAG += $(CACHEFLAG_FLAGS) +explicit_cache_cutvalue: $(LIBS) + +clean: + rm -f *.opp + rm -f *.o + rm -f *.d + rm -f *.so + +.PHONY: clean explicit explicit_cache explicit_cache_cutvalue format check check-fix + +format: + clang-format --style=file:../.clang-format -i *.cc *.h + +# Collect all the include paths from the compiler. +# The .../gcc/x86_64-redhat-linux-gnu/*/include path is excluded since .../gcc/x86_64-redhat-linux-gnu/*/include-fixed should be used instead. +TIDYINCLUDEFLAGS := $(shell g++ -E -x c++ - -v < /dev/null 2>&1 | awk '/#include <...>/,/^End of search/{if (/^ / && !/x86_64-redhat-linux-gnu\/[0-9.]+\/include$$/) print "-I"$$1}' | tr '\n' ' ') +TIDYFLAGS := --language=c++ $(CXXFLAGS_CPU) $(ALPAKAINCLUDE) $(ALPAKASERIAL) $(ROOTCFLAGS) $(PRINTFLAG) $(DUPLICATED) $(CACHEFLAG_FLAGS) $(TIDYINCLUDEFLAGS) + +check: + clang-tidy --config-file=../.clang-tidy *.cc *.h -- $(TIDYFLAGS) + +check-fix: + clang-tidy --config-file=../.clang-tidy --format-style=file:../.clang-format --fix --fix-errors --fix-notes *.cc *.h -- $(TIDYFLAGS) diff --git a/RecoTracker/LSTCore/standalone/Makefile b/RecoTracker/LSTCore/standalone/Makefile new file mode 100644 index 0000000000000..efcd2483c5eba --- /dev/null +++ b/RecoTracker/LSTCore/standalone/Makefile @@ -0,0 +1,78 @@ +# Simple makefile + +EXES := bin/lst_cpu bin/lst_cuda + +SOURCES=$(wildcard code/core/*.cc) +OBJECTS_CPU=$(SOURCES:.cc=_cpu.o) +OBJECTS_CUDA=$(SOURCES:.cc=_cuda.o) +OBJECTS_ROCM=$(SOURCES:.cc=_rocm.o) +OBJECTS=$(OBJECTS_CPU) $(OBJECTS_CUDA) $(OBJECTS_ROCM) + +CXX = g++ +CXXFLAGS = -g -O2 -Wall -fPIC -Wshadow -Woverloaded-virtual -Wno-unused-function -fno-var-tracking -std=c++17 +INCLUDEFLAGS= -ILST -I$(shell pwd) -Icode -Icode/core -I${ALPAKA_ROOT}/include -I/${BOOST_ROOT}/include $(shell rooutil-config --include) -I$(shell root-config --incdir) -I${TRACKLOOPERDIR}/../../../ -I${CMSSW_BASE}/src -I../interface/ -I../interface/alpaka/ -I../src/ -I../src/alpaka/ +ifdef CMSSW_RELEASE_BASE +INCLUDEFLAGS:= ${INCLUDEFLAGS} -I${CMSSW_RELEASE_BASE}/src +endif +LDFLAGS = -g -O2 $(LSTLIB) -L${TRACKLOOPERDIR}/LST $(shell rooutil-config --libs) $(shell root-config --libs) +LDFLAGS_CUDA= -L${CUDA_HOME}/lib64 -lcudart +LDFLAGS_ROCM= -L${ROCM_ROOT}/lib -lamdhip64 +ALPAKAFLAGS = -DALPAKA_DEBUG=0 +CUDAINCLUDE = -I${CUDA_HOME}/include +ROCMINCLUDE = -I${ROCM_ROOT}/include +ALPAKA_CPU = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED +ALPAKA_CUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_HOST_ONLY +ALPAKA_ROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_HOST_ONLY -DALPAKA_DISABLE_VENDOR_RNG -D__HIP_PLATFORM_HCC__ -D__HIP_PLATFORM_AMD__ +EXTRAFLAGS = -ITMultiDrawTreePlayer -Wunused-variable -lTMVA -lEG -lGenVector -lXMLIO -lMLP -lTreePlayer -fopenmp +DOQUINTUPLET = +PTCUTFLAG = +CUTVALUEFLAG = +CUTVALUEFLAG_FLAGS = -DCUT_VALUE_DEBUG + +PRIMITIVEFLAG = +PRIMITIVEFLAG_FLAGS = -DPRIMITIVE_STUDY + +all: rooutil efficiency $(EXES) + +cutvalue: CUTVALUEFLAG = ${CUTVALUEFLAG_FLAGS} +cutvalue: rooutil efficiency $(EXES) + +primitive: PRIMITIVEFLAG = ${PRIMITIVEFLAG_FLAGS} +primitive: rooutil efficiency $(EXES) + +cutvalue_primitive: CUTVALUEFLAG = ${CUTVALUEFLAG_FLAGS} +cutvalue_primitive: PRIMITIVEFLAG = ${PRIMITIVEFLAG_FLAGS} +cutvalue_primitive: rooutil efficiency $(EXES) + + +bin/lst_cpu: LSTLIB=-llst_cpu +bin/lst_cpu: bin/lst_cpu.o $(OBJECTS_CPU) + $(CXX) $(LDFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $^ $(ROOTLIBS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_CPU) -o $@ +bin/lst_cuda: LSTLIB=-llst_cuda +bin/lst_cuda: bin/lst_cuda.o $(OBJECTS_CUDA) + $(CXX) $(LDFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $^ $(ROOTLIBS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_CUDA) $(LDFLAGS_CUDA) -o $@ +bin/lst_rocm: LSTLIB=-llst_rocm +bin/lst_rocm: bin/lst_rocm.o $(OBJECTS_ROCM) + $(CXX) $(LDFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $^ $(ROOTLIBS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_ROCM) $(LDFLAGS_ROCM) -o $@ + +%_cpu.o: %.cc rooutil + $(CXX) $(CXXFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_CPU) $< -c -o $@ +%_cuda.o: %.cc rooutil + $(CXX) $(CXXFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_CUDA) $(CUDAINCLUDE) $< -c -o $@ +%_rocm.o: %.cc rooutil + $(CXX) $(CXXFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_ROCM) $(ROCMINCLUDE) $< -c -o $@ + +rooutil: + $(MAKE) -C code/rooutil/ + +efficiency: rooutil + $(MAKE) -C efficiency/ + +clean: + rm -f $(OBJECTS) bin/*.o $(EXES) bin/lst + rm -f code/rooutil/*.so code/rooutil/*.o + rm -f bin/lst.o + rm -f LST/*.o + cd efficiency/ && make clean + +.PHONY: rooutil efficiency diff --git a/RecoTracker/LSTCore/standalone/README.md b/RecoTracker/LSTCore/standalone/README.md new file mode 100644 index 0000000000000..02fbef943f697 --- /dev/null +++ b/RecoTracker/LSTCore/standalone/README.md @@ -0,0 +1,291 @@ +# TrackLooper + + +## Quick Start + + +### Setting up LSTPerformanceWeb (only for lnx7188 and lnx4555) + +For lnx7188 and lnx4555 this needs to be done once + + cd /cdat/tem/${USER}/ + git clone git@github.com:SegmentLinking/LSTPerformanceWeb.git + +### Setting up container (only for lnx7188) + +For lnx7188 this needs to be done before compiling or running the code: + + singularity shell --nv --bind /mnt/data1:/data --bind /data2/segmentlinking/ --bind /opt --bind /nfs --bind /mnt --bind /usr/local/cuda/bin/ --bind /cvmfs /cvmfs/unpacked.cern.ch/registry.hub.docker.com/cmssw/el8:x86_64 + +### Setting up the code + + git clone git@github.com:SegmentLinking/TrackLooper.git + cd TrackLooper/ + # Source one of the commands below, depending on the site + source setup.sh # if on UCSD or Cornell + source setup_hpg.sh # if on Florida + +### Running the code + + sdl_make_tracklooper -mc + sdl_ -i PU200 -o LSTNtuple.root + createPerfNumDenHists -i LSTNtuple.root -o LSTNumDen.root + lst_plot_performance.py LSTNumDen.root -t "myTag" + # python3 efficiency/python/lst_plot_performance.py LSTNumDen.root -t "myTag" # if you are on cgpu-1 or Cornell + +The above can be even simplified + + sdl_run -f -mc -s PU200 -n -1 -t myTag + +The `-f` flag can be omitted when the code has already been compiled. If multiple backends were compiled, then the `-b` flag can be used to specify a backend. For example + + sdl_run -b cpu -s PU200 -n -1 -t myTag + +## Command explanations + +Compile the code with option flags. If none of `C,G,R,A` are used, then it defaults to compiling for CUDA and CPU. + + sdl_make_tracklooper -mc + -m: make clean binaries + -c: run with the cmssw caching allocator + -C: compile CPU backend + -G: compile CUDA backend + -R: compile ROCm backend + -A: compile all backends + -h: show help screen with all options + +Run the code + + sdl_ -n -v -w -s -i -o + + -i: PU200; muonGun, etc + -n: number of events; default: all + -v: 0-no printout; 1- timing printout only; 2- multiplicity printout; default: 0 + -s: number of streams/events in flight; default: 1 + -w: 0- no writeout; 1- minimum writeout; default: 1 + -o: provide an output root file name (e.g. LSTNtuple.root); default: debug.root + -l: add lower level object (pT3, pT5, T5, etc.) branches to the output + +Plotting numerators and denominators of performance plots + + createPerfNumDenHists -i -o [-g -n ] + + -i: Path to LSTNtuple.root + -o: provide an output root file name (e.g. num_den_hist.root) + -n: (optional) number of events + -g: (optional) comma separated pdgids to add more efficiency plots with different sim particle slices + +Plotting performance plots + + lst_plot_performance.py num_den_hist.root -t "mywork" + +There are several options you can provide to restrict number of plots being produced. +And by default, it creates a certain set of objects. +One can specifcy the type, range, metric, etc. +To see the full information type + + lst_plot_performance.py --help + +To give an example of plotting efficiency, object type of lower level T5, for |eta| < 2.5 only. + + lst_plot_performance.py num_den_hist.root -t "mywork" -m eff -o T5_lower -s loweta + +NOTE: in order to plot lower level object, ```-l``` option must have been used during ```sdl``` step! + +When running on ```cgpu-1``` remember to specify python3 as there is no python. +The shebang on the ```lst_plot_performance.py``` is not updated as ```lnx7188``` works with python2... + + python3 efficiency/python/lst_plot_performance.py num_den_hist.root -t "mywork" # If running on cgpu-1 + +Comparing two different runs + + lst_plot_performance.py \ + num_den_hist_1.root \ # Reference + num_den_hist_2.root \ # New work + -L BaseLine,MyNewWork \ # Labeling + -t "mywork" \ + --compare + +## CMSSW Integration +This is the a complete set of instruction on how the TrackLooper code +can be linked as an external tool in CMSSW: + +### Build TrackLooper +```bash +git clone git@github.com:SegmentLinking/TrackLooper.git +cd TrackLooper/ +# Source one of the commands below, depending on the site +source setup.sh # if on UCSD or Cornell +source setup_hpg.sh # if on Florida +sdl_make_tracklooper -mc +cd .. +``` + +### Set up `TrackLooper` as an external +```bash +mkdir workingFolder # Create the folder you will be working in +cd workingFolder +cmsrel CMSSW_14_1_0_pre3 +cd CMSSW_14_1_0_pre3/src +cmsenv +git cms-init +git remote add SegLink git@github.com:SegmentLinking/cmssw.git +git fetch SegLink CMSSW_14_1_0_pre3_LST_X +git cms-addpkg RecoTracker Configuration +git checkout CMSSW_14_1_0_pre3_LST_X +#To include both the CPU library and GPU library into CMSSW, create 3 xml files (headers file has no library). +#Before writing the following xml file, check that libsdl_cpu.so and libsdl_gpu.so can be found under the ../../../TrackLooper/SDL/ folder. +cat <lst_headers.xml + + + + + + + +EOF +cat <lst_cpu.xml + + + + + + + + + +EOF +cat <lst_cuda.xml + + + + + + + + + +EOF +scram setup lst_headers.xml +scram setup lst_cpu.xml +scram setup lst_cuda.xml +cmsenv +git cms-checkdeps -a -A +scram b -j 12 +``` + +### Run the LST reconstruction in CMSSW +A simple test configuration of the LST reconstruction can be run with the command: +```bash +cmsRun RecoTracker/LST/test/LSTAlpakaTester.py +``` + +For a more complete workflow, one can run a modified version of the 21034.1 workflow. +To get the commands of this workflow, one can run: +```bash +runTheMatrix.py -w upgrade -n -e -l 21034.1 +``` + +For convenience, the workflow has been run for 100 events and the output is stored here: +```bash +/data2/segmentlinking/CMSSW_14_1_0_pre0/step2_21034.1_100Events.root +``` + +For enabling the LST reconstruction in the CMSSW tracking workflow, a modified step3 needs to be run. +This is based on the step3 command of the 21034.1 workflow with the following changes: + - Remove the `--pileup_input` and `--pileup` flags. + - The number of threads and streams for the job can be optionally controlled by the `--nThreads` and `--nStreams` command line options respectively (`1` ends up being the actual default value for both, and more info can be found by running `cmsDriver.py --help`). + - Add at the end of the command: `--procModifiers gpu,trackingLST,trackingIters01 --no_exec` + +Run the command and modify the output configuration file with the following: + - If want to run a cpu version, remove the ```gpu``` in the line defining the `process` object: + ```python + process = cms.Process('RECO',...,gpu,...) + ``` + - Add the following lines below the part where the import of the standard configurations happens: + ```python + process.load('Configuration.StandardSequences.Accelerators_cff') + process.load("HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi") + ``` + - Modify the input and output file names accordingly, as well as the number of events. + +Then, run the configuration file with `cmsRun`. + +To get the DQM files, one would have to run step4 of the 21034.1 workflow with the following modifications: + - Add `--no_exec` to the end of command and then run it. + - Modify the output configuration file by changing the input file (the one containing `inDQM` from the previous step) and number of events accordingly. + +Running the configuration file with `cmsRun`, the output file will have a name starting with `DQM`. The name is the same every time this step runs, +so it is good practice to rename the file, e.g. to `tracking_Iters01LST.root`. +The MTV plots can be produced with the command: +```bash +makeTrackValidationPlots.py --extended tracking_Iters01LST.root +``` +Comparison plots can be made by including multiple ROOT files as arguments. + +**Note:** In case one wants to run step2 as well, similar modifications as in step4 (`--no_exec` flag and input file/number of events) need to be applied. Moreover, the PU files have better be modified to point to local ones. This can be done by inserting a dummy file when running the command (set the argument of the `--pileup_input` flag to `file:file.root`), and then change the PU input files in the configuration to the following line (by means of replacing the corresponding line in the configuration): +```python +process.mix.input.fileNames = cms.untracked.vstring(['file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/066fc95d-1cef-4469-9e08-3913973cd4ce.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/07928a25-231b-450d-9d17-e20e751323a1.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/26bd8fb0-575e-4201-b657-94cdcb633045.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/4206a9c5-44c2-45a5-aab2-1a8a6043a08a.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/55a372bf-a234-4111-8ce0-ead6157a1810.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/59ad346c-f405-4288-96d7-795f81c43fe8.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/7280f5ec-b71d-4579-a730-7ce2de0ff906.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/b93adc85-715f-477a-afc9-65f3241933ee.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/c7a0aa46-f55c-4b01-977f-34a397b71fba.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/e77fa467-97cb-4943-884f-6965b4eb0390.root']) +``` + +### Inclusion of LST in other CMSSW packages +Including the line +``` + +``` +in the relevant package `BuildFile.xml` allows for +including our headers in the code of that package. + +## Running LST in a CVMFS-less setup + +The setup scripts included in this repository assume that the [CernVM File System (CVMFS)](https://cernvm.cern.ch/fs/) is installed. This provides a convenient way to fetch the required dependencies, but it is not necessary to run LST in standalone mode. Here, we briefly describe how to build and run it when CVMFS is not available. + +The necessary dependencies are CUDA, ROOT, the Boost libraries, Alpaka, and some CMSSW headers. CUDA, ROOT, and Boost, are fairly standard libraries and are available from multiple package managers. For the remaining necessary headers you will need to clone the [Alpaka](https://github.com/alpaka-group/alpaka) and [CMSSW](https://github.com/cms-sw/cmssw) repositories. The Alpaka repository is reasonably sized, but the CMSSW one extremely large, especially considering that we only need a tiny fraction of its files to build LST. We can get only the Alpaka interface headers from CMSSW by running the following commands. + +``` bash +git clone --filter=blob:none --no-checkout --depth 1 --sparse --branch CMSSW_14_1_X https://github.com/cms-sw/cmssw.git +cd cmssw +git sparse-checkout add HeterogeneousCore/AlpakaInterface +git checkout +``` + +Then all that is left to do is set some environment variables. We give an example of how to do this in lnx7188/cgpu-1. + +```bash +# These two lines are only needed to set the right version of gcc and nvcc. They are not needed for standard installations. +export PATH=/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/gcc/12.3.1-40d504be6370b5a30e3947a6e575ca28/bin:/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3/external/el8_amd64_gcc12/bin:$PATH +export LD_LIBRARY_PATH=/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3/biglib/el8_amd64_gcc12:/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3/lib/el8_amd64_gcc12:/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3/external/el8_amd64_gcc12/lib:/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/gcc/12.3.1-40d504be6370b5a30e3947a6e575ca28/lib64:/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/gcc/12.3.1-40d504be6370b5a30e3947a6e575ca28/lib:$LD_LIBRARY_PATH + +# These are the lines that you need to manually change for a CVMFS-less setup. +# In this example we use cvmfs paths since that is where the dependencies are in lnx7188/cgpu1, but they can point to local directories. +export BOOST_ROOT=/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/boost/1.80.0-60a217837b5db1cff00c7d88ec42f53a +export ALPAKA_ROOT=/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/alpaka/1.1.0-7d0324257db47fde2d27987e7ff98fb4 +export CUDA_HOME=/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/cuda/12.4.1-06cde0cd9f95a73a1ea05c8535f60bde +export ROOT_ROOT=/cvmfs/cms.cern.ch/el8_amd64_gcc12/lcg/root/6.30.07-21947a33e64ceb827a089697ad72e468 +export CMSSW_BASE=/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3 + +# These lines are needed to account for some extra environment variables that are exported in the setup script. +export LD_LIBRARY_PATH=$PWD/SDL/cuda:$PWD/SDL/cpu:$PWD:$LD_LIBRARY_PATH +export PATH=$PWD/bin:$PATH +export PATH=$PWD/efficiency/bin:$PATH +export PATH=$PWD/efficiency/python:$PATH +export TRACKLOOPERDIR=$PWD +export TRACKINGNTUPLEDIR=/data2/segmentlinking/CMSSW_12_2_0_pre2/ +export LSTOUTPUTDIR=. +source $PWD/code/rooutil/thisrooutil.sh + +# After this, you can compile and run LST as usual. +sdl_run -f -mc -s PU200 -n -1 -t myTag +``` + +## Code formatting and checking + +The makefile in the `SDL` directory includes phony targets to run `clang-format` and `clang-tidy` on the code using the formatting and checks used in CMSSW. The following are the available commands. + +- `make format` + Formats the code in the `SDL` directory using `clang-format` following the rules specified in `.clang-format`. +- `make check` + Runs `clang-tidy` on the code in the `SDL` directory to performs the checks specified in `.clang-tidy`. +- `make check-fix` + Same as `make check`, but fixes the issues that it knows how to fix. + \ No newline at end of file diff --git a/RecoTracker/LSTCore/standalone/bin/lst.cc b/RecoTracker/LSTCore/standalone/bin/lst.cc new file mode 100644 index 0000000000000..c0e52d0a0d194 --- /dev/null +++ b/RecoTracker/LSTCore/standalone/bin/lst.cc @@ -0,0 +1,524 @@ +#include "lst.h" + +#include + +using LSTEvent = ALPAKA_ACCELERATOR_NAMESPACE::lst::Event; +using namespace ::lst; + +//___________________________________________________________________________________________________________________________________________________________________________________________ +int main(int argc, char **argv) { + //******************************************************************************** + // + // 0. Preliminary operations + // + //******************************************************************************** + + // Checking the TRACKLOOPERDIR is set + ana.track_looper_dir_path = gSystem->Getenv("TRACKLOOPERDIR"); + if (ana.track_looper_dir_path.IsNull()) { + RooUtil::error( + "TRACKLOOPERDIR is not set! Did you run $ source setup.sh from TrackLooper/ main repository directory?"); + } + RooUtil::print(TString::Format("TRACKLOOPERDIR=%s", ana.track_looper_dir_path.Data())); + + // Write the command line used to run it + // N.B. This needs to be before the argument parsing as it will change some values + std::vector allArgs(argv, argv + argc); + ana.full_cmd_line = ""; + for (auto &str : allArgs) { + ana.full_cmd_line += TString::Format(" %s", str.c_str()); + } + + //******************************************************************************** + // + // 1. Parsing options + // + //******************************************************************************** + + // cxxopts is just a tool to parse argc, and argv easily + + // Grand option setting + cxxopts::Options options("\n $ lst", + "\n **********************\n * *\n * " + "Looper *\n * *\n **********************\n"); + + // Read the options + options.add_options()("m,mode", "Run mode (NOT DEFINED)", cxxopts::value()->default_value("5"))( + "i,input", + "Comma separated input file list OR if just a directory is provided it will glob all in the directory BUT must " + "end with '/' for the path", + cxxopts::value()->default_value("muonGun"))( + "t,tree", + "Name of the tree in the root file to open and loop over", + cxxopts::value()->default_value("trackingNtuple/tree"))( + "o,output", "Output file name", cxxopts::value())( + "N,nmatch", "N match for MTV-like matching", cxxopts::value()->default_value("9"))( + "n,nevents", "N events to loop over", cxxopts::value()->default_value("-1"))( + "x,event_index", "specific event index to process", cxxopts::value()->default_value("-1"))( + "g,pdg_id", "The simhit pdgId match option", cxxopts::value()->default_value("0"))( + "v,verbose", + "Verbose mode (0: no print, 1: only final timing, 2: object multiplitcity", + cxxopts::value()->default_value("0"))( + "w,write_ntuple", "Write Ntuple", cxxopts::value()->default_value("1"))( + "s,streams", "Set number of streams", cxxopts::value()->default_value("1"))( + "d,debug", "Run debug job. i.e. overrides output option to 'debug.root' and 'recreate's the file.")( + "l,lower_level", "write lower level objects ntuple results")("G,gnn_ntuple", "write gnn input variable ntuple")( + "j,nsplit_jobs", "Enable splitting jobs by N blocks (--job_index must be set)", cxxopts::value())( + "I,job_index", + "job_index of split jobs (--nsplit_jobs must be set. index starts from 0. i.e. 0, 1, 2, 3, etc...)", + cxxopts::value())("3,tc_pls_triplets", "Allow triplet pLSs in TC collection")( + "2,no_pls_dupclean", "Disable pLS duplicate cleaning (both steps)")("h,help", "Print help"); + + auto result = options.parse(argc, argv); + + // NOTE: When an option was provided (e.g. -i or --input), then the result.count("