From a96a2c72916dbf8ff3dffbe9ff58e9767d90062c Mon Sep 17 00:00:00 2001
From: Matevz Tadel <mtadel@ucsd.edu>
Date: Thu, 16 Dec 2021 23:15:47 -0800
Subject: [PATCH] Import mkFit from trackreco/mkFit to cmssw.

This commit is squashed from many commits made during the code review.
Below is a list of changes (in addition to overall cleanup):

- Build subdirs of MkFitCore/src/, put Ice source back in Ice/. Move CMS-2017 geom test to MkFitCMS.

- Move files with stand-alone steering code out of attic/.

- Use std::aligned_alloc in place of _mm_malloc.

- Reenable standalone quality and root-based validation.
  So far ohnly works for standalone runBtpCe_MultiIter() - MIMI
  even though the modifications have also been made in legacy
  best-hit, standard, and clone-engine steering functions.

- Remove standalone externals from MkFitCore.

- Remove RecoTracker/MkFitCore/standalone/dust-chest/ directory that contained nlohmann and
  SMatrix externals.

- Move them to a new github repo: trackreco/mkFit-external

- In standalone configure, clone the mkFit-external repo into buld directory.

- Modify Makefiles to include from this new directory instead of dusty-chest/.

- Make Pool<T> to return unique_ptr directly, and reduce public
  interface, follow CMS' naming convention, aling naming a bit closer
  to edm::ReusableObjectHolder.

- refactor LayerNumberConverter

- Remove std::vector inheritance of CombCandidate - use a std::vector member.

- Switch to regular std::clamp

- Significant cleanup of Config.h/cc

- Split Config.h/cc into standalone/ConfigStandalone.h/cc
  Move as much standalone stuff as possible.

- Move TrackExtra class out of Track.h/cc into standalone/TrackExtra.h/cc

- Move Conformal-fit stuff into standalone as well.

- Makefiles -- distclean removes libs and executables.

- Remove some more Config variables related to standalone sim.

- Remove usage of unnamed union/struct for bitfield initialization.

- LayerNumberConverter.h private data

- Rename most functions to start with a lower-case letter.

- HitStructures: classes LayerOfHits and EventOfHits
  - make all members private
  - proper pub/priv order
  - remove commented code
  - improve comments

- Remove unused Track::clone()

- HitStructures: classes TrackCand, CombCandidate, EventOfCombCandidates
  - make all members private, write accessors as needed
  - proper pub/priv order
  - remove commented code
  - improve comments

- Mostly miscellaneous review followup:
  - commented out code cleanup
  - drop const in returned values
  - enum-> constexpr in Matriplex.h
  - costexpr for 1620

- Remove more standalone vars, typedefs, includes from Config.h.

- include CMS_SA_ALLOW, with support for MKFIT_STANDALONE

- Move Matriplex/test to standalone/test, move define ASSUME_ALIGNED to MatriplexCommon.h

- Fixup CandCloner
  - remove unused code
  - move impl of begin/end funcs to .cc, use dprint
  - make members private

- Remove MkFitCMS/standalone/CMS-2017/. This contained data and code
  to generate CMS-2017.acc geometry description.

- Move plugin loader from TrackerInfo to ConfigStandalone

- Do not dlclose geometry/config plugin library.

- Move standalone auto-generated files of out cmssw repo (to trackreco/mkFit-external).

- Move material constants to class MaterialEffects, make a const global instance of it namespace Config.

- LayerInfo privatization + related changes and removal of some members obsoleted by layer-plan (sibling/next layer).

- TrackerInfo privatization.

- Use unique_ptr for JSON object, and remove release_json() as
  unnecessary

- default LayerInfo::m_is_stereo = false

- Split Config in Const / Config, remove unused useCMSGeom and
  nParams. Config now has to be turned into a class, some variables
  would be better off being put into IterationConfig or similar.

- Remove Config::nTotalLayers, build writeMemoryFile.

- Put propagation-flags into PropagationConfig class. Access through
  const& singleton pattern.

- Move Config::json_verbose to standalone, put json functions into
  JsonConfig class.

- Move Config::silent to standalone and a member of MkBuilder.

- Final cleanup of Config.h
  - add ool argument to PropagationConfig::set_as_default(bool force)
  - move includePCA to ConfigStandalone (for cmd-line option processing) and
    introduce new member bool PropagationConfig::backward_fit_to_pca
  - if-def MKFIT_STANDALONE numThreadsFinder numThreadsEvents numSeedsPerTask

- if-def MKFIT_STANDALONE bool removeDuplicates and useHitsForDuplicates

- use edm::isFinite with non-standalone build

- Fixofy MkBase / MkFitter / MkFinder
  - members use m_ prefix
  - privatize/protectify data members, put them to the end
  - remove some comments

- On Slava's request remove from backward-fit outlier/nan-fit hit
  rejection proto-code and discussion about why it should be
  implemented the way it was and what to do with overlaps when primary
  hit gets rejected.

- drop cmsDeltaRad and related commented-out code for rough
  propagation window from nominal r/z to the actual expected hit (this
  was set up for the CMS geometry and was replaced by the per-layer
  windows; we are not going back from this setup)

- use TrackAlgorithm instead of literals

- Code cleanup:
  - move some aged standalone code out of StdStandaloneSeqs.cc
  - remove unneeded 1.02 alocation scale in LayerOfHits
  - remove unused eta regions from TrackerInfo - superseeded by
    seed-partitioning function, layer-plan and steering-params.
---
 RecoTracker/MkFit/BuildFile.xml               |    3 +-
 RecoTracker/MkFit/plugins/BuildFile.xml       |    1 -
 .../MkFit/plugins/MkFitEventOfHitsProducer.cc |   16 +-
 .../MkFit/plugins/MkFitGeometryESProducer.cc  |    6 +-
 .../plugins/MkFitIterationConfigESProducer.cc |   85 +-
 .../MkFit/plugins/MkFitOutputConverter.cc     |    6 +-
 RecoTracker/MkFit/plugins/MkFitProducer.cc    |   27 +-
 .../MkFit/plugins/MkFitSeedConverter.cc       |    4 +-
 RecoTracker/MkFit/plugins/convertHits.h       |    4 +-
 .../plugins/createPhase1TrackerGeometry.cc    |   23 +-
 .../createPhase1TrackerGeometryAutoGen.acc    |  830 ++---
 .../MkFit/src/ES_MkFitIterationConfig.cc      |    2 +-
 RecoTracker/MkFit/src/MkFitEventOfHits.cc     |    2 +-
 RecoTracker/MkFit/src/MkFitGeometry.cc        |    4 +-
 RecoTracker/MkFit/src/MkFitHitWrapper.cc      |    4 +-
 RecoTracker/MkFit/src/MkFitOutputWrapper.cc   |    2 +-
 RecoTracker/MkFit/src/MkFitSeedWrapper.cc     |    2 +-
 RecoTracker/MkFitCMS/BuildFile.xml            |    5 +
 .../MkFitCMS/interface/LayerNumberConverter.h |  116 +
 RecoTracker/MkFitCMS/interface/MkStdSeqs.h    |  138 +
 RecoTracker/MkFitCMS/interface/runFunctions.h |   26 +
 RecoTracker/MkFitCMS/src/MkStdSeqs.cc         |  579 ++++
 RecoTracker/MkFitCMS/src/runFunctions.cc      |  114 +
 .../MkFitCMS/standalone/Geoms/CMS-2017.cc     |    7 +
 .../MkFitCMS/standalone/Geoms/Makefile        |   48 +
 RecoTracker/MkFitCMS/standalone/Makefile      |   73 +
 .../MkFitCMS/standalone/MkStandaloneSeqs.cc   |  404 +++
 .../MkFitCMS/standalone/MkStandaloneSeqs.h    |   53 +
 .../MkFitCMS/standalone/buildtestMPlex.cc     |  569 ++++
 .../MkFitCMS/standalone/buildtestMPlex.h      |   32 +
 RecoTracker/MkFitCMS/standalone/deadmodules.h |  716 ++++
 RecoTracker/MkFitCMS/standalone/mkFit.cc      | 1027 ++++++
 .../standalone/tkNtuple/DictsLinkDef.h        |   21 +
 .../MkFitCMS/standalone/tkNtuple/Makefile     |   57 +
 .../standalone/tkNtuple/WriteMemoryFile.cc    | 1257 +++++++
 RecoTracker/MkFitCore/BuildFile.xml           |    8 +
 RecoTracker/MkFitCore/interface/Config.h      |  168 +
 .../MkFitCore/interface/ConfigWrapper.h       |   16 +
 RecoTracker/MkFitCore/interface/Hit.h         |  279 ++
 .../MkFitCore/interface/HitStructures.h       |  882 +++++
 .../MkFitCore/interface/IterationConfig.h     |  373 +++
 .../MkFitCore/interface/MatrixSTypes.h        |   48 +
 RecoTracker/MkFitCore/interface/MkBuilder.h   |  186 +
 .../MkFitCore/interface/MkBuilderWrapper.h    |   29 +
 .../MkFitCore/interface/SteeringParams.h      |  149 +
 RecoTracker/MkFitCore/interface/Track.h       |  683 ++++
 RecoTracker/MkFitCore/interface/TrackerInfo.h |  173 +
 RecoTracker/MkFitCore/interface/binnor.h      |  246 ++
 .../MkFitCore/interface/cms_common_macros.h   |   10 +
 RecoTracker/MkFitCore/src/CCSErr.ah           |  208 ++
 RecoTracker/MkFitCore/src/CCSErrTransp.ah     |  147 +
 RecoTracker/MkFitCore/src/CandCloner.cc       |  240 ++
 RecoTracker/MkFitCore/src/CandCloner.h        |   75 +
 RecoTracker/MkFitCore/src/CartesianErr.ah     |  201 ++
 .../MkFitCore/src/CartesianErrTransp.ah       |  145 +
 RecoTracker/MkFitCore/src/Config.cc           |   48 +
 RecoTracker/MkFitCore/src/ConfigWrapper.cc    |   20 +
 RecoTracker/MkFitCore/src/Debug.h             |   98 +
 RecoTracker/MkFitCore/src/FindingFoos.cc      |   20 +
 RecoTracker/MkFitCore/src/FindingFoos.h       |   38 +
 RecoTracker/MkFitCore/src/Hit.cc              |   16 +
 RecoTracker/MkFitCore/src/HitStructures.cc    |  624 ++++
 RecoTracker/MkFitCore/src/Ice/IceFPU.h        |  278 ++
 .../MkFitCore/src/Ice/IceMemoryMacros.h       |  123 +
 .../MkFitCore/src/Ice/IcePreprocessor.h       |   23 +
 .../MkFitCore/src/Ice/IceRevisitedRadix.cc    |  547 +++
 .../MkFitCore/src/Ice/IceRevisitedRadix.h     |   73 +
 RecoTracker/MkFitCore/src/Ice/IceTypes.h      |  119 +
 RecoTracker/MkFitCore/src/IterationConfig.cc  |  686 ++++
 RecoTracker/MkFitCore/src/K62HC.ah            |  131 +
 RecoTracker/MkFitCore/src/KH.ah               |  140 +
 RecoTracker/MkFitCore/src/KHC.ah              |  186 +
 RecoTracker/MkFitCore/src/KalmanGain.ah       |  147 +
 RecoTracker/MkFitCore/src/KalmanGain62.ah     |   87 +
 RecoTracker/MkFitCore/src/KalmanHTG.ah        |   64 +
 RecoTracker/MkFitCore/src/KalmanUtilsMPlex.cc |  902 +++++
 RecoTracker/MkFitCore/src/KalmanUtilsMPlex.h  |  114 +
 .../MkFitCore/src/KalmanUtilsMPlex.icc        |  205 ++
 RecoTracker/MkFitCore/src/MaterialEffects.cc  |   51 +
 RecoTracker/MkFitCore/src/MaterialEffects.h   |  443 +++
 .../MkFitCore/src/Matriplex/GenMPlexOps.pl    |  407 +++
 RecoTracker/MkFitCore/src/Matriplex/GenMul.pm |  870 +++++
 RecoTracker/MkFitCore/src/Matriplex/Makefile  |   31 +
 .../MkFitCore/src/Matriplex/Matriplex.h       |  484 +++
 .../src/Matriplex/MatriplexCommon.cc          |    7 +
 .../MkFitCore/src/Matriplex/MatriplexCommon.h |   87 +
 .../MkFitCore/src/Matriplex/MatriplexSym.h    |  467 +++
 .../MkFitCore/src/Matriplex/MatriplexVector.h |  154 +
 .../MkFitCore/src/Matriplex/gen_mul.pl        |  242 ++
 .../MkFitCore/src/Matriplex/intr_sym_3x3.ah   |   57 +
 .../MkFitCore/src/Matriplex/intr_sym_6x6.ah   |  330 ++
 .../MkFitCore/src/Matriplex/std_sym_3x3.ah    |    9 +
 .../MkFitCore/src/Matriplex/std_sym_6x6.ah    |   36 +
 RecoTracker/MkFitCore/src/MatriplexPackers.h  |  142 +
 RecoTracker/MkFitCore/src/Matrix.h            |   73 +
 RecoTracker/MkFitCore/src/MkBase.h            |  100 +
 RecoTracker/MkFitCore/src/MkBuilder.cc        | 1362 ++++++++
 RecoTracker/MkFitCore/src/MkBuilderWrapper.cc |   10 +
 RecoTracker/MkFitCore/src/MkFinder.cc         | 1769 ++++++++++
 RecoTracker/MkFitCore/src/MkFinder.h          |  324 ++
 RecoTracker/MkFitCore/src/MkFitter.cc         |  385 +++
 RecoTracker/MkFitCore/src/MkFitter.h          |   93 +
 RecoTracker/MkFitCore/src/MultHelixProp.ah    |  300 ++
 .../MkFitCore/src/MultHelixPropEndcap.ah      |  254 ++
 .../MkFitCore/src/MultHelixPropTransp.ah      |  232 ++
 .../src/MultHelixPropTranspEndcap.ah          |  209 ++
 RecoTracker/MkFitCore/src/Pool.h              |   54 +
 RecoTracker/MkFitCore/src/ProjectResErr.ah    |   64 +
 .../MkFitCore/src/ProjectResErrTransp.ah      |   55 +
 RecoTracker/MkFitCore/src/PropagationMPlex.cc |  920 +++++
 RecoTracker/MkFitCore/src/PropagationMPlex.h  |   86 +
 .../MkFitCore/src/PropagationMPlex.icc        |  291 ++
 RecoTracker/MkFitCore/src/Track.cc            |  424 +++
 RecoTracker/MkFitCore/src/TrackerInfo.cc      |   62 +
 .../MkFitCore/src/upParam_MultKalmanGain.ah   |  142 +
 .../src/upParam_kalmanGain_x_propErr.ah       |  168 +
 .../src/upParam_propErrT_x_simil_propErr.ah   |  186 +
 .../MkFitCore/src/upParam_simil_x_propErr.ah  |  201 ++
 .../MkFitCore/standalone/CFMatrix33Vector3.ah |   48 +
 .../MkFitCore/standalone/ConfigStandalone.cc  |  135 +
 .../MkFitCore/standalone/ConfigStandalone.h   |  188 ++
 .../standalone/ConformalUtilsMPlex.cc         |  274 ++
 .../standalone/ConformalUtilsMPlex.h          |   19 +
 RecoTracker/MkFitCore/standalone/Event.cc     |  987 ++++++
 RecoTracker/MkFitCore/standalone/Event.h      |  123 +
 .../MkFitCore/standalone/Geoms/CylCowWLids.cc |  180 +
 .../MkFitCore/standalone/Geoms/Makefile       |   48 +
 RecoTracker/MkFitCore/standalone/Makefile     |   82 +
 .../MkFitCore/standalone/Makefile.config      |  192 ++
 RecoTracker/MkFitCore/standalone/README.md    |  863 +++++
 .../standalone/README_multipleIterations.txt  |   83 +
 .../MkFitCore/standalone/TTreeValidation.cc   | 2978 +++++++++++++++++
 .../MkFitCore/standalone/TTreeValidation.h    |  421 +++
 .../MkFitCore/standalone/TrackExtra.cc        |  513 +++
 RecoTracker/MkFitCore/standalone/TrackExtra.h |  131 +
 .../MkFitCore/standalone/Validation.cc        |   16 +
 RecoTracker/MkFitCore/standalone/Validation.h |   82 +
 .../MkFitCore/standalone/attic/BestCands.h    |  233 ++
 .../MkFitCore/standalone/attic/align_alloc.h  |  112 +
 .../standalone/attic/fittestMPlex.cc          |  105 +
 .../MkFitCore/standalone/attic/fittestMPlex.h |   12 +
 .../standalone/attic/seedtestMPlex.cc         |  151 +
 .../standalone/attic/seedtestMPlex.h          |   16 +
 .../standalone/cmssw-trackerinfo-desc.txt     |   80 +
 .../code-mod-tools/re-identifier-do-it.pl     |   71 +
 .../code-mod-tools/re-identifier.pl           |  102 +
 .../code-mod-tools/re-include-guard.pl        |   57 +
 .../standalone/code-mod-tools/re-include.pl   |   85 +
 RecoTracker/MkFitCore/standalone/configure    |   44 +
 .../MkFitCore/standalone/index-desc.txt       |  143 +
 .../MkFitCore/standalone/plotting/Common.hh   |  304 ++
 .../standalone/plotting/PlotBenchmarks.cpp    |  212 ++
 .../standalone/plotting/PlotBenchmarks.hh     |   32 +
 .../plotting/PlotMEIFBenchmarks.cpp           |  146 +
 .../standalone/plotting/PlotMEIFBenchmarks.hh |   67 +
 .../standalone/plotting/PlotValidation.cpp    | 1137 +++++++
 .../standalone/plotting/PlotValidation.hh     |  160 +
 .../standalone/plotting/PlotsFromDump.cpp     |  102 +
 .../standalone/plotting/PlotsFromDump.hh      |   93 +
 .../standalone/plotting/StackValidation.cpp   |  303 ++
 .../standalone/plotting/StackValidation.hh    |   79 +
 .../standalone/plotting/benchmarkPlots.sh     |   41 +
 .../standalone/plotting/compareTotals.py      |  531 +++
 .../standalone/plotting/makeBenchmarkPlots.C  |    6 +
 .../standalone/plotting/makeBenchmarkPlots.py |  201 ++
 .../plotting/makeMEIFBenchmarkPlots.C         |    6 +
 .../plotting/makeMEIFBenchmarkPlots.py        |  121 +
 .../standalone/plotting/makePlotsFromDump.C   |    6 +
 .../standalone/plotting/makePlotsFromDump.py  |   64 +
 .../standalone/plotting/makeStressPlot.sh     |   23 +
 .../plotting/makeThroughputPlots.sh           |   12 +
 .../standalone/plotting/makeValidation.C      |    9 +
 .../standalone/plotting/plotStress.C          |  150 +
 .../standalone/plotting/plotThroughput.py     |  166 +
 .../standalone/plotting/runValidation.C       |   27 +
 .../standalone/plotting/textDumpPlots.sh      |   28 +
 .../MkFitCore/standalone/test/CylCowWLids.C   |   77 +
 .../standalone/test/DumpHitSearchStats.icc    |   88 +
 .../standalone/test/Matriplex/GMtest.cxx      |  227 ++
 .../standalone/test/Matriplex/GMtest.pl       |   65 +
 .../standalone/test/Matriplex/m512_test.cxx   |   44 +
 .../MkFitCore/standalone/test/binnor_demo.cxx |  381 +++
 .../test/config-parse/ConfigLinkDef.h         |    5 +
 .../standalone/test/config-parse/Makefile     |   13 +
 .../standalone/test/config-parse/dump_vars.C  |   51 +
 .../standalone/test/config-parse/extracto.pl  |   19 +
 .../standalone/test/config-parse/test.json    |   29 +
 ...n-cmssw-10mu-fulldet-build-extrectracks.sh |   31 +
 .../validation-cmssw-10mu-fulldet-build.sh    |   39 +
 .../validation-cmssw-benchmarks-multiiter.sh  |  237 ++
 .../validation-cmssw-benchmarks.sh            |  255 ++
 ...-cmssw-ttbar-fulldet-build-extrectracks.sh |   40 +
 .../validation-cmssw-ttbar-fulldet-build.sh   |   48 +
 .../validation-toymc-fulldet-build.sh         |   26 +
 .../val_scripts/validationMIC-build-10mu.sh   |   49 +
 .../val_scripts/validationMIC-build-PU70.sh   |   49 +
 .../MkFitCore/standalone/validation-desc.txt  |  549 +++
 .../standalone/web/README_WEBPLOTS.md         |  133 +
 .../standalone/web/collectBenchmarks-multi.sh |  103 +
 .../standalone/web/collectBenchmarks.sh       |  153 +
 .../MkFitCore/standalone/web/copyphp.sh       |    6 +
 .../MkFitCore/standalone/web/index.php        |   84 +
 .../MkFitCore/standalone/web/makereadable.sh  |    6 +
 .../standalone/web/move-benchmarks.sh         |   40 +
 .../web/move-cmsswval-10mu-extrectracks.sh    |   47 +
 .../standalone/web/move-cmsswval-10mu.sh      |   55 +
 .../web/move-cmsswval-ttbar-extrectracks.sh   |   44 +
 .../standalone/web/move-cmsswval-ttbar.sh     |   50 +
 .../MkFitCore/standalone/web/move-toymcval.sh |   36 +
 .../standalone/web/tarAndSendToLXPLUS.sh      |   63 +
 ...chmark-cmssw-ttbar-fulldet-build-remote.sh |   62 +
 .../benchmark-cmssw-ttbar-fulldet-build.sh    |  162 +
 .../xeon_scripts/benchmarkMIC-build.sh        |   83 +
 .../standalone/xeon_scripts/check.sh          |    8 +
 .../xeon_scripts/common-variables.sh          |  212 ++
 .../xeon_scripts/data-dir-location.sh         |   19 +
 .../standalone/xeon_scripts/debug-test.sh     |   42 +
 .../xeon_scripts/generateToyMCsamples.sh      |   32 +
 .../standalone/xeon_scripts/init-env.sh       |    7 +
 .../standalone/xeon_scripts/init-gcc10-env.sh |    7 +
 .../standalone/xeon_scripts/runBenchmark.sh   |  117 +
 .../xeon_scripts/stress-test-common.sh        |  144 +
 .../xeon_scripts/stress-test-main.sh          |  218 ++
 .../xeon_scripts/tarAndSendToRemote.sh        |  103 +
 .../xeon_scripts/throughput-test-common.sh    |   60 +
 .../xeon_scripts/throughput-test-main.sh      |  178 +
 .../standalone/xeon_scripts/trashSKL-SP.sh    |   18 +
 227 files changed, 43391 insertions(+), 622 deletions(-)
 create mode 100644 RecoTracker/MkFitCMS/BuildFile.xml
 create mode 100644 RecoTracker/MkFitCMS/interface/LayerNumberConverter.h
 create mode 100644 RecoTracker/MkFitCMS/interface/MkStdSeqs.h
 create mode 100644 RecoTracker/MkFitCMS/interface/runFunctions.h
 create mode 100644 RecoTracker/MkFitCMS/src/MkStdSeqs.cc
 create mode 100644 RecoTracker/MkFitCMS/src/runFunctions.cc
 create mode 100644 RecoTracker/MkFitCMS/standalone/Geoms/CMS-2017.cc
 create mode 100644 RecoTracker/MkFitCMS/standalone/Geoms/Makefile
 create mode 100644 RecoTracker/MkFitCMS/standalone/Makefile
 create mode 100644 RecoTracker/MkFitCMS/standalone/MkStandaloneSeqs.cc
 create mode 100644 RecoTracker/MkFitCMS/standalone/MkStandaloneSeqs.h
 create mode 100644 RecoTracker/MkFitCMS/standalone/buildtestMPlex.cc
 create mode 100644 RecoTracker/MkFitCMS/standalone/buildtestMPlex.h
 create mode 100644 RecoTracker/MkFitCMS/standalone/deadmodules.h
 create mode 100644 RecoTracker/MkFitCMS/standalone/mkFit.cc
 create mode 100644 RecoTracker/MkFitCMS/standalone/tkNtuple/DictsLinkDef.h
 create mode 100644 RecoTracker/MkFitCMS/standalone/tkNtuple/Makefile
 create mode 100644 RecoTracker/MkFitCMS/standalone/tkNtuple/WriteMemoryFile.cc
 create mode 100644 RecoTracker/MkFitCore/BuildFile.xml
 create mode 100644 RecoTracker/MkFitCore/interface/Config.h
 create mode 100644 RecoTracker/MkFitCore/interface/ConfigWrapper.h
 create mode 100644 RecoTracker/MkFitCore/interface/Hit.h
 create mode 100644 RecoTracker/MkFitCore/interface/HitStructures.h
 create mode 100644 RecoTracker/MkFitCore/interface/IterationConfig.h
 create mode 100644 RecoTracker/MkFitCore/interface/MatrixSTypes.h
 create mode 100644 RecoTracker/MkFitCore/interface/MkBuilder.h
 create mode 100644 RecoTracker/MkFitCore/interface/MkBuilderWrapper.h
 create mode 100644 RecoTracker/MkFitCore/interface/SteeringParams.h
 create mode 100644 RecoTracker/MkFitCore/interface/Track.h
 create mode 100644 RecoTracker/MkFitCore/interface/TrackerInfo.h
 create mode 100644 RecoTracker/MkFitCore/interface/binnor.h
 create mode 100644 RecoTracker/MkFitCore/interface/cms_common_macros.h
 create mode 100644 RecoTracker/MkFitCore/src/CCSErr.ah
 create mode 100644 RecoTracker/MkFitCore/src/CCSErrTransp.ah
 create mode 100644 RecoTracker/MkFitCore/src/CandCloner.cc
 create mode 100644 RecoTracker/MkFitCore/src/CandCloner.h
 create mode 100644 RecoTracker/MkFitCore/src/CartesianErr.ah
 create mode 100644 RecoTracker/MkFitCore/src/CartesianErrTransp.ah
 create mode 100644 RecoTracker/MkFitCore/src/Config.cc
 create mode 100644 RecoTracker/MkFitCore/src/ConfigWrapper.cc
 create mode 100644 RecoTracker/MkFitCore/src/Debug.h
 create mode 100644 RecoTracker/MkFitCore/src/FindingFoos.cc
 create mode 100644 RecoTracker/MkFitCore/src/FindingFoos.h
 create mode 100644 RecoTracker/MkFitCore/src/Hit.cc
 create mode 100644 RecoTracker/MkFitCore/src/HitStructures.cc
 create mode 100644 RecoTracker/MkFitCore/src/Ice/IceFPU.h
 create mode 100644 RecoTracker/MkFitCore/src/Ice/IceMemoryMacros.h
 create mode 100644 RecoTracker/MkFitCore/src/Ice/IcePreprocessor.h
 create mode 100644 RecoTracker/MkFitCore/src/Ice/IceRevisitedRadix.cc
 create mode 100644 RecoTracker/MkFitCore/src/Ice/IceRevisitedRadix.h
 create mode 100644 RecoTracker/MkFitCore/src/Ice/IceTypes.h
 create mode 100644 RecoTracker/MkFitCore/src/IterationConfig.cc
 create mode 100644 RecoTracker/MkFitCore/src/K62HC.ah
 create mode 100644 RecoTracker/MkFitCore/src/KH.ah
 create mode 100644 RecoTracker/MkFitCore/src/KHC.ah
 create mode 100644 RecoTracker/MkFitCore/src/KalmanGain.ah
 create mode 100644 RecoTracker/MkFitCore/src/KalmanGain62.ah
 create mode 100644 RecoTracker/MkFitCore/src/KalmanHTG.ah
 create mode 100644 RecoTracker/MkFitCore/src/KalmanUtilsMPlex.cc
 create mode 100644 RecoTracker/MkFitCore/src/KalmanUtilsMPlex.h
 create mode 100644 RecoTracker/MkFitCore/src/KalmanUtilsMPlex.icc
 create mode 100644 RecoTracker/MkFitCore/src/MaterialEffects.cc
 create mode 100644 RecoTracker/MkFitCore/src/MaterialEffects.h
 create mode 100755 RecoTracker/MkFitCore/src/Matriplex/GenMPlexOps.pl
 create mode 100644 RecoTracker/MkFitCore/src/Matriplex/GenMul.pm
 create mode 100644 RecoTracker/MkFitCore/src/Matriplex/Makefile
 create mode 100644 RecoTracker/MkFitCore/src/Matriplex/Matriplex.h
 create mode 100644 RecoTracker/MkFitCore/src/Matriplex/MatriplexCommon.cc
 create mode 100644 RecoTracker/MkFitCore/src/Matriplex/MatriplexCommon.h
 create mode 100644 RecoTracker/MkFitCore/src/Matriplex/MatriplexSym.h
 create mode 100644 RecoTracker/MkFitCore/src/Matriplex/MatriplexVector.h
 create mode 100755 RecoTracker/MkFitCore/src/Matriplex/gen_mul.pl
 create mode 100644 RecoTracker/MkFitCore/src/Matriplex/intr_sym_3x3.ah
 create mode 100644 RecoTracker/MkFitCore/src/Matriplex/intr_sym_6x6.ah
 create mode 100644 RecoTracker/MkFitCore/src/Matriplex/std_sym_3x3.ah
 create mode 100644 RecoTracker/MkFitCore/src/Matriplex/std_sym_6x6.ah
 create mode 100644 RecoTracker/MkFitCore/src/MatriplexPackers.h
 create mode 100644 RecoTracker/MkFitCore/src/Matrix.h
 create mode 100644 RecoTracker/MkFitCore/src/MkBase.h
 create mode 100644 RecoTracker/MkFitCore/src/MkBuilder.cc
 create mode 100644 RecoTracker/MkFitCore/src/MkBuilderWrapper.cc
 create mode 100644 RecoTracker/MkFitCore/src/MkFinder.cc
 create mode 100644 RecoTracker/MkFitCore/src/MkFinder.h
 create mode 100644 RecoTracker/MkFitCore/src/MkFitter.cc
 create mode 100644 RecoTracker/MkFitCore/src/MkFitter.h
 create mode 100644 RecoTracker/MkFitCore/src/MultHelixProp.ah
 create mode 100644 RecoTracker/MkFitCore/src/MultHelixPropEndcap.ah
 create mode 100644 RecoTracker/MkFitCore/src/MultHelixPropTransp.ah
 create mode 100644 RecoTracker/MkFitCore/src/MultHelixPropTranspEndcap.ah
 create mode 100644 RecoTracker/MkFitCore/src/Pool.h
 create mode 100644 RecoTracker/MkFitCore/src/ProjectResErr.ah
 create mode 100644 RecoTracker/MkFitCore/src/ProjectResErrTransp.ah
 create mode 100644 RecoTracker/MkFitCore/src/PropagationMPlex.cc
 create mode 100644 RecoTracker/MkFitCore/src/PropagationMPlex.h
 create mode 100644 RecoTracker/MkFitCore/src/PropagationMPlex.icc
 create mode 100644 RecoTracker/MkFitCore/src/Track.cc
 create mode 100644 RecoTracker/MkFitCore/src/TrackerInfo.cc
 create mode 100644 RecoTracker/MkFitCore/src/upParam_MultKalmanGain.ah
 create mode 100644 RecoTracker/MkFitCore/src/upParam_kalmanGain_x_propErr.ah
 create mode 100644 RecoTracker/MkFitCore/src/upParam_propErrT_x_simil_propErr.ah
 create mode 100644 RecoTracker/MkFitCore/src/upParam_simil_x_propErr.ah
 create mode 100644 RecoTracker/MkFitCore/standalone/CFMatrix33Vector3.ah
 create mode 100644 RecoTracker/MkFitCore/standalone/ConfigStandalone.cc
 create mode 100644 RecoTracker/MkFitCore/standalone/ConfigStandalone.h
 create mode 100644 RecoTracker/MkFitCore/standalone/ConformalUtilsMPlex.cc
 create mode 100644 RecoTracker/MkFitCore/standalone/ConformalUtilsMPlex.h
 create mode 100644 RecoTracker/MkFitCore/standalone/Event.cc
 create mode 100644 RecoTracker/MkFitCore/standalone/Event.h
 create mode 100644 RecoTracker/MkFitCore/standalone/Geoms/CylCowWLids.cc
 create mode 100644 RecoTracker/MkFitCore/standalone/Geoms/Makefile
 create mode 100644 RecoTracker/MkFitCore/standalone/Makefile
 create mode 100644 RecoTracker/MkFitCore/standalone/Makefile.config
 create mode 100644 RecoTracker/MkFitCore/standalone/README.md
 create mode 100644 RecoTracker/MkFitCore/standalone/README_multipleIterations.txt
 create mode 100644 RecoTracker/MkFitCore/standalone/TTreeValidation.cc
 create mode 100644 RecoTracker/MkFitCore/standalone/TTreeValidation.h
 create mode 100644 RecoTracker/MkFitCore/standalone/TrackExtra.cc
 create mode 100644 RecoTracker/MkFitCore/standalone/TrackExtra.h
 create mode 100644 RecoTracker/MkFitCore/standalone/Validation.cc
 create mode 100644 RecoTracker/MkFitCore/standalone/Validation.h
 create mode 100644 RecoTracker/MkFitCore/standalone/attic/BestCands.h
 create mode 100644 RecoTracker/MkFitCore/standalone/attic/align_alloc.h
 create mode 100644 RecoTracker/MkFitCore/standalone/attic/fittestMPlex.cc
 create mode 100644 RecoTracker/MkFitCore/standalone/attic/fittestMPlex.h
 create mode 100644 RecoTracker/MkFitCore/standalone/attic/seedtestMPlex.cc
 create mode 100644 RecoTracker/MkFitCore/standalone/attic/seedtestMPlex.h
 create mode 100644 RecoTracker/MkFitCore/standalone/cmssw-trackerinfo-desc.txt
 create mode 100644 RecoTracker/MkFitCore/standalone/code-mod-tools/re-identifier-do-it.pl
 create mode 100644 RecoTracker/MkFitCore/standalone/code-mod-tools/re-identifier.pl
 create mode 100755 RecoTracker/MkFitCore/standalone/code-mod-tools/re-include-guard.pl
 create mode 100755 RecoTracker/MkFitCore/standalone/code-mod-tools/re-include.pl
 create mode 100755 RecoTracker/MkFitCore/standalone/configure
 create mode 100644 RecoTracker/MkFitCore/standalone/index-desc.txt
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/Common.hh
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/PlotBenchmarks.cpp
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/PlotBenchmarks.hh
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/PlotMEIFBenchmarks.cpp
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/PlotMEIFBenchmarks.hh
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/PlotValidation.cpp
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/PlotValidation.hh
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/PlotsFromDump.cpp
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/PlotsFromDump.hh
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/StackValidation.cpp
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/StackValidation.hh
 create mode 100755 RecoTracker/MkFitCore/standalone/plotting/benchmarkPlots.sh
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/compareTotals.py
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/makeBenchmarkPlots.C
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/makeBenchmarkPlots.py
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/makeMEIFBenchmarkPlots.C
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/makeMEIFBenchmarkPlots.py
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/makePlotsFromDump.C
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/makePlotsFromDump.py
 create mode 100755 RecoTracker/MkFitCore/standalone/plotting/makeStressPlot.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/plotting/makeThroughputPlots.sh
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/makeValidation.C
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/plotStress.C
 create mode 100755 RecoTracker/MkFitCore/standalone/plotting/plotThroughput.py
 create mode 100644 RecoTracker/MkFitCore/standalone/plotting/runValidation.C
 create mode 100755 RecoTracker/MkFitCore/standalone/plotting/textDumpPlots.sh
 create mode 100644 RecoTracker/MkFitCore/standalone/test/CylCowWLids.C
 create mode 100644 RecoTracker/MkFitCore/standalone/test/DumpHitSearchStats.icc
 create mode 100644 RecoTracker/MkFitCore/standalone/test/Matriplex/GMtest.cxx
 create mode 100755 RecoTracker/MkFitCore/standalone/test/Matriplex/GMtest.pl
 create mode 100644 RecoTracker/MkFitCore/standalone/test/Matriplex/m512_test.cxx
 create mode 100644 RecoTracker/MkFitCore/standalone/test/binnor_demo.cxx
 create mode 100644 RecoTracker/MkFitCore/standalone/test/config-parse/ConfigLinkDef.h
 create mode 100644 RecoTracker/MkFitCore/standalone/test/config-parse/Makefile
 create mode 100644 RecoTracker/MkFitCore/standalone/test/config-parse/dump_vars.C
 create mode 100755 RecoTracker/MkFitCore/standalone/test/config-parse/extracto.pl
 create mode 100644 RecoTracker/MkFitCore/standalone/test/config-parse/test.json
 create mode 100755 RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-10mu-fulldet-build-extrectracks.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-10mu-fulldet-build.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-benchmarks-multiiter.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-benchmarks.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-ttbar-fulldet-build-extrectracks.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-ttbar-fulldet-build.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/val_scripts/validation-toymc-fulldet-build.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/val_scripts/validationMIC-build-10mu.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/val_scripts/validationMIC-build-PU70.sh
 create mode 100644 RecoTracker/MkFitCore/standalone/validation-desc.txt
 create mode 100644 RecoTracker/MkFitCore/standalone/web/README_WEBPLOTS.md
 create mode 100755 RecoTracker/MkFitCore/standalone/web/collectBenchmarks-multi.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/web/collectBenchmarks.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/web/copyphp.sh
 create mode 100644 RecoTracker/MkFitCore/standalone/web/index.php
 create mode 100755 RecoTracker/MkFitCore/standalone/web/makereadable.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/web/move-benchmarks.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/web/move-cmsswval-10mu-extrectracks.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/web/move-cmsswval-10mu.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/web/move-cmsswval-ttbar-extrectracks.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/web/move-cmsswval-ttbar.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/web/move-toymcval.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/web/tarAndSendToLXPLUS.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/xeon_scripts/benchmark-cmssw-ttbar-fulldet-build-remote.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/xeon_scripts/benchmark-cmssw-ttbar-fulldet-build.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/xeon_scripts/benchmarkMIC-build.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/xeon_scripts/check.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/xeon_scripts/common-variables.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/xeon_scripts/data-dir-location.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/xeon_scripts/debug-test.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/xeon_scripts/generateToyMCsamples.sh
 create mode 100644 RecoTracker/MkFitCore/standalone/xeon_scripts/init-env.sh
 create mode 100644 RecoTracker/MkFitCore/standalone/xeon_scripts/init-gcc10-env.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/xeon_scripts/runBenchmark.sh
 create mode 100644 RecoTracker/MkFitCore/standalone/xeon_scripts/stress-test-common.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/xeon_scripts/stress-test-main.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/xeon_scripts/tarAndSendToRemote.sh
 create mode 100644 RecoTracker/MkFitCore/standalone/xeon_scripts/throughput-test-common.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/xeon_scripts/throughput-test-main.sh
 create mode 100755 RecoTracker/MkFitCore/standalone/xeon_scripts/trashSKL-SP.sh
diff --git a/RecoTracker/MkFit/BuildFile.xml b/RecoTracker/MkFit/BuildFile.xml
index 177da9dffc6df..7c1e113bb9cad 100644
--- a/RecoTracker/MkFit/BuildFile.xml
+++ b/RecoTracker/MkFit/BuildFile.xml
@@ -3,7 +3,8 @@
 <use name="DataFormats/TrackerCommon"/>
 <use name="Geometry/TrackerGeometryBuilder"/>
 <use name="RecoTracker/TkDetLayers"/>
-<use name="mkfit"/>
+<use name="RecoTracker/MkFitCore"/>
+<use name="RecoTracker/MkFitCMS"/>
 <use name="rootcore"/>
 <export>
   <lib name="RecoTrackerMkFit"/>
diff --git a/RecoTracker/MkFit/plugins/BuildFile.xml b/RecoTracker/MkFit/plugins/BuildFile.xml
index 6967de7612951..c993b66b41afa 100644
--- a/RecoTracker/MkFit/plugins/BuildFile.xml
+++ b/RecoTracker/MkFit/plugins/BuildFile.xml
@@ -29,7 +29,6 @@
   <use name="TrackingTools/TrackFitters"/>
   <use name="TrackingTools/TrajectoryState"/>
   <use name="TrackingTools/TransientTrackingRecHit"/>
-  <use name="mkfit"/>
   <use name="rootmath"/>
   <flags EDM_PLUGIN="1"/>
 </library>
diff --git a/RecoTracker/MkFit/plugins/MkFitEventOfHitsProducer.cc b/RecoTracker/MkFit/plugins/MkFitEventOfHitsProducer.cc
index ea23319cdbf4b..e3b800314a2a0 100644
--- a/RecoTracker/MkFit/plugins/MkFitEventOfHitsProducer.cc
+++ b/RecoTracker/MkFit/plugins/MkFitEventOfHitsProducer.cc
@@ -25,9 +25,9 @@
 #include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h"
 
 // mkFit includes
-#include "mkFit/HitStructures.h"
-#include "mkFit/MkStdSeqs.h"
-#include "LayerNumberConverter.h"
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
+#include "RecoTracker/MkFitCMS/interface/MkStdSeqs.h"
+#include "RecoTracker/MkFitCMS/interface/LayerNumberConverter.h"
 
 class MkFitEventOfHitsProducer : public edm::global::EDProducer<> {
 public:
@@ -95,7 +95,7 @@ void MkFitEventOfHitsProducer::produce(edm::StreamID iID, edm::Event& iEvent, co
   const auto& mkFitGeom = iSetup.getData(mkFitGeomToken_);
 
   auto eventOfHits = std::make_unique<mkfit::EventOfHits>(mkFitGeom.trackerInfo());
-  mkfit::StdSeq::Cmssw_LoadHits_Begin(*eventOfHits, {&pixelHits.hits(), &stripHits.hits()});
+  mkfit::StdSeq::cmssw_LoadHits_Begin(*eventOfHits, {&pixelHits.hits(), &stripHits.hits()});
 
   if (usePixelQualityDB_ || useStripStripQualityDB_) {
     std::vector<mkfit::DeadVec> deadvectors(mkFitGeom.layerNumberConverter().nLayers());
@@ -168,16 +168,16 @@ void MkFitEventOfHitsProducer::produce(edm::StreamID iID, edm::Event& iEvent, co
         }
       }
     }
-    mkfit::StdSeq::LoadDeads(*eventOfHits, deadvectors);
+    mkfit::StdSeq::loadDeads(*eventOfHits, deadvectors);
   }
 
   fill(iEvent.get(pixelClusterIndexToHitToken_).hits(), *eventOfHits, mkFitGeom);
   fill(iEvent.get(stripClusterIndexToHitToken_).hits(), *eventOfHits, mkFitGeom);
 
-  mkfit::StdSeq::Cmssw_LoadHits_End(*eventOfHits);
+  mkfit::StdSeq::cmssw_LoadHits_End(*eventOfHits);
 
   auto const bs = iEvent.get(beamSpotToken_);
-  eventOfHits->SetBeamSpot(
+  eventOfHits->setBeamSpot(
       mkfit::BeamSpot(bs.x0(), bs.y0(), bs.z0(), bs.sigmaZ(), bs.BeamWidthX(), bs.BeamWidthY(), bs.dxdz(), bs.dydz()));
 
   iEvent.emplace(putToken_, std::move(eventOfHits));
@@ -190,7 +190,7 @@ void MkFitEventOfHitsProducer::fill(const std::vector<const TrackingRecHit*>& hi
     const auto* hit = hits[i];
     if (hit != nullptr) {
       const auto ilay = mkFitGeom.mkFitLayerNumber(hit->geographicalId());
-      eventOfHits[ilay].RegisterHit(i);
+      eventOfHits[ilay].registerHit(i);
     }
   }
 }
diff --git a/RecoTracker/MkFit/plugins/MkFitGeometryESProducer.cc b/RecoTracker/MkFit/plugins/MkFitGeometryESProducer.cc
index 04d25b3f8bbac..65ec712382df5 100644
--- a/RecoTracker/MkFit/plugins/MkFitGeometryESProducer.cc
+++ b/RecoTracker/MkFit/plugins/MkFitGeometryESProducer.cc
@@ -13,9 +13,9 @@
 #include "createPhase1TrackerGeometry.h"
 
 // mkFit includes
-#include "ConfigWrapper.h"
-#include "TrackerInfo.h"
-#include "mkFit/IterationConfig.h"
+#include "RecoTracker/MkFitCore/interface/ConfigWrapper.h"
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
+#include "RecoTracker/MkFitCore/interface/IterationConfig.h"
 
 #include <atomic>
 
diff --git a/RecoTracker/MkFit/plugins/MkFitIterationConfigESProducer.cc b/RecoTracker/MkFit/plugins/MkFitIterationConfigESProducer.cc
index 5f876e95bfff0..a1f134a3ef3a6 100644
--- a/RecoTracker/MkFit/plugins/MkFitIterationConfigESProducer.cc
+++ b/RecoTracker/MkFit/plugins/MkFitIterationConfigESProducer.cc
@@ -6,10 +6,10 @@
 #include "RecoTracker/MkFit/interface/MkFitGeometry.h"
 
 // mkFit includes
-#include "Track.h"
-#include "TrackerInfo.h"
-#include "mkFit/HitStructures.h"
-#include "mkFit/IterationConfig.h"
+#include "RecoTracker/MkFitCore/interface/Track.h"
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
+#include "RecoTracker/MkFitCore/interface/IterationConfig.h"
 
 namespace {
   using namespace mkfit;
@@ -26,7 +26,7 @@ namespace {
       const bool z_dir_pos = S.pz() > 0;
 
       const auto &hot = S.getLastHitOnTrack();
-      const float eta = eoh[hot.layer].GetHit(hot.index).eta();
+      const float eta = eoh[hot.layer].refHit(hot.index).eta();
 
       // Region to be defined by propagation / intersection tests
       TrackerInfo::EtaRegion reg;
@@ -42,39 +42,39 @@ namespace {
       constexpr int tecp1_id = 27;
       constexpr int tecn1_id = 54;
 
-      const LayerInfo &tib1 = trk_info.m_layers[tib1_id];
-      const LayerInfo &tob1 = trk_info.m_layers[tob1_id];
+      const LayerInfo &tib1 = trk_info.layer(tib1_id);
+      const LayerInfo &tob1 = trk_info.layer(tob1_id);
 
-      const LayerInfo &tecp1 = trk_info.m_layers[tecp1_id];
-      const LayerInfo &tecn1 = trk_info.m_layers[tecn1_id];
+      const LayerInfo &tecp1 = trk_info.layer(tecp1_id);
+      const LayerInfo &tecn1 = trk_info.layer(tecn1_id);
 
       const LayerInfo &tec_first = z_dir_pos ? tecp1 : tecn1;
 
       const float maxR = S.maxReachRadius();
       float z_at_maxr;
 
-      bool can_reach_outer_brl = S.canReachRadius(outer_brl.m_rout);
+      bool can_reach_outer_brl = S.canReachRadius(outer_brl.rout());
       float z_at_outer_brl;
       bool misses_first_tec;
       if (can_reach_outer_brl) {
-        z_at_outer_brl = S.zAtR(outer_brl.m_rout);
+        z_at_outer_brl = S.zAtR(outer_brl.rout());
         if (z_dir_pos)
-          misses_first_tec = z_at_outer_brl < tec_first.m_zmin;
+          misses_first_tec = z_at_outer_brl < tec_first.zmin();
         else
-          misses_first_tec = z_at_outer_brl > tec_first.m_zmax;
+          misses_first_tec = z_at_outer_brl > tec_first.zmax();
       } else {
         z_at_maxr = S.zAtR(maxR);
         if (z_dir_pos)
-          misses_first_tec = z_at_maxr < tec_first.m_zmin;
+          misses_first_tec = z_at_maxr < tec_first.zmin();
         else
-          misses_first_tec = z_at_maxr > tec_first.m_zmax;
+          misses_first_tec = z_at_maxr > tec_first.zmax();
       }
 
       if (misses_first_tec) {
         reg = TrackerInfo::Reg_Barrel;
       } else {
-        if ((S.canReachRadius(tib1.m_rin) && tib1.is_within_z_limits(S.zAtR(tib1.m_rin))) ||
-            (S.canReachRadius(tob1.m_rin) && tob1.is_within_z_limits(S.zAtR(tob1.m_rin)))) {
+        if ((S.canReachRadius(tib1.rin()) && tib1.is_within_z_limits(S.zAtR(tib1.rin()))) ||
+            (S.canReachRadius(tob1.rin()) && tob1.is_within_z_limits(S.zAtR(tob1.rin())))) {
           reg = z_dir_pos ? TrackerInfo::Reg_Transition_Pos : TrackerInfo::Reg_Transition_Neg;
         } else {
           reg = z_dir_pos ? TrackerInfo::Reg_Endcap_Pos : TrackerInfo::Reg_Endcap_Neg;
@@ -101,26 +101,26 @@ namespace {
     constexpr int tecp1_id = 27;
     constexpr int tecn1_id = 54;
 
-    const LayerInfo &tib1 = trk_info.m_layers[tib1_id];
-    const LayerInfo &tob1 = trk_info.m_layers[tob1_id];
+    const LayerInfo &tib1 = trk_info.layer(tib1_id);
+    const LayerInfo &tob1 = trk_info.layer(tob1_id);
 
-    const LayerInfo &tidp1 = trk_info.m_layers[tidp1_id];
-    const LayerInfo &tidn1 = trk_info.m_layers[tidn1_id];
+    const LayerInfo &tidp1 = trk_info.layer(tidp1_id);
+    const LayerInfo &tidn1 = trk_info.layer(tidn1_id);
 
-    const LayerInfo &tecp1 = trk_info.m_layers[tecp1_id];
-    const LayerInfo &tecn1 = trk_info.m_layers[tecn1_id];
+    const LayerInfo &tecp1 = trk_info.layer(tecp1_id);
+    const LayerInfo &tecn1 = trk_info.layer(tecn1_id);
 
     // Merge first two layers to account for mono/stereo coverage.
     // TrackerInfo could hold joint limits for sub-detectors.
-    const auto &L = trk_info.m_layers;
-    const float tidp_rin = std::min(L[tidp1_id].m_rin, L[tidp1_id + 1].m_rin);
-    const float tidp_rout = std::max(L[tidp1_id].m_rout, L[tidp1_id + 1].m_rout);
-    const float tecp_rin = std::min(L[tecp1_id].m_rin, L[tecp1_id + 1].m_rin);
-    const float tecp_rout = std::max(L[tecp1_id].m_rout, L[tecp1_id + 1].m_rout);
-    const float tidn_rin = std::min(L[tidn1_id].m_rin, L[tidn1_id + 1].m_rin);
-    const float tidn_rout = std::max(L[tidn1_id].m_rout, L[tidn1_id + 1].m_rout);
-    const float tecn_rin = std::min(L[tecn1_id].m_rin, L[tecn1_id + 1].m_rin);
-    const float tecn_rout = std::max(L[tecn1_id].m_rout, L[tecn1_id + 1].m_rout);
+    const auto &L = trk_info;
+    const float tidp_rin = std::min(L[tidp1_id].rin(), L[tidp1_id + 1].rin());
+    const float tidp_rout = std::max(L[tidp1_id].rout(), L[tidp1_id + 1].rout());
+    const float tecp_rin = std::min(L[tecp1_id].rin(), L[tecp1_id + 1].rin());
+    const float tecp_rout = std::max(L[tecp1_id].rout(), L[tecp1_id + 1].rout());
+    const float tidn_rin = std::min(L[tidn1_id].rin(), L[tidn1_id + 1].rin());
+    const float tidn_rout = std::max(L[tidn1_id].rout(), L[tidn1_id + 1].rout());
+    const float tecn_rin = std::min(L[tecn1_id].rin(), L[tecn1_id + 1].rin());
+    const float tecn_rout = std::max(L[tecn1_id].rout(), L[tecn1_id + 1].rout());
 
     // Bias towards more aggressive transition-region assignemnts.
     // With current tunning it seems to make things a bit worse.
@@ -153,7 +153,7 @@ namespace {
       const Track &S = in_seeds[i];
 
       const auto &hot = S.getLastHitOnTrack();
-      const float eta = eoh[hot.layer].GetHit(hot.index).eta();
+      const float eta = eoh[hot.layer].refHit(hot.index).eta();
 
       // Region to be defined by propagation / intersection tests
       TrackerInfo::EtaRegion reg;
@@ -165,14 +165,14 @@ namespace {
       const float maxR = S.maxReachRadius();
 
       if (z_dir_pos) {
-        const bool in_tib = barrel_pos_check(S, maxR, tib1.m_rin, tib1.m_zmax);
-        const bool in_tob = barrel_pos_check(S, maxR, tob1.m_rin, tob1.m_zmax);
+        const bool in_tib = barrel_pos_check(S, maxR, tib1.rin(), tib1.zmax());
+        const bool in_tob = barrel_pos_check(S, maxR, tob1.rin(), tob1.zmax());
 
         if (!in_tib && !in_tob) {
           reg = TrackerInfo::Reg_Endcap_Pos;
         } else {
-          const bool in_tid = endcap_pos_check(S, maxR, tidp_rout, tidp_rin, tidp1.m_zmin - tid_z_extra);
-          const bool in_tec = endcap_pos_check(S, maxR, tecp_rout, tecp_rin, tecp1.m_zmin - tec_z_extra);
+          const bool in_tid = endcap_pos_check(S, maxR, tidp_rout, tidp_rin, tidp1.zmin() - tid_z_extra);
+          const bool in_tec = endcap_pos_check(S, maxR, tecp_rout, tecp_rin, tecp1.zmin() - tec_z_extra);
 
           if (!in_tid && !in_tec) {
             reg = TrackerInfo::Reg_Barrel;
@@ -181,14 +181,14 @@ namespace {
           }
         }
       } else {
-        const bool in_tib = barrel_neg_check(S, maxR, tib1.m_rin, tib1.m_zmin);
-        const bool in_tob = barrel_neg_check(S, maxR, tob1.m_rin, tob1.m_zmin);
+        const bool in_tib = barrel_neg_check(S, maxR, tib1.rin(), tib1.zmin());
+        const bool in_tob = barrel_neg_check(S, maxR, tob1.rin(), tob1.zmin());
 
         if (!in_tib && !in_tob) {
           reg = TrackerInfo::Reg_Endcap_Neg;
         } else {
-          const bool in_tid = endcap_neg_check(S, maxR, tidn_rout, tidn_rin, tidn1.m_zmax + tid_z_extra);
-          const bool in_tec = endcap_neg_check(S, maxR, tecn_rout, tecn_rin, tecn1.m_zmax + tec_z_extra);
+          const bool in_tid = endcap_neg_check(S, maxR, tidn_rout, tidn_rin, tidn1.zmax() + tid_z_extra);
+          const bool in_tec = endcap_neg_check(S, maxR, tecn_rout, tecn_rin, tecn1.zmax() + tec_z_extra);
 
           if (!in_tid && !in_tec) {
             reg = TrackerInfo::Reg_Barrel;
@@ -233,7 +233,8 @@ void MkFitIterationConfigESProducer::fillDescriptions(edm::ConfigurationDescript
 
 std::unique_ptr<mkfit::IterationConfig> MkFitIterationConfigESProducer::produce(
     const TrackerRecoGeometryRecord &iRecord) {
-  auto it_conf = mkfit::ConfigJson_Load_File(configFile_);
+  mkfit::ConfigJson cj;
+  auto it_conf = cj.load_File(configFile_);
   it_conf->m_partition_seeds = partitionSeeds1;
   return it_conf;
 }
diff --git a/RecoTracker/MkFit/plugins/MkFitOutputConverter.cc b/RecoTracker/MkFit/plugins/MkFitOutputConverter.cc
index ba8e36e248a88..c2b3d8781c546 100644
--- a/RecoTracker/MkFit/plugins/MkFitOutputConverter.cc
+++ b/RecoTracker/MkFit/plugins/MkFitOutputConverter.cc
@@ -37,9 +37,9 @@
 #include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h"
 
 // mkFit indludes
-#include "LayerNumberConverter.h"
-#include "Track.h"
-#include "mkFit/HitStructures.h"
+#include "RecoTracker/MkFitCMS/interface/LayerNumberConverter.h"
+#include "RecoTracker/MkFitCore/interface/Track.h"
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
 
 namespace {
   template <typename T>
diff --git a/RecoTracker/MkFit/plugins/MkFitProducer.cc b/RecoTracker/MkFit/plugins/MkFitProducer.cc
index c511c38ae90d5..721a3310f0f7c 100644
--- a/RecoTracker/MkFit/plugins/MkFitProducer.cc
+++ b/RecoTracker/MkFit/plugins/MkFitProducer.cc
@@ -18,14 +18,14 @@
 #include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h"
 
 // mkFit includes
-#include "ConfigWrapper.h"
-#include "LayerNumberConverter.h"
-#include "mkFit/buildtestMPlex.h"
-#include "mkFit/IterationConfig.h"
-#include "mkFit/MkBuilderWrapper.h"
+#include "RecoTracker/MkFitCore/interface/ConfigWrapper.h"
+#include "RecoTracker/MkFitCMS/interface/LayerNumberConverter.h"
+#include "RecoTracker/MkFitCMS/interface/runFunctions.h"
+#include "RecoTracker/MkFitCore/interface/IterationConfig.h"
+#include "RecoTracker/MkFitCore/interface/MkBuilderWrapper.h"
 
 // TBB includes
-#include "tbb/task_arena.h"
+#include "oneapi/tbb/task_arena.h"
 
 // std includes
 #include <functional>
@@ -54,7 +54,6 @@ class MkFitProducer : public edm::global::EDProducer<edm::StreamCache<mkfit::MkB
   const edm::ESGetToken<MkFitGeometry, TrackerRecoGeometryRecord> mkFitGeomToken_;
   const edm::ESGetToken<mkfit::IterationConfig, TrackerRecoGeometryRecord> mkFitIterConfigToken_;
   const edm::EDPutTokenT<MkFitOutputWrapper> putToken_;
-  std::function<double(mkfit::Event&, mkfit::MkBuilder&)> buildFunction_;
   const float minGoodStripCharge_;
   const bool seedCleaning_;
   const bool backwardFitInCMSSW_;
@@ -101,7 +100,7 @@ MkFitProducer::MkFitProducer(edm::ParameterSet const& iConfig)
 
   // TODO: what to do when we have multiple instances of MkFitProducer in a job?
   mkfit::MkBuilderWrapper::populate();
-  mkfit::ConfigWrapper::initializeForCMSSW(mkFitSilent_);
+  mkfit::ConfigWrapper::initializeForCMSSW();
 }
 
 void MkFitProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
@@ -134,12 +133,9 @@ void MkFitProducer::fillDescriptions(edm::ConfigurationDescriptions& description
 }
 
 std::unique_ptr<mkfit::MkBuilderWrapper> MkFitProducer::beginStream(edm::StreamID iID) const {
-  return std::make_unique<mkfit::MkBuilderWrapper>();
+  return std::make_unique<mkfit::MkBuilderWrapper>(mkFitSilent_);
 }
 
-namespace {
-  std::once_flag geometryFlag;
-}
 void MkFitProducer::produce(edm::StreamID iID, edm::Event& iEvent, const edm::EventSetup& iSetup) const {
   const auto& pixelHits = iEvent.get(pixelHitsToken_);
   const auto& stripHits = iEvent.get(stripHitsToken_);
@@ -180,13 +176,6 @@ void MkFitProducer::produce(edm::StreamID iID, edm::Event& iEvent, const edm::Ev
     stripClusterChargeCut(iEvent.get(stripClusterChargeToken_), stripMask);
   }
 
-  // Initialize the number of layers, has to be done exactly once in
-  // the whole program.
-  // TODO: the mechanism needs to be improved...
-  std::call_once(geometryFlag, [nlayers = mkFitGeom.layerNumberConverter().nLayers()]() {
-    mkfit::ConfigWrapper::setNTotalLayers(nlayers);
-  });
-
   // seeds need to be mutable because of the possible cleaning
   auto seeds_mutable = seeds.seeds();
   mkfit::TrackVec tracks;
diff --git a/RecoTracker/MkFit/plugins/MkFitSeedConverter.cc b/RecoTracker/MkFit/plugins/MkFitSeedConverter.cc
index fb427aa22d2ce..4f908c42cf094 100644
--- a/RecoTracker/MkFit/plugins/MkFitSeedConverter.cc
+++ b/RecoTracker/MkFit/plugins/MkFitSeedConverter.cc
@@ -31,8 +31,8 @@
 #include "Math/SMatrix.h"
 
 // mkFit includes
-#include "LayerNumberConverter.h"
-#include "Track.h"
+#include "RecoTracker/MkFitCMS/interface/LayerNumberConverter.h"
+#include "RecoTracker/MkFitCore/interface/Track.h"
 
 class MkFitSeedConverter : public edm::global::EDProducer<> {
 public:
diff --git a/RecoTracker/MkFit/plugins/convertHits.h b/RecoTracker/MkFit/plugins/convertHits.h
index f016b43b0fb0a..6560f50e208e6 100644
--- a/RecoTracker/MkFit/plugins/convertHits.h
+++ b/RecoTracker/MkFit/plugins/convertHits.h
@@ -17,8 +17,8 @@
 #include "Math/SMatrix.h"
 
 // mkFit includes
-#include "Hit.h"
-#include "mkFit/HitStructures.h"
+#include "RecoTracker/MkFitCore/interface/Hit.h"
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
 
 namespace mkfit {
   template <typename Traits, typename HitCollection>
diff --git a/RecoTracker/MkFit/plugins/createPhase1TrackerGeometry.cc b/RecoTracker/MkFit/plugins/createPhase1TrackerGeometry.cc
index 9d795d48a4584..2c67582307b46 100644
--- a/RecoTracker/MkFit/plugins/createPhase1TrackerGeometry.cc
+++ b/RecoTracker/MkFit/plugins/createPhase1TrackerGeometry.cc
@@ -2,9 +2,8 @@
 // Phase1 tracker geometry
 //-------------------
 
-#include "Config.h"
-#include "Debug.h"
-#include "TrackerInfo.h"
+#include "RecoTracker/MkFitCore/interface/Config.h"
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
 
 #include <functional>
 
@@ -16,20 +15,6 @@ namespace {
 
 namespace mkfit {
   void createPhase1TrackerGeometry(TrackerInfo &ti, bool verbose) {
-    // TODO: these writes to global variables need to be removed
-    Config::nTotalLayers = 18 + 2 * 27;
-
-    Config::useCMSGeom = true;
-
-    Config::finding_requires_propagation_to_hit_pos = true;
-    Config::finding_inter_layer_pflags = PropagationFlags(PF_use_param_b_field | PF_apply_material);
-    Config::finding_intra_layer_pflags = PropagationFlags(PF_none);
-    Config::backward_fit_pflags = PropagationFlags(PF_use_param_b_field | PF_apply_material);
-    Config::forward_fit_pflags = PropagationFlags(PF_use_param_b_field | PF_apply_material);
-    Config::seed_fit_pflags = PropagationFlags(PF_none);
-    Config::pca_prop_pflags = PropagationFlags(PF_none);
-
-    ti.set_eta_regions(0.9, 1.7, 2.45, false);
     ti.create_layers(18, 27, 27);
     createPhase1TrackerGeometryAutoGen(ti);
 
@@ -38,8 +23,8 @@ namespace mkfit {
       printf("==========================================================================================\n");
       printf("Phase1 tracker -- Create_TrackerInfo finished\n");
       printf("==========================================================================================\n");
-      for (auto &i : ti.m_layers)
-        i.print_layer();
+      for (int ii = 0; ii < ti.n_layers(); ++ii)
+        ti.layer(ii).print_layer();
       printf("==========================================================================================\n");
     }
   }
diff --git a/RecoTracker/MkFit/plugins/createPhase1TrackerGeometryAutoGen.acc b/RecoTracker/MkFit/plugins/createPhase1TrackerGeometryAutoGen.acc
index 68229b38d0f7f..8989440b6958d 100644
--- a/RecoTracker/MkFit/plugins/createPhase1TrackerGeometryAutoGen.acc
+++ b/RecoTracker/MkFit/plugins/createPhase1TrackerGeometryAutoGen.acc
@@ -2,847 +2,649 @@ void createPhase1TrackerGeometryAutoGen(TrackerInfo &ti) {
   // PIXB
 
   {
-    LayerInfo &obj = ti.m_layers[0];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(0);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(2.720, 3.390, -26.700, 26.700);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(1, 18, 45);
-    obj.m_q_bin = 2.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(2.000);
     obj.m_is_pixb_lyr = true;
-    obj.m_is_seed_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[1];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(1);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(6.560, 7.270, -26.700, 26.700);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(2, 18, 45);
-    obj.m_q_bin = 2.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(2.000);
     obj.m_is_pixb_lyr = true;
-    obj.m_is_seed_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[2];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(2);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(10.690, 11.360, -26.700, 26.700);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(3, 18, 45);
-    obj.m_q_bin = 2.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(2.000);
     obj.m_is_pixb_lyr = true;
-    obj.m_is_seed_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[3];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(3);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(15.800, 16.460, -26.700, 26.690);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(4, 21, 48);
-    obj.m_q_bin = 2.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(2.000);
     obj.m_is_pixb_lyr = true;
-    obj.m_is_seed_lyr = true;
   }
 
   // TIB
 
   {
-    LayerInfo &obj = ti.m_layers[4];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(4);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(23.450, 27.880, -66.670, 63.930);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(5, 21, 48);
-    obj.m_q_bin = 6.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(6.000);
     obj.m_is_tib_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[5];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(5);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(23.450, 27.880, -66.670, 63.930);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(6, 21, 48);
-    obj.m_q_bin = 6.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(6.000);
+    obj.set_is_stereo(true);
     obj.m_is_tib_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[6];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(6);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(31.830, 36.240, -65.890, 66.670);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(7, 21, 48);
-    obj.m_q_bin = 6.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(6.000);
     obj.m_is_tib_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[7];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(7);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(31.830, 36.240, -65.890, 66.670);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(8, 21, 48);
-    obj.m_q_bin = 6.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(6.000);
+    obj.set_is_stereo(true);
     obj.m_is_tib_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[8];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(8);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(39.880, 44.040, -66.400, 65.240);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(9, 21, 48);
-    obj.m_q_bin = 6.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(6.000);
     obj.m_is_tib_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[9];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(9);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(47.810, 51.980, -66.400, 66.400);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(10, 27, 54);
-    obj.m_q_bin = 6.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(6.000);
     obj.m_is_tib_lyr = true;
   }
 
   // TOB
 
   {
-    LayerInfo &obj = ti.m_layers[10];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(10);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(58.410, 63.430, -108.710, 108.710);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(11, 27, 54);
-    obj.m_q_bin = 9.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(9.500);
     obj.m_is_tib_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[11];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(11);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(58.410, 63.430, -108.710, 108.710);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(12, 27, 54);
-    obj.m_q_bin = 9.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(9.500);
+    obj.set_is_stereo(true);
     obj.m_is_tib_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[12];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(12);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(66.810, 71.740, -108.700, 108.710);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(13, 27, 54);
-    obj.m_q_bin = 9.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(9.500);
     obj.m_is_tib_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[13];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(13);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(66.810, 71.740, -108.700, 108.710);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(14, 27, 54);
-    obj.m_q_bin = 9.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(9.500);
+    obj.set_is_stereo(true);
     obj.m_is_tib_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[14];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(14);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(76.040, 80.090, -108.300, 108.290);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(15, 27, 54);
-    obj.m_q_bin = 9.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(9.500);
     obj.m_is_tib_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[15];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(15);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(84.840, 88.880, -108.290, 108.300);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(16, 27, 54);
-    obj.m_q_bin = 9.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(9.500);
     obj.m_is_tib_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[16];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(16);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(94.540, 98.570, -108.290, 108.290);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(17, 27, 54);
-    obj.m_q_bin = 9.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(9.500);
     obj.m_is_tib_lyr = true;
   }
 
   {
-    LayerInfo &obj = ti.m_layers[17];
-    obj.m_layer_type = LayerInfo::Barrel;
+    LayerInfo &obj = ti.layer_nc(17);
+    obj.set_layer_type(LayerInfo::Barrel);
     obj.set_limits(106.040, 110.060, -108.290, 108.290);
-    obj.m_propagate_to = obj.r_mean();
-    obj.set_next_layers(-1, -1, -1);
-    obj.m_q_bin = 9.500;
-    obj.m_is_outer = true;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.r_mean());
+    obj.set_q_bin(9.500);
     obj.m_is_tib_lyr = true;
   }
 
   // PIXE +/-
 
   {
-    LayerInfo &obj = ti.m_layers[18];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(18);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(4.590, 16.090, 29.440, 35.210);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(4, 19, -1);
-    obj.m_q_bin = 1.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(1.000);
     obj.m_is_pixe_lyr = true;
-    obj.m_is_seed_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[45];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(45);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(4.590, 16.090, -35.200, -29.440);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(4, -1, 46);
-    obj.m_q_bin = 1.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(1.000);
     obj.m_is_pixe_lyr = true;
-    obj.m_is_seed_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[19];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(19);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(4.590, 16.090, 36.940, 42.700);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(4, 20, -1);
-    obj.m_q_bin = 1.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(1.000);
     obj.m_is_pixe_lyr = true;
-    obj.m_is_seed_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[46];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(46);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(4.590, 16.090, -42.700, -36.940);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(4, -1, 47);
-    obj.m_q_bin = 1.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(1.000);
     obj.m_is_pixe_lyr = true;
-    obj.m_is_seed_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[20];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(20);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(4.590, 16.090, 46.450, 52.200);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(4, 21, -1);
-    obj.m_q_bin = 1.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(1.000);
     obj.m_is_pixe_lyr = true;
-    obj.m_is_seed_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[47];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(47);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(4.590, 16.090, -52.210, -46.450);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(4, -1, 48);
-    obj.m_q_bin = 1.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(1.000);
     obj.m_is_pixe_lyr = true;
-    obj.m_is_seed_lyr = true;
   }
 
   // TID +/-
 
   {
-    LayerInfo &obj = ti.m_layers[21];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(21);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(22.980, 50.440, 74.350, 84.050);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(10, 22, -1);
-    obj.m_q_bin = 5.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(5.500);
     obj.m_is_tid_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[48];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(48);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(22.980, 50.440, -84.050, -74.350);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(10, -1, 49);
-    obj.m_q_bin = 5.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(5.500);
     obj.m_is_tid_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[22];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(22);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(22.800, 42.000, 74.350, 84.050);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(10, 23, -1);
-    obj.m_q_bin = 5.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(5.500);
+    obj.set_is_stereo(true);
     obj.m_is_tid_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[49];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(49);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(22.800, 42.000, -84.050, -74.350);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(10, -1, 50);
-    obj.m_q_bin = 5.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(5.500);
+    obj.set_is_stereo(true);
     obj.m_is_tid_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[23];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(23);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(22.980, 50.440, 87.300, 97.000);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(10, 24, -1);
-    obj.m_q_bin = 5.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(5.500);
     obj.m_is_tid_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[50];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(50);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(22.980, 50.440, -97.000, -87.300);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(10, -1, 51);
-    obj.m_q_bin = 5.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(5.500);
     obj.m_is_tid_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[24];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(24);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(22.800, 42.000, 87.300, 97.000);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(10, 25, -1);
-    obj.m_q_bin = 5.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(5.500);
+    obj.set_is_stereo(true);
     obj.m_is_tid_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[51];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(51);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(22.800, 42.000, -97.000, -87.300);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(10, -1, 52);
-    obj.m_q_bin = 5.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(5.500);
+    obj.set_is_stereo(true);
     obj.m_is_tid_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[25];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(25);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(22.980, 50.440, 100.250, 109.950);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(10, 26, -1);
-    obj.m_q_bin = 5.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(5.500);
     obj.m_is_tid_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[52];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(52);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(22.980, 50.440, -109.950, -100.250);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(10, -1, 53);
-    obj.m_q_bin = 5.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(5.500);
     obj.m_is_tid_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[26];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(26);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(22.800, 42.000, 100.250, 109.950);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(10, 27, -1);
-    obj.m_q_bin = 5.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(5.500);
+    obj.set_is_stereo(true);
     obj.m_is_tid_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[53];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(53);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(22.800, 42.000, -109.950, -100.250);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(10, -1, 54);
-    obj.m_q_bin = 5.500;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(5.500);
+    obj.set_is_stereo(true);
     obj.m_is_tid_lyr = true;
   }
 
   // TEC +/-
 
   {
-    LayerInfo &obj = ti.m_layers[27];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(27);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(23.370, 109.390, 126.380, 137.390);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 28, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[54];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(54);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(23.360, 109.400, -137.390, -126.380);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 55);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[28];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(28);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(23.300, 76.100, 126.380, 137.390);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 29, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
     obj.set_r_hole_range(42.000, 59.900);
   }
   {
-    LayerInfo &obj = ti.m_layers[55];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(55);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(23.300, 76.100, -137.390, -126.380);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 56);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
     obj.set_r_hole_range(42.000, 59.900);
   }
   {
-    LayerInfo &obj = ti.m_layers[29];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(29);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(23.360, 109.410, 140.380, 151.390);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 30, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[56];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(56);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(23.360, 109.390, -151.420, -140.380);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 57);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[30];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(30);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(23.300, 76.100, 140.380, 151.390);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 31, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
     obj.set_r_hole_range(42.000, 59.900);
   }
   {
-    LayerInfo &obj = ti.m_layers[57];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(57);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(23.300, 76.100, -151.420, -140.380);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 58);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
     obj.set_r_hole_range(42.000, 59.900);
   }
   {
-    LayerInfo &obj = ti.m_layers[31];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(31);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(23.360, 109.390, 154.380, 165.390);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 32, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[58];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(58);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(23.360, 109.390, -165.390, -154.380);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 59);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[32];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(32);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(23.300, 76.100, 154.380, 165.390);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 33, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
     obj.set_r_hole_range(42.000, 59.900);
   }
   {
-    LayerInfo &obj = ti.m_layers[59];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(59);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(23.300, 76.100, -165.390, -154.380);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 60);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
     obj.set_r_hole_range(42.000, 59.900);
   }
   {
-    LayerInfo &obj = ti.m_layers[33];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(33);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(32.110, 109.400, 168.380, 179.380);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 34, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[60];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(60);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(32.110, 109.390, -179.390, -168.380);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 61);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[34];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(34);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(31.600, 75.900, 168.380, 179.380);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 35, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
     obj.set_r_hole_range(42.100, 59.700);
   }
   {
-    LayerInfo &obj = ti.m_layers[61];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(61);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(31.600, 75.900, -179.390, -168.380);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 62);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
     obj.set_r_hole_range(42.100, 59.700);
   }
   {
-    LayerInfo &obj = ti.m_layers[35];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(35);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(32.110, 109.390, 182.370, 193.390);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 36, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[62];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(62);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(32.110, 109.390, -193.380, -182.380);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 63);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[36];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(36);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(31.600, 75.900, 182.370, 193.390);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 37, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
     obj.set_r_hole_range(42.100, 59.700);
   }
   {
-    LayerInfo &obj = ti.m_layers[63];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(63);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(31.600, 75.900, -193.380, -182.380);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 64);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
     obj.set_r_hole_range(42.100, 59.700);
   }
   {
-    LayerInfo &obj = ti.m_layers[37];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(37);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(32.110, 109.390, 199.870, 210.880);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 38, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[64];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(64);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(32.110, 109.390, -210.880, -199.870);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 65);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[38];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(38);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(31.600, 75.900, 199.870, 210.880);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 39, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
     obj.set_r_hole_range(42.100, 59.700);
   }
   {
-    LayerInfo &obj = ti.m_layers[65];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(65);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(31.600, 75.900, -210.880, -199.870);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 66);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
     obj.set_r_hole_range(42.100, 59.700);
   }
   {
-    LayerInfo &obj = ti.m_layers[39];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(39);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(39.200, 109.390, 218.870, 229.860);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 40, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[66];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(66);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(39.200, 109.390, -229.860, -218.870);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 67);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[40];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(40);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(59.900, 75.900, 218.870, 229.860);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 41, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[67];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(67);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(59.900, 75.900, -229.860, -218.870);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 68);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[41];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(41);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(39.200, 109.400, 239.370, 250.360);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 42, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[68];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(68);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(39.200, 109.400, -250.360, -239.370);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 69);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[42];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(42);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(59.900, 75.900, 239.370, 250.360);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 43, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[69];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(69);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(59.900, 75.900, -250.360, -239.370);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 70);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[43];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(43);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(50.410, 109.400, 260.870, 271.870);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, 44, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[70];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(70);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(50.410, 109.390, -271.880, -260.870);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, 71);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = false;
-    obj.m_is_stereo_lyr = false;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[44];
-    obj.m_layer_type = LayerInfo::EndCapPos;
+    LayerInfo &obj = ti.layer_nc(44);
+    obj.set_layer_type(LayerInfo::EndCapPos);
     obj.set_limits(59.900, 75.900, 260.870, 271.870);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = true;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
   }
   {
-    LayerInfo &obj = ti.m_layers[71];
-    obj.m_layer_type = LayerInfo::EndCapNeg;
+    LayerInfo &obj = ti.layer_nc(71);
+    obj.set_layer_type(LayerInfo::EndCapNeg);
     obj.set_limits(59.900, 75.900, -271.880, -260.870);
-    obj.m_propagate_to = obj.z_mean();
-    obj.set_next_layers(-1, -1, -1);
-    obj.m_q_bin = 10.000;
-    obj.m_is_outer = true;
-    obj.m_is_stereo_lyr = true;
+    obj.set_propagate_to(obj.z_mean());
+    obj.set_q_bin(10.000);
+    obj.set_is_stereo(true);
     obj.m_is_tec_lyr = true;
   }
 }
diff --git a/RecoTracker/MkFit/src/ES_MkFitIterationConfig.cc b/RecoTracker/MkFit/src/ES_MkFitIterationConfig.cc
index 9ba5e5559c1ad..3562947eb5264 100644
--- a/RecoTracker/MkFit/src/ES_MkFitIterationConfig.cc
+++ b/RecoTracker/MkFit/src/ES_MkFitIterationConfig.cc
@@ -1,3 +1,3 @@
-#include "mkFit/IterationConfig.h"
+#include "RecoTracker/MkFitCore/interface/IterationConfig.h"
 #include "FWCore/Utilities/interface/typelookup.h"
 TYPELOOKUP_DATA_REG(mkfit::IterationConfig);
diff --git a/RecoTracker/MkFit/src/MkFitEventOfHits.cc b/RecoTracker/MkFit/src/MkFitEventOfHits.cc
index 72b6c98345dae..1070d692ad862 100644
--- a/RecoTracker/MkFit/src/MkFitEventOfHits.cc
+++ b/RecoTracker/MkFit/src/MkFitEventOfHits.cc
@@ -1,7 +1,7 @@
 #include "RecoTracker/MkFit/interface/MkFitEventOfHits.h"
 
 // mkFit includes
-#include "mkFit/HitStructures.h"
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
 
 MkFitEventOfHits::MkFitEventOfHits() = default;
 MkFitEventOfHits::MkFitEventOfHits(std::unique_ptr<mkfit::EventOfHits> eoh) : eventOfHits_(std::move(eoh)) {}
diff --git a/RecoTracker/MkFit/src/MkFitGeometry.cc b/RecoTracker/MkFit/src/MkFitGeometry.cc
index 35616f90b902b..3bfb51ecfcca6 100644
--- a/RecoTracker/MkFit/src/MkFitGeometry.cc
+++ b/RecoTracker/MkFit/src/MkFitGeometry.cc
@@ -5,8 +5,8 @@
 #include "RecoTracker/TkDetLayers/interface/GeometricSearchTracker.h"
 #include "RecoTracker/MkFit/interface/MkFitGeometry.h"
 
-#include "LayerNumberConverter.h"
-#include "TrackerInfo.h"
+#include "RecoTracker/MkFitCMS/interface/LayerNumberConverter.h"
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
 
 namespace {
   bool isPlusSide(const TrackerTopology& ttopo, DetId detid) {
diff --git a/RecoTracker/MkFit/src/MkFitHitWrapper.cc b/RecoTracker/MkFit/src/MkFitHitWrapper.cc
index ebef4972f777b..3bf8ddb6cfade 100644
--- a/RecoTracker/MkFit/src/MkFitHitWrapper.cc
+++ b/RecoTracker/MkFit/src/MkFitHitWrapper.cc
@@ -2,8 +2,8 @@
 #include "RecoTracker/MkFit/interface/MkFitHitWrapper.h"
 
 // mkFit includes
-#include "Hit.h"
-#include "mkFit/HitStructures.h"
+#include "RecoTracker/MkFitCore/interface/Hit.h"
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
 
 MkFitHitWrapper::MkFitHitWrapper() = default;
 MkFitHitWrapper::~MkFitHitWrapper() = default;
diff --git a/RecoTracker/MkFit/src/MkFitOutputWrapper.cc b/RecoTracker/MkFit/src/MkFitOutputWrapper.cc
index 7ad4b457c10dc..f94dd46c59ffa 100644
--- a/RecoTracker/MkFit/src/MkFitOutputWrapper.cc
+++ b/RecoTracker/MkFit/src/MkFitOutputWrapper.cc
@@ -1,7 +1,7 @@
 #include "RecoTracker/MkFit/interface/MkFitOutputWrapper.h"
 
 // mkFit includes
-#include "Track.h"
+#include "RecoTracker/MkFitCore/interface/Track.h"
 
 MkFitOutputWrapper::MkFitOutputWrapper() = default;
 
diff --git a/RecoTracker/MkFit/src/MkFitSeedWrapper.cc b/RecoTracker/MkFit/src/MkFitSeedWrapper.cc
index 22964e21df56d..cdb6e87a7cf55 100644
--- a/RecoTracker/MkFit/src/MkFitSeedWrapper.cc
+++ b/RecoTracker/MkFit/src/MkFitSeedWrapper.cc
@@ -1,7 +1,7 @@
 #include "RecoTracker/MkFit/interface/MkFitSeedWrapper.h"
 
 // mkFit includes
-#include "Track.h"
+#include "RecoTracker/MkFitCore/interface/Track.h"
 
 MkFitSeedWrapper::MkFitSeedWrapper() = default;
 
diff --git a/RecoTracker/MkFitCMS/BuildFile.xml b/RecoTracker/MkFitCMS/BuildFile.xml
new file mode 100644
index 0000000000000..9ab03be951b1e
--- /dev/null
+++ b/RecoTracker/MkFitCMS/BuildFile.xml
@@ -0,0 +1,5 @@
+<use name="RecoTracker/MkFitCore"/>
+<flags CXXFLAGS="-fopenmp-simd"/>
+<export>
+  <lib name="RecoTrackerMkFitCMS"/>
+</export>
diff --git a/RecoTracker/MkFitCMS/interface/LayerNumberConverter.h b/RecoTracker/MkFitCMS/interface/LayerNumberConverter.h
new file mode 100644
index 0000000000000..5528318dec468
--- /dev/null
+++ b/RecoTracker/MkFitCMS/interface/LayerNumberConverter.h
@@ -0,0 +1,116 @@
+#ifndef RecoTracker_MkFitCMS_interface_LayerNumberConverter_h
+#define RecoTracker_MkFitCMS_interface_LayerNumberConverter_h
+
+namespace mkfit {
+
+  enum struct TkLayout { phase0 = 0, phase1 = 1 };
+
+  class LayerNumberConverter {
+  public:
+    LayerNumberConverter(TkLayout layout) : lo_(layout) {}
+    unsigned int nLayers() const {
+      if (lo_ == TkLayout::phase0)
+        return 69;
+      if (lo_ == TkLayout::phase1)
+        return 72;
+      return 10;
+    }
+    int convertLayerNumber(int det, int lay, bool useMatched, int isStereo, bool posZ) const {
+      if (det == 1 || det == 3 || det == 5) {
+        return convertBarrelLayerNumber(det, lay, useMatched, isStereo);
+      } else {
+        int disk = convertDiskNumber(det, lay, useMatched, isStereo);
+        if (disk < 0)
+          return -1;
+
+        int lOffset = 0;
+        if (lo_ == TkLayout::phase1)
+          lOffset = 1;
+        disk += 17 + lOffset;
+        if (!posZ)
+          disk += 25 + 2 * lOffset;
+        return disk;
+      }
+      return -1;
+    }
+
+    int convertBarrelLayerNumber(int cmsswdet, int cmsswlay, bool useMatched, int isStereo) const {
+      int lOffset = 0;
+      if (lo_ == TkLayout::phase1)
+        lOffset = 1;
+      if (cmsswdet == 2 || cmsswdet == 4 || cmsswdet == 6)
+        return -1;  //FPIX, TID, TEC
+      if (cmsswdet == 1)
+        return cmsswlay - 1;  //BPIX
+      if (useMatched) {
+        //TIB
+        if (cmsswdet == 3) {
+          if (cmsswlay == 1 && isStereo == -1)
+            return 3 + lOffset;
+          else if (cmsswlay == 2 && isStereo == -1)
+            return 4 + lOffset;
+          else if (cmsswlay == 3 && isStereo == 0)
+            return 5 + lOffset;
+          else if (cmsswlay == 4 && isStereo == 0)
+            return 6 + lOffset;
+        }
+        //TOB
+        else if (cmsswdet == 5) {
+          if (cmsswlay == 1 && isStereo == -1)
+            return 7 + lOffset;
+          else if (cmsswlay == 2 && isStereo == -1)
+            return 8 + lOffset;
+          else if (cmsswlay >= 3 && cmsswlay <= 6 && isStereo == 0)
+            return 6 + cmsswlay + lOffset;
+        }
+        return -1;
+      } else {
+        //TIB
+        if (cmsswdet == 3) {
+          if ((cmsswlay == 1 || cmsswlay == 2) && (isStereo == 0 || isStereo == 1)) {
+            return 1 + cmsswlay * 2 + isStereo + lOffset;
+          } else if (cmsswlay == 3 && isStereo == 0)
+            return 7 + lOffset;
+          else if (cmsswlay == 4 && isStereo == 0)
+            return 8 + lOffset;
+        }
+        //TOB
+        else if (cmsswdet == 5) {
+          if ((cmsswlay == 1 || cmsswlay == 2) && (isStereo == 0 || isStereo == 1)) {
+            return 7 + cmsswlay * 2 + isStereo + lOffset;
+          } else if (cmsswlay >= 3 && cmsswlay <= 6 && isStereo == 0)
+            return 10 + cmsswlay + lOffset;
+        }
+        return -1;
+      }
+    }
+    int convertDiskNumber(int cmsswdet, int cmsswdisk, bool useMatched, int isStereo) const {
+      if (cmsswdet == 1 || cmsswdet == 3 || cmsswdet == 5)
+        return -1;  //BPIX, TIB, TOB
+      if (cmsswdet == 2)
+        return cmsswdisk - 1;  //FPIX
+      int lOffset = 0;
+      if (lo_ == TkLayout::phase1)
+        lOffset = 1;
+      if (useMatched) {
+        return -1;
+      } else {
+        if ((isStereo != 0 && isStereo != 1) || cmsswdisk < 1)
+          return -1;
+        //TID
+        if (cmsswdet == 4 && cmsswdisk <= 3)
+          return cmsswdisk * 2 + isStereo + lOffset;
+        //TEC
+        else if (cmsswdet == 6 && cmsswdisk <= 9)
+          return 6 + cmsswdisk * 2 + isStereo + lOffset;
+        return -1;
+      }
+    }
+
+  private:
+    TkLayout lo_;
+  };
+
+}  // end namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCMS/interface/MkStdSeqs.h b/RecoTracker/MkFitCMS/interface/MkStdSeqs.h
new file mode 100644
index 0000000000000..5b57e32ac17b3
--- /dev/null
+++ b/RecoTracker/MkFitCMS/interface/MkStdSeqs.h
@@ -0,0 +1,138 @@
+#ifndef RecoTracker_MkFitCMS_interface_MkStdSeqs_h
+#define RecoTracker_MkFitCMS_interface_MkStdSeqs_h
+
+#include "RecoTracker/MkFitCore/interface/Config.h"
+#include "RecoTracker/MkFitCore/interface/Hit.h"
+#include "RecoTracker/MkFitCore/interface/Track.h"
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
+
+namespace mkfit {
+
+  class EventOfHits;
+  class IterationConfig;
+  class TrackerInfo;
+
+  namespace StdSeq {
+
+    void loadDeads(EventOfHits &eoh, const std::vector<DeadVec> &deadvectors);
+
+    void cmssw_LoadHits_Begin(EventOfHits &eoh, const std::vector<const HitVec *> &orig_hitvectors);
+    void cmssw_LoadHits_End(EventOfHits &eoh);
+
+    // Not used anymore. Left here if we want to experiment again with
+    // COPY_SORTED_HITS in class LayerOfHits.
+    void cmssw_Map_TrackHitIndices(const EventOfHits &eoh, TrackVec &seeds);
+    void cmssw_ReMap_TrackHitIndices(const EventOfHits &eoh, TrackVec &out_tracks);
+
+    int clean_cms_seedtracks_iter(TrackVec *seed_ptr, const IterationConfig &itrcfg, const BeamSpot &bspot);
+
+    void find_duplicates(TrackVec &tracks);
+    void remove_duplicates(TrackVec &tracks);
+
+    void find_duplicates_sharedhits(TrackVec &tracks, const float fraction);
+    void find_duplicates_sharedhits_pixelseed(TrackVec &tracks,
+                                              const float fraction,
+                                              const float drth_central,
+                                              const float drth_obarrel,
+                                              const float drth_forward);
+
+    // quality filter for n hits with seed hit "penalty" for strip-based seeds
+    //   this implicitly separates triplets and doublet seeds with glued layers
+    template <class TRACK>
+    bool qfilter_n_hits(const TRACK &t, int nMinHits) {
+      int seedHits = t.getNSeedHits();
+      int seedReduction = (seedHits <= 5) ? 2 : 3;
+      return t.nFoundHits() - seedReduction >= nMinHits;
+    }
+
+    // simple hit-count quality filter; used with pixel-based seeds
+    template <class TRACK>
+    bool qfilter_n_hits_pixseed(const TRACK &t, int nMinHits) {
+      return t.nFoundHits() >= nMinHits;
+    }
+
+    // layer-dependent quality filter
+    template <class TRACK>
+    bool qfilter_n_layers(const TRACK &t, const BeamSpot &bspot, const TrackerInfo &trk_inf) {
+      int enhits = t.nHitsByTypeEncoded(trk_inf);
+      int npixhits = t.nPixelDecoded(enhits);
+      int enlyrs = t.nLayersByTypeEncoded(trk_inf);
+      int npixlyrs = t.nPixelDecoded(enlyrs);
+      int nmatlyrs = t.nTotMatchDecoded(enlyrs);
+      int llyr = t.getLastFoundHitLyr();
+      int lplyr = t.getLastFoundPixelHitLyr();
+      float invpt = t.invpT();
+      float invptmin = 1.43;  // min 1/pT (=1/0.7) for full filter on (npixhits<=3 .or. npixlyrs<=3)
+      float d0BS = t.d0BeamSpot(bspot.x, bspot.y);
+      float d0_max = 0.1;  // 1 mm
+
+      bool endsInsidePix = (llyr == 2 || llyr == 18 || llyr == 45);
+      bool lastInsidePix = ((0 <= lplyr && lplyr < 3) || (18 <= lplyr && lplyr < 20) || (45 <= lplyr && lplyr < 47));
+      return !(((npixhits <= 3 || npixlyrs <= 3) && endsInsidePix &&
+                (invpt < invptmin || (invpt >= invptmin && std::abs(d0BS) > d0_max))) ||
+               ((npixlyrs <= 3 && nmatlyrs <= 6) && lastInsidePix && llyr != lplyr && std::abs(d0BS) > d0_max));
+    }
+
+    /// quality filter tuned for pixelLess iteration during forward search
+    template <class TRACK>
+    bool qfilter_pixelLessFwd(const TRACK &t, const BeamSpot &bspot, const TrackerInfo &tk_info) {
+      float d0BS = t.d0BeamSpot(bspot.x, bspot.y);
+      float d0_max = 0.05;  // 0.5 mm
+
+      int encoded;
+      encoded = t.nLayersByTypeEncoded(tk_info);
+      int nLyrs = t.nTotMatchDecoded(encoded);
+      encoded = t.nHitsByTypeEncoded(tk_info);
+      int nHits = t.nTotMatchDecoded(encoded);
+
+      int seedReduction = (t.getNSeedHits() <= 5) ? 2 : 3;
+
+      float invpt = t.invpT();
+      float invptmin = 1.11;  // =1/0.9
+
+      float thetasym = std::abs(t.theta() - Const::PIOver2);
+      float thetasymmin = 1.11;  // -> |eta|=1.45
+
+      return (((t.nFoundHits() - seedReduction >= 4 && invpt < invptmin) ||
+               (t.nFoundHits() - seedReduction >= 3 && invpt > invptmin && thetasym <= thetasymmin) ||
+               (t.nFoundHits() - seedReduction >= 4 && invpt > invptmin && thetasym > thetasymmin)) &&
+              !((nLyrs <= 4 || nHits <= 4) && std::abs(d0BS) > d0_max && invpt < invptmin));
+    }
+
+    /// quality filter tuned for pixelLess iteration during backward search
+    template <class TRACK>
+    bool qfilter_pixelLessBkwd(const TRACK &t, const BeamSpot &bspot, const TrackerInfo &tk_info) {
+      float d0BS = t.d0BeamSpot(bspot.x, bspot.y);
+      float d0_max = 0.1;  // 1 mm
+
+      int encoded;
+      encoded = t.nLayersByTypeEncoded(tk_info);
+      int nLyrs = t.nTotMatchDecoded(encoded);
+      encoded = t.nHitsByTypeEncoded(tk_info);
+      int nHits = t.nTotMatchDecoded(encoded);
+
+      float invpt = t.invpT();
+      float invptmin = 1.11;  // =1/0.9
+
+      float thetasym = std::abs(t.theta() - Const::PIOver2);
+      float thetasymmin_l = 0.80;  // -> |eta|=0.9
+      float thetasymmin_h = 1.11;  // -> |eta|=1.45
+
+      return !(
+          ((nLyrs <= 3 || nHits <= 3)) ||
+          ((nLyrs <= 4 || nHits <= 4) && (invpt < invptmin || (thetasym > thetasymmin_l && std::abs(d0BS) > d0_max))) ||
+          ((nLyrs <= 6 || nHits <= 6) && (invpt > invptmin && thetasym > thetasymmin_h && std::abs(d0BS) > d0_max)));
+    }
+
+    template <class TRACK>
+    bool qfilter_nan_n_silly(const TRACK &t) {
+      return !(t.hasNanNSillyValues());
+    }
+
+    void find_and_remove_duplicates(TrackVec &tracks, const IterationConfig &itconf);
+
+  }  // namespace StdSeq
+
+}  // namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCMS/interface/runFunctions.h b/RecoTracker/MkFitCMS/interface/runFunctions.h
new file mode 100644
index 0000000000000..e36d60be08c89
--- /dev/null
+++ b/RecoTracker/MkFitCMS/interface/runFunctions.h
@@ -0,0 +1,26 @@
+#ifndef RecoTracker_MkFitCMS_interface_runFunctions_h
+#define RecoTracker_MkFitCMS_interface_runFunctions_h
+
+#include "RecoTracker/MkFitCore/interface/Track.h"
+
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
+
+namespace mkfit {
+
+  class IterationConfig;
+  class MkBuilder;
+
+  // nullptr is a valid mask ... means no mask for these layers.
+  void run_OneIteration(const TrackerInfo &trackerInfo,
+                        const IterationConfig &itconf,
+                        const EventOfHits &eoh,
+                        const std::vector<const std::vector<bool> *> &hit_masks,
+                        MkBuilder &builder,
+                        TrackVec &seeds,
+                        TrackVec &out_tracks,
+                        bool do_seed_clean,
+                        bool do_backward_fit,
+                        bool do_remove_duplicates);
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCMS/src/MkStdSeqs.cc b/RecoTracker/MkFitCMS/src/MkStdSeqs.cc
new file mode 100644
index 0000000000000..3a6ad20c5dfdb
--- /dev/null
+++ b/RecoTracker/MkFitCMS/src/MkStdSeqs.cc
@@ -0,0 +1,579 @@
+#include "RecoTracker/MkFitCMS/interface/MkStdSeqs.h"
+
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
+#include "RecoTracker/MkFitCore/interface/IterationConfig.h"
+
+#include "RecoTracker/MkFitCore/interface/binnor.h"
+
+#include "oneapi/tbb/parallel_for.h"
+
+namespace mkfit {
+
+  namespace StdSeq {
+
+    //=========================================================================
+    // Hit processing
+    //=========================================================================
+
+    void loadDeads(EventOfHits &eoh, const std::vector<DeadVec> &deadvectors) {
+      for (size_t il = 0; il < deadvectors.size(); il++) {
+        eoh.suckInDeads(int(il), deadvectors[il]);
+      }
+    }
+
+    // Loading hits in CMSSW from two "large multi-layer vectors".
+    // orig_hitvectors[0] - pixels,
+    // orig_hitvectors[1] - strips.
+
+    void cmssw_LoadHits_Begin(EventOfHits &eoh, const std::vector<const HitVec *> &orig_hitvectors) {
+      eoh.reset();
+      for (int i = 0; i < eoh.nLayers(); ++i) {
+        auto &&l = eoh[i];
+        l.beginRegistrationOfHits(*orig_hitvectors[l.is_pix_lyr() ? 0 : 1]);
+      }
+    }
+
+    // Loop with LayerOfHits::registerHit(int idx) - it takes Hit out of original HitVec to
+    // extract phi, r/z, and calculate qphifines
+    //
+    // Something like what is done in MkFitInputConverter::convertHits
+    //
+    // Problem is I don't know layers for each large-vector;
+    // Also, layer is calculated for each detset when looping over the HitCollection
+
+    void cmssw_LoadHits_End(EventOfHits &eoh) {
+      for (int i = 0; i < eoh.nLayers(); ++i) {
+        auto &&l = eoh[i];
+        l.endRegistrationOfHits(false);
+      }
+    }
+
+    //=========================================================================
+    // Hit-index mapping / remapping
+    //=========================================================================
+
+    void cmssw_Map_TrackHitIndices(const EventOfHits &eoh, TrackVec &seeds) {
+      for (auto &&track : seeds) {
+        for (int i = 0; i < track.nTotalHits(); ++i) {
+          const int hitidx = track.getHitIdx(i);
+          const int hitlyr = track.getHitLyr(i);
+          if (hitidx >= 0) {
+            const auto &loh = eoh[hitlyr];
+            track.setHitIdx(i, loh.getHitIndexFromOriginal(hitidx));
+          }
+        }
+      }
+    }
+
+    void cmssw_ReMap_TrackHitIndices(const EventOfHits &eoh, TrackVec &out_tracks) {
+      for (auto &&track : out_tracks) {
+        for (int i = 0; i < track.nTotalHits(); ++i) {
+          const int hitidx = track.getHitIdx(i);
+          const int hitlyr = track.getHitLyr(i);
+          if (hitidx >= 0) {
+            const auto &loh = eoh[hitlyr];
+            track.setHitIdx(i, loh.getOriginalHitIndex(hitidx));
+          }
+        }
+      }
+    }
+
+    //=========================================================================
+    // Seed cleaning (multi-iter)
+    //=========================================================================
+    int clean_cms_seedtracks_iter(TrackVec *seed_ptr, const IterationConfig &itrcfg, const BeamSpot &bspot) {
+      const float etamax_brl = Config::c_etamax_brl;
+      const float dpt_common = Config::c_dpt_common;
+
+      const float dzmax_bh = itrcfg.m_params.c_dzmax_bh;
+      const float drmax_bh = itrcfg.m_params.c_drmax_bh;
+      const float dzmax_eh = itrcfg.m_params.c_dzmax_eh;
+      const float drmax_eh = itrcfg.m_params.c_drmax_eh;
+      const float dzmax_bl = itrcfg.m_params.c_dzmax_bl;
+      const float drmax_bl = itrcfg.m_params.c_drmax_bl;
+      const float dzmax_el = itrcfg.m_params.c_dzmax_el;
+      const float drmax_el = itrcfg.m_params.c_drmax_el;
+
+      const float ptmin_hpt = itrcfg.m_params.c_ptthr_hpt;
+
+      const float dzmax2_inv_bh = 1.f / (dzmax_bh * dzmax_bh);
+      const float drmax2_inv_bh = 1.f / (drmax_bh * drmax_bh);
+      const float dzmax2_inv_eh = 1.f / (dzmax_eh * dzmax_eh);
+      const float drmax2_inv_eh = 1.f / (drmax_eh * drmax_eh);
+      const float dzmax2_inv_bl = 1.f / (dzmax_bl * dzmax_bl);
+      const float drmax2_inv_bl = 1.f / (drmax_bl * drmax_bl);
+      const float dzmax2_inv_el = 1.f / (dzmax_el * dzmax_el);
+      const float drmax2_inv_el = 1.f / (drmax_el * drmax_el);
+
+      // Merge hits from overlapping seeds?
+      // For now always true, we require extra hits after seed.
+      const bool merge_hits = true;  // itrcfg.merge_seed_hits_during_cleaning();
+
+      if (seed_ptr == nullptr)
+        return 0;
+      TrackVec &seeds = *seed_ptr;
+
+      const int ns = seeds.size();
+#ifdef DEBUG
+      std::cout << "before seed cleaning " << seeds.size() << std::endl;
+#endif
+      TrackVec cleanSeedTracks;
+      cleanSeedTracks.reserve(ns);
+      std::vector<bool> writetrack(ns, true);
+
+      const float invR1GeV = 1.f / Config::track1GeVradius;
+
+      std::vector<int> nHits(ns);
+      std::vector<int> charge(ns);
+      std::vector<float> oldPhi(ns);
+      std::vector<float> pos2(ns);
+      std::vector<float> eta(ns);
+      std::vector<float> ctheta(ns);
+      std::vector<float> invptq(ns);
+      std::vector<float> pt(ns);
+      std::vector<float> x(ns);
+      std::vector<float> y(ns);
+      std::vector<float> z(ns);
+      std::vector<float> d0(ns);
+      int i1, i2;  //for the sorting
+
+      for (int ts = 0; ts < ns; ts++) {
+        const Track &tk = seeds[ts];
+        nHits[ts] = tk.nFoundHits();
+        charge[ts] = tk.charge();
+        oldPhi[ts] = tk.momPhi();
+        pos2[ts] = std::pow(tk.x(), 2) + std::pow(tk.y(), 2);
+        eta[ts] = tk.momEta();
+        ctheta[ts] = 1.f / std::tan(tk.theta());
+        invptq[ts] = tk.charge() * tk.invpT();
+        pt[ts] = tk.pT();
+        x[ts] = tk.x();
+        y[ts] = tk.y();
+        z[ts] = tk.z();
+        d0[ts] = tk.d0BeamSpot(bspot.x, bspot.y);
+      }
+
+      for (int ts = 0; ts < ns; ts++) {
+        if (not writetrack[ts])
+          continue;  // Note: this speed up prevents transitive masking (possibly marginal gain).
+
+        const float oldPhi1 = oldPhi[ts];
+        const float pos2_first = pos2[ts];
+        const float eta1 = eta[ts];
+        const float pt1 = pt[ts];
+        const float invptq_first = invptq[ts];
+
+        // To study some more details -- need EventOfHits for this
+        int n_ovlp_hits_added = 0;
+
+        for (int tss = ts + 1; tss < ns; tss++) {
+          const float pt2 = pt[tss];
+
+          ////// Always require charge consistency. If different charge is assigned, do not remove seed-track
+          if (charge[tss] != charge[ts])
+            continue;
+
+          const float thisDPt = std::abs(pt2 - pt1);
+          ////// Require pT consistency between seeds. If dpT is large, do not remove seed-track.
+          if (thisDPt > dpt_common * (pt1))
+            continue;
+
+          const float eta2 = eta[tss];
+          const float deta2 = std::pow(eta1 - eta2, 2);
+
+          const float oldPhi2 = oldPhi[tss];
+
+          const float pos2_second = pos2[tss];
+          const float thisDXYSign05 = pos2_second > pos2_first ? -0.5f : 0.5f;
+
+          const float thisDXY = thisDXYSign05 * sqrt(std::pow(x[ts] - x[tss], 2) + std::pow(y[ts] - y[tss], 2));
+
+          const float invptq_second = invptq[tss];
+
+          const float newPhi1 = oldPhi1 - thisDXY * invR1GeV * invptq_first;
+          const float newPhi2 = oldPhi2 + thisDXY * invR1GeV * invptq_second;
+
+          const float dphi = cdist(std::abs(newPhi1 - newPhi2));
+
+          const float dr2 = deta2 + dphi * dphi;
+
+          const float thisDZ = z[ts] - z[tss] - thisDXY * (ctheta[ts] + ctheta[tss]);
+          const float dz2 = thisDZ * thisDZ;
+
+          ////// Reject tracks within dR-dz elliptical window.
+          ////// Adaptive thresholds, based on observation that duplicates are more abundant at large pseudo-rapidity and low track pT
+          bool overlapping = false;
+          if (std::abs(eta1) < etamax_brl) {
+            if (pt1 > ptmin_hpt) {
+              if (dz2 * dzmax2_inv_bh + dr2 * drmax2_inv_bh < 1.0f)
+                overlapping = true;
+            } else {
+              if (dz2 * dzmax2_inv_bl + dr2 * drmax2_inv_bl < 1.0f)
+                overlapping = true;
+            }
+          } else {
+            if (pt1 > ptmin_hpt) {
+              if (dz2 * dzmax2_inv_eh + dr2 * drmax2_inv_eh < 1.0f)
+                overlapping = true;
+            } else {
+              if (dz2 * dzmax2_inv_el + dr2 * drmax2_inv_el < 1.0f)
+                overlapping = true;
+            }
+          }
+
+          if (overlapping) {
+            //Mark tss as a duplicate
+            i1 = ts;
+            i2 = tss;
+            if (d0[tss] > d0[ts])
+              writetrack[tss] = false;
+            else {
+              writetrack[ts] = false;
+              i2 = ts;
+              i1 = tss;
+            }
+            // Add hits from tk2 to the seed we are keeping.
+            // NOTE: We have a limit in Track::Status for the number of seed hits.
+            //       There is a check at entry and after adding of a new hit.
+            Track &tk = seeds[i1];
+            if (merge_hits && tk.nTotalHits() < Track::Status::kMaxSeedHits) {
+              const Track &tk2 = seeds[i2];
+              //We are not actually fitting to the extra hits; use chi2 of 0
+              float fakeChi2 = 0.0;
+
+              for (int j = 0; j < tk2.nTotalHits(); ++j) {
+                int hitidx = tk2.getHitIdx(j);
+                int hitlyr = tk2.getHitLyr(j);
+                if (hitidx >= 0) {
+                  bool unique = true;
+                  for (int i = 0; i < tk.nTotalHits(); ++i) {
+                    if ((hitidx == tk.getHitIdx(i)) && (hitlyr == tk.getHitLyr(i))) {
+                      unique = false;
+                      break;
+                    }
+                  }
+                  if (unique) {
+                    tk.addHitIdx(tk2.getHitIdx(j), tk2.getHitLyr(j), fakeChi2);
+                    ++n_ovlp_hits_added;
+                    if (tk.nTotalHits() >= Track::Status::kMaxSeedHits)
+                      break;
+                  }
+                }
+              }
+            }
+            if (n_ovlp_hits_added > 0)
+              tk.sortHitsByLayer();
+          }
+        }  //end of inner loop over tss
+
+        if (writetrack[ts]) {
+          cleanSeedTracks.emplace_back(seeds[ts]);
+        }
+      }
+
+      seeds.swap(cleanSeedTracks);
+
+#ifdef DEBUG
+      {
+        const int ns2 = seeds.size();
+        printf("Number of CMS seeds before %d --> after %d cleaning\n", ns, ns2);
+
+        for (int it = 0; it < ns2; it++) {
+          const Track &ss = seeds[it];
+          printf("  %3i q=%+i pT=%7.3f eta=% 7.3f nHits=%i label=% i\n",
+                 it,
+                 ss.charge(),
+                 ss.pT(),
+                 ss.momEta(),
+                 ss.nFoundHits(),
+                 ss.label());
+        }
+      }
+#endif
+
+#ifdef DEBUG
+      std::cout << "AFTER seed cleaning " << seeds.size() << std::endl;
+#endif
+
+      return seeds.size();
+    }
+
+    //=========================================================================
+    // Duplicate cleaning
+    //=========================================================================
+
+    void find_duplicates(TrackVec &tracks) {
+      const auto ntracks = tracks.size();
+      float eta1, phi1, pt1, deta, dphi, dr2;
+
+      if (ntracks == 0) {
+        return;
+      }
+      for (auto itrack = 0U; itrack < ntracks - 1; itrack++) {
+        auto &track = tracks[itrack];
+        using Algo = TrackBase::TrackAlgorithm;
+        auto const algo = track.algorithm();
+        if (algo == Algo::pixelLessStep || algo == Algo::tobTecStep)
+          continue;
+        eta1 = track.momEta();
+        phi1 = track.momPhi();
+        pt1 = track.pT();
+        for (auto jtrack = itrack + 1; jtrack < ntracks; jtrack++) {
+          auto &track2 = tracks[jtrack];
+          if (track.label() == track2.label())
+            continue;
+          if (track.algoint() != track2.algoint())
+            continue;
+
+          deta = std::abs(track2.momEta() - eta1);
+          if (deta > Config::maxdEta)
+            continue;
+
+          dphi = std::abs(squashPhiMinimal(phi1 - track2.momPhi()));
+          if (dphi > Config::maxdPhi)
+            continue;
+
+          float maxdR = Config::maxdR;
+          float maxdRSquared = maxdR * maxdR;
+          if (std::abs(eta1) > 2.5f)
+            maxdRSquared *= 16.0f;
+          else if (std::abs(eta1) > 1.44f)
+            maxdRSquared *= 9.0f;
+          dr2 = dphi * dphi + deta * deta;
+          if (dr2 < maxdRSquared) {
+            //Keep track with best score
+            if (track.score() > track2.score())
+              track2.setDuplicateValue(true);
+            else
+              track.setDuplicateValue(true);
+            continue;
+          } else {
+            if (pt1 == 0)
+              continue;
+            if (track2.pT() == 0)
+              continue;
+
+            if (std::abs((1 / track2.pT()) - (1 / pt1)) < Config::maxdPt) {
+              if (Config::useHitsForDuplicates) {
+                float numHitsShared = 0;
+                for (int ihit2 = 0; ihit2 < track2.nTotalHits(); ihit2++) {
+                  const int hitidx2 = track2.getHitIdx(ihit2);
+                  const int hitlyr2 = track2.getHitLyr(ihit2);
+                  if (hitidx2 >= 0) {
+                    auto const it = std::find_if(track.beginHitsOnTrack(),
+                                                 track.endHitsOnTrack(),
+                                                 [&hitidx2, &hitlyr2](const HitOnTrack &element) {
+                                                   return (element.index == hitidx2 && element.layer == hitlyr2);
+                                                 });
+                    if (it != track.endHitsOnTrack())
+                      numHitsShared++;
+                  }
+                }
+
+                float fracHitsShared = numHitsShared / std::min(track.nFoundHits(), track2.nFoundHits());
+                //Only remove one of the tracks if they share at least X% of the hits (denominator is the shorter track)
+                if (fracHitsShared < Config::minFracHitsShared)
+                  continue;
+              }
+              //Keep track with best score
+              if (track.score() > track2.score())
+                track2.setDuplicateValue(true);
+              else
+                track.setDuplicateValue(true);
+            }  //end of if dPt
+          }    //end of else
+        }      //end of loop over track2
+      }        //end of loop over track1
+    }
+
+    void remove_duplicates(TrackVec &tracks) {
+      tracks.erase(std::remove_if(tracks.begin(), tracks.end(), [](auto track) { return track.getDuplicateValue(); }),
+                   tracks.end());
+    }
+
+    //=========================================================================
+    // SHARED HITS DUPLICATE CLEANING
+    //=========================================================================
+
+    void find_duplicates_sharedhits(TrackVec &tracks, const float fraction) {
+      const auto ntracks = tracks.size();
+
+      std::vector<float> ctheta(ntracks);
+      for (auto itrack = 0U; itrack < ntracks; itrack++) {
+        auto &trk = tracks[itrack];
+        ctheta[itrack] = 1.f / std::tan(trk.theta());
+      }
+
+      for (auto itrack = 0U; itrack < ntracks; itrack++) {
+        auto &trk = tracks[itrack];
+        auto phi1 = trk.momPhi();
+        auto ctheta1 = ctheta[itrack];
+
+        for (auto jtrack = itrack + 1; jtrack < ntracks; jtrack++) {
+          auto &track2 = tracks[jtrack];
+          auto sharedCount = 0;
+          auto sharedFirst = 0;
+
+          auto dctheta = std::abs(ctheta[jtrack] - ctheta1);
+          if (dctheta > 1.)
+            continue;
+
+          auto dphi = std::abs(squashPhiMinimal(phi1 - track2.momPhi()));
+          if (dphi > 1.)
+            continue;
+
+          for (int i = 0; i < trk.nTotalHits(); ++i) {
+            if (trk.getHitIdx(i) < 0)
+              continue;
+            const int a = trk.getHitLyr(i);
+            const int b = trk.getHitIdx(i);
+            for (int j = 0; j < track2.nTotalHits(); ++j) {
+              if (track2.getHitIdx(j) < 0)
+                continue;
+              const int c = track2.getHitLyr(j);
+              const int d = track2.getHitIdx(j);
+              if (a == c && b == d)
+                sharedCount += 1;
+              if (a == c && b == d && j == 0 && i == 0)
+                sharedFirst += 1;
+            }
+          }
+          if ((sharedCount - sharedFirst) >=
+              ((std::min(trk.nFoundHits(), track2.nFoundHits()) - sharedFirst) * (fraction))) {
+            if (trk.score() > track2.score())
+              track2.setDuplicateValue(true);
+            else
+              trk.setDuplicateValue(true);
+          }
+        }
+      }
+      tracks.erase(std::remove_if(tracks.begin(), tracks.end(), [](auto track) { return track.getDuplicateValue(); }),
+                   tracks.end());
+    }
+
+    void find_duplicates_sharedhits_pixelseed(TrackVec &tracks,
+                                              const float fraction,
+                                              const float drth_central,
+                                              const float drth_obarrel,
+                                              const float drth_forward) {
+      const auto ntracks = tracks.size();
+
+      std::vector<float> ctheta(ntracks);
+      for (auto itrack = 0U; itrack < ntracks; itrack++) {
+        auto &trk = tracks[itrack];
+        ctheta[itrack] = 1.f / std::tan(trk.theta());
+      }
+
+      float phi1, invpt1, dctheta, ctheta1, dphi, dr2;
+      for (auto itrack = 0U; itrack < ntracks; itrack++) {
+        auto &trk = tracks[itrack];
+        phi1 = trk.momPhi();
+        invpt1 = trk.invpT();
+        ctheta1 = ctheta[itrack];
+        for (auto jtrack = itrack + 1; jtrack < ntracks; jtrack++) {
+          auto &track2 = tracks[jtrack];
+          if (trk.label() == track2.label())
+            continue;
+
+          dctheta = std::abs(ctheta[jtrack] - ctheta1);
+
+          if (dctheta > Config::maxdcth)
+            continue;
+
+          dphi = std::abs(squashPhiMinimal(phi1 - track2.momPhi()));
+
+          if (dphi > Config::maxdphi)
+            continue;
+
+          float maxdRSquared = drth_central * drth_central;
+          if (std::abs(ctheta1) > Config::maxcth_fw)
+            maxdRSquared = drth_forward * drth_forward;
+          else if (std::abs(ctheta1) > Config::maxcth_ob)
+            maxdRSquared = drth_obarrel * drth_obarrel;
+          dr2 = dphi * dphi + dctheta * dctheta;
+          if (dr2 < maxdRSquared) {
+            //Keep track with best score
+            if (trk.score() > track2.score())
+              track2.setDuplicateValue(true);
+            else
+              trk.setDuplicateValue(true);
+            continue;
+          }
+
+          if (std::abs(track2.invpT() - invpt1) > Config::maxd1pt)
+            continue;
+
+          auto sharedCount = 0;
+          auto sharedFirst = 0;
+          const auto minFoundHits = std::min(trk.nFoundHits(), track2.nFoundHits());
+
+          for (int i = 0; i < trk.nTotalHits(); ++i) {
+            if (trk.getHitIdx(i) < 0)
+              continue;
+            const int a = trk.getHitLyr(i);
+            const int b = trk.getHitIdx(i);
+            for (int j = 0; j < track2.nTotalHits(); ++j) {
+              if (track2.getHitIdx(j) < 0)
+                continue;
+              const int c = track2.getHitLyr(j);
+              const int d = track2.getHitIdx(j);
+
+              //this is to count once shared matched hits (may be done more properly...)
+              if (a == c && b == d)
+                sharedCount += 1;
+              if (j == 0 && i == 0 && a == c && b == d)
+                sharedFirst += 1;
+
+              if ((sharedCount - sharedFirst) >= ((minFoundHits - sharedFirst) * fraction))
+                continue;
+            }
+            if ((sharedCount - sharedFirst) >= ((minFoundHits - sharedFirst) * fraction))
+              continue;
+          }
+
+          //selection here - 11percent fraction of shared hits to label a duplicate
+          if ((sharedCount - sharedFirst) >= ((minFoundHits - sharedFirst) * fraction)) {
+            if (trk.score() > track2.score())
+              track2.setDuplicateValue(true);
+            else
+              trk.setDuplicateValue(true);
+          }
+        }
+      }  //end loop one over tracks
+
+      //removal here
+      tracks.erase(std::remove_if(tracks.begin(), tracks.end(), [](auto track) { return track.getDuplicateValue(); }),
+                   tracks.end());
+    }
+
+    //=========================================================================
+    //
+    //=========================================================================
+
+    void find_and_remove_duplicates(TrackVec &tracks, const IterationConfig &itconf) {
+#ifdef DEBUG
+      std::cout << " find_and_remove_duplicates: input track size " << tracks.size() << std::endl;
+#endif
+      if (itconf.m_requires_quality_filter && !(itconf.m_requires_dupclean_tight)) {
+        find_duplicates_sharedhits(tracks, itconf.m_params.fracSharedHits);
+      } else if (itconf.m_requires_dupclean_tight) {
+        find_duplicates_sharedhits_pixelseed(tracks,
+                                             itconf.m_params.fracSharedHits,
+                                             itconf.m_params.drth_central,
+                                             itconf.m_params.drth_obarrel,
+                                             itconf.m_params.drth_forward);
+      } else {
+        find_duplicates(tracks);
+        remove_duplicates(tracks);
+      }
+
+#ifdef DEBUG
+      std::cout << " find_and_remove_duplicates: output track size " << tracks.size() << std::endl;
+      for (auto const &tk : tracks) {
+        std::cout << tk.parameters() << std::endl;
+      }
+#endif
+    }
+
+  }  // namespace StdSeq
+}  // namespace mkfit
diff --git a/RecoTracker/MkFitCMS/src/runFunctions.cc b/RecoTracker/MkFitCMS/src/runFunctions.cc
new file mode 100644
index 0000000000000..c9dede55ac499
--- /dev/null
+++ b/RecoTracker/MkFitCMS/src/runFunctions.cc
@@ -0,0 +1,114 @@
+#include "RecoTracker/MkFitCMS/interface/runFunctions.h"
+#include "RecoTracker/MkFitCore/interface/MkBuilder.h"
+#include "RecoTracker/MkFitCMS/interface/MkStdSeqs.h"
+
+#include "oneapi/tbb/parallel_for.h"
+
+#include <memory>
+
+namespace mkfit {
+
+  //==============================================================================
+  // run_OneIteration
+  //
+  // One-stop function for running track building from CMSSW.
+  //==============================================================================
+
+  struct IterationMaskIfcCmssw : public IterationMaskIfcBase {
+    const TrackerInfo &m_trk_info;
+    const std::vector<const std::vector<bool> *> &m_mask_vector;
+
+    IterationMaskIfcCmssw(const TrackerInfo &ti, const std::vector<const std::vector<bool> *> &maskvec)
+        : m_trk_info(ti), m_mask_vector(maskvec) {}
+
+    const std::vector<bool> *get_mask_for_layer(int layer) const override {
+      return m_trk_info.layer(layer).is_pix_lyr() ? m_mask_vector[0] : m_mask_vector[1];
+    }
+  };
+
+  void run_OneIteration(const TrackerInfo &trackerInfo,
+                        const IterationConfig &itconf,
+                        const EventOfHits &eoh,
+                        const std::vector<const std::vector<bool> *> &hit_masks,
+                        MkBuilder &builder,
+                        TrackVec &seeds,
+                        TrackVec &out_tracks,
+                        bool do_seed_clean,
+                        bool do_backward_fit,
+                        bool do_remove_duplicates) {
+    IterationMaskIfcCmssw it_mask_ifc(trackerInfo, hit_masks);
+
+    MkJob job({trackerInfo, itconf, eoh, &it_mask_ifc});
+
+    builder.begin_event(&job, nullptr, __func__);
+
+    if (do_seed_clean) {
+      // Seed cleaning not done on pixelLess / tobTec iters
+      if (itconf.m_requires_dupclean_tight)
+        StdSeq::clean_cms_seedtracks_iter(&seeds, itconf, eoh.refBeamSpot());
+    }
+
+    // Check nans in seeds -- this should not be needed when Slava fixes
+    // the track parameter coordinate transformation.
+    builder.seed_post_cleaning(seeds);
+
+    if (itconf.m_requires_seed_hit_sorting) {
+      for (auto &s : seeds)
+        s.sortHitsByLayer();  // sort seed hits for the matched hits (I hope it works here)
+    }
+
+    builder.find_tracks_load_seeds(seeds);
+
+    builder.findTracksCloneEngine();
+
+    using Algo = TrackBase::TrackAlgorithm;
+    if (itconf.m_requires_quality_filter && Algo(itconf.m_track_algorithm) != Algo::detachedTripletStep) {
+      if (Algo(itconf.m_track_algorithm) == Algo::pixelPairStep) {
+        builder.filter_comb_cands([&](const TrackCand &t) { return StdSeq::qfilter_n_hits_pixseed(t, 3); });
+      } else if (Algo(itconf.m_track_algorithm) == Algo::pixelLessStep) {
+        builder.filter_comb_cands(
+            [&](const TrackCand &t) { return StdSeq::qfilter_pixelLessFwd(t, eoh.refBeamSpot(), trackerInfo); });
+      } else {
+        builder.filter_comb_cands(
+            [&](const TrackCand &t) { return StdSeq::qfilter_n_hits(t, itconf.m_params.minHitsQF); });
+      }
+    }
+
+    if (do_backward_fit) {
+      if (itconf.m_backward_search) {
+        builder.compactifyHitStorageForBestCand(itconf.m_backward_drop_seed_hits, itconf.m_backward_fit_min_hits);
+      }
+
+      builder.backwardFit();
+
+      if (itconf.m_backward_search) {
+        builder.beginBkwSearch();
+        builder.findTracksCloneEngine(SteeringParams::IT_BkwSearch);
+        builder.endBkwSearch();
+      }
+
+      if (itconf.m_requires_quality_filter && (Algo(itconf.m_track_algorithm) == Algo::detachedTripletStep ||
+                                               Algo(itconf.m_track_algorithm) == Algo::pixelLessStep)) {
+        if (Algo(itconf.m_track_algorithm) == Algo::detachedTripletStep) {
+          builder.filter_comb_cands(
+              [&](const TrackCand &t) { return StdSeq::qfilter_n_layers(t, eoh.refBeamSpot(), trackerInfo); });
+        } else if (Algo(itconf.m_track_algorithm) == Algo::pixelLessStep) {
+          builder.filter_comb_cands(
+              [&](const TrackCand &t) { return StdSeq::qfilter_pixelLessBkwd(t, eoh.refBeamSpot(), trackerInfo); });
+        }
+      }
+    }
+
+    builder.filter_comb_cands([&](const TrackCand &t) { return StdSeq::qfilter_nan_n_silly(t); });
+
+    builder.export_best_comb_cands(out_tracks, true);
+
+    if (do_remove_duplicates) {
+      StdSeq::find_and_remove_duplicates(out_tracks, itconf);
+    }
+
+    builder.end_event();
+    builder.release_memory();
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCMS/standalone/Geoms/CMS-2017.cc b/RecoTracker/MkFitCMS/standalone/Geoms/CMS-2017.cc
new file mode 100644
index 0000000000000..4dc7529961699
--- /dev/null
+++ b/RecoTracker/MkFitCMS/standalone/Geoms/CMS-2017.cc
@@ -0,0 +1,7 @@
+//-------------------
+// CMS 2017 geometry
+//-------------------
+
+// Redirect to external geometry creator function.
+
+#include "mkfit-geom-cms-2017/CMS-2017.cc"
diff --git a/RecoTracker/MkFitCMS/standalone/Geoms/Makefile b/RecoTracker/MkFitCMS/standalone/Geoms/Makefile
new file mode 100644
index 0000000000000..b4897929bea6c
--- /dev/null
+++ b/RecoTracker/MkFitCMS/standalone/Geoms/Makefile
@@ -0,0 +1,48 @@
+include ${SADIR}/Makefile.config
+
+CPPEXTRA := ${USER_CPPFLAGS} ${DEFS} -I${SRCDIR} -I../mkFit-external
+LDEXTRA  := ${USER_LDFLAGS}
+
+CPPFLAGS := ${CPPEXTRA} ${CPPFLAGS}
+CXXFLAGS += -fPIC ${USER_CXXFLAGS}
+LDFLAGS  += ${LDEXTRA}
+
+CPPFLAGS_NO_ROOT := ${CPPEXTRA} ${CPPFLAGS_NO_ROOT}
+
+.PHONY: all clean distclean echo
+
+SRCS := ${SACMS}/Geoms/CMS-2017.cc
+SRCB := $(notdir ${SRCS})
+DEPS := $(SRCB:.cc=.d)
+OBJS := $(SRCB:.cc=.o)
+
+TGTS := $(basename ${OBJS})
+TGTS := $(addprefix ../, $(addsuffix .so, ${TGTS}))
+
+vpath %.cc ${SACMS}/Geoms
+
+all: ${TGTS}
+
+%.o: %.cc %.d
+	${CXX} ${CPPFLAGS} ${CXXFLAGS} ${VEC_HOST} -c -o $@ $<
+
+%.d: %.cc
+	${MAKEDEPEND} -o $@ $<
+
+../%.so: %.o
+	${CXX} -shared -L.. -lMicCore -o $@ $<
+
+ifeq ($(filter clean distclean, ${MAKECMDGOALS}),)
+include ${DEPS}
+endif
+
+clean:
+	-rm -f *.so *.o *.om *.d *.optrpt
+
+distclean: clean
+	-rm -f ${TGTS}
+
+echo:
+	@echo SRCS = ${SRCS}
+	@echo DEPS = ${DEPS}
+	@echo OBJS = ${OBJS}
diff --git a/RecoTracker/MkFitCMS/standalone/Makefile b/RecoTracker/MkFitCMS/standalone/Makefile
new file mode 100644
index 0000000000000..af7eacfca8507
--- /dev/null
+++ b/RecoTracker/MkFitCMS/standalone/Makefile
@@ -0,0 +1,73 @@
+include ${SADIR}/Makefile.config
+
+CPPFLAGS := -I. -I${SRCDIR} -I../mkFit-external ${CPPFLAGS}
+
+CMS_DIR := ${SRCDIR}/RecoTracker/MkFitCMS
+
+LIB_CMS := ../libMicCMS.so
+MAIN    := ../mkFit
+WRMEMF  := ../writeMemoryFile
+
+TGTS := ${LIB_CMS} ${MAIN} ${WRMEMF}
+
+.PHONY: all clean distclean
+
+all: ${TGTS}
+
+SRCS := $(wildcard ${CMS_DIR}/src/*.cc) $(wildcard ${SACMS}/*.cc) ${SACMS}/tkNtuple/WriteMemoryFile.cc
+SRCB := $(notdir ${SRCS})
+DEPS := $(SRCB:.cc=.d)
+OBJS := $(SRCB:.cc=.o)
+
+CMS_OBJS := $(filter-out mkFit.o WriteMemoryFile.o, ${OBJS})
+
+vpath %.cc ${CMS_DIR}/src ${SACMS} ${SACMS}/tkNtuple
+
+ifeq ($(filter clean-local clean distclean, ${MAKECMDGOALS}),)
+include ${DEPS}
+endif
+
+clean-local:
+	-rm -f ${TGTS} *.d *.o *.om *.so
+	-rm -rf main.dSYM
+	-rm -rf plotting/*.so plotting/*.d plotting/*.pcm
+
+clean: clean-local
+
+distclean: clean-local
+	-rm -f *.optrpt
+	-rm -f ${TGTS}
+
+${LIB_CMS}: ${CMS_OBJS}
+	@mkdir -p $(@D)
+	${CXX} ${CXXFLAGS} ${VEC_HOST} ${CMS_OBJS} -shared -o $@ ${LDFLAGS_HOST} ${LDFLAGS}
+
+${MAIN}: ${LIB_CMS} mkFit.o
+	${CXX} ${CXXFLAGS} ${VEC_HOST} ${LDFLAGS} mkFit.o -o $@ ${LDFLAGS_HOST} -ltbb -L.. -lMicCore -lMicCMS -Wl,-rpath=.
+
+${WRMEMF}: WriteMemoryFile.o
+	${CXX} ${CXXFLAGS} ${LDFLAGS} $^ -o $@ ${LDFLAGS_HOST} -ltbb -L.. -lMicCore -Wl,-rpath=.
+
+${OBJS}: %.o: %.cc %.d
+	${CXX} ${CPPFLAGS} ${CXXFLAGS} ${VEC_HOST} -c -o $@ $<
+
+%.d: %.cc
+	${MAKEDEPEND} -o $@ $<
+
+echo:
+	@echo "CXX=${CXX}"
+	@echo SRCS = ${SRCS}
+	@echo DEPS = ${DEPS}
+	@echo OBJS = ${OBJS}
+
+echo_cc_defs:
+	${CXX} -dM -E -mavx2 - < /dev/null
+
+echo-srcs:
+	@echo ${SRCS}
+
+echo-flags:
+	@echo "CPPFLAGS=${CPPFLAGS}"
+
+echo-tbb:
+	@echo "TBB_GCC=${TBB_GCC}, TBB_PREFIX=${TBB_PREFIX}, TBB_ROOT=${TBB_ROOT}"
diff --git a/RecoTracker/MkFitCMS/standalone/MkStandaloneSeqs.cc b/RecoTracker/MkFitCMS/standalone/MkStandaloneSeqs.cc
new file mode 100644
index 0000000000000..cee972d963b9b
--- /dev/null
+++ b/RecoTracker/MkFitCMS/standalone/MkStandaloneSeqs.cc
@@ -0,0 +1,404 @@
+#include "RecoTracker/MkFitCMS/standalone/MkStandaloneSeqs.h"
+#include "RecoTracker/MkFitCMS/interface/MkStdSeqs.h"
+
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
+#include "RecoTracker/MkFitCore/standalone/Event.h"
+
+#include "RecoTracker/MkFitCore/src/Debug.h"
+
+#include "oneapi/tbb/parallel_for.h"
+
+namespace mkfit {
+
+  namespace StdSeq {
+
+    //=========================================================================
+    // Hit processing
+    //=========================================================================
+
+    void loadHitsAndBeamSpot(Event &ev, EventOfHits &eoh) {
+      eoh.reset();
+
+      // fill vector of hits in each layer
+      // XXXXMT: Does it really makes sense to multi-thread this?
+      tbb::parallel_for(tbb::blocked_range<int>(0, ev.layerHits_.size()), [&](const tbb::blocked_range<int> &layers) {
+        for (int ilay = layers.begin(); ilay < layers.end(); ++ilay) {
+          eoh.suckInHits(ilay, ev.layerHits_[ilay]);
+        }
+      });
+      eoh.setBeamSpot(ev.beamSpot_);
+    }
+
+    void handle_duplicates(Event *event) {
+      // Mark tracks as duplicates; if within CMSSW, remove duplicate tracks from fit or candidate track collection
+      if (Config::removeDuplicates) {
+        if (Config::quality_val || Config::sim_val || Config::cmssw_val) {
+          find_duplicates(event->candidateTracks_);
+          if (Config::backwardFit)
+            find_duplicates(event->fitTracks_);
+        }
+        // For the MEIF benchmarks and the stress tests, no validation flags are set so we will enter this block
+        else {
+          // Only care about the candidate tracks here; no need to run the duplicate removal on both candidate and fit tracks
+          find_duplicates(event->candidateTracks_);
+        }
+      }
+    }
+
+    //=========================================================================
+    // Random stuff
+    //=========================================================================
+
+    void dump_simtracks(Event *event) {
+      // Ripped out of MkBuilder::begin_event, ifdefed under DEBUG
+
+      std::vector<Track> &simtracks = event->simTracks_;
+
+      for (int itrack = 0; itrack < (int)simtracks.size(); ++itrack) {
+        // bool debug = true;
+        Track track = simtracks[itrack];
+        // simtracks are initially written with label = index; uncomment in case tracks were edited
+        // if (track.label() != itrack) {
+        //   dprintf("Bad label for simtrack %d -- %d\n", itrack, track.label());
+        // }
+
+        dprint("MX - simtrack with nHits=" << track.nFoundHits() << " chi2=" << track.chi2() << " pT=" << track.pT()
+                                           << " phi=" << track.momPhi() << " eta=" << track.momEta());
+      }
+
+      for (int itrack = 0; itrack < (int)simtracks.size(); ++itrack) {
+        for (int ihit = 0; ihit < simtracks[itrack].nFoundHits(); ++ihit) {
+          dprint("track #" << itrack << " hit #" << ihit
+                           << " hit pos=" << simtracks[itrack].hitsVector(event->layerHits_)[ihit].position()
+                           << " phi=" << simtracks[itrack].hitsVector(event->layerHits_)[ihit].phi());
+        }
+      }
+    }
+
+    void track_print(Event *event, const Track &t, const char *pref) {
+      printf("%s with q=%+i pT=%7.3f eta=% 7.3f nHits=%2d  label=%4d\nState:\n",
+             pref,
+             t.charge(),
+             t.pT(),
+             t.momEta(),
+             t.nFoundHits(),
+             t.label());
+
+      print(t.state());
+
+      printf("Hits:\n");
+      for (int ih = 0; ih < t.nTotalHits(); ++ih) {
+        int lyr = t.getHitLyr(ih);
+        int idx = t.getHitIdx(ih);
+        if (idx >= 0) {
+          const Hit &hit = event->layerHits_[lyr][idx];
+          printf("    hit %2d lyr=%2d idx=%4d pos r=%7.3f z=% 8.3f   mc_hit=%4d mc_trk=%4d\n",
+                 ih,
+                 lyr,
+                 idx,
+                 hit.r(),
+                 hit.z(),
+                 hit.mcHitID(),
+                 hit.mcTrackID(event->simHitsInfo_));
+        } else
+          printf("    hit %2d        idx=%i\n", ih, t.getHitIdx(ih));
+      }
+    }
+
+    //------------------------------------------------------------------------------
+    // Non-ROOT validation
+    //------------------------------------------------------------------------------
+
+    void Quality::quality_val(Event *event) {
+      quality_reset();
+
+      std::map<int, int> cmsswLabelToPos;
+      if (Config::dumpForPlots && Config::readCmsswTracks) {
+        for (size_t itrack = 0; itrack < event->cmsswTracks_.size(); itrack++) {
+          cmsswLabelToPos[event->cmsswTracks_[itrack].label()] = itrack;
+        }
+      }
+
+      for (size_t itrack = 0; itrack < event->candidateTracks_.size(); itrack++) {
+        quality_process(event, event->candidateTracks_[itrack], itrack, cmsswLabelToPos);
+      }
+
+      quality_print();
+    }
+
+    void Quality::quality_reset() { m_cnt = m_cnt1 = m_cnt2 = m_cnt_8 = m_cnt1_8 = m_cnt2_8 = m_cnt_nomc = 0; }
+
+    void Quality::quality_process(Event *event, Track &tkcand, const int itrack, std::map<int, int> &cmsswLabelToPos) {
+      // KPM: Do not use this method for validating CMSSW tracks if we ever build a DumbCMSSW function for them to print out...
+      // as we would need to access seeds through map of seed ids...
+
+      // initialize track extra (input original seed label)
+      const auto label = tkcand.label();
+      TrackExtra extra(label);
+
+      // track_print(tkcand, "XXX");
+
+      // access temp seed trk and set matching seed hits
+      const auto &seed = event->seedTracks_[itrack];
+      extra.findMatchingSeedHits(tkcand, seed, event->layerHits_);
+
+      // set mcTrackID through 50% hit matching after seed
+      extra.setMCTrackIDInfo(
+          tkcand, event->layerHits_, event->simHitsInfo_, event->simTracks_, false, (Config::seedInput == simSeeds));
+      const int mctrk = extra.mcTrackID();
+
+      //  int mctrk = tkcand.label(); // assumes 100% "efficiency"
+
+      const float pT = tkcand.pT();
+      float pTmc = 0.f, etamc = 0.f, phimc = 0.f;
+      float pTr = 0.f;
+      int nfoundmc = -1;
+
+      if (mctrk < 0 || static_cast<size_t>(mctrk) >= event->simTracks_.size()) {
+        ++m_cnt_nomc;
+        dprint("XX bad track idx " << mctrk << ", orig label was " << label);
+      } else {
+        auto &simtrack = event->simTracks_[mctrk];
+        pTmc = simtrack.pT();
+        etamc = simtrack.momEta();
+        phimc = simtrack.momPhi();
+        pTr = pT / pTmc;
+
+        nfoundmc = simtrack.nUniqueLayers();
+
+        ++m_cnt;
+        if (pTr > 0.9 && pTr < 1.1)
+          ++m_cnt1;
+        if (pTr > 0.8 && pTr < 1.2)
+          ++m_cnt2;
+
+        if (tkcand.nFoundHits() >= 0.8f * nfoundmc) {
+          ++m_cnt_8;
+          if (pTr > 0.9 && pTr < 1.1)
+            ++m_cnt1_8;
+          if (pTr > 0.8 && pTr < 1.2)
+            ++m_cnt2_8;
+        }
+
+        // perl -ne 'print if m/FOUND_LABEL\s+[-\d]+/o;' | sort -k2 -n
+        // grep "FOUND_LABEL" | sort -n -k 8,8 -k 2,2
+        // printf("FOUND_LABEL %6d  pT_mc= %8.2f eta_mc= %8.2f event= %d\n", label, pTmc, etamc, event->evtID());
+      }
+
+#ifdef SELECT_SEED_LABEL
+      if (label == SELECT_SEED_LABEL)
+        track_print(tkcand, "MkBuilder::quality_process SELECT_SEED_LABEL:");
+#endif
+
+      float pTcmssw = 0.f, etacmssw = 0.f, phicmssw = 0.f;
+      int nfoundcmssw = -1;
+      if (Config::dumpForPlots && Config::readCmsswTracks) {
+        if (cmsswLabelToPos.count(label)) {
+          auto &cmsswtrack = event->cmsswTracks_[cmsswLabelToPos[label]];
+          pTcmssw = cmsswtrack.pT();
+          etacmssw = cmsswtrack.momEta();
+          phicmssw = cmsswtrack.swimPhiToR(tkcand.x(), tkcand.y());  // to get rough estimate of diff in phi
+          nfoundcmssw = cmsswtrack.nUniqueLayers();
+        }
+      }
+
+      if (!Config::silent && Config::dumpForPlots) {
+        std::lock_guard<std::mutex> printlock(Event::printmutex);
+        printf(
+            "MX - found track with chi2= %6.3f nFoundHits= %2d pT= %7.4f eta= %7.4f phi= %7.4f nfoundmc= %2d pTmc= "
+            "%7.4f etamc= %7.4f phimc= %7.4f nfoundcmssw= %2d pTcmssw= %7.4f etacmssw= %7.4f phicmssw= %7.4f lab= %d\n",
+            tkcand.chi2(),
+            tkcand.nFoundHits(),
+            pT,
+            tkcand.momEta(),
+            tkcand.momPhi(),
+            nfoundmc,
+            pTmc,
+            etamc,
+            phimc,
+            nfoundcmssw,
+            pTcmssw,
+            etacmssw,
+            phicmssw,
+            label);
+      }
+    }
+
+    void Quality::quality_print() {
+      if (!Config::silent) {
+        std::lock_guard<std::mutex> printlock(Event::printmutex);
+        std::cout << "found tracks=" << m_cnt << "  in pT 10%=" << m_cnt1 << "  in pT 20%=" << m_cnt2
+                  << "     no_mc_assoc=" << m_cnt_nomc << std::endl;
+        std::cout << "  nH >= 80% =" << m_cnt_8 << "  in pT 10%=" << m_cnt1_8 << "  in pT 20%=" << m_cnt2_8
+                  << std::endl;
+      }
+    }
+
+    //------------------------------------------------------------------------------
+    // Root validation
+    //------------------------------------------------------------------------------
+
+    void root_val_dumb_cmssw(Event *event) {
+      // get labels correct first
+      event->relabel_bad_seedtracks();
+      event->relabel_cmsswtracks_from_seeds();
+
+      //collection cleaning
+      if (Config::nItersCMSSW > 0)
+        event->select_tracks_iter(Config::nItersCMSSW);
+
+      // set the track collections to each other
+      event->candidateTracks_ = event->cmsswTracks_;
+      event->fitTracks_ = event->candidateTracks_;
+
+      // prep the tracks + extras
+      prep_simtracks(event);
+      prep_recotracks(event);
+
+      // validate
+      event->validate();
+    }
+
+    void root_val(Event *event) {
+      // score the tracks
+      score_tracks(event->seedTracks_);
+      score_tracks(event->candidateTracks_);
+
+      // deal with fit tracks
+      if (Config::backwardFit) {
+        score_tracks(event->fitTracks_);
+      } else
+        event->fitTracks_ = event->candidateTracks_;
+
+      // sort hits + make extras, align if needed
+      prep_recotracks(event);
+      if (Config::cmssw_val)
+        prep_cmsswtracks(event);
+
+      // validate
+      event->validate();
+    }
+
+    void prep_recotracks(Event *event) {
+      // seed tracks extras always needed
+      if (Config::sim_val || Config::sim_val_for_cmssw) {
+        prep_tracks(event, event->seedTracks_, event->seedTracksExtra_, true);
+      } else if (Config::cmssw_val)  // seed tracks are not validated, labels used for maps --> do NOT align index and labels!
+      {
+        prep_tracks(event, event->seedTracks_, event->seedTracksExtra_, false);
+      }
+
+      // make extras + align index == label() for candidate tracks
+      prep_tracks(event, event->candidateTracks_, event->candidateTracksExtra_, true);
+      prep_tracks(event, event->fitTracks_, event->fitTracksExtra_, true);
+    }
+
+    void prep_simtracks(Event *event) {
+      // First prep sim tracks to have hits sorted, then mark unfindable if too short
+      prep_reftracks(event, event->simTracks_, event->simTracksExtra_, false);
+
+      // Now, make sure sim track shares at least four hits with a single cmssw seed.
+      // This ensures we factor out any weakness from CMSSW
+
+      // First, make a make a map of [lyr][hit idx].vector(seed trk labels)
+      LayIdxIDVecMapMap seedHitIDMap;
+      std::map<int, int> labelNHitsMap;
+      std::map<int, int> labelAlgoMap;
+      std::map<int, std::vector<int>> labelSeedHitsMap;
+      for (const auto &seedtrack : event->seedTracks_) {
+        for (int ihit = 0; ihit < seedtrack.nTotalHits(); ihit++) {
+          const auto lyr = seedtrack.getHitLyr(ihit);
+          const auto idx = seedtrack.getHitIdx(ihit);
+
+          if (lyr < 0 || idx < 0)
+            continue;  // standard check
+          seedHitIDMap[lyr][idx].push_back(seedtrack.label());
+          labelSeedHitsMap[seedtrack.label()].push_back(lyr);
+        }
+        labelNHitsMap[seedtrack.label()] = seedtrack.nTotalHits();
+        labelAlgoMap[seedtrack.label()] = seedtrack.algoint();
+      }
+
+      // Then, loop over sim tracks, and add up how many lyrs they possess of a single seed track
+      unsigned int count = 0;
+      for (auto &simtrack : event->simTracks_) {
+        if (simtrack.isNotFindable())
+          continue;  // skip ones we already know are bad
+        TrkIDLaySetMap seedIDMap;
+        for (int ihit = 0; ihit < simtrack.nTotalHits(); ihit++) {
+          const auto lyr = simtrack.getHitLyr(ihit);
+          const auto idx = simtrack.getHitIdx(ihit);
+
+          if (lyr < 0 || idx < 0)
+            continue;  // standard check
+
+          if (!seedHitIDMap.count(lyr))
+            continue;  // ensure seed hit map has at least one entry for this layer
+          if (!seedHitIDMap.at(lyr).count(idx))
+            continue;  // ensure seed hit map has at least one entry for this idx
+
+          for (const auto label : seedHitIDMap.at(lyr).at(idx)) {
+            const auto &seedLayers = labelSeedHitsMap[label];
+            if (std::find(seedLayers.begin(), seedLayers.end(), lyr) != seedLayers.end())  //seed check moved here
+              seedIDMap[label].emplace(lyr);
+          }
+        }
+
+        // now see if one of the seedIDs matched has at least 4 hits!
+        bool isSimSeed = false;
+        for (const auto &seedIDpair : seedIDMap) {
+          if ((int)seedIDpair.second.size() == labelNHitsMap[seedIDpair.first]) {
+            isSimSeed = true;
+            if (Config::mtvRequireSeeds)
+              simtrack.setAlgoint(labelAlgoMap[seedIDpair.first]);
+            if (Config::mtvRequireSeeds)
+              event->simTracksExtra_[count].addAlgo(labelAlgoMap[seedIDpair.first]);
+            //break;
+          }
+        }
+        if (Config::mtvLikeValidation) {
+          // Apply MTV selection criteria and then return
+          if (simtrack.prodType() != Track::ProdType::Signal || simtrack.charge() == 0 || simtrack.posR() > 2.5 ||
+              std::abs(simtrack.z()) > 30 || std::abs(simtrack.momEta()) > 3.0)
+            simtrack.setNotFindable();
+          else if (Config::mtvRequireSeeds && !isSimSeed)
+            simtrack.setNotFindable();
+        } else {
+          // set findability based on bool isSimSeed
+          if (!isSimSeed)
+            simtrack.setNotFindable();
+        }
+        count++;
+      }
+    }
+
+    void prep_cmsswtracks(Event *event) { prep_reftracks(event, event->cmsswTracks_, event->cmsswTracksExtra_, true); }
+
+    void prep_reftracks(Event *event, TrackVec &tracks, TrackExtraVec &extras, const bool realigntracks) {
+      prep_tracks(event, tracks, extras, realigntracks);
+
+      // mark cmsswtracks as unfindable if too short
+      for (auto &track : tracks) {
+        const int nlyr = track.nUniqueLayers();
+        if (nlyr < Config::cmsSelMinLayers)
+          track.setNotFindable();
+      }
+    }
+
+    void prep_tracks(Event *event, TrackVec &tracks, TrackExtraVec &extras, const bool realigntracks) {
+      for (size_t i = 0; i < tracks.size(); i++) {
+        extras.emplace_back(tracks[i].label());
+      }
+      if (realigntracks)
+        event->validation_.alignTracks(tracks, extras, false);
+    }
+
+    void score_tracks(TrackVec &tracks) {
+      for (auto &track : tracks) {
+        track.setScore(getScoreCand(track));
+      }
+    }
+
+  }  // namespace StdSeq
+
+}  // namespace mkfit
diff --git a/RecoTracker/MkFitCMS/standalone/MkStandaloneSeqs.h b/RecoTracker/MkFitCMS/standalone/MkStandaloneSeqs.h
new file mode 100644
index 0000000000000..816f4ab84c069
--- /dev/null
+++ b/RecoTracker/MkFitCMS/standalone/MkStandaloneSeqs.h
@@ -0,0 +1,53 @@
+#ifndef RecoTracker_MkFitCMS_standalone_MkStandaloneSeqs_h
+#define RecoTracker_MkFitCMS_standalone_MkStandaloneSeqs_h
+
+#include <vector>
+#include <map>
+
+namespace mkfit {
+
+  class EventOfHits;
+  class Track;
+  class TrackExtra;
+  typedef std::vector<Track> TrackVec;
+  typedef std::vector<TrackExtra> TrackExtraVec;
+
+  class Event;
+
+  namespace StdSeq {
+
+    void loadHitsAndBeamSpot(Event &ev, EventOfHits &eoh);
+
+    void handle_duplicates(Event *event);
+
+    void dump_simtracks(Event *event);
+    void track_print(Event *event, const Track &t, const char *pref);
+
+    // Validation quality & ROOT
+    //--------------------------
+
+    struct Quality {
+      int m_cnt = 0, m_cnt1 = 0, m_cnt2 = 0, m_cnt_8 = 0, m_cnt1_8 = 0, m_cnt2_8 = 0, m_cnt_nomc = 0;
+
+      void quality_val(Event *event);
+      void quality_reset();
+      void quality_process(Event *event, Track &tkcand, const int itrack, std::map<int, int> &cmsswLabelToPos);
+      void quality_print();
+    };
+
+    void root_val_dumb_cmssw(Event *event);
+    void root_val(Event *event);
+
+    void prep_recotracks(Event *event);
+    void prep_simtracks(Event *event);
+    void prep_cmsswtracks(Event *event);
+    void prep_reftracks(Event *event, TrackVec &tracks, TrackExtraVec &extras, const bool realigntracks);
+    // sort hits by layer, init track extras, align track labels if true
+    void prep_tracks(Event *event, TrackVec &tracks, TrackExtraVec &extras, const bool realigntracks);
+    void score_tracks(TrackVec &tracks);  // if track score is not already assigned
+
+  }  // namespace StdSeq
+
+}  // namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCMS/standalone/buildtestMPlex.cc b/RecoTracker/MkFitCMS/standalone/buildtestMPlex.cc
new file mode 100644
index 0000000000000..e6e83b8e6b441
--- /dev/null
+++ b/RecoTracker/MkFitCMS/standalone/buildtestMPlex.cc
@@ -0,0 +1,569 @@
+#include "RecoTracker/MkFitCMS/standalone/buildtestMPlex.h"
+#include "RecoTracker/MkFitCore/standalone/ConfigStandalone.h"
+#include "RecoTracker/MkFitCore/src/Matrix.h"
+#include "RecoTracker/MkFitCore/interface/MkBuilder.h"
+#include "RecoTracker/MkFitCMS/interface/MkStdSeqs.h"
+#include "RecoTracker/MkFitCMS/standalone/MkStandaloneSeqs.h"
+
+#include "oneapi/tbb/parallel_for.h"
+
+#include <memory>
+
+namespace mkfit {
+
+  inline bool sortByHitsChi2(const std::pair<Track, TrackState> &cand1, const std::pair<Track, TrackState> &cand2) {
+    if (cand1.first.nFoundHits() == cand2.first.nFoundHits())
+      return cand1.first.chi2() < cand2.first.chi2();
+
+    return cand1.first.nFoundHits() > cand2.first.nFoundHits();
+  }
+
+  inline bool sortByPhi(const Hit &hit1, const Hit &hit2) {
+    return std::atan2(hit1.y(), hit1.x()) < std::atan2(hit2.y(), hit2.x());
+  }
+
+  inline bool sortByEta(const Hit &hit1, const Hit &hit2) { return hit1.eta() < hit2.eta(); }
+
+  inline bool sortTracksByEta(const Track &track1, const Track &track2) { return track1.momEta() < track2.momEta(); }
+
+  inline bool sortTracksByPhi(const Track &track1, const Track &track2) { return track1.momPhi() < track2.momPhi(); }
+
+  struct sortTracksByPhiStruct {
+    const std::vector<std::vector<Track>> &m_track_candidates;
+
+    sortTracksByPhiStruct(std::vector<std::vector<Track>> *track_candidates) : m_track_candidates(*track_candidates) {}
+
+    bool operator()(const std::pair<int, int> &track1, const std::pair<int, int> &track2) {
+      return m_track_candidates[track1.first][track1.second].posPhi() <
+             m_track_candidates[track2.first][track2.second].posPhi();
+    }
+  };
+
+  // within a layer with a "reasonable" geometry, ordering by Z is the same as eta
+  inline bool sortByZ(const Hit &hit1, const Hit &hit2) { return hit1.z() < hit2.z(); }
+
+  //==============================================================================
+  // NaN and Silly track parameter check
+  //==============================================================================
+
+  namespace {
+
+    int check_nan_n_silly(TrackVec &tracks, const char *prefix) {
+      int count = 0;
+      for (auto &t : tracks) {
+        if (t.hasSillyValues(Const::nan_n_silly_print_bad_cands_bkfit, false, prefix)) {
+          ++count;
+        }
+      }
+      return count;
+    }
+
+    void check_nan_n_silly_candidates(Event &ev) {
+      // MIMI -- nan_n_silly_per_layer_count is in MkBuilder, could be in MkJob.
+      // if (Const::nan_n_silly_check_cands_every_layer)
+      // {
+      //   int sc = (int) ev.nan_n_silly_per_layer_count_;
+      //   if (sc > 0)
+      //     printf("Nan'n'Silly: Number of silly candidates over all layers = %d\n", sc);
+      // }
+      if (Const::nan_n_silly_check_cands_pre_bkfit) {
+        int sc = check_nan_n_silly(ev.candidateTracks_, "Pre-bkfit silly check");
+        if (sc > 0)
+          printf("Nan'n'Silly: Number of silly pre-bkfit candidates = %d\n", sc);
+      }
+    }
+
+    void check_nan_n_silly_bkfit(Event &ev) {
+      if (Const::nan_n_silly_check_cands_post_bkfit) {
+        int sc = check_nan_n_silly(ev.fitTracks_, "Post-bkfit silly check");
+        if (sc > 0)
+          printf("Nan'n'Silly: Number of silly post-bkfit candidates = %d\n", sc);
+      }
+    }
+
+  }  // namespace
+
+  //==============================================================================
+  // runBuildTestPlexDumbCMSSW
+  //==============================================================================
+
+  void runBuildingTestPlexDumbCMSSW(Event &ev, const EventOfHits &eoh, MkBuilder &builder) {
+    const IterationConfig &itconf = Config::ItrInfo[0];
+
+    MkJob job({Config::TrkInfo, itconf, eoh});
+
+    builder.begin_event(&job, &ev, __func__);
+
+    if (Config::sim_val_for_cmssw) {
+      StdSeq::root_val_dumb_cmssw(&ev);
+    }
+
+    builder.end_event();
+  }
+
+  //==============================================================================
+  // runBuildTestPlexBestHit
+  //==============================================================================
+
+  double runBuildingTestPlexBestHit(Event &ev, const EventOfHits &eoh, MkBuilder &builder) {
+    const IterationConfig &itconf = Config::ItrInfo[0];
+
+    const bool validation_on = (Config::sim_val || Config::quality_val);
+
+    if (validation_on) {
+      TrackVec seeds1;
+
+      unsigned int algorithms[] = {4};  //only initialStep
+
+      for (auto const &s : ev.seedTracks_) {
+        //keep seeds form the first iteration for processing
+        if (std::find(algorithms, algorithms + 1, s.algoint()) != algorithms + 1)
+          seeds1.push_back(s);
+      }
+      ev.seedTracks_.swap(seeds1);  //necessary for the validation - PrepareSeeds
+      ev.relabel_bad_seedtracks();  //necessary for the validation - PrepareSeeds
+    }
+
+    IterationMaskIfc mask_ifc;
+
+    // To disable hit-masks, pass nullptr in place of &mask_ifc to MkJob ctor
+    // and optionally comment out ev.fill_hitmask_bool_vectors() call.
+
+    ev.fill_hitmask_bool_vectors(itconf.m_track_algorithm, mask_ifc.m_mask_vector);
+
+    MkJob job({Config::TrkInfo, itconf, eoh, &mask_ifc});
+
+    builder.begin_event(&job, &ev, __func__);
+
+    // CCCC builder.PrepareSeeds();
+
+    // EventOfCandidates event_of_cands;
+    builder.find_tracks_load_seeds_BH(ev.seedTracks_);
+
+#ifdef USE_VTUNE_PAUSE
+    __SSC_MARK(0x111);  // use this to resume Intel SDE at the same point
+    __itt_resume();
+#endif
+
+    double time = dtime();
+
+    builder.findTracksBestHit();
+
+    time = dtime() - time;
+
+#ifdef USE_VTUNE_PAUSE
+    __itt_pause();
+    __SSC_MARK(0x222);  // use this to pause Intel SDE at the same point
+#endif
+
+    // Hack, get the tracks out.
+    ev.candidateTracks_ = builder.ref_tracks();
+
+    // For best hit, the candidateTracks_ vector is the direct input to the backward fit so only need to do find_duplicates once
+    if (Config::quality_val || Config::sim_val || Config::cmssw_val) {
+      //Mark tracks as duplicates; if within CMSSW, remove duplicate tracks before backward fit
+      if (Config::removeDuplicates) {
+        StdSeq::find_duplicates(ev.candidateTracks_);
+      }
+    }
+
+    // now do backwards fit... do we want to time this section?
+    if (Config::backwardFit) {
+      builder.backwardFitBH();
+      ev.fitTracks_ = builder.ref_tracks();
+    }
+
+    if (Config::quality_val) {
+      StdSeq::Quality qval;
+      qval.quality_val(&ev);
+    } else if (Config::sim_val || Config::cmssw_val) {
+      StdSeq::root_val(&ev);
+    }
+
+    builder.end_event();
+
+    // ev.print_tracks(ev.candidateTracks_, true);
+
+    return time;
+  }
+
+  //==============================================================================
+  // runBuildTestPlex Combinatorial: Standard TBB
+  //==============================================================================
+
+  double runBuildingTestPlexStandard(Event &ev, const EventOfHits &eoh, MkBuilder &builder) {
+    const IterationConfig &itconf = Config::ItrInfo[0];
+
+    const bool validation_on = (Config::sim_val || Config::quality_val);
+
+    if (validation_on) {
+      TrackVec seeds1;
+
+      unsigned int algorithms[] = {4};  //only initialStep
+
+      for (auto const &s : ev.seedTracks_) {
+        //keep seeds form the first iteration for processing
+        if (std::find(algorithms, algorithms + 1, s.algoint()) != algorithms + 1)
+          seeds1.push_back(s);
+      }
+      ev.seedTracks_.swap(seeds1);  //necessary for the validation - PrepareSeeds
+      ev.relabel_bad_seedtracks();  //necessary for the validation - PrepareSeeds
+    }
+
+    IterationMaskIfc mask_ifc;
+
+    // To disable hit-masks, pass nullptr in place of &mask_ifc to MkJob ctor
+    // and optionally comment out ev.fill_hitmask_bool_vectors() call.
+
+    ev.fill_hitmask_bool_vectors(itconf.m_track_algorithm, mask_ifc.m_mask_vector);
+
+    MkJob job({Config::TrkInfo, itconf, eoh, &mask_ifc});
+
+    builder.begin_event(&job, &ev, __func__);
+
+    // CCCC builder.PrepareSeeds();
+
+    builder.find_tracks_load_seeds(ev.seedTracks_);
+
+#ifdef USE_VTUNE_PAUSE
+    __SSC_MARK(0x111);  // use this to resume Intel SDE at the same point
+    __itt_resume();
+#endif
+
+    double time = dtime();
+
+    builder.findTracksStandard();
+
+    time = dtime() - time;
+
+#ifdef USE_VTUNE_PAUSE
+    __itt_pause();
+    __SSC_MARK(0x222);  // use this to pause Intel SDE at the same point
+#endif
+
+    check_nan_n_silly_candidates(ev);
+
+    // first store candidate tracks
+    builder.export_best_comb_cands(ev.candidateTracks_);
+
+    // now do backwards fit... do we want to time this section?
+    if (Config::backwardFit) {
+      // Using the TrackVec version until we home in on THE backward fit etc.
+      // builder.backwardFit();
+      builder.select_best_comb_cands();
+      builder.backwardFitBH();
+      ev.fitTracks_ = builder.ref_tracks();
+
+      check_nan_n_silly_bkfit(ev);
+    }
+
+    StdSeq::handle_duplicates(&ev);
+
+    if (Config::quality_val) {
+      StdSeq::Quality qval;
+      qval.quality_val(&ev);
+    } else if (Config::sim_val || Config::cmssw_val) {
+      StdSeq::root_val(&ev);
+    }
+
+    builder.end_event();
+
+    // ev.print_tracks(ev.candidateTracks_, true);
+
+    return time;
+  }
+
+  //==============================================================================
+  // runBuildTestPlex Combinatorial: CloneEngine TBB
+  //==============================================================================
+
+  double runBuildingTestPlexCloneEngine(Event &ev, const EventOfHits &eoh, MkBuilder &builder) {
+    const IterationConfig &itconf = Config::ItrInfo[0];
+
+    const bool validation_on = (Config::sim_val || Config::quality_val);
+
+    if (validation_on) {
+      TrackVec seeds1;
+
+      unsigned int algorithms[] = {4};  //only initialStep
+
+      for (auto const &s : ev.seedTracks_) {
+        //keep seeds form the first iteration for processing
+        if (std::find(algorithms, algorithms + 1, s.algoint()) != algorithms + 1)
+          seeds1.push_back(s);
+      }
+      ev.seedTracks_.swap(seeds1);  //necessary for the validation - PrepareSeeds
+      ev.relabel_bad_seedtracks();  //necessary for the validation - PrepareSeeds
+    }
+
+    IterationMaskIfc mask_ifc;
+
+    // To disable hit-masks, pass nullptr in place of &mask_ifc to MkJob ctor
+    // and optionally comment out ev.fill_hitmask_bool_vectors() call.
+
+    ev.fill_hitmask_bool_vectors(itconf.m_track_algorithm, mask_ifc.m_mask_vector);
+
+    MkJob job({Config::TrkInfo, itconf, eoh, &mask_ifc});
+
+    builder.begin_event(&job, &ev, __func__);
+
+    // CCCC builder.PrepareSeeds();
+
+    builder.find_tracks_load_seeds(ev.seedTracks_);
+
+#ifdef USE_VTUNE_PAUSE
+    __SSC_MARK(0x111);  // use this to resume Intel SDE at the same point
+    __itt_resume();
+#endif
+
+    double time = dtime();
+
+    builder.findTracksCloneEngine();
+
+    time = dtime() - time;
+
+#ifdef USE_VTUNE_PAUSE
+    __itt_pause();
+    __SSC_MARK(0x222);  // use this to pause Intel SDE at the same point
+#endif
+
+    check_nan_n_silly_candidates(ev);
+
+    // first store candidate tracks - needed for BH backward fit and root_validation
+    builder.export_best_comb_cands(ev.candidateTracks_);
+
+    // now do backwards fit... do we want to time this section?
+    if (Config::backwardFit) {
+      // a) TrackVec version:
+      builder.select_best_comb_cands();
+      builder.backwardFitBH();
+      ev.fitTracks_ = builder.ref_tracks();
+
+      // b) Version that runs on CombCand / TrackCand
+      // builder.backwardFit();
+      // builder.quality_store_tracks(ev.fitTracks_);
+
+      check_nan_n_silly_bkfit(ev);
+    }
+
+    StdSeq::handle_duplicates(&ev);
+
+    // validation section
+    if (Config::quality_val) {
+      StdSeq::Quality qval;
+      qval.quality_val(&ev);
+    } else if (Config::sim_val || Config::cmssw_val) {
+      StdSeq::root_val(&ev);
+    }
+
+    builder.end_event();
+
+    // ev.print_tracks(ev.candidateTracks_, true);
+
+    return time;
+  }
+
+  //==============================================================================
+  // runBtpCe_MultiIter
+  //
+  // Prototype for running multiple iterations, sequentially, using the same builder.
+  // For cmmsw seeds
+  //
+  // There is, in general, a mess in how tracks are processed, marked, or copied out
+  // in various validation scenarios and export flags.
+  //
+  // In particular, MkBuilder::PrepareSeeds does a lot of things to whole / complete
+  // event,seedTracks_ -- probably this would need to be split into common / and
+  // per-iteration part.
+  // - MkBuilder::prep_*** functions also mostly do not belong there (prep_sim is called from
+  //   PrepareSeeds() for cmssw seeds).
+  //
+  // At this point we need to think about what should happen to Event before all the iterations and
+  // after all the iterations ... from the Validation perspective.
+  // And if we care about doing too muich work for seeds that will never get processed.
+  //==============================================================================
+
+  std::vector<double> runBtpCe_MultiIter(Event &ev, const EventOfHits &eoh, MkBuilder &builder, int n) {
+    std::vector<double> timevec;
+    if (n <= 0)
+      return timevec;
+    timevec.resize(n + 1, 0.0);
+
+    const bool validation_on = (Config::sim_val || Config::quality_val);
+
+    TrackVec seeds_used;
+    TrackVec seeds1;
+
+    unsigned int algorithms[] = {4, 22, 23, 5, 24, 7, 8, 9, 10, 6};  //9 iterations
+
+    if (validation_on) {
+      for (auto const &s : ev.seedTracks_) {
+        //keep seeds form the first n iterations for processing
+        if (std::find(algorithms, algorithms + n, s.algoint()) != algorithms + n)
+          seeds1.push_back(s);
+      }
+      ev.seedTracks_.swap(seeds1);  //necessary for the validation - PrepareSeeds
+      ev.relabel_bad_seedtracks();  //necessary for the validation - PrepareSeeds
+    }
+
+    IterationMaskIfc mask_ifc;
+    TrackVec seeds;
+    TrackVec tmp_tvec;
+
+    for (int it = 0; it <= n - 1; ++it) {
+      const IterationConfig &itconf = Config::ItrInfo[it];
+
+      // To disable hit-masks, pass nullptr in place of &mask_ifc to MkJob ctor
+      // and optionally comment out ev.fill_hitmask_bool_vectors() call.
+
+      ev.fill_hitmask_bool_vectors(itconf.m_track_algorithm, mask_ifc.m_mask_vector);
+
+      MkJob job({Config::TrkInfo, itconf, eoh, &mask_ifc});
+
+      builder.begin_event(&job, &ev, __func__);
+
+      {  // We could partition seeds once, store beg, end for each iteration in a map or vector.
+        seeds.clear();
+        int nc = 0;
+        for (auto &s : ev.seedTracks_) {
+          if (s.algoint() == itconf.m_track_algorithm) {
+            if (itconf.m_requires_seed_hit_sorting) {
+              s.sortHitsByLayer();
+            }
+            seeds.push_back(s);
+            ++nc;
+          } else if (nc > 0)
+            break;
+        }
+      }
+
+      if (itconf.m_requires_dupclean_tight)
+        StdSeq::clean_cms_seedtracks_iter(&seeds, itconf, eoh.refBeamSpot());
+
+      builder.seed_post_cleaning(seeds);
+
+      // Add protection in case no seeds are found for iteration
+      if (seeds.size() <= 0)
+        continue;
+
+      builder.find_tracks_load_seeds(seeds);
+
+      double time = dtime();
+
+      builder.findTracksCloneEngine();
+
+      timevec[it] = dtime() - time;
+      timevec[n] += timevec[it];
+
+      // Print min and max size of hots vectors of CombCands.
+      // builder.find_min_max_hots_size();
+
+      if (validation_on)
+        seeds_used.insert(seeds_used.end(), seeds.begin(), seeds.end());  //cleaned seeds need to be stored somehow
+
+      using Algo = TrackBase::TrackAlgorithm;
+      if (itconf.m_requires_quality_filter && Algo(itconf.m_track_algorithm) != Algo::detachedTripletStep) {
+        if (Algo(itconf.m_track_algorithm) == Algo::pixelPairStep) {
+          builder.filter_comb_cands([&](const TrackCand &t) { return StdSeq::qfilter_n_hits_pixseed(t, 3); });
+        } else if (Algo(itconf.m_track_algorithm) == Algo::pixelLessStep) {
+          builder.filter_comb_cands(
+              [&](const TrackCand &t) { return StdSeq::qfilter_pixelLessFwd(t, eoh.refBeamSpot(), Config::TrkInfo); });
+        } else {
+          builder.filter_comb_cands(
+              [&](const TrackCand &t) { return StdSeq::qfilter_n_hits(t, itconf.m_params.minHitsQF); });
+        }
+      }
+
+      builder.select_best_comb_cands();
+
+      {
+        builder.export_tracks(tmp_tvec);
+        StdSeq::find_and_remove_duplicates(tmp_tvec, itconf);
+        ev.candidateTracks_.reserve(ev.candidateTracks_.size() + tmp_tvec.size());
+        for (auto &&t : tmp_tvec)
+          ev.candidateTracks_.emplace_back(std::move(t));
+        tmp_tvec.clear();
+      }
+
+      // now do backwards fit... do we want to time this section?
+      if (Config::backwardFit) {
+        // a) TrackVec version:
+        // builder.backwardFitBH();
+
+        // b) Version that runs on CombCand / TrackCand
+        const bool do_backward_search = Config::backwardSearch && itconf.m_backward_search;
+
+        // We copy seed-hits into Candidates ... now we have to remove them so backward fit stops
+        // before reaching seeding region. Ideally, we wouldn't add them in the first place but
+        // if we want to export full tracks above we need to hold on to them (alternatively, we could
+        // have a pointer to seed track in CombCandidate and copy them from there).
+        if (do_backward_search) {
+          builder.compactifyHitStorageForBestCand(itconf.m_backward_drop_seed_hits, itconf.m_backward_fit_min_hits);
+        }
+
+        builder.backwardFit();
+
+        if (do_backward_search) {
+          builder.beginBkwSearch();
+          builder.findTracksCloneEngine(SteeringParams::IT_BkwSearch);
+          builder.endBkwSearch();
+        }
+
+        if (itconf.m_requires_quality_filter && (Algo(itconf.m_track_algorithm) == Algo::detachedTripletStep ||
+                                                 Algo(itconf.m_track_algorithm) == Algo::pixelLessStep)) {
+          if (Algo(itconf.m_track_algorithm) == Algo::detachedTripletStep) {
+            builder.filter_comb_cands(
+                [&](const TrackCand &t) { return StdSeq::qfilter_n_layers(t, eoh.refBeamSpot(), Config::TrkInfo); });
+          } else if (Algo(itconf.m_track_algorithm) == Algo::pixelLessStep) {
+            builder.filter_comb_cands([&](const TrackCand &t) {
+              return StdSeq::qfilter_pixelLessBkwd(t, eoh.refBeamSpot(), Config::TrkInfo);
+            });
+          }
+        }
+
+        builder.filter_comb_cands([&](const TrackCand &t) { return StdSeq::qfilter_nan_n_silly(t); });
+
+        builder.select_best_comb_cands(true);  // true -> clear m_tracks as they were already filled once above
+
+        StdSeq::find_and_remove_duplicates(builder.ref_tracks_nc(), itconf);
+        builder.export_tracks(ev.fitTracks_);
+      }
+
+      builder.end_event();
+    }
+
+    // MIMI - Fake back event pointer for final processing (that should be done elsewhere)
+    MkJob job({Config::TrkInfo, Config::ItrInfo[0], eoh});
+    builder.begin_event(&job, &ev, __func__);
+
+    if (validation_on) {
+      StdSeq::prep_simtracks(&ev);
+      //swap for the cleaned seeds
+      ev.seedTracks_.swap(seeds_used);
+    }
+
+    check_nan_n_silly_candidates(ev);
+
+    if (Config::backwardFit)
+      check_nan_n_silly_bkfit(ev);
+
+    // validation section
+    if (Config::quality_val) {
+      StdSeq::Quality qval;
+      qval.quality_val(&ev);
+    } else if (Config::sim_val || Config::cmssw_val) {
+      StdSeq::root_val(&ev);
+    }
+
+    // ev.print_tracks(ev.candidateTracks_, true);
+
+    // MIMI Unfake.
+    builder.end_event();
+
+    // In CMSSW runOneIter we now release memory for comb-cands:
+    builder.release_memory();
+
+    return timevec;
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCMS/standalone/buildtestMPlex.h b/RecoTracker/MkFitCMS/standalone/buildtestMPlex.h
new file mode 100644
index 0000000000000..feb761d2cacfd
--- /dev/null
+++ b/RecoTracker/MkFitCMS/standalone/buildtestMPlex.h
@@ -0,0 +1,32 @@
+#ifndef RecoTracker_MkFitCMS_interface_buildtestMPlex_h
+#define RecoTracker_MkFitCMS_interface_buildtestMPlex_h
+
+#include "RecoTracker/MkFitCore/interface/Track.h"
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
+#include "RecoTracker/MkFitCore/standalone/Event.h"
+
+#include <sys/time.h>
+
+namespace mkfit {
+
+  class IterationConfig;
+  class MkBuilder;
+
+  void runBuildingTestPlexDumbCMSSW(Event& ev, const EventOfHits& eoh, MkBuilder& builder);
+
+  double runBuildingTestPlexBestHit(Event& ev, const EventOfHits& eoh, MkBuilder& builder);
+  double runBuildingTestPlexStandard(Event& ev, const EventOfHits& eoh, MkBuilder& builder);
+  double runBuildingTestPlexCloneEngine(Event& ev, const EventOfHits& eoh, MkBuilder& builder);
+
+  std::vector<double> runBtpCe_MultiIter(Event& ev, const EventOfHits& eoh, MkBuilder& builder, int n);
+
+  inline double dtime() {
+    double tseconds = 0.0;
+    struct timeval mytime;
+    gettimeofday(&mytime, (struct timezone*)nullptr);
+    tseconds = (double)(mytime.tv_sec + mytime.tv_usec * 1.0e-6);
+    return (tseconds);
+  }
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCMS/standalone/deadmodules.h b/RecoTracker/MkFitCMS/standalone/deadmodules.h
new file mode 100644
index 0000000000000..c593bf10a3dee
--- /dev/null
+++ b/RecoTracker/MkFitCMS/standalone/deadmodules.h
@@ -0,0 +1,716 @@
+deadvectors[0].push_back({0.0175431, 0.538005, 6.80997, 13.29});
+deadvectors[1].push_back({1.80736, 2.04038, -6.59, -0.109983});
+deadvectors[2].push_back({0.281584, 0.428298, 6.80983, 13.2899});
+deadvectors[2].push_back({1.13833, 1.28505, -26.6901, -20.21});
+deadvectors[3].push_back({0.585942, 0.688389, -13.2901, -6.81001});
+deadvectors[3].push_back({1.76407, 1.86653, 0.109926, 6.59001});
+deadvectors[3].push_back({-2.55562, -2.45318, -13.2902, -6.81017});
+deadvectors[4].push_back({1.20401, 1.28617, -50.5788, -38.8903});
+deadvectors[4].push_back({1.52785, 1.57005, -9.5373, 2.15122});
+deadvectors[4].push_back({-2.17942, -1.92542, -9.53648, 2.15203});
+deadvectors[4].push_back({-2.17938, -1.92537, -30.1149, -18.4264});
+deadvectors[4].push_back({-2.17938, -1.9254, -50.5784, -38.8899});
+deadvectors[4].push_back({-1.93782, -1.68382, -9.53847, 2.15004});
+deadvectors[4].push_back({-1.93773, -1.68374, -30.1146, -18.4261});
+deadvectors[4].push_back({-1.9378, -1.68379, -50.5805, -38.892});
+deadvectors[4].push_back({-1.69608, -1.44206, -9.53747, 2.15104});
+deadvectors[4].push_back({-1.6961, -1.4421, -30.1178, -18.4293});
+deadvectors[4].push_back({-1.69603, -1.44202, -50.5781, -38.8896});
+deadvectors[4].push_back({-1.45436, -1.20036, -9.53943, 2.14908});
+deadvectors[4].push_back({-1.45445, -1.20047, -30.1143, -18.4257});
+deadvectors[4].push_back({-1.45444, -1.20044, -50.5798, -38.8913});
+deadvectors[4].push_back({-1.21283, -0.958833, -9.53724, 2.15127});
+deadvectors[4].push_back({-1.21276, -0.958768, -30.1144, -18.4259});
+deadvectors[4].push_back({-1.21281, -0.958794, -50.5801, -38.8916});
+deadvectors[4].push_back({-0.245951, -0.163794, -9.53809, 2.15041});
+deadvectors[4].push_back({0.624377, 0.846698, -21.864, -10.1755});
+deadvectors[4].push_back({0.624414, 0.846734, -45.3316, -33.643});
+deadvectors[4].push_back({0.62432, 0.846647, -66.3886, -54.7001});
+deadvectors[4].push_back({-2.57996, -2.50431, -45.3314, -33.6429});
+deadvectors[4].push_back({-2.02595, -1.95164, -66.3891, -54.7006});
+deadvectors[4].push_back({-0.84172, -0.619388, -45.3323, -33.6438});
+deadvectors[4].push_back({-0.422645, -0.350435, -66.3883, -54.6997});
+deadvectors[4].push_back({-0.0042878, 0.0363765, 31.5671, 43.2556});
+deadvectors[4].push_back({2.89565, 2.97781, 31.5691, 43.2576});
+deadvectors[4].push_back({-0.00389682, 0.218427, 1.73207, 13.4206});
+deadvectors[4].push_back({-0.00383973, 0.218474, 25.2446, 36.9332});
+deadvectors[4].push_back({-0.00390077, 0.218426, 48.6325, 60.321});
+deadvectors[4].push_back({2.50938, 2.73171, 48.6308, 60.3193});
+deadvectors[4].push_back({3.00061, 3.07491, 48.6308, 60.3193});
+deadvectors[4].push_back({3.1377, -2.92317, 48.631, 60.3195});
+deadvectors[4].push_back({-2.65427, -2.57997, 1.73323, 13.4217});
+deadvectors[4].push_back({-2.51722, -2.2949, 48.6315, 60.32});
+deadvectors[4].push_back({-1.88887, -1.66656, 1.73415, 13.4227});
+deadvectors[4].push_back({-1.88886, -1.66654, 25.2479, 36.9364});
+deadvectors[4].push_back({-1.88886, -1.66653, 48.6319, 60.3204});
+deadvectors[4].push_back({-1.26053, -1.03822, 25.2453, 36.9338});
+deadvectors[4].push_back({-1.26055, -1.03822, 48.6313, 60.3198});
+deadvectors[4].push_back({-0.866746, -0.828805, 1.73339, 13.4219});
+deadvectors[4].push_back({-0.0666964, -0.0289888, 1.7314, 13.4199});
+deadvectors[5].push_back({-2.37757, -2.33664, -30.3916, -18.1481});
+deadvectors[5].push_back({-2.19877, -1.90118, -9.8133, 2.43019});
+deadvectors[5].push_back({-2.1988, -1.9012, -30.392, -18.1485});
+deadvectors[5].push_back({-2.19881, -1.9012, -50.8572, -38.6137});
+deadvectors[5].push_back({-1.95715, -1.65955, -9.81426, 2.42924});
+deadvectors[5].push_back({-1.95711, -1.65948, -30.3922, -18.1487});
+deadvectors[5].push_back({-1.95714, -1.65954, -50.8576, -38.6141});
+deadvectors[5].push_back({-1.71541, -1.41782, -9.81634, 2.42716});
+deadvectors[5].push_back({-1.71547, -1.41788, -30.3922, -18.1487});
+deadvectors[5].push_back({-1.71548, -1.41789, -50.8554, -38.6119});
+deadvectors[5].push_back({-1.47382, -1.17621, -9.81436, 2.42913});
+deadvectors[5].push_back({-1.47384, -1.17622, -30.3934, -18.1499});
+deadvectors[5].push_back({-1.47381, -1.17619, -50.8579, -38.6144});
+deadvectors[5].push_back({-1.23217, -0.934583, -9.81439, 2.4291});
+deadvectors[5].push_back({-1.23215, -0.934562, -30.393, -18.1495});
+deadvectors[5].push_back({-1.23213, -0.934524, -50.8572, -38.6137});
+deadvectors[5].push_back({0.485893, 0.560436, -45.6073, -33.3638});
+deadvectors[5].push_back({0.602526, 0.867664, -22.1406, -9.89713});
+deadvectors[5].push_back({0.602486, 0.867627, -45.6085, -33.3651});
+deadvectors[5].push_back({0.602521, 0.867641, -66.6674, -54.424});
+deadvectors[5].push_back({2.44537, 2.52128, -22.1413, -9.89781});
+deadvectors[5].push_back({2.71731, 2.78973, -45.6071, -33.3637});
+deadvectors[5].push_back({-1.47149, -1.39908, -66.6664, -54.4229});
+deadvectors[5].push_back({-0.351884, -0.277343, -66.6676, -54.4241});
+deadvectors[5].push_back({2.41552, 2.49658, 10.7877, 23.0312});
+deadvectors[5].push_back({1.04178, 1.1142, 24.9675, 37.211});
+deadvectors[5].push_back({-1.95285, -1.87694, 24.9671, 37.2106});
+deadvectors[5].push_back({-1.91077, -1.64563, 1.45491, 13.6984});
+deadvectors[5].push_back({-1.91078, -1.64565, 24.9687, 37.2122});
+deadvectors[5].push_back({-1.91073, -1.64558, 48.3537, 60.5972});
+deadvectors[5].push_back({-1.28246, -1.0173, 48.354, 60.5974});
+deadvectors[5].push_back({-0.235247, 0.0298801, 24.9699, 37.2134});
+deadvectors[6].push_back({0.738277, 0.926419, -23.867, -12.1785});
+deadvectors[6].push_back({0.738337, 0.926488, -44.7714, -33.0829});
+deadvectors[6].push_back({2.64777, 2.67902, -23.8673, -12.1788});
+deadvectors[6].push_back({-0.678605, -0.615782, -65.609, -53.9204});
+deadvectors[6].push_back({-0.309017, -0.246195, -23.865, -12.1764});
+deadvectors[6].push_back({-0.246191, -0.182355, -44.7698, -33.0813});
+deadvectors[6].push_back({-0.309016, -0.246194, -65.6096, -53.921});
+deadvectors[6].push_back({-0.185553, -0.0613818, -23.8681, -12.1795});
+deadvectors[6].push_back({-0.000863405, 0.187278, -2.90962, 8.77889});
+deadvectors[6].push_back({-0.0008811, 0.18728, 18.0438, 29.7323});
+deadvectors[6].push_back({-0.000825668, 0.187319, 38.9301, 50.6186});
+deadvectors[6].push_back({0.183906, 0.372043, -2.90902, 8.77949});
+deadvectors[6].push_back({0.183948, 0.372088, 18.0447, 29.7332});
+deadvectors[6].push_back({0.183912, 0.372065, 38.9273, 50.6158});
+deadvectors[6].push_back({0.368719, 0.556859, -2.90921, 8.7793});
+deadvectors[6].push_back({0.368693, 0.556828, 18.0432, 29.7317});
+deadvectors[6].push_back({0.368725, 0.556875, 38.9301, 50.6186});
+deadvectors[6].push_back({0.553543, 0.741689, -2.9088, 8.77971});
+deadvectors[6].push_back({0.553579, 0.741731, 18.0414, 29.7299});
+deadvectors[6].push_back({0.55358, 0.741716, 38.9302, 50.6187});
+deadvectors[6].push_back({0.738334, 0.926474, -2.90617, 8.78234});
+deadvectors[6].push_back({0.738336, 0.926484, 18.0432, 29.7317});
+deadvectors[6].push_back({0.738286, 0.926426, 38.9305, 50.619});
+deadvectors[6].push_back({1.29273, 1.48088, -2.91022, 8.77828});
+deadvectors[6].push_back({1.35419, 1.38544, 38.93, 50.6185});
+deadvectors[6].push_back({1.41701, 1.48085, 38.93, 50.6185});
+deadvectors[6].push_back({1.6018, 1.66563, 38.9277, 50.6162});
+deadvectors[6].push_back({-0.924913, -0.736768, -2.90851, 8.77999});
+deadvectors[6].push_back({-0.924886, -0.73674, 38.9293, 50.6178});
+deadvectors[6].push_back({-0.185561, -0.124211, -2.90681, 8.7817});
+deadvectors[6].push_back({0.826062, 0.881659, 9.01813, 20.7082});
+deadvectors[6].push_back({2.97545, -3.13758, 32.1739, 43.8624});
+deadvectors[6].push_back({-1.98494, -1.81479, 54.7021, 66.3906});
+deadvectors[6].push_back({-1.65427, -1.48411, 9.0191, 20.7076});
+deadvectors[7].push_back({0.723475, 0.944641, -24.1426, -11.8991});
+deadvectors[7].push_back({0.723421, 0.944571, -45.0469, -32.8034});
+deadvectors[7].push_back({2.75624, 2.9774, -65.8861, -53.6427});
+deadvectors[7].push_back({-0.79985, -0.736784, -65.8865, -53.6431});
+deadvectors[7].push_back({-0.46144, -0.430247, -24.1455, -11.902});
+deadvectors[7].push_back({-0.552976, -0.492321, -45.049, -32.8056});
+deadvectors[7].push_back({-0.307521, -0.24544, -24.1443, -11.9008});
+deadvectors[7].push_back({-0.0606437, -0.0292041, -24.1426, -11.8991});
+deadvectors[7].push_back({-0.122733, -0.0606515, -45.0477, -32.8042});
+deadvectors[7].push_back({-0.183393, -0.122738, -65.885, -53.6416});
+deadvectors[7].push_back({0.478463, 0.681132, -60.7588, -48.5153});
+deadvectors[7].push_back({1.70733, 1.76419, -60.757, -48.5136});
+deadvectors[7].push_back({1.87269, 1.90099, -37.6589, -25.4155});
+deadvectors[7].push_back({-1.98597, -1.9303, -60.7592, -48.5157});
+deadvectors[7].push_back({-0.82853, -0.77286, -60.7585, -48.515});
+deadvectors[7].push_back({-0.0157383, 0.205418, -3.18391, 9.05957});
+deadvectors[7].push_back({-0.0157809, 0.205366, 17.7667, 30.0102});
+deadvectors[7].push_back({-0.0157674, 0.205386, 38.6506, 50.8941});
+deadvectors[7].push_back({0.169033, 0.39018, -3.18468, 9.0588});
+deadvectors[7].push_back({0.169067, 0.390222, 17.7657, 30.0092});
+deadvectors[7].push_back({0.168991, 0.390153, 38.6515, 50.8949});
+deadvectors[7].push_back({0.353855, 0.575001, -3.18698, 9.0565});
+deadvectors[7].push_back({0.353803, 0.574949, 17.7665, 30.01});
+deadvectors[7].push_back({0.353783, 0.574935, 38.6507, 50.8942});
+deadvectors[7].push_back({0.538683, 0.759825, -3.18522, 9.05826});
+deadvectors[7].push_back({0.538611, 0.759771, 17.767, 30.0104});
+deadvectors[7].push_back({0.538651, 0.759795, 38.6527, 50.8962});
+deadvectors[7].push_back({0.723458, 0.944618, -3.18395, 9.05953});
+deadvectors[7].push_back({0.723477, 0.944631, 17.7676, 30.011});
+deadvectors[7].push_back({0.723407, 0.944563, 38.6502, 50.8937});
+deadvectors[7].push_back({1.44919, 1.48082, 38.6509, 50.8943});
+deadvectors[7].push_back({-0.939742, -0.718583, 17.7649, 30.0084});
+deadvectors[7].push_back({-0.615048, -0.551982, 17.7664, 30.0098});
+deadvectors[7].push_back({1.80128, 2.00395, 31.8961, 44.1396});
+deadvectors[7].push_back({2.95871, -3.12182, 31.8964, 44.1399});
+deadvectors[7].push_back({-1.34036, -1.13769, 8.74203, 20.9855});
+deadvectors[7].push_back({-1.34039, -1.13772, 31.8963, 44.1397});
+deadvectors[7].push_back({-1.34038, -1.13772, 54.4231, 66.6666});
+deadvectors[7].push_back({-0.332501, -0.276832, 31.8941, 44.1375});
+deadvectors[8].push_back({0.139919, 0.214255, -51.4629, -39.7744});
+deadvectors[8].push_back({0.42545, 0.57602, -9.45308, 2.23543});
+deadvectors[8].push_back({0.425459, 0.576033, -30.4724, -18.7839});
+deadvectors[8].push_back({0.425458, 0.576024, -51.4627, -39.7742});
+deadvectors[8].push_back({0.568311, 0.642647, -30.4697, -18.7812});
+deadvectors[8].push_back({1.28231, 1.35664, -30.4708, -18.7823});
+deadvectors[8].push_back({1.64225, 1.71837, -51.4633, -39.7748});
+deadvectors[8].push_back({-2.71616, -2.56559, -30.4706, -18.7821});
+deadvectors[8].push_back({-1.85937, -1.70879, -9.45349, 2.23502});
+deadvectors[8].push_back({-1.85935, -1.70878, -30.4704, -18.7819});
+deadvectors[8].push_back({-1.85934, -1.70877, -51.4631, -39.7746});
+deadvectors[8].push_back({-1.64215, -1.60426, -9.45355, 2.23499});
+deadvectors[8].push_back({-1.43094, -1.28038, -9.45385, 2.23466});
+deadvectors[8].push_back({-1.43097, -1.2804, -30.4703, -18.7818});
+deadvectors[8].push_back({-1.43094, -1.28037, -51.4637, -39.7752});
+deadvectors[8].push_back({0.408998, 0.548588, -21.0835, -9.395});
+deadvectors[8].push_back({0.408967, 0.54856, -43.7564, -32.0679});
+deadvectors[8].push_back({0.408973, 0.548566, -66.3901, -54.7016});
+deadvectors[8].push_back({2.59442, 2.73401, -66.3923, -54.7037});
+deadvectors[8].push_back({-1.84401, -1.7735, -21.0845, -9.39599});
+deadvectors[8].push_back({-0.956922, -0.817325, -21.0853, -9.39678});
+deadvectors[8].push_back({-0.00288471, 0.0714499, 11.575, 23.2636});
+deadvectors[8].push_back({0.425415, 0.575988, 11.5763, 23.2648});
+deadvectors[8].push_back({0.425473, 0.576044, 32.5819, 44.2704});
+deadvectors[8].push_back({0.425442, 0.576016, 53.5494, 65.2379});
+deadvectors[8].push_back({1.56782, 1.7184, 11.5757, 23.2646});
+deadvectors[8].push_back({1.56783, 1.7184, 32.5811, 44.2697});
+deadvectors[8].push_back({1.7107, 1.78504, 11.575, 23.2637});
+deadvectors[8].push_back({1.71062, 1.8612, 32.5819, 44.2706});
+deadvectors[8].push_back({1.71063, 1.86121, 53.5503, 65.2388});
+deadvectors[8].push_back({1.85343, 2.004, 11.5727, 23.2612});
+deadvectors[8].push_back({1.92785, 2.00397, 32.5815, 44.2709});
+deadvectors[8].push_back({1.92785, 2.00397, 53.5485, 65.2371});
+deadvectors[8].push_back({0.135799, 0.275394, 1.61466, 13.3032});
+deadvectors[8].push_back({0.135808, 0.275406, 24.2995, 35.988});
+deadvectors[8].push_back({0.135795, 0.275393, 46.9461, 58.6346});
+deadvectors[8].push_back({0.409001, 0.548598, 1.61327, 13.3018});
+deadvectors[8].push_back({0.408963, 0.548562, 24.2998, 35.9883});
+deadvectors[8].push_back({0.408957, 0.548548, 46.9447, 58.6332});
+deadvectors[8].push_back({0.545592, 0.685186, 1.61354, 13.3021});
+deadvectors[8].push_back({0.545529, 0.685123, 24.2985, 35.9871});
+deadvectors[8].push_back({0.545611, 0.685206, 46.9467, 58.6352});
+deadvectors[8].push_back({0.682178, 0.821782, 1.61256, 13.3011});
+deadvectors[8].push_back({0.682126, 0.821719, 24.3009, 35.9894});
+deadvectors[8].push_back({0.682145, 0.821739, 46.9454, 58.634});
+deadvectors[8].push_back({0.818746, 0.958343, 1.61584, 13.3044});
+deadvectors[8].push_back({0.818755, 0.958351, 24.2999, 35.9884});
+deadvectors[8].push_back({0.818774, 0.958366, 46.947, 58.6355});
+deadvectors[8].push_back({1.63829, 1.77788, 24.2997, 35.9883});
+deadvectors[8].push_back({1.63828, 1.77789, 46.9458, 58.6349});
+deadvectors[8].push_back({1.84393, 1.91445, 1.61489, 13.3036});
+deadvectors[8].push_back({1.77488, 1.91448, 24.3002, 35.9888});
+deadvectors[8].push_back({1.77488, 1.91448, 46.9469, 58.6354});
+deadvectors[8].push_back({-1.36673, -1.22713, 1.61586, 13.3044});
+deadvectors[8].push_back({-1.3667, -1.22711, 24.3003, 35.9888});
+deadvectors[8].push_back({-1.36672, -1.22713, 46.947, 58.6355});
+deadvectors[8].push_back({-0.137412, 0.00218318, 1.61391, 13.3024});
+deadvectors[9].push_back({0.723133, 0.848939, -66.3881, -54.6996});
+deadvectors[9].push_back({0.843971, 0.96978, -23.6655, -11.977});
+deadvectors[9].push_back({0.843997, 0.969802, -45.0419, -33.3533});
+deadvectors[9].push_back({0.843922, 0.969726, -66.3877, -54.6992});
+deadvectors[9].push_back({1.32729, 1.45309, -23.6655, -11.977});
+deadvectors[9].push_back({1.32729, 1.4531, -45.0393, -33.3508});
+deadvectors[9].push_back({1.32729, 1.4531, -66.3899, -54.7014});
+deadvectors[9].push_back({2.41476, 2.54057, -23.6651, -11.9766});
+deadvectors[9].push_back({2.41476, 2.54056, -45.0425, -33.354});
+deadvectors[9].push_back({2.41476, 2.54057, -66.3891, -54.7006});
+deadvectors[9].push_back({-2.41845, -2.29265, -23.6645, -11.976});
+deadvectors[9].push_back({-2.41845, -2.29264, -45.0415, -33.353});
+deadvectors[9].push_back({-2.41846, -2.29265, -66.3886, -54.7001});
+deadvectors[9].push_back({-1.69346, -1.56765, -66.3887, -54.7002});
+deadvectors[9].push_back({-1.21018, -1.08438, -23.6643, -11.9757});
+deadvectors[9].push_back({-1.21016, -1.08435, -45.0436, -33.3551});
+deadvectors[9].push_back({-1.21014, -1.08433, -66.3918, -54.7033});
+deadvectors[9].push_back({-0.847627, -0.72182, -23.6661, -11.9776});
+deadvectors[9].push_back({-0.847658, -0.72185, -45.0419, -33.3534});
+deadvectors[9].push_back({-0.847663, -0.721859, -66.3893, -54.7008});
+deadvectors[9].push_back({-0.00242246, 0.115629, -13.4455, -1.757});
+deadvectors[9].push_back({-0.00243932, 0.115613, -36.2497, -24.5612});
+deadvectors[9].push_back({-0.00243738, 0.115608, -59.0251, -47.3366});
+deadvectors[9].push_back({0.334168, 0.452216, -13.447, -1.75846});
+deadvectors[9].push_back({0.334156, 0.452205, -36.2479, -24.5594});
+deadvectors[9].push_back({0.334135, 0.452185, -59.0251, -47.3366});
+deadvectors[9].push_back({-2.3586, -2.24055, -13.4445, -1.75601});
+deadvectors[9].push_back({-2.35863, -2.24058, -36.2474, -24.5589});
+deadvectors[9].push_back({-2.35863, -2.24058, -59.0247, -47.3362});
+deadvectors[9].push_back({-2.13423, -2.01618, -13.444, -1.75551});
+deadvectors[9].push_back({-2.13421, -2.01616, -36.2493, -24.5608});
+deadvectors[9].push_back({-2.1342, -2.01615, -59.0229, -47.3344});
+deadvectors[9].push_back({-1.57323, -1.45518, -13.4456, -1.75707});
+deadvectors[9].push_back({-1.57319, -1.45514, -36.2493, -24.5608});
+deadvectors[9].push_back({-1.57325, -1.4552, -59.0218, -47.3333});
+deadvectors[9].push_back({-1.12445, -1.0064, -13.4448, -1.75632});
+deadvectors[9].push_back({-1.12443, -1.00638, -36.2488, -24.5603});
+deadvectors[9].push_back({-1.12438, -1.00634, -59.0223, -47.3338});
+deadvectors[9].push_back({-1.01224, -0.894184, -13.4441, -1.75561});
+deadvectors[9].push_back({-1.01219, -0.894145, -36.2473, -24.5588});
+deadvectors[9].push_back({-1.01223, -0.894179, -59.0215, -47.333});
+deadvectors[9].push_back({-0.900026, -0.781973, -13.4473, -1.75876});
+deadvectors[9].push_back({-0.900045, -0.781995, -36.2485, -24.56});
+deadvectors[9].push_back({-0.900055, -0.782002, -59.024, -47.3355});
+deadvectors[9].push_back({-0.787811, -0.669759, -13.4454, -1.75692});
+deadvectors[9].push_back({-0.787824, -0.669771, -36.2491, -24.5606});
+deadvectors[9].push_back({-0.787827, -0.669781, -59.0219, -47.3334});
+deadvectors[9].push_back({-0.675619, -0.557572, -13.4445, -1.75599});
+deadvectors[9].push_back({-0.67566, -0.557606, -36.2494, -24.5608});
+deadvectors[9].push_back({-0.675627, -0.557577, -59.0232, -47.3347});
+deadvectors[9].push_back({-0.451216, -0.333166, -13.4459, -1.75737});
+deadvectors[9].push_back({-0.451284, -0.333234, -36.2479, -24.5594});
+deadvectors[9].push_back({-0.451236, -0.333187, -59.0249, -47.3364});
+deadvectors[9].push_back({-0.339028, -0.220978, -13.4449, -1.75637});
+deadvectors[9].push_back({-0.339049, -0.220997, -36.248, -24.5595});
+deadvectors[9].push_back({-0.339053, -0.221001, -59.0228, -47.3343});
+deadvectors[9].push_back({-0.22685, -0.108802, -13.4451, -1.75656});
+deadvectors[9].push_back({-0.22686, -0.108809, -36.2483, -24.5598});
+deadvectors[9].push_back({-0.226814, -0.108764, -59.023, -47.3345});
+deadvectors[9].push_back({2.53557, 2.66138, 40.268, 51.9566});
+deadvectors[9].push_back({1.56837, 1.68642, 9.14254, 20.8311});
+deadvectors[9].push_back({1.56837, 1.68643, 31.9364, 43.625});
+deadvectors[9].push_back({1.56836, 1.68642, 54.7007, 66.3893});
+deadvectors[9].push_back({1.68056, 1.79862, 9.14224, 20.831});
+deadvectors[9].push_back({1.68057, 1.79862, 31.9337, 43.6223});
+deadvectors[9].push_back({1.73904, 1.76869, 54.6987, 66.3926});
+deadvectors[9].push_back({1.85125, 1.8809, 9.14224, 20.8309});
+deadvectors[9].push_back({1.88093, 1.91081, 31.9357, 43.6243});
+deadvectors[9].push_back({1.79276, 1.91082, 54.7021, 66.3908});
+deadvectors[9].push_back({2.91476, 3.03282, 9.14351, 20.832});
+deadvectors[9].push_back({2.91476, 3.03281, 31.9368, 43.6253});
+deadvectors[9].push_back({2.91479, 3.03284, 54.7012, 66.3897});
+deadvectors[9].push_back({-2.86115, -2.80162, 31.9356, 43.6245});
+deadvectors[9].push_back({-1.96354, -1.90401, 54.7031, 66.3917});
+deadvectors[9].push_back({-0.11463, 0.00342012, 9.14321, 20.8317});
+deadvectors[9].push_back({-0.114597, 0.00345451, 31.9363, 43.6248});
+deadvectors[9].push_back({-0.114629, 0.00342217, 54.701, 66.3895});
+deadvectors[10].push_back({1.66324, 1.7391, -54.3722, -35.7588});
+deadvectors[10].push_back({-0.882682, -0.725526, -108.288, -89.6745});
+deadvectors[10].push_back({1.06614, 1.10337, -0.0751572, 18.5383});
+deadvectors[10].push_back({1.06616, 1.14071, 72.937, 91.5505});
+deadvectors[10].push_back({2.5621, 2.71126, -0.0746555, 18.5388});
+deadvectors[10].push_back({2.63669, 2.71124, 72.9365, 91.5499});
+deadvectors[10].push_back({-2.22507, -2.07592, -0.0740957, 18.5393});
+deadvectors[10].push_back({-2.2251, -2.07595, 36.6717, 55.2851});
+deadvectors[10].push_back({-2.22509, -2.07593, 72.9361, 91.5495});
+deadvectors[10].push_back({-2.07944, -2.0009, 72.9367, 91.5501});
+deadvectors[10].push_back({-1.32746, -1.17831, -0.075738, 18.5377});
+deadvectors[11].push_back({-0.897098, -0.711092, -108.709, -89.2535});
+deadvectors[11].push_back({0.467341, 0.542296, 53.6552, 73.1109});
+deadvectors[11].push_back({2.48709, 2.56468, 36.2487, 55.7044});
+deadvectors[11].push_back({2.69328, 2.8793, -0.496463, 18.9594});
+deadvectors[11].push_back({-2.2388, -2.06216, -0.497606, 18.9582});
+deadvectors[11].push_back({-2.2388, -2.06217, 36.2474, 55.7033});
+deadvectors[11].push_back({-2.23881, -2.06217, 72.5138, 91.9696});
+deadvectors[11].push_back({-1.34121, -1.16458, -0.496822, 18.959});
+deadvectors[12].push_back({1.01646, 1.15335, -18.5367, 0.0766945});
+deadvectors[12].push_back({1.01535, 1.15445, -36.6222, -18.0088});
+deadvectors[12].push_back({1.01641, 1.15331, -55.2818, -36.6684});
+deadvectors[12].push_back({1.01535, 1.15445, -72.6898, -54.0764});
+deadvectors[12].push_back({1.01645, 1.15335, -91.5492, -72.9358});
+deadvectors[12].push_back({1.01535, 1.15445, -108.288, -89.6742});
+deadvectors[12].push_back({-0.293644, -0.224103, -36.6209, -18.0074});
+deadvectors[12].push_back({0.229978, 0.299505, 71.1616, 89.7752});
+deadvectors[12].push_back({0.888604, 1.0194, 54.0774, 72.6908});
+deadvectors[12].push_back({0.887582, 1.02038, 71.1622, 89.7756});
+deadvectors[12].push_back({-2.51578, -2.38298, 71.1646, 89.7781});
+deadvectors[12].push_back({-0.224094, -0.154562, 71.1633, 89.7769});
+deadvectors[13].push_back({1.00277, 1.16706, -18.9584, 0.497439});
+deadvectors[13].push_back({1.00145, 1.1684, -37.0412, -17.5854});
+deadvectors[13].push_back({1.00273, 1.16702, -55.7049, -36.2491});
+deadvectors[13].push_back({1.00143, 1.16838, -73.1112, -53.6554});
+deadvectors[13].push_back({1.00277, 1.16706, -91.9704, -72.5146});
+deadvectors[13].push_back({1.00145, 1.1684, -108.709, -89.2529});
+deadvectors[13].push_back({1.34669, 1.41521, -55.7035, -36.2477});
+deadvectors[13].push_back({1.87029, 1.90515, -108.709, -89.2535});
+deadvectors[13].push_back({-0.957066, -0.800138, -91.9703, -72.5145});
+deadvectors[13].push_back({-0.554428, -0.48591, -91.9695, -72.5139});
+deadvectors[13].push_back({0.875524, 1.03245, 89.2522, 108.708});
+deadvectors[13].push_back({1.41215, 1.4776, 53.6555, 73.1111});
+deadvectors[13].push_back({2.96875, 3.1281, -0.498503, 18.9573});
+deadvectors[13].push_back({3.04839, 3.11485, 35.3365, 54.7923});
+deadvectors[13].push_back({-1.66398, -1.63123, 53.6557, 73.1115});
+deadvectors[14].push_back({0.227431, 0.257951, -36.623, -18.0096});
+deadvectors[14].push_back({0.517462, 0.635553, -18.5369, 0.0764742});
+deadvectors[14].push_back({0.517915, 0.635044, -36.6225, -18.0091});
+deadvectors[14].push_back({0.517466, 0.635553, -54.3732, -35.7598});
+deadvectors[14].push_back({0.517933, 0.63506, -72.6893, -54.0759});
+deadvectors[14].push_back({0.517454, 0.635543, -89.7769, -71.1635});
+deadvectors[14].push_back({0.517941, 0.635071, -108.288, -89.675});
+deadvectors[14].push_back({0.750187, 0.809216, -89.7765, -71.163});
+deadvectors[14].push_back({1.6815, 1.74005, -36.6218, -18.0082});
+deadvectors[14].push_back({1.91372, 2.0318, -18.5387, 0.0747423});
+deadvectors[14].push_back({1.91371, 2.0318, -54.3742, -35.7608});
+deadvectors[14].push_back({1.9137, 2.03179, -89.7771, -71.1637});
+deadvectors[14].push_back({2.02762, 2.15066, -18.5367, 0.0767221});
+deadvectors[14].push_back({2.02814, 2.15015, -36.6218, -18.0084});
+deadvectors[14].push_back({2.02757, 2.15062, -54.374, -35.7606});
+deadvectors[14].push_back({2.02814, 2.15014, -72.6896, -54.0762});
+deadvectors[14].push_back({2.0276, 2.15065, -89.7761, -71.1627});
+deadvectors[14].push_back({2.02811, 2.15011, -108.289, -89.6752});
+deadvectors[14].push_back({2.26084, 2.32182, -36.6214, -18.0079});
+deadvectors[14].push_back({2.37963, 2.43818, -36.6227, -18.0092});
+deadvectors[14].push_back({2.49303, 2.55454, -18.5374, 0.0760317});
+deadvectors[14].push_back({-2.68144, -2.62289, -72.6904, -54.077});
+deadvectors[14].push_back({-2.33238, -2.2714, -108.289, -89.6755});
+deadvectors[14].push_back({-2.27459, -2.15746, -36.6211, -18.0077});
+deadvectors[14].push_back({-2.2746, -2.15747, -72.6903, -54.0769});
+deadvectors[14].push_back({-2.27459, -2.15745, -108.288, -89.675});
+deadvectors[14].push_back({-1.46304, -1.40154, -54.3708, -35.7574});
+deadvectors[14].push_back({-2.50729, -2.44874, 36.6692, 55.2826});
+deadvectors[14].push_back({-1.92796, -1.80596, -0.073843, 18.5396});
+deadvectors[14].push_back({-1.92849, -1.80544, 18.0095, 36.6229});
+deadvectors[14].push_back({-1.92797, -1.80596, 36.6696, 55.283});
+deadvectors[14].push_back({-1.92847, -1.80543, 54.075, 72.6884});
+deadvectors[14].push_back({-1.92793, -1.80593, 72.9362, 91.5496});
+deadvectors[14].push_back({-1.92848, -1.80544, 89.6752, 108.289});
+deadvectors[15].push_back({0.181913, 0.237016, -72.6915, -54.0777});
+deadvectors[15].push_back({1.33578, 1.44209, -0.0776157, 18.5358});
+deadvectors[15].push_back({1.33616, 1.44169, 18.0086, 36.622});
+deadvectors[15].push_back({1.33578, 1.4421, 35.7592, 54.3726});
+deadvectors[15].push_back({1.33616, 1.44169, 54.0754, 72.6888});
+deadvectors[15].push_back({1.33579, 1.44211, 71.1617, 89.7751});
+deadvectors[15].push_back({1.33617, 1.4417, 89.6737, 108.287});
+deadvectors[15].push_back({1.4385, 1.54882, -0.0749645, 18.5384});
+deadvectors[15].push_back({1.43894, 1.54841, 18.0095, 36.6229});
+deadvectors[15].push_back({1.43848, 1.5488, 35.7612, 54.3746});
+deadvectors[15].push_back({1.43894, 1.54841, 54.0773, 72.6907});
+deadvectors[15].push_back({1.43852, 1.54883, 71.1641, 89.7775});
+deadvectors[15].push_back({1.43893, 1.5484, 89.6748, 108.288});
+deadvectors[15].push_back({1.54522, 1.65153, -0.0762568, 18.5371});
+deadvectors[15].push_back({1.54562, 1.65115, 18.0089, 36.6223});
+deadvectors[15].push_back({1.5452, 1.65151, 35.7577, 54.3711});
+deadvectors[15].push_back({1.54559, 1.65113, 54.0749, 72.6883});
+deadvectors[15].push_back({1.54522, 1.65153, 71.1609, 89.7743});
+deadvectors[15].push_back({1.54561, 1.65114, 89.6738, 108.287});
+deadvectors[15].push_back({1.64795, 1.75826, -0.0731449, 18.5403});
+deadvectors[15].push_back({1.64836, 1.75783, 18.0085, 36.6219});
+deadvectors[15].push_back({1.64793, 1.75825, 35.7581, 54.3715});
+deadvectors[15].push_back({1.64836, 1.75783, 54.0744, 72.6878});
+deadvectors[15].push_back({1.64794, 1.75825, 71.1624, 89.7758});
+deadvectors[15].push_back({1.64835, 1.75783, 89.6737, 108.287});
+deadvectors[15].push_back({1.75466, 1.86097, -0.0748606, 18.5385});
+deadvectors[15].push_back({1.75504, 1.86057, 18.0088, 36.6222});
+deadvectors[15].push_back({1.75465, 1.86096, 35.7602, 54.3736});
+deadvectors[15].push_back({1.75505, 1.86059, 54.0775, 72.6909});
+deadvectors[15].push_back({1.75466, 1.86097, 71.1642, 89.7776});
+deadvectors[15].push_back({1.75505, 1.86058, 89.6731, 108.286});
+deadvectors[15].push_back({1.85738, 1.96769, -0.0754919, 18.5379});
+deadvectors[15].push_back({1.8578, 1.96727, 18.0074, 36.6208});
+deadvectors[15].push_back({1.85737, 1.96768, 35.757, 54.3704});
+deadvectors[15].push_back({1.85778, 1.96725, 54.076, 72.6894});
+deadvectors[15].push_back({1.85738, 1.96769, 71.1632, 89.7766});
+deadvectors[15].push_back({1.85781, 1.96729, 89.6728, 108.286});
+deadvectors[15].push_back({1.9641, 2.07042, -0.077137, 18.5363});
+deadvectors[15].push_back({1.96447, 2.07, 18.0083, 36.6217});
+deadvectors[15].push_back({1.96408, 2.07039, 35.7589, 54.3723});
+deadvectors[15].push_back({1.96449, 2.07002, 54.0791, 72.6926});
+deadvectors[15].push_back({1.9641, 2.07041, 71.163, 89.7764});
+deadvectors[15].push_back({1.96449, 2.07002, 89.6747, 108.288});
+deadvectors[15].push_back({2.06681, 2.17712, -0.0743303, 18.5391});
+deadvectors[15].push_back({2.06724, 2.17672, 18.008, 36.6214});
+deadvectors[15].push_back({2.06681, 2.17713, 35.7604, 54.3738});
+deadvectors[15].push_back({2.06725, 2.17672, 54.075, 72.6884});
+deadvectors[15].push_back({2.06682, 2.17714, 71.1628, 89.7762});
+deadvectors[15].push_back({2.06726, 2.17673, 89.6744, 108.288});
+deadvectors[16].push_back({1.17864, 1.21037, -72.6916, -54.0781});
+deadvectors[16].push_back({2.95414, 2.9869, -108.289, -89.6751});
+deadvectors[16].push_back({-2.28057, -2.18479, -18.5383, 0.075161});
+deadvectors[16].push_back({-2.187, -2.08799, -18.5376, 0.0758381});
+deadvectors[16].push_back({-2.18666, -2.08832, -36.6211, -18.0077});
+deadvectors[16].push_back({-2.187, -2.08798, -54.3703, -35.7569});
+deadvectors[16].push_back({-2.18666, -2.08832, -72.689, -54.0756});
+deadvectors[16].push_back({-2.187, -2.08799, -89.7779, -71.1645});
+deadvectors[16].push_back({-2.18667, -2.08833, -108.287, -89.6732});
+deadvectors[16].push_back({-0.757069, -0.661923, -36.6222, -18.0088});
+deadvectors[16].push_back({-0.757071, -0.661927, -72.6903, -54.0769});
+deadvectors[16].push_back({-0.757061, -0.661915, -108.288, -89.6742});
+deadvectors[16].push_back({0.607329, 0.671169, 89.6723, 108.286});
+deadvectors[16].push_back({2.0018, 2.10082, 18.0092, 36.6226});
+deadvectors[16].push_back({2.2581, 2.29086, 72.9356, 91.549});
+deadvectors[16].push_back({3.05091, -3.13713, 36.6682, 55.2816});
+deadvectors[16].push_back({-0.821092, -0.788297, 72.9352, 91.5488});
+deadvectors[16].push_back({-0.757371, -0.661593, 89.675, 108.288});
+deadvectors[17].push_back({0.688589, 0.71803, -36.6215, -18.008});
+deadvectors[17].push_back({2.3576, 2.44535, -18.537, 0.0764084});
+deadvectors[17].push_back({2.3576, 2.44536, -55.2841, -36.6707});
+deadvectors[17].push_back({2.35759, 2.44534, -91.5472, -72.9338});
+deadvectors[17].push_back({-2.35337, -2.33874, -91.5497, -72.9363});
+deadvectors[17].push_back({-2.1415, -2.05579, -36.6222, -18.0088});
+deadvectors[17].push_back({-2.1415, -2.05579, -72.6885, -54.0751});
+deadvectors[17].push_back({-2.14125, -2.05605, -91.5489, -72.9355});
+deadvectors[17].push_back({-2.14151, -2.0558, -108.286, -89.6731});
+deadvectors[17].push_back({0.0386753, 0.0679107, 18.0107, 36.6245});
+deadvectors[17].push_back({2.07656, 2.10598, 35.7591, 54.3727});
+deadvectors[17].push_back({2.33085, 2.35941, 35.7609, 54.3745});
+deadvectors[17].push_back({2.75574, 2.78497, 89.6738, 108.288});
+deadvectors[18].push_back({0.453363, 0.780043, 4.671, 11.0697});
+deadvectors[18].push_back({-2.21852, -1.88976, 4.59921, 11.0288});
+deadvectors[18].push_back({-0.505149, -0.176426, 4.59985, 11.0291});
+deadvectors[18].push_back({-1.57763, -1.4181, 9.52426, 16.0838});
+deadvectors[19].push_back({2.35088, 2.67959, 4.60028, 11.0294});
+deadvectors[19].push_back({0.121514, 0.280987, 9.5225, 16.0849});
+deadvectors[19].push_back({0.824854, 0.984366, 9.52497, 16.0845});
+deadvectors[20].push_back({-2.40275, -2.0761, 4.67161, 11.0703});
+deadvectors[21].push_back({0.390613, 0.656739, 23.3363, 34.5215});
+deadvectors[21].push_back({0.914042, 1.18019, 23.3337, 34.519});
+deadvectors[21].push_back({1.43768, 1.7038, 23.3355, 34.5206});
+deadvectors[21].push_back({-1.18023, -1.09154, 32.5873, 41.4819});
+deadvectors[22].push_back({0.829728, 0.919569, 22.9876, 34.8754});
+deadvectors[22].push_back({0.654734, 0.741468, 22.9876, 34.8754});
+deadvectors[22].push_back({0.91655, 1.00327, 32.1235, 41.9466});
+deadvectors[22].push_back({-1.00287, -0.913038, 32.1231, 41.9462});
+deadvectors[23].push_back({-3.06496, -2.90418, 39.3348, 50.437});
+deadvectors[23].push_back({-2.7509, -2.5901, 39.3359, 50.4387});
+deadvectors[23].push_back({0.862098, 0.94248, 39.3359, 50.4378});
+deadvectors[24].push_back({0.108185, 0.412151, 22.9875, 34.8752});
+deadvectors[24].push_back({-0.938995, -0.635043, 22.989, 34.8767});
+deadvectors[24].push_back({-0.415399, -0.111442, 22.9882, 34.8759});
+deadvectors[24].push_back({2.21239, 2.50101, 32.1241, 41.9478});
+deadvectors[25].push_back({-0.394682, -0.128577, 32.5882, 41.4845});
+deadvectors[26].push_back({2.66232, 2.75216, 22.9878, 34.8757});
+deadvectors[27].push_back({-1.34772, -1.23288, 72.7945, 91.2337});
+deadvectors[27].push_back({-0.629566, -0.469945, 60.5178, 75.2913});
+deadvectors[27].push_back({-0.472505, -0.312887, 60.5182, 75.2915});
+deadvectors[27].push_back({-0.315396, -0.155779, 60.5201, 75.2936});
+deadvectors[27].push_back({-0.590001, -0.509565, 88.9007, 109.389});
+deadvectors[27].push_back({-0.511437, -0.431001, 88.9011, 109.389});
+deadvectors[27].push_back({-0.432919, -0.352485, 88.9027, 109.391});
+deadvectors[27].push_back({-0.354367, -0.273934, 88.9036, 109.392});
+deadvectors[27].push_back({-0.275852, -0.195412, 88.8985, 109.387});
+deadvectors[27].push_back({-0.086538, 0.00216758, 23.7177, 32.3097});
+deadvectors[28].push_back({-0.639305, -0.458593, 60.015, 75.7957});
+deadvectors[28].push_back({-0.482252, -0.301548, 60.0159, 75.7963});
+deadvectors[28].push_back({-0.325148, -0.14444, 60.0161, 75.7967});
+deadvectors[28].push_back({1.55748, 1.84618, 32.122, 41.949});
+deadvectors[28].push_back({1.40273, 1.58343, 60.0179, 75.7985});
+deadvectors[28].push_back({3.1306, -2.97188, 60.0197, 75.8003});
+deadvectors[29].push_back({0.549779, 0.589998, 88.8957, 109.384});
+deadvectors[29].push_back({1.29496, 1.33518, 88.8918, 109.38});
+deadvectors[29].push_back({-1.12327, -1.00839, 50.5036, 62.039});
+deadvectors[29].push_back({-1.45991, -1.4312, 72.7944, 91.2336});
+deadvectors[30].push_back({-2.61821, -2.57504, 32.1209, 41.9484});
+deadvectors[30].push_back({-0.872388, -0.785644, 23.371, 32.6555});
+deadvectors[30].push_back({-0.943595, -0.890086, 60.0148, 75.795});
+deadvectors[31].push_back({2.01827, 2.04699, 50.5025, 62.0375});
+deadvectors[31].push_back({0.0767547, 0.237525, 39.3377, 50.4399});
+deadvectors[31].push_back({0.67191, 0.786754, 72.7941, 91.2335});
+deadvectors[31].push_back({0.58809, 0.668528, 88.8989, 109.387});
+deadvectors[31].push_back({1.6269, 1.68431, 72.7971, 91.2369});
+deadvectors[31].push_back({1.60911, 1.68955, 88.9008, 109.389});
+deadvectors[31].push_back({2.94458, 2.9733, 50.5025, 62.0376});
+deadvectors[31].push_back({-3.06304, -3.02282, 88.8888, 109.377});
+deadvectors[31].push_back({-2.30009, -2.27138, 50.504, 62.0391});
+deadvectors[31].push_back({-1.40249, -1.34506, 50.5016, 62.0366});
+deadvectors[31].push_back({-1.51885, -1.49225, 60.5178, 75.2913});
+deadvectors[32].push_back({1.99028, 2.04266, 60.0149, 75.7949});
+deadvectors[32].push_back({-2.88311, -2.79326, 23.3724, 32.6567});
+deadvectors[32].push_back({-2.05259, -2.00786, 23.3728, 32.6575});
+deadvectors[32].push_back({2.92275, 2.96629, 32.1204, 41.9479});
+deadvectors[33].push_back({1.29496, 1.33518, 88.9013, 109.39});
+deadvectors[33].push_back({1.83039, 2.09648, 32.5881, 41.4833});
+deadvectors[33].push_back({-1.29029, -1.23286, 50.5025, 62.0376});
+deadvectors[33].push_back({3.14024, -3.02807, 50.5027, 62.0379});
+deadvectors[33].push_back({-2.53143, -2.44274, 32.5884, 41.483});
+deadvectors[34].push_back({-2.52427, -2.34357, 60.0177, 75.7982});
+deadvectors[35].push_back({1.25664, 1.29683, 39.3365, 50.4384});
+deadvectors[35].push_back({1.25537, 1.30858, 60.5163, 75.2896});
+deadvectors[35].push_back({1.9369, 1.9635, 60.517, 75.2905});
+deadvectors[35].push_back({1.9233, 2.00374, 88.8987, 109.387});
+deadvectors[35].push_back({2.61592, 2.88199, 32.5891, 41.4842});
+deadvectors[35].push_back({-2.62012, -2.53143, 32.5886, 41.4833});
+deadvectors[35].push_back({-2.58192, -2.52449, 50.5033, 62.0384});
+deadvectors[36].push_back({2.60458, 2.89331, 32.1206, 41.9484});
+deadvectors[36].push_back({-0.158201, -0.104693, 60.0158, 75.7969});
+deadvectors[37].push_back({-2.88188, -2.61579, 32.5869, 41.482});
+deadvectors[37].push_back({-0.158322, 0.00129489, 60.5199, 75.2934});
+deadvectors[37].push_back({-0.00129046, 0.158327, 60.5203, 75.2939});
+deadvectors[37].push_back({-0.19727, -0.11683, 88.8992, 109.387});
+deadvectors[37].push_back({-0.118758, -0.0383232, 88.901, 109.389});
+deadvectors[37].push_back({-0.0402126, 0.0402247, 88.8981, 109.386});
+deadvectors[37].push_back({0.0383002, 0.118741, 88.8984, 109.387});
+deadvectors[37].push_back({0.116841, 0.197281, 88.899, 109.387});
+deadvectors[37].push_back({0.698849, 0.787541, 32.5874, 41.4821});
+deadvectors[37].push_back({1.65736, 1.74605, 32.5873, 41.482});
+deadvectors[37].push_back({2.43474, 2.47496, 88.8983, 109.387});
+deadvectors[37].push_back({-0.942466, -0.902248, 88.8986, 109.387});
+deadvectors[38].push_back({0.248327, 0.537072, 32.1199, 41.9481});
+deadvectors[38].push_back({-0.168067, 0.0126386, 60.017, 75.7977});
+deadvectors[38].push_back({-0.0109786, 0.169736, 60.0145, 75.7952});
+deadvectors[39].push_back({0.46998, 0.523186, 60.5186, 75.292});
+deadvectors[39].push_back({1.01922, 1.17998, 39.3381, 50.4401});
+deadvectors[39].push_back({1.17626, 1.33702, 39.3361, 50.438});
+deadvectors[39].push_back({1.00845, 1.12332, 50.5021, 62.0373});
+deadvectors[39].push_back({1.12067, 1.23554, 50.5023, 62.0374});
+deadvectors[39].push_back({1.2329, 1.34778, 50.5013, 62.0366});
+deadvectors[39].push_back({1.00847, 1.12331, 72.7971, 91.2364});
+deadvectors[39].push_back({1.12069, 1.23552, 72.7978, 91.237});
+deadvectors[39].push_back({1.23289, 1.34773, 72.7958, 91.235});
+deadvectors[39].push_back({2.51202, 2.67163, 60.5207, 75.294});
+deadvectors[39].push_back({2.66909, 2.82872, 60.517, 75.2908});
+deadvectors[39].push_back({2.82616, 2.9858, 60.5168, 75.2909});
+deadvectors[39].push_back({-1.10081, -0.994403, 60.5191, 75.2926});
+deadvectors[39].push_back({-1.02101, -0.980794, 88.9028, 109.391});
+deadvectors[39].push_back({-0.314149, -0.294042, 88.9087, 109.397});
+deadvectors[39].push_back({0.784119, 0.943739, 60.518, 75.2915});
+deadvectors[39].push_back({-3.10328, -3.02284, 88.8992, 109.387});
+deadvectors[39].push_back({-3.02472, -2.94428, 88.8963, 109.384});
+deadvectors[40].push_back({2.50227, 2.68297, 60.0176, 75.7978});
+deadvectors[40].push_back({2.65935, 2.84004, 60.019, 75.7991});
+deadvectors[40].push_back({2.81643, 2.99713, 60.0173, 75.7974});
+deadvectors[41].push_back({0.335276, 0.363991, 50.5037, 62.0388});
+deadvectors[41].push_back({2.66914, 2.82876, 60.5195, 75.2934});
+deadvectors[41].push_back({-2.90782, -2.74706, 39.3386, 50.4405});
+deadvectors[41].push_back({-2.75066, -2.58988, 39.3387, 50.4411});
+deadvectors[41].push_back({-2.91851, -2.80364, 50.5002, 62.0353});
+deadvectors[41].push_back({-2.80636, -2.69149, 50.5046, 62.0399});
+deadvectors[41].push_back({-2.69412, -2.57926, 50.5049, 62.0399});
+deadvectors[41].push_back({-2.91847, -2.80363, 72.7956, 91.2351});
+deadvectors[41].push_back({-2.80631, -2.69147, 72.7964, 91.2356});
+deadvectors[41].push_back({-2.69412, -2.57929, 72.799, 91.2383});
+deadvectors[41].push_back({1.68167, 1.79654, 50.4968, 62.0318});
+deadvectors[41].push_back({1.34508, 1.4025, 72.7918, 91.231});
+deadvectors[41].push_back({2.43289, 2.51327, 39.336, 50.4391});
+deadvectors[41].push_back({3.06306, 3.08317, 88.8973, 109.386});
+deadvectors[41].push_back({-2.58192, -2.46705, 50.5029, 62.038});
+deadvectors[42].push_back({-1.46547, -1.41309, 60.0155, 75.7959});
+deadvectors[43].push_back({2.57923, 2.69411, 50.5014, 62.0367});
+deadvectors[43].push_back({2.69142, 2.80629, 50.5051, 62.0405});
+deadvectors[43].push_back({2.80367, 2.91854, 50.5036, 62.0386});
+deadvectors[43].push_back({2.5793, 2.69414, 72.7933, 91.2326});
+deadvectors[43].push_back({2.69152, 2.80636, 72.7956, 91.2351});
+deadvectors[43].push_back({2.80366, 2.91851, 72.7952, 91.2345});
+deadvectors[43].push_back({1.3735, 1.41372, 88.8915, 109.38});
+deadvectors[43].push_back({-2.5535, -2.47304, 88.9076, 109.397});
+deadvectors[43].push_back({-1.51886, -1.46565, 60.5154, 75.2887});
+deadvectors[44].push_back({-1.10067, -1.04716, 60.0172, 75.7972});
+deadvectors[45].push_back({-0.219485, 0.109311, 4.5988, 11.0279});
+deadvectors[46].push_back({1.74881, 1.90833, 9.5251, 16.0847});
+deadvectors[47].push_back({0.121579, 0.281062, 9.52217, 16.0845});
+deadvectors[48].push_back({3.00847, -3.00859, 23.3361, 34.5211});
+deadvectors[48].push_back({-2.75104, -2.48497, 23.34, 34.5247});
+deadvectors[48].push_back({-2.22757, -1.96141, 23.3328, 34.5181});
+deadvectors[48].push_back({0.128771, 0.394837, 32.5889, 41.4836});
+deadvectors[48].push_back({-0.82974, -0.741048, 32.587, 41.4818});
+deadvectors[49].push_back({-3.01396, -2.96885, 22.9885, 34.8762});
+deadvectors[49].push_back({2.99118, -2.98807, 22.9878, 34.8752});
+deadvectors[49].push_back({-2.76841, -2.46449, 22.9902, 34.8775});
+deadvectors[49].push_back({-2.24476, -1.94082, 22.9897, 34.8774});
+deadvectors[50].push_back({2.74675, 3.01283, 23.3388, 34.5235});
+deadvectors[50].push_back({-0.394841, -0.306142, 23.3362, 34.5209});
+deadvectors[50].push_back({2.48487, 2.75097, 23.3391, 34.5243});
+deadvectors[50].push_back({3.00847, -3.00858, 23.3346, 34.5196});
+deadvectors[50].push_back({0.128759, 0.39484, 32.5874, 41.4822});
+deadvectors[50].push_back({-0.918426, -0.652357, 32.5888, 41.4835});
+deadvectors[50].push_back({-0.394831, -0.128752, 32.5872, 41.4819});
+deadvectors[50].push_back({-1.4941, -1.33334, 39.3376, 50.4395});
+deadvectors[51].push_back({1.15864, 1.46261, 22.9876, 34.8755});
+deadvectors[51].push_back({1.68236, 1.9863, 22.993, 34.8816});
+deadvectors[51].push_back({2.20593, 2.50995, 22.9867, 34.8753});
+deadvectors[51].push_back({2.72933, 3.03328, 22.9857, 34.8727});
+deadvectors[51].push_back({-3.03015, -2.72612, 22.9835, 34.8715});
+deadvectors[51].push_back({2.46762, 2.77163, 22.9848, 34.8726});
+deadvectors[51].push_back({2.99123, -2.98797, 22.9862, 34.874});
+deadvectors[51].push_back({0.11693, 0.40554, 32.1242, 41.9474});
+deadvectors[51].push_back({-0.930265, -0.641644, 32.123, 41.9463});
+deadvectors[51].push_back({-0.406668, -0.118045, 32.123, 41.9463});
+deadvectors[51].push_back({2.47318, 2.76181, 32.1236, 41.9474});
+deadvectors[52].push_back({-0.0443429, 0.0443551, 23.3367, 34.5215});
+deadvectors[52].push_back({-0.668504, -0.628314, 39.3374, 50.4393});
+deadvectors[54].push_back({1.04512, 1.31125, 23.7184, 32.311});
+deadvectors[54].push_back({-2.71057, -2.69046, 88.8939, 109.382});
+deadvectors[54].push_back({-2.04698, -2.01826, 50.5016, 62.0367});
+deadvectors[54].push_back({-0.157074, -0.0766959, 39.3382, 50.4401});
+deadvectors[54].push_back({-1.68433, -1.65561, 50.5035, 62.0386});
+deadvectors[55].push_back({-1.91912, -1.82928, 23.3724, 32.6576});
+deadvectors[55].push_back({-1.85945, -1.83319, 60.0176, 75.7975});
+deadvectors[55].push_back({-1.22193, -1.13368, 32.1179, 41.9458});
+deadvectors[55].push_back({2.09466, 2.13784, 23.3725, 32.6569});
+deadvectors[56].push_back({0.31416, 0.354349, 39.3381, 50.44});
+deadvectors[56].push_back({0.223108, 0.337983, 50.5017, 62.037});
+deadvectors[56].push_back({1.26158, 1.29029, 50.5041, 62.0392});
+deadvectors[56].push_back({2.80367, 2.86109, 72.7925, 91.2319});
+deadvectors[56].push_back({2.65024, 2.67035, 88.8928, 109.381});
+deadvectors[56].push_back({-2.09648, -1.83033, 23.717, 32.3096});
+deadvectors[56].push_back({-2.09657, -1.83048, 32.5867, 41.4816});
+deadvectors[56].push_back({-1.96537, -1.80461, 39.3404, 50.4425});
+deadvectors[56].push_back({-2.12248, -1.9617, 39.3349, 50.4372});
+deadvectors[56].push_back({-1.90879, -1.79391, 50.5014, 62.0368});
+deadvectors[56].push_back({-2.021, -1.90613, 50.5032, 62.0386});
+deadvectors[56].push_back({-2.13321, -2.01833, 50.5026, 62.0381});
+deadvectors[56].push_back({-1.22245, -1.13378, 32.5951, 41.4897});
+deadvectors[56].push_back({-1.12332, -1.0659, 72.7904, 91.2296});
+deadvectors[56].push_back({-0.280502, -0.223085, 72.7966, 91.2361});
+deadvectors[56].push_back({3.02284, 3.06306, 88.8976, 109.386});
+deadvectors[56].push_back({-0.898915, -0.841499, 72.7988, 91.238});
+deadvectors[57].push_back({-2.11149, -1.81598, 23.3712, 32.6557});
+deadvectors[57].push_back({-2.10792, -1.81919, 32.1203, 41.948});
+deadvectors[57].push_back({0.175275, 0.265119, 23.373, 32.6583});
+deadvectors[57].push_back({0.782617, 0.872449, 32.1221, 41.9492});
+deadvectors[58].push_back({1.36177, 1.41498, 60.5166, 75.2901});
+deadvectors[58].push_back({-1.13374, -1.04503, 23.7167, 32.3089});
+deadvectors[58].push_back({-3.02471, -2.94427, 88.8997, 109.388});
+deadvectors[58].push_back({-3.10329, -3.02285, 88.8991, 109.387});
+deadvectors[59].push_back({-1.83233, -1.78915, 23.3727, 32.6578});
+deadvectors[60].push_back({1.04506, 1.13375, 32.5875, 41.4822});
+deadvectors[60].push_back({-0.562315, -0.504886, 50.505, 62.0401});
+deadvectors[60].push_back({-0.898918, -0.841495, 72.7894, 91.2291});
+deadvectors[61].push_back({2.13674, 2.18146, 32.1218, 41.9491});
+deadvectors[62].push_back({0.259671, 0.525741, 32.5886, 41.4833});
+deadvectors[62].push_back({-0.0584345, -0.0383255, 88.8979, 109.386});
+deadvectors[62].push_back({-2.9845, -2.94428, 88.8892, 109.378});
+deadvectors[62].push_back({-0.942469, -0.862086, 39.3357, 50.4376});
+deadvectors[62].push_back({-0.904159, -0.863939, 88.8936, 109.383});
+deadvectors[63].push_back({0.248258, 0.536982, 32.1221, 41.9502});
+deadvectors[63].push_back({-0.576543, -0.523605, 60.0161, 75.7966});
+deadvectors[64].push_back({-1.9635, -1.93689, 60.5152, 75.2887});
+deadvectors[64].push_back({-1.2183, -1.13786, 88.8978, 109.386});
+deadvectors[64].push_back({-0.157064, -0.0766817, 39.3358, 50.4377});
+deadvectors[64].push_back({0.783243, 1.04932, 32.5876, 41.4824});
+deadvectors[64].push_back({0.521462, 0.787531, 32.5883, 41.4829});
+deadvectors[64].push_back({2.09222, 2.35829, 32.5889, 41.4839});
+deadvectors[64].push_back({-1.45994, -1.34504, 50.4928, 62.028});
+deadvectors[64].push_back({-0.66853, -0.62831, 88.8938, 109.382});
+deadvectors[65].push_back({-1.32251, -1.03379, 32.1215, 41.9492});
+deadvectors[65].push_back({0.771843, 1.06056, 32.1211, 41.9485});
+deadvectors[65].push_back({0.510092, 0.798799, 32.1234, 41.9511});
+deadvectors[65].push_back({1.5582, 1.7389, 60.0172, 75.7976});
+deadvectors[66].push_back({0.509572, 0.59001, 88.8987, 109.387});
+deadvectors[66].push_back({0.431033, 0.511469, 88.9012, 109.389});
+deadvectors[66].push_back({0.35248, 0.432915, 88.9007, 109.389});
+deadvectors[66].push_back({0.273912, 0.354354, 88.8982, 109.387});
+deadvectors[66].push_back({0.195394, 0.275829, 88.9008, 109.389});
+deadvectors[66].push_back({2.82616, 2.98579, 60.5173, 75.2912});
+deadvectors[66].push_back({2.66908, 2.8287, 60.5188, 75.292});
+deadvectors[66].push_back({2.512, 2.67162, 60.5199, 75.2936});
+deadvectors[66].push_back({0.784136, 0.943759, 60.5165, 75.2899});
+deadvectors[66].push_back({2.13045, 2.24531, 72.7829, 91.2223});
+deadvectors[66].push_back({-3.02473, -2.98451, 88.8967, 109.385});
+deadvectors[66].push_back({-2.58193, -2.5245, 50.499, 62.0343});
+deadvectors[66].push_back({-1.65118, -1.49041, 39.3357, 50.4377});
+deadvectors[67].push_back({2.8148, 2.99553, 60.0112, 75.7921});
+deadvectors[67].push_back({2.65775, 2.83844, 60.017, 75.7971});
+deadvectors[67].push_back({2.50068, 2.68136, 60.0182, 75.7978});
+deadvectors[67].push_back({0.772769, 0.953478, 60.0154, 75.796});
+deadvectors[67].push_back({0.6157, 0.796406, 60.0158, 75.7963});
+deadvectors[68].push_back({2.80365, 2.91852, 50.5017, 62.0369});
+deadvectors[68].push_back({2.57926, 2.69409, 72.7975, 91.2368});
+deadvectors[68].push_back({-1.9901, -1.9635, 60.5141, 75.2876});
+deadvectors[68].push_back({-1.90872, -1.8513, 72.7952, 91.2346});
+deadvectors[68].push_back({-0.0013158, 0.113557, 50.4989, 62.034});
+deadvectors[68].push_back({-0.113513, 0.00132563, 72.7967, 91.2359});
+deadvectors[68].push_back({1.41373, 1.49412, 39.3281, 50.4303});
+deadvectors[68].push_back({1.45202, 1.53246, 88.8983, 109.386});
+deadvectors[68].push_back({-2.51328, -2.43289, 39.3326, 50.4345});
+deadvectors[69].push_back({-2.14735, -2.12095, 60.0189, 75.7993});
+deadvectors[69].push_back({-0.733617, -0.707218, 60.0173, 75.7973});
+deadvectors[70].push_back({1.00847, 1.12333, 50.5051, 62.0402});
+deadvectors[70].push_back({2.01827, 2.13313, 50.5085, 62.0438});
+deadvectors[70].push_back({1.90608, 2.02094, 50.5048, 62.0399});
+deadvectors[70].push_back({1.79387, 1.90874, 50.4994, 62.0346});
+deadvectors[70].push_back({1.79387, 1.90873, 72.787, 91.2269});
+deadvectors[70].push_back({-2.74887, -2.69144, 72.7778, 91.2158});
+deadvectors[70].push_back({2.43473, 2.47494, 88.9094, 109.398});
+deadvectors[70].push_back({-1.49226, -1.47215, 88.8906, 109.379});
diff --git a/RecoTracker/MkFitCMS/standalone/mkFit.cc b/RecoTracker/MkFitCMS/standalone/mkFit.cc
new file mode 100644
index 0000000000000..bc89dc59bfb85
--- /dev/null
+++ b/RecoTracker/MkFitCMS/standalone/mkFit.cc
@@ -0,0 +1,1027 @@
+#include "RecoTracker/MkFitCore/src/Matriplex/MatriplexCommon.h"
+
+// CCCC #include "fittestMPlex.h"
+#include "RecoTracker/MkFitCMS/standalone/buildtestMPlex.h"
+
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
+#include "RecoTracker/MkFitCore/interface/MkBuilder.h"
+#include "RecoTracker/MkFitCore/src/MkFitter.h"
+#include "RecoTracker/MkFitCMS/interface/MkStdSeqs.h"
+#include "RecoTracker/MkFitCMS/standalone/MkStandaloneSeqs.h"
+
+#include "RecoTracker/MkFitCore/interface/Config.h"
+#include "RecoTracker/MkFitCore/standalone/ConfigStandalone.h"
+
+#include "RecoTracker/MkFitCore/standalone/Event.h"
+
+#include "RecoTracker/MkFitCore/src/MaterialEffects.h"
+
+#ifndef NO_ROOT
+#include "RecoTracker/MkFitCore/standalone/Validation.h"
+#endif
+
+//#define DEBUG
+#include "RecoTracker/MkFitCore/src/Debug.h"
+
+#include "oneapi/tbb/task_arena.h"
+#include "oneapi/tbb/parallel_for.h"
+
+#if defined(USE_VTUNE_PAUSE)
+#include "ittnotify.h"
+#endif
+
+#include <cstdlib>
+#include <limits>
+#include <list>
+#include <sstream>
+#include <memory>
+
+using namespace mkfit;
+
+//==============================================================================
+
+void initGeom() {
+  std::cout << "Constructing SimpleGeometry Cylinder geometry" << std::endl;
+
+  // NB: we currently assume that each node is a layer, and that layers
+  // are added starting from the center
+  // NB: z is just a dummy variable, VUSolid is actually infinite in size.  *** Therefore, set it to the eta of simulation ***
+
+  /*
+  // Test CMSSW loading
+  IterationsInfo ii;
+  unsigned int algorithms[]={ 4,22,23,5,24,7,8,9,10,6 }; // 10 iterations
+  ii.resize(10);
+  for (int i = 0; i < 10; ++i) {
+      ii[i].m_track_algorithm = algorithms[i];
+  }
+  auto xx = configJson_Load_File(ii, "mkfit-phase1-initialStep.json");
+  printf("%d\n", xx->m_iteration_index);
+  */
+
+  mkfit::execTrackerInfoCreatorPlugin(Config::geomPlugin, Config::TrkInfo, Config::ItrInfo);
+
+  ConfigJson cj(Config::json_verbose);
+
+  if (Config::json_dump_before)
+    cj.dump(Config::ItrInfo);
+
+  if (!Config::json_load_filenames.empty()) {
+    ConfigJsonPatcher::PatchReport report;
+
+    for (auto& fn : Config::json_load_filenames) {
+      // This is for testing only ... we drop the loaded IterationConfig
+      // as further code will always use IterationsInfo[ iter_index ].
+      cj.patchLoad_File(Config::ItrInfo, fn, &report);
+    }
+
+    printf(
+        "mkFit.cc/%s--JSON-Load read %d JSON entities from %d files, replaced %d parameters.\n"
+        "   NOTE that these changes were NOT APPLIED to actual configuration that is going to be used.\n",
+        __func__,
+        report.n_json_entities,
+        report.n_files,
+        report.n_replacements);
+  }
+
+  if (!Config::json_patch_filenames.empty()) {
+    ConfigJsonPatcher::PatchReport report;
+
+    cj.patch_Files(Config::ItrInfo, Config::json_patch_filenames, &report);
+
+    printf("mkFit.cc/%s--JOSN-Patch read %d JSON entities from %d files, replaced %d parameters.\n",
+           __func__,
+           report.n_json_entities,
+           report.n_files,
+           report.n_replacements);
+  }
+
+  if (Config::json_dump_after)
+    cj.dump(Config::ItrInfo);
+
+  if (!Config::json_save_iters_fname_fmt.empty()) {
+    cj.save_Iterations(
+        Config::ItrInfo, Config::json_save_iters_fname_fmt, Config::json_save_iters_include_iter_info_preamble);
+  }
+
+  // Test functions for ConfigJsonPatcher
+  // cj.test_Direct (Config::ItrInfo[0]);
+  // cj.test_Patcher(Config::ItrInfo[0]);
+}
+
+namespace {
+  int g_start_event = 1;
+
+  bool g_run_fit_std = false;
+
+  bool g_run_build_all = true;
+  bool g_run_build_cmssw = false;
+  bool g_run_build_bh = false;
+  bool g_run_build_std = false;
+  bool g_run_build_ce = false;
+  bool g_run_build_mimi = false;
+
+  std::string g_operation = "simulate_and_process";
+  ;
+  std::string g_input_file = "";
+  std::string g_output_file = "";
+
+  seedOptsMap g_seed_opts;
+  void init_seed_opts() {
+    g_seed_opts["sim"] = {simSeeds, "Use simtracks for seeds"};
+    g_seed_opts["cmssw"] = {cmsswSeeds, "Use external CMSSW seeds"};
+    g_seed_opts["find"] = {findSeeds, "Use mplex seed finder for seeds"};
+  }
+
+  cleanOptsMap g_clean_opts;
+  void init_clean_opts() {
+    g_clean_opts["none"] = {noCleaning, "No cleaning applied to external CMSSW seeds"};
+    g_clean_opts["n2"] = {cleanSeedsN2, "Apply N^2 cleaning by Mario to external CMSSW seeds"};
+    g_clean_opts["pure"] = {
+        cleanSeedsPure,
+        "Only use external CMSSW seeds that have produced a CMSSW track \n    must enable: --read-cmssw-tracks"};
+    g_clean_opts["badlabel"] = {cleanSeedsBadLabel, "Remove seeds with label()<0 in external CMSSW seeds"};
+  }
+
+  matchOptsMap g_match_opts;
+  void init_match_opts() {
+    g_match_opts["trkparam"] = {trkParamBased,
+                                "Use track parameter-based matching for validating against CMSSW tracks"};
+    g_match_opts["hits"] = {hitBased, "Use hit-based matching for validating against CMSSW tracks"};
+    g_match_opts["label"] = {labelBased, "Only allowed with pure seeds: stricter hit-based matching"};
+  }
+
+  const char* b2a(bool b) { return b ? "true" : "false"; }
+}  // namespace
+
+//==============================================================================
+
+// Getters and setters of enum configs (from command line using anon. namespace above)
+
+template <typename T, typename U>
+std::string getOpt(const T& c_opt, const U& g_opt_map) {
+  static const std::string empty("");
+
+  for (const auto& g_opt_pair : g_opt_map) {
+    if (g_opt_pair.second.first == c_opt)
+      return g_opt_pair.first;
+  }
+  std::cerr << "No match for option " << c_opt << std::endl;
+  return empty;
+}
+
+template <typename T, typename U>
+void setOpt(const std::string& cmd_ln_str, T& c_opt, const U& g_opt_map, const std::string& ex_txt) {
+  if (g_opt_map.count(cmd_ln_str))
+    c_opt = g_opt_map.at(cmd_ln_str).first;
+  else {
+    std::cerr << cmd_ln_str << " is not a valid " << ex_txt << " option!! Exiting..." << std::endl;
+    exit(1);
+  }
+}
+
+template <typename U>
+void listOpts(const U& g_opt_map) {
+  for (const auto& g_opt_pair : g_opt_map) {
+    std::cout << "  " << g_opt_pair.first.c_str() << " : " << g_opt_pair.second.second.c_str() << std::endl;
+  }
+}
+
+//==============================================================================
+
+void test_standard() {
+  printf("Running test_standard(), operation=\"%s\"\n", g_operation.c_str());
+  printf("  vusize=%d, num_th_sim=%d, num_th_finder=%d\n",
+         MPT_SIZE,
+         Config::numThreadsSimulation,
+         Config::numThreadsFinder);
+  printf(
+      "  sizeof(Track)=%zu, sizeof(Hit)=%zu, sizeof(SVector3)=%zu, sizeof(SMatrixSym33)=%zu, sizeof(MCHitInfo)=%zu\n",
+      sizeof(Track),
+      sizeof(Hit),
+      sizeof(SVector3),
+      sizeof(SMatrixSym33),
+      sizeof(MCHitInfo));
+
+  if (Config::seedInput == cmsswSeeds)
+    printf("- reading seeds from file\n");
+
+  initGeom();
+
+  DataFile data_file;
+  if (g_operation == "read") {
+    int evs_in_file = data_file.openRead(g_input_file, Config::TrkInfo.n_layers());
+    int evs_available = evs_in_file - g_start_event + 1;
+    if (Config::nEvents == -1) {
+      Config::nEvents = evs_available;
+    } else if (Config::nEvents > evs_available and not Config::loopOverFile) {
+      printf("Requested number of events %d, only %d available.\n", Config::nEvents, evs_available);
+      Config::nEvents = evs_available;
+    }
+
+    if (g_start_event > 1) {
+      data_file.skipNEvents(g_start_event - 1);
+    }
+  }
+
+  constexpr int NT = 5;
+  double t_sum[NT] = {0};
+  double t_skip[NT] = {0};
+  std::vector<double> t_sum_iter(Config::nItersCMSSW, 0.0);
+  std::vector<double> t_skip_iter(Config::nItersCMSSW, 0.0);
+  double time = dtime();
+
+  std::atomic<int> nevt{g_start_event};
+  std::atomic<int> seedstot{0}, simtrackstot{0}, candstot{0};
+  std::atomic<int> maxHits_all{0}, maxLayer_all{0};
+
+  MkBuilder::populate();
+
+  std::vector<std::unique_ptr<Event>> evs(Config::numThreadsEvents);
+  std::vector<std::unique_ptr<Validation>> vals(Config::numThreadsEvents);
+  std::vector<std::unique_ptr<MkBuilder>> mkbs(Config::numThreadsEvents);
+  std::vector<std::shared_ptr<EventOfHits>> eohs(Config::numThreadsEvents);
+  std::vector<std::shared_ptr<FILE>> fps;
+  fps.reserve(Config::numThreadsEvents);
+
+  const std::string valfile("valtree");
+
+  for (int i = 0; i < Config::numThreadsEvents; ++i) {
+    std::ostringstream serial;
+    if (Config::numThreadsEvents > 1) {
+      serial << "_" << i;
+    }
+    vals[i].reset(Validation::make_validation(valfile + serial.str() + ".root", &Config::TrkInfo));
+    mkbs[i] = MkBuilder::make_builder(Config::silent);
+    eohs[i].reset(new EventOfHits(Config::TrkInfo));
+    evs[i].reset(new Event(*vals[i], 0, Config::TrkInfo.n_layers()));
+    if (g_operation == "read") {
+      fps.emplace_back(fopen(g_input_file.c_str(), "r"), [](FILE* fp) {
+        if (fp)
+          fclose(fp);
+      });
+    }
+  }
+
+  tbb::task_arena arena(Config::numThreadsFinder);
+
+  dprint("parallel_for step size " << (Config::nEvents + Config::numThreadsEvents - 1) / Config::numThreadsEvents);
+
+  time = dtime();
+
+  int events_per_thread = (Config::nEvents + Config::numThreadsEvents - 1) / Config::numThreadsEvents;
+
+  arena.execute([&]() {
+    tbb::parallel_for(
+        tbb::blocked_range<int>(0, Config::numThreadsEvents, 1),
+        [&](const tbb::blocked_range<int>& threads) {
+          int thisthread = threads.begin();
+
+          assert(threads.begin() == threads.end() - 1 && thisthread < Config::numThreadsEvents);
+
+          // std::vector<Track> plex_tracks;
+          auto& ev = *evs[thisthread].get();
+          auto& mkb = *mkbs[thisthread].get();
+          auto& eoh = *eohs[thisthread].get();
+          auto fp = fps[thisthread].get();
+
+          int evstart = thisthread * events_per_thread;
+          int evend = std::min(Config::nEvents, evstart + events_per_thread);
+
+          dprint("thisthread " << thisthread << " events " << Config::nEvents << " events/thread " << events_per_thread
+                               << " range " << evstart << ":" << evend);
+
+          for (int evt = evstart; evt < evend; ++evt) {
+            ev.reset(nevt++);
+
+            if (!Config::silent) {
+              std::lock_guard<std::mutex> printlock(Event::printmutex);
+              printf("\n");
+              printf("Processing event %d\n", ev.evtID());
+            }
+
+            ev.read_in(data_file, fp);
+
+            // skip events with zero seed tracks!
+            if (ev.seedTracks_.empty())
+              continue;
+
+            // plex_tracks.resize(ev.simTracks_.size());
+
+            StdSeq::loadHitsAndBeamSpot(ev, eoh);
+
+            std::vector<DeadVec> deadvectors(ev.layerHits_.size());
+#include "RecoTracker/MkFitCMS/standalone/deadmodules.h"
+            if (Config::useDeadModules) {
+              StdSeq::loadDeads(eoh, deadvectors);
+            }
+
+            double t_best[NT] = {0}, t_cur[NT] = {0};
+            std::vector<double> t_cur_iter;
+            simtrackstot += ev.simTracks_.size();
+            seedstot += ev.seedTracks_.size();
+
+            int ncands_thisthread = 0;
+            int maxHits_thisthread = 0;
+            int maxLayer_thisthread = 0;
+            for (int b = 0; b < Config::finderReportBestOutOfN; ++b) {
+              t_cur[0] = 0;  // t_cur[0] = (g_run_fit_std) ? runFittingTestPlex(ev, plex_tracks) : 0;
+              t_cur[1] = (g_run_build_all || g_run_build_bh) ? runBuildingTestPlexBestHit(ev, eoh, mkb) : 0;
+              t_cur[3] = (g_run_build_all || g_run_build_ce) ? runBuildingTestPlexCloneEngine(ev, eoh, mkb) : 0;
+              if (g_run_build_all || g_run_build_mimi)
+                t_cur_iter = runBtpCe_MultiIter(ev, eoh, mkb, Config::nItersCMSSW);
+              t_cur[4] = (g_run_build_all || g_run_build_mimi) ? t_cur_iter[Config::nItersCMSSW] : 0;
+              if (g_run_build_all || g_run_build_cmssw)
+                runBuildingTestPlexDumbCMSSW(ev, eoh, mkb);
+              t_cur[2] = (g_run_build_all || g_run_build_std) ? runBuildingTestPlexStandard(ev, eoh, mkb) : 0;
+              if (g_run_build_ce || g_run_build_mimi) {
+                ncands_thisthread = mkb.total_cands();
+                auto const& ln = mkb.max_hits_layer(eoh);
+                maxHits_thisthread = ln.first;
+                maxLayer_thisthread = ln.second;
+              }
+              for (int i = 0; i < NT; ++i)
+                t_best[i] = (b == 0) ? t_cur[i] : std::min(t_cur[i], t_best[i]);
+
+              if (!Config::silent) {
+                std::lock_guard<std::mutex> printlock(Event::printmutex);
+                if (Config::finderReportBestOutOfN > 1) {
+                  printf("----------------------------------------------------------------\n");
+                  printf("Best-of-times:");
+                  for (int i = 0; i < NT; ++i)
+                    printf("  %.5f/%.5f", t_cur[i], t_best[i]);
+                  printf("\n");
+                }
+                printf("----------------------------------------------------------------\n");
+              }
+            }
+
+            candstot += ncands_thisthread;
+            if (maxHits_thisthread > maxHits_all) {
+              maxHits_all = maxHits_thisthread;
+              maxLayer_all = maxLayer_thisthread;
+            }
+            if (!Config::silent) {
+              std::lock_guard<std::mutex> printlock(Event::printmutex);
+              printf("Matriplex fit = %.5f  --- Build  BHMX = %.5f  STDMX = %.5f  CEMX = %.5f  MIMI = %.5f\n",
+                     t_best[0],
+                     t_best[1],
+                     t_best[2],
+                     t_best[3],
+                     t_best[4]);
+            }
+
+            {
+              static std::mutex sum_up_lock;
+              std::lock_guard<std::mutex> locker(sum_up_lock);
+
+              for (int i = 0; i < NT; ++i)
+                t_sum[i] += t_best[i];
+              if (evt > 0)
+                for (int i = 0; i < NT; ++i)
+                  t_skip[i] += t_best[i];
+              if (g_run_build_all || g_run_build_mimi) {
+                for (int i = 0; i < Config::nItersCMSSW; ++i)
+                  t_sum_iter[i] += t_cur_iter[i];
+                if (evt > 0)
+                  for (int i = 0; i < Config::nItersCMSSW; ++i)
+                    t_skip_iter[i] += t_cur_iter[i];
+              }
+            }
+          }
+        },
+        tbb::simple_partitioner());
+  });
+
+  time = dtime() - time;
+
+  printf("\n");
+  printf("================================================================\n");
+  printf("=== TOTAL for %d events\n", Config::nEvents);
+  printf("================================================================\n");
+
+  printf("Total Matriplex fit = %.5f  --- Build  BHMX = %.5f  STDMX = %.5f  CEMX = %.5f  MIMI = %.5f\n",
+         t_sum[0],
+         t_sum[1],
+         t_sum[2],
+         t_sum[3],
+         t_sum[4]);
+  printf("Total event > 1 fit = %.5f  --- Build  BHMX = %.5f  STDMX = %.5f  CEMX = %.5f  MIMI = %.5f\n",
+         t_skip[0],
+         t_skip[1],
+         t_skip[2],
+         t_skip[3],
+         t_skip[4]);
+  printf("Total event loop time %.5f simtracks %d seedtracks %d builtcands %d maxhits %d on lay %d\n",
+         time,
+         simtrackstot.load(),
+         seedstot.load(),
+         candstot.load(),
+         maxHits_all.load(),
+         maxLayer_all.load());
+  //fflush(stdout);
+  if (g_run_build_all || g_run_build_mimi) {
+    printf("================================================================\n");
+    for (int i = 0; i < Config::nItersCMSSW; ++i)
+      std::cout << " Iteration " << i << " build time = " << t_sum_iter[i] << " \n";
+    printf("================================================================\n");
+    for (int i = 0; i < Config::nItersCMSSW; ++i)
+      std::cout << " Iteration " << i << " build time (event > 1) = " << t_skip_iter[i] << " \n";
+    printf("================================================================\n");
+  }
+  if (g_operation == "read") {
+    data_file.close();
+  }
+
+  for (auto& val : vals) {
+    val->fillConfigTree();
+    val->saveTTrees();
+  }
+}
+
+//==============================================================================
+// Command line argument parsing
+//==============================================================================
+
+typedef std::list<std::string> lStr_t;
+typedef lStr_t::iterator lStr_i;
+
+bool has_suffix(const std::string& str, const std::string& suffix) {
+  return str.size() >= suffix.size() && str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
+}
+
+void next_arg_or_die(lStr_t& args, lStr_i& i, bool allow_single_minus = false) {
+  lStr_i j = i;
+  if (++j == args.end() || has_suffix(*j, ".C") || ((*j)[0] == '-' && !(*j == "-" && allow_single_minus))) {
+    std::cerr << "Error: option " << *i << " requires an argument.\n";
+    exit(1);
+  }
+  i = j;
+}
+
+//==============================================================================
+// main
+//==============================================================================
+
+#include <fenv.h>
+
+int main(int argc, const char* argv[]) {
+#ifdef _GNU_SOURCE
+  if (Const::nan_etc_sigs_enable) {
+    feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);  //FE_ALL_EXCEPT);
+  }
+#endif
+
+#ifdef USE_VTUNE_PAUSE
+  __itt_pause();
+#endif
+
+  assert(sizeof(Track::Status) == 4 &&
+         "To make sure this is true for icc and gcc<6 when mixing bools/ints in bitfields.");
+
+  // init enum maps
+  init_seed_opts();
+  init_clean_opts();
+  init_match_opts();
+
+  lStr_t mArgs;
+  for (int i = 1; i < argc; ++i) {
+    mArgs.push_back(argv[i]);
+  }
+
+  lStr_i i = mArgs.begin();
+  while (i != mArgs.end()) {
+    lStr_i start = i;
+
+    if (*i == "-h" || *i == "-help" || *i == "--help") {
+      printf(
+          "\n"
+          "Usage: %s [options]\n"
+          "Options: defaults defined as (def: DEFAULT VALUE)\n"
+          "\n----------------------------------------------------------------------------------------------------------"
+          "\n\n"
+          "Generic options\n\n"
+          "  --geom           <str>   geometry plugin to use (def: %s)\n"
+          "  --silent                 suppress printouts inside event loop (def: %s)\n"
+          "  --best-out-of    <int>   run test num times, report best time (def: %d)\n"
+          "  --input-file             file name for reading (def: %s)\n"
+          "  --output-file            file name for writitng (def: %s)\n"
+          "  --read-cmssw-tracks      read external cmssw reco tracks if available (def: %s)\n"
+          "  --read-simtrack-states   read in simTrackStates for pulls in validation (def: %s)\n"
+          "  --num-events     <int>   number of events to run over or simulate (def: %d)\n"
+          "                             if using --input-file, must be enabled AFTER on command line\n"
+          "  --start-event    <int>   event number to start at when reading from a file (def: %d)\n"
+          "  --loop-over-file         after reaching the end of the file, start over from the beginning until "
+          "<num-events> events have been processed\n"
+          "\n"
+          "If no --input-file is specified, will trigger simulation\n"
+          "  --num-tracks     <int>   number of tracks to generate for each event (def: %d)\n"
+          "\n----------------------------------------------------------------------------------------------------------"
+          "\n\n"
+          "Threading related options\n\n"
+          "  --num-thr-sim    <int>   number of threads for simulation (def: %d)\n"
+          "  --num-thr        <int>   number of threads for track finding (def: %d)\n"
+          "  --num-thr-ev     <int>   number of threads to run the event loop (def: %d)\n"
+          "  --seeds-per-task <int>   number of seeds to process in a tbb task (def: %d)\n"
+          "  --hits-per-task  <int>   number of layer1 hits per task when using find seeds (def: %d)\n"
+          "\n----------------------------------------------------------------------------------------------------------"
+          "\n\n"
+          "FittingTestMPlex options\n\n"
+          "  --fit-std                run standard fitting test (def: %s)\n"
+          "  --fit-std-only           run only standard fitting test (def: %s)\n"
+          "  --cf-fitting             enable conformal fit before fitting tracks to get initial estimate of track "
+          "parameters and errors (def: %s)\n"
+          "  --fit-val                enable ROOT based validation for fittingMPlex  (def: %s)\n"
+          "\n----------------------------------------------------------------------------------------------------------"
+          "\n\n"
+          "BuildingTestMPlex options\n\n"
+          " **Specify which building routine you would like to run\n"
+          "  --build-cmssw            run dummy validation of CMSSW tracks with MkBuilder stuff (def: %s)\n"
+          "  --build-bh               run best-hit building test (def: %s)\n"
+          "  --build-std              run standard combinatorial building test (def: %s)\n"
+          "  --build-ce               run clone engine combinatorial building test (def: %s)\n"
+          "  --build-mimi             run clone engine on multiple-iteration test (def: %s)\n"
+          "  --num-iters-cmssw <int>  number of mimi iterations to run (def: set to 3 when --build-mimi is in effect, "
+          "0 otherwise)\n"
+          "\n"
+          " **Seeding options\n"
+          "  --seed-input     <str>   which seed collecion used for building (def: %s)\n"
+          "  --seed-cleaning  <str>   which seed cleaning to apply if using cmssw seeds (def: %s)\n"
+          "  --cf-seeding             enable conformal fit over seeds (def: %s)\n"
+          "\n"
+          " **Duplicate removal options\n"
+          "  --remove-dup            run duplicate removal after building, using both hit and kinematic criteria (def: "
+          "%s)\n"
+          "  --remove-dup-no-hit     run duplicate removal after building, using kinematic criteria only (def: %s)\n"
+          "\n"
+          " **Dead module (strip) option\n"
+          "  --use-dead-modules          run duplicate removal after building, using both hit and kinematic criteria "
+          "(def: %s)\n"
+          "\n"
+          " **Additional options for building\n"
+          "  --use-phiq-arr           use phi-Q arrays in select hit indices (def: %s)\n"
+          "  --kludge-cms-hit-errors  make sure err(xy) > 15 mum, err(z) > 30 mum (def: %s)\n"
+          "  --backward-fit           perform backward fit during building (def: %s)\n"
+          "  --no-backward-search     do not do backward search after backward fit\n"
+          "                           (def: do search if backward-fit is enabled and available in given iteration)\n"
+          "  --include-pca            do the backward fit to point of closest approach, does not imply "
+          "'--backward-fit' (def: %s)\n"
+          "\n----------------------------------------------------------------------------------------------------------"
+          "\n\n"
+          "Validation options\n\n"
+          " **Text file based options\n"
+          "  --quality-val            enable printout validation for MkBuilder (def: %s)\n"
+          "                             must enable: --dump-for-plots\n"
+          "  --dump-for-plots         make shell printouts for plots (def: %s)\n"
+          "  --mtv-like-val           configure validation to emulate CMSSW MultiTrackValidator (MTV) (def: %s)\n"
+          "  --mtv-require-seeds      configure validation to emulate MTV but require sim tracks to be matched to "
+          "seeds (def: %s)\n"
+          "\n"
+          " **ROOT based options\n"
+          "  --sim-val-for-cmssw      enable ROOT based validation for CMSSW tracks with simtracks as reference [eff, "
+          "FR, DR] (def: %s)\n"
+          "  --sim-val                enable ROOT based validation for seeding, building, and fitting with simtracks "
+          "as reference [eff, FR, DR] (def: %s)\n"
+          "  --cmssw-val              enable ROOT based validation for building and fitting with CMSSW tracks as "
+          "reference [eff, FR, DR] (def: %s)\n"
+          "                             must enable: --geom CMS-2017 --read-cmssw-tracks\n"
+          "  --cmssw-match-fw  <str>  which cmssw track matching routine to use if validating against CMSSW tracks, "
+          "forward built tracks only (def: %s)\n"
+          "                             must enable: --geom CMS-2017 --cmssw-val --read-cmssw-tracks\n"
+          "  --cmssw-match-bk  <str>  which cmssw track matching routine to use if validating against CMSSW tracks, "
+          "backward fit tracks only (def: %s)\n"
+          "                             must enable: --geom CMS-2017 --cmssw-val --read-cmssw-tracks --backward-fit "
+          "--backward-fit-pca\n"
+          "  --inc-shorts             include short reco tracks into FR (def: %s)\n"
+          "  --keep-hit-info          keep vectors of hit idxs and branches in trees (def: %s)\n"
+          "  --try-to-save-sim-info   two options for this flag [related to validation with simtracks as reference "
+          "collection] (def: %s)\n"
+          "                              a) IF (--read-simtrack-states is enabled)\n"
+          "                                    If a sim track is associated to a reco track, but it does not contain "
+          "the last found on the reco track\n"
+          "                                    still save sim track kinematic info from generator position\n"
+          "                              b) ELSE (--read-simtrack-states is NOT enabled)\n"
+          "                                    Save sim track kinematic info from generator position if matched to "
+          "reco track\n"
+          "\n----------------------------------------------------------------------------------------------------------"
+          "\n\n"
+          "Combo spaghetti, that's with cole slaw:\n\n"
+          " **Building and fitting combo options\n"
+          "  --backward-fit-pca       perform backward fit to point of closest approach during building\n"
+          "                             == --backward-fit --include-pca\n"
+          " **Seed combo options\n"
+          "  --cmssw-simseeds         use CMS geom with simtracks for seeds\n"
+          "                             == --geom CMS-2017 --seed-input %s\n"
+          "  --cmssw-stdseeds         use CMS geom with CMSSW seeds uncleaned\n"
+          "                             == --geom CMS-2017 --seed-input %s --seed-cleaning %s\n"
+          "  --cmssw-n2seeds          use CMS geom with CMSSW seeds cleaned with N^2 routine\n"
+          "                             == --geom CMS-2017 --seed-input %s --seed-cleaning %s\n"
+          "  --cmssw-pureseeds        use CMS geom with pure CMSSW seeds (seeds which produced CMSSW reco tracks), "
+          "enable read of CMSSW tracks\n"
+          "                             == --geom CMS-2017 --seed-input %s --seed-cleaning %s --read-cmssw-tracks\n"
+          "  --cmssw-goodlabelseeds   use CMS geom with CMSSW seeds with label() >= 0\n"
+          "                             == --geom CMS-2017 --seed-input %s --seed-cleaning %s\n"
+          "\n"
+          " **CMSSW validation combo options\n"
+          "  --cmssw-val-fhit-bhit    use CMSSW validation with hit based matching (50 percent after seed) for forward "
+          "built tracks\n"
+          "                           use CMSSW validation with hit based matching (50 percent after seed) for "
+          "backward fit tracks\n"
+          "                             == --cmssw-val --read-cmssw-tracks --cmssw-match-fw %s --cmssw-match-bk %s\n"
+          "                             must enable: --backward-fit-pca\n"
+          "  --cmssw-val-fhit-bprm    use CMSSW validation with hit based matching (50 percent after seed) for forward "
+          "built tracks\n"
+          "                           use CMSSW validation with track parameter based matching for backward fit "
+          "tracks\n"
+          "                             == --cmssw-val --read-cmssw-tracks --cmssw-match-fw %s --cmssw-match-bk %s\n"
+          "                             must enable: --backward-fit-pca\n"
+          "  --cmssw-val-fprm-bhit    use CMSSW validation with track parameter based matching for forward built "
+          "tracks\n"
+          "                           use CMSSW validation with hit based matching (50 percent after seed) for "
+          "backward fit tracks\n"
+          "                             == --cmssw-val --read-cmssw-tracks --cmssw-match-fw %s --cmssw-match-bk %s\n"
+          "                             must enable: --backward-fit-pca\n"
+          "  --cmssw-val-fprm-bprm    use CMSSW validation with track parameter based matching for forward built "
+          "tracks\n"
+          "                           use CMSSW validation with track parameter based matching for backward fit "
+          "tracks\n"
+          "                             == --cmssw-val --read-cmssw-tracks --cmssw-match-fw %s --cmssw-match-bk %s\n"
+          "                             must enable: --backward-fit-pca\n"
+          "  --cmssw-val-label        use CMSSW validation with stricter hit based matching for both forward built and "
+          "backward fit tracks, enable read of CMSSW tracks\n"
+          "                             == --cmssw-val --read-cmssw-tracks --cmssw-match-fw %s --cmssw-match-bk %s\n"
+          "                             must enable: --cmssw-pureseeds --backward-fit-pca\n"
+          "\n----------------------------------------------------------------------------------------------------------"
+          "\n\n"
+          "JSON config patcher options:\n\n"
+          "  --json-load  <filename>  load single IterationConfig from given JSON file (def: do not load)\n"
+          "                           can be specified multiple times for several files\n"
+          "  --json-patch <filename>  patch IterationsInfo from given JSON file (def: do not patch)\n"
+          "                           can be specified multiple times for several files\n"
+          "  --json-save-iterations <fname-fmt> save per iteration json files\n"
+          "                           %%d in fname-fmt gets replaced with iteration index\n"
+          "                           %%s in fname-fmt gets replaced with iteration algorithm name\n"
+          "                           exactly one of %%d and %%s must be specified\n"
+          "  --json-save-iterations-include-iter-info-preamble (def: %s)\n"
+          "  --json-verbose     print each patch assignment as it is being made (def: %s)\n"
+          "  --json-dump-before print iteration config before patching (def: %s)\n"
+          "  --json-dump-after  print iteration config after  patching (def: %s)\n"
+          "\n----------------------------------------------------------------------------------------------------------"
+          "\n\n",
+          argv[0],
+
+          Config::geomPlugin.c_str(),
+          b2a(Config::silent),
+          Config::finderReportBestOutOfN,
+          g_input_file.c_str(),
+          g_output_file.c_str(),
+          b2a(Config::readCmsswTracks),
+          b2a(Config::readSimTrackStates),
+          Config::nEvents,
+          g_start_event,
+          Config::nTracks,
+
+          Config::numThreadsSimulation,
+          Config::numThreadsFinder,
+          Config::numThreadsEvents,
+          Config::numSeedsPerTask,
+          Config::numHitsPerTask,
+
+          b2a(g_run_fit_std),
+          b2a(g_run_fit_std &&
+              !(g_run_build_all || g_run_build_cmssw || g_run_build_bh || g_run_build_std || g_run_build_ce)),
+          b2a(Config::cf_fitting),
+          b2a(Config::fit_val),
+
+          b2a(g_run_build_all || g_run_build_cmssw),
+          b2a(g_run_build_all || g_run_build_bh),
+          b2a(g_run_build_all || g_run_build_std),
+          b2a(g_run_build_all || g_run_build_ce),
+          b2a(g_run_build_all || g_run_build_mimi),
+
+          getOpt(Config::seedInput, g_seed_opts).c_str(),
+          getOpt(Config::seedCleaning, g_clean_opts).c_str(),
+          b2a(Config::cf_seeding),
+
+          b2a(Config::removeDuplicates && Config::useHitsForDuplicates),
+          b2a(Config::removeDuplicates && !Config::useHitsForDuplicates),
+
+          b2a(Config::useDeadModules),
+
+          b2a(Config::usePhiQArrays),
+          b2a(Config::kludgeCmsHitErrors),
+          b2a(Config::backwardFit),
+          b2a(Config::includePCA),
+
+          b2a(Config::quality_val),
+          b2a(Config::dumpForPlots),
+          b2a(Config::mtvLikeValidation),
+          b2a(Config::mtvRequireSeeds),
+
+          b2a(Config::sim_val_for_cmssw),
+          b2a(Config::sim_val),
+          b2a(Config::cmssw_val),
+          getOpt(Config::cmsswMatchingFW, g_match_opts).c_str(),
+          getOpt(Config::cmsswMatchingBK, g_match_opts).c_str(),
+          b2a(Config::inclusiveShorts),
+          b2a(Config::keepHitInfo),
+          b2a(Config::tryToSaveSimInfo),
+
+          getOpt(simSeeds, g_seed_opts).c_str(),
+          getOpt(cmsswSeeds, g_seed_opts).c_str(),
+          getOpt(noCleaning, g_clean_opts).c_str(),
+          getOpt(cmsswSeeds, g_seed_opts).c_str(),
+          getOpt(cleanSeedsN2, g_clean_opts).c_str(),
+          getOpt(cmsswSeeds, g_seed_opts).c_str(),
+          getOpt(cleanSeedsPure, g_clean_opts).c_str(),
+          getOpt(cmsswSeeds, g_seed_opts).c_str(),
+          getOpt(cleanSeedsBadLabel, g_clean_opts).c_str(),
+
+          getOpt(hitBased, g_match_opts).c_str(),
+          getOpt(hitBased, g_match_opts).c_str(),
+          getOpt(hitBased, g_match_opts).c_str(),
+          getOpt(trkParamBased, g_match_opts).c_str(),
+          getOpt(trkParamBased, g_match_opts).c_str(),
+          getOpt(hitBased, g_match_opts).c_str(),
+          getOpt(trkParamBased, g_match_opts).c_str(),
+          getOpt(trkParamBased, g_match_opts).c_str(),
+          getOpt(labelBased, g_match_opts).c_str(),
+          getOpt(labelBased, g_match_opts).c_str(),
+
+          b2a(Config::json_verbose),
+          b2a(Config::json_save_iters_include_iter_info_preamble),
+          b2a(Config::json_dump_before),
+          b2a(Config::json_dump_after));
+
+      printf("List of options for string based inputs \n");
+      printf(
+          "--geom \n"
+          "  CMS-2017 \n"
+          "  CylCowWLids \n"
+          "\n");
+
+      printf("--seed-input \n");
+      listOpts(g_seed_opts);
+      printf("\n");
+
+      printf("--seed-cleaning \n");
+      listOpts(g_clean_opts);
+      printf("\n");
+
+      printf("--cmssw-matching \n");
+      listOpts(g_match_opts);
+      printf("\n");
+
+      exit(0);
+    }  // end of "help" block
+
+    else if (*i == "--geom") {
+      next_arg_or_die(mArgs, i);
+      Config::geomPlugin = *i;
+    } else if (*i == "--silent") {
+      Config::silent = true;
+    } else if (*i == "--best-out-of") {
+      next_arg_or_die(mArgs, i);
+      Config::finderReportBestOutOfN = atoi(i->c_str());
+    } else if (*i == "--input-file") {
+      next_arg_or_die(mArgs, i);
+      g_input_file = *i;
+      g_operation = "read";
+      Config::nEvents = -1;
+    } else if (*i == "--output-file") {
+      next_arg_or_die(mArgs, i);
+      g_output_file = *i;
+      g_operation = "write";
+    } else if (*i == "--read-cmssw-tracks") {
+      Config::readCmsswTracks = true;
+    } else if (*i == "--read-simtrack-states") {
+      Config::readSimTrackStates = true;
+    } else if (*i == "--num-events") {
+      next_arg_or_die(mArgs, i);
+      Config::nEvents = atoi(i->c_str());
+    } else if (*i == "--start-event") {
+      next_arg_or_die(mArgs, i);
+      g_start_event = atoi(i->c_str());
+    } else if (*i == "--loop-over-file") {
+      Config::loopOverFile = true;
+    } else if (*i == "--num-tracks") {
+      next_arg_or_die(mArgs, i);
+      Config::nTracks = atoi(i->c_str());
+    } else if (*i == "--num-thr-sim") {
+      next_arg_or_die(mArgs, i);
+      Config::numThreadsSimulation = atoi(i->c_str());
+    } else if (*i == "--num-thr") {
+      next_arg_or_die(mArgs, i);
+      Config::numThreadsFinder = atoi(i->c_str());
+    } else if (*i == "--num-thr-ev") {
+      next_arg_or_die(mArgs, i);
+      Config::numThreadsEvents = atoi(i->c_str());
+    } else if (*i == "--seeds-per-task") {
+      next_arg_or_die(mArgs, i);
+      Config::numSeedsPerTask = atoi(i->c_str());
+    } else if (*i == "--hits-per-task") {
+      next_arg_or_die(mArgs, i);
+      Config::numHitsPerTask = atoi(i->c_str());
+    } else if (*i == "--fit-std") {
+      g_run_fit_std = true;
+    } else if (*i == "--fit-std-only") {
+      g_run_fit_std = true;
+      g_run_build_all = false;
+      g_run_build_bh = false;
+      g_run_build_std = false;
+      g_run_build_ce = false;
+    } else if (*i == "--cf-fitting") {
+      Config::cf_fitting = true;
+    } else if (*i == "--fit-val") {
+      Config::fit_val = true;
+    } else if (*i == "--build-cmssw") {
+      g_run_build_all = false;
+      g_run_build_cmssw = true;
+      g_run_build_bh = false;
+      g_run_build_std = false;
+      g_run_build_ce = false;
+    } else if (*i == "--build-bh") {
+      g_run_build_all = false;
+      g_run_build_cmssw = false;
+      g_run_build_bh = true;
+      g_run_build_std = false;
+      g_run_build_ce = false;
+    } else if (*i == "--build-std") {
+      g_run_build_all = false;
+      g_run_build_cmssw = false;
+      g_run_build_bh = false;
+      g_run_build_std = true;
+      g_run_build_ce = false;
+    } else if (*i == "--build-ce") {
+      g_run_build_all = false;
+      g_run_build_cmssw = false;
+      g_run_build_bh = false;
+      g_run_build_std = false;
+      g_run_build_ce = true;
+    } else if (*i == "--build-mimi") {
+      g_run_build_all = false;
+      g_run_build_cmssw = false;
+      g_run_build_bh = false;
+      g_run_build_std = false;
+      g_run_build_ce = false;
+      g_run_build_mimi = true;
+      if (Config::nItersCMSSW == 0)
+        Config::nItersCMSSW = 3;
+    } else if (*i == "--num-iters-cmssw") {
+      next_arg_or_die(mArgs, i);
+      Config::nItersCMSSW = atoi(i->c_str());
+    } else if (*i == "--seed-input") {
+      next_arg_or_die(mArgs, i);
+      setOpt(*i, Config::seedInput, g_seed_opts, "seed input collection");
+    } else if (*i == "--seed-cleaning") {
+      next_arg_or_die(mArgs, i);
+      setOpt(*i, Config::seedCleaning, g_clean_opts, "seed cleaning");
+    } else if (*i == "--cf-seeding") {
+      Config::cf_seeding = true;
+    } else if (*i == "--use-phiq-arr") {
+#ifdef CONFIG_PhiQArrays
+      Config::usePhiQArrays = true;
+#else
+      printf("--use-phiq-arr has no effect: recompile with CONFIG_PhiQArrays\n");
+#endif
+    } else if (*i == "--remove-dup") {
+      Config::removeDuplicates = true;
+      Config::useHitsForDuplicates = true;
+    } else if (*i == "--remove-dup-no-hit") {
+      Config::removeDuplicates = true;
+      Config::useHitsForDuplicates = false;
+    } else if (*i == "--use-dead-modules") {
+      Config::useDeadModules = true;
+    } else if (*i == "--kludge-cms-hit-errors") {
+      Config::kludgeCmsHitErrors = true;
+    } else if (*i == "--backward-fit") {
+      Config::backwardFit = true;
+    } else if (*i == "--no-backward-search") {
+      Config::backwardSearch = false;
+    } else if (*i == "--include-pca") {
+      Config::includePCA = true;
+    } else if (*i == "--quality-val") {
+      Config::quality_val = true;
+    } else if (*i == "--dump-for-plots") {
+      Config::dumpForPlots = true;
+    } else if (*i == "--mtv-like-val") {
+      Config::mtvLikeValidation = true;
+      Config::cmsSelMinLayers = 0;
+      Config::nMinFoundHits = 0;
+    } else if (*i == "--mtv-require-seeds") {
+      Config::mtvLikeValidation = true;
+      Config::cmsSelMinLayers = 0;
+      Config::nMinFoundHits = 0;
+      Config::mtvRequireSeeds = true;
+    } else if (*i == "--sim-val-for-cmssw") {
+      Config::sim_val_for_cmssw = true;
+    } else if (*i == "--sim-val") {
+      Config::sim_val = true;
+    } else if (*i == "--cmssw-val") {
+      Config::cmssw_val = true;
+    } else if (*i == "--cmssw-match-fw") {
+      next_arg_or_die(mArgs, i);
+      setOpt(*i, Config::cmsswMatchingFW, g_match_opts, "CMSSW validation track matching for forward built tracks");
+    } else if (*i == "--cmssw-match-bk") {
+      next_arg_or_die(mArgs, i);
+      setOpt(*i, Config::cmsswMatchingBK, g_match_opts, "CMSSW validation track matching for backward fit tracks");
+    } else if (*i == "--inc-shorts") {
+      Config::inclusiveShorts = true;
+    } else if (*i == "--keep-hit-info") {
+      Config::keepHitInfo = true;
+    } else if (*i == "--try-to-save-sim-info") {
+      Config::tryToSaveSimInfo = true;
+    } else if (*i == "--backward-fit-pca") {
+      Config::backwardFit = true;
+      Config::includePCA = true;
+    } else if (*i == "--cmssw-simseeds") {
+      Config::geomPlugin = "CMS-2017";
+      Config::seedInput = simSeeds;
+    } else if (*i == "--cmssw-stdseeds") {
+      Config::geomPlugin = "CMS-2017";
+      Config::seedInput = cmsswSeeds;
+      Config::seedCleaning = noCleaning;
+    } else if (*i == "--cmssw-n2seeds") {
+      Config::geomPlugin = "CMS-2017";
+      Config::seedInput = cmsswSeeds;
+      Config::seedCleaning = cleanSeedsN2;
+    } else if (*i == "--cmssw-pureseeds") {
+      Config::geomPlugin = "CMS-2017";
+      Config::seedInput = cmsswSeeds;
+      Config::seedCleaning = cleanSeedsPure;
+      Config::readCmsswTracks = true;
+    } else if (*i == "--cmssw-goodlabelseeds") {
+      Config::geomPlugin = "CMS-2017";
+      Config::seedInput = cmsswSeeds;
+      Config::seedCleaning = cleanSeedsBadLabel;
+    } else if (*i == "--cmssw-val-fhit-bhit") {
+      Config::cmssw_val = true;
+      Config::readCmsswTracks = true;
+      Config::cmsswMatchingFW = hitBased;
+      Config::cmsswMatchingBK = hitBased;
+    } else if (*i == "--cmssw-val-fhit-bprm") {
+      Config::cmssw_val = true;
+      Config::readCmsswTracks = true;
+      Config::cmsswMatchingFW = hitBased;
+      Config::cmsswMatchingBK = trkParamBased;
+    } else if (*i == "--cmssw-val-fprm-bhit") {
+      Config::cmssw_val = true;
+      Config::readCmsswTracks = true;
+      Config::cmsswMatchingFW = trkParamBased;
+      Config::cmsswMatchingBK = hitBased;
+    } else if (*i == "--cmssw-val-fprm-bprm") {
+      Config::cmssw_val = true;
+      Config::readCmsswTracks = true;
+      Config::cmsswMatchingFW = trkParamBased;
+      Config::cmsswMatchingBK = trkParamBased;
+    } else if (*i == "--cmssw-val-label") {
+      Config::cmssw_val = true;
+      Config::readCmsswTracks = true;
+      Config::cmsswMatchingFW = labelBased;
+      Config::cmsswMatchingBK = labelBased;
+    } else if (*i == "--json-load") {
+      next_arg_or_die(mArgs, i);
+      Config::json_load_filenames.push_back(*i);
+    } else if (*i == "--json-patch") {
+      next_arg_or_die(mArgs, i);
+      Config::json_patch_filenames.push_back(*i);
+    } else if (*i == "--json-save-iterations") {
+      next_arg_or_die(mArgs, i);
+      Config::json_save_iters_fname_fmt = *i;
+    } else if (*i == "--json-save-iterations-include-iter-info-preamble") {
+      Config::json_save_iters_include_iter_info_preamble = true;
+    } else if (*i == "--json-verbose") {
+      Config::json_verbose = true;
+    } else if (*i == "--json-dump-before") {
+      Config::json_dump_before = true;
+    } else if (*i == "--json-dump-after") {
+      Config::json_dump_after = true;
+    } else {
+      fprintf(stderr, "Error: Unknown option/argument '%s'.\n", i->c_str());
+      exit(1);
+    }
+
+    mArgs.erase(start, ++i);
+  }
+
+  // Do some checking of options before going...
+  if (Config::seedCleaning != cleanSeedsPure &&
+      (Config::cmsswMatchingFW == labelBased || Config::cmsswMatchingBK == labelBased)) {
+    std::cerr << "What have you done?!? Can't mix cmssw label matching without pure seeds! Exiting..." << std::endl;
+    exit(1);
+  } else if (Config::mtvLikeValidation && Config::inclusiveShorts) {
+    std::cerr
+        << "What have you done?!? Short reco tracks are already accounted for in the MTV-Like Validation! Inclusive "
+           "shorts is only an option for the standard simval, and will break the MTV-Like simval! Exiting..."
+        << std::endl;
+    exit(1);
+  }
+
+  Config::recalculateDependentConstants();
+
+  printf("Running with n_threads=%d, best_out_of=%d\n", Config::numThreadsFinder, Config::finderReportBestOutOfN);
+
+  test_standard();
+
+  return 0;
+}
diff --git a/RecoTracker/MkFitCMS/standalone/tkNtuple/DictsLinkDef.h b/RecoTracker/MkFitCMS/standalone/tkNtuple/DictsLinkDef.h
new file mode 100644
index 0000000000000..9c68de8b00dc6
--- /dev/null
+++ b/RecoTracker/MkFitCMS/standalone/tkNtuple/DictsLinkDef.h
@@ -0,0 +1,21 @@
+#include "Rtypes.h"
+#include "vector"
+
+#ifdef __CINT__
+#pragma link C++ class vector < vector < int>> + ;
+#pragma link C++ class vector < vector < unsigned int>> + ;
+#pragma link C++ class vector < vector < float>> + ;
+#ifdef G__VECTOR_HAS_CLASS_ITERATOR
+#pragma link C++ operators vector < vector < int>> ::iterator;
+#pragma link C++ operators vector < vector < int>> ::const_iterator;
+#pragma link C++ operators vector < vector < int>> ::reverse_iterator;
+
+#pragma link C++ operators vector < vector < unsigned int>> ::iterator;
+#pragma link C++ operators vector < vector < unsigned int>> ::const_iterator;
+#pragma link C++ operators vector < vector < unsigned int>> ::reverse_iterator;
+
+#pragma link C++ operators vector < vector < float>> ::iterator;
+#pragma link C++ operators vector < vector < float>> ::const_iterator;
+#pragma link C++ operators vector < vector < float>> ::reverse_iterator;
+#endif
+#endif
diff --git a/RecoTracker/MkFitCMS/standalone/tkNtuple/Makefile b/RecoTracker/MkFitCMS/standalone/tkNtuple/Makefile
new file mode 100644
index 0000000000000..c391026a73c98
--- /dev/null
+++ b/RecoTracker/MkFitCMS/standalone/tkNtuple/Makefile
@@ -0,0 +1,57 @@
+ifndef ROOTSYS
+$(error ROOTSYS is not set. Please set ROOT environment properly)
+endif
+
+WITH_ROOT:=1
+
+include ../Makefile.config
+
+.PHONY: all clean distclean echo
+
+all: default
+
+CPPEXTRA := -I.. ${USER_CPPFLAGS} ${DEFS}
+LDEXTRA  := ${USER_LDFLAGS}
+
+CPPFLAGS := -I${ROOTSYS}/include ${CPPEXTRA} ${CPPFLAGS}
+CXXFLAGS += ${USER_CXXFLAGS}
+LDFLAGS  += -L${ROOTSYS}/lib -L../lib -lCore -lRIO -lTree -lMathCore ${LDEXTRA}
+
+
+EXES     := writeMemoryFile
+
+default: ${EXES}
+
+clean:
+	rm -f ${EXES} *.d *.o *.om *.so *Dict.* *.pcm
+	rm -rf writeMemoryFile.dSYM
+
+distclean: clean
+	rm -f *.optrpt 
+	rm -f ${EXES} *.ah
+
+echo:
+	@echo "CXX      = ${CXX}"
+	@echo "CPPFLAGS = ${CPPFLAGS}"
+	@echo "CXXFLAGS = ${CXXFLAGS}"
+	@echo "LDFLAGS  = ${LDFLAGS}"
+	@echo "EXES     = ${EXES}"
+
+
+################################################################
+
+SRCS := WriteMemoryFile.cc
+
+OBJS := $(SRCS:.cc=.o)
+
+#libDicts.so: DictsDict.o
+#	${CXX} ${CXXFLAGS} ${LDFLAGS} $< -shared -o $@ ${LDFLAGS_HOST}
+
+DictsDict.cc: DictsLinkDef.h
+	rootcint -v3 -f $@ -c -p $<
+
+writeMemoryFile: ${OBJS} DictsDict.o
+	${CXX} ${CXXFLAGS} ${LDFLAGS} $^ -L.. -lMicCore -Wl,-rpath=..,-rpath=.,-rpath=../lib -o $@
+
+%.o: %.cc
+	${CXX} ${CPPFLAGS} ${CXXFLAGS} -c -o $@ $<
diff --git a/RecoTracker/MkFitCMS/standalone/tkNtuple/WriteMemoryFile.cc b/RecoTracker/MkFitCMS/standalone/tkNtuple/WriteMemoryFile.cc
new file mode 100644
index 0000000000000..0eacff336ed9d
--- /dev/null
+++ b/RecoTracker/MkFitCMS/standalone/tkNtuple/WriteMemoryFile.cc
@@ -0,0 +1,1257 @@
+#include "TFile.h"
+#include "TTree.h"
+
+#include <iostream>
+#include <list>
+#include <unordered_map>
+#include "RecoTracker/MkFitCore/standalone/Event.h"
+#include "RecoTracker/MkFitCore/interface/Config.h"
+#include "RecoTracker/MkFitCMS/interface/LayerNumberConverter.h"
+
+using namespace mkfit;
+
+using TrackAlgorithm = TrackBase::TrackAlgorithm;
+
+constexpr bool useMatched = false;
+
+constexpr int cleanSimTrack_minSimHits = 3;
+constexpr int cleanSimTrack_minRecHits = 2;
+
+//check if this is the same as in the release
+enum class HitType { Pixel = 0, Strip = 1, Glued = 2, Invalid = 3, Phase2OT = 4, Unknown = 99 };
+
+typedef std::list<std::string> lStr_t;
+typedef lStr_t::iterator lStr_i;
+void next_arg_or_die(lStr_t& args, lStr_i& i) {
+  lStr_i j = i;
+  if (++j == args.end() || ((*j)[0] == '-')) {
+    std::cerr << "Error: option " << *i << " requires an argument.\n";
+    exit(1);
+  }
+  i = j;
+}
+
+bool next_arg_option(lStr_t& args, lStr_i& i) {
+  lStr_i j = i;
+  if (++j == args.end() || ((*j)[0] == '-')) {
+    return false;
+  }
+  i = j;
+  return true;
+}
+
+void printHelp(const char* av0) {
+  printf(
+      "Usage: %s [options]\n"
+      "Options:\n"
+      "  --input          <str>    input file\n"
+      "  --output         <str>    output file\n"
+      "  --verbosity      <num>    print details (0 quiet, 1 print counts, 2 print all; def: 0)\n"
+      "  --maxevt         <num>    maxevt events to write (-1 for everything in the file def: -1)\n"
+      "  --clean-sim-tracks        apply sim track cleaning (def: no cleaning)\n"
+      "  --write-all-events        write all events (def: skip events with 0 simtracks or seeds)\n"
+      "  --write-rec-tracks        write rec tracks (def: not written)\n"
+      "  --apply-ccc               apply cluster charge cut to strip hits (def: false)\n"
+      "  --all-seeds               merge all seeds from the input file (def: false)\n",
+      av0);
+}
+
+int main(int argc, char* argv[]) {
+  bool haveInput = false;
+  std::string inputFileName;
+  bool haveOutput = false;
+  std::string outputFileName;
+
+  bool cleanSimTracks = false;
+  bool writeAllEvents = false;
+  bool writeRecTracks = false;
+  bool writeHitIterMasks = false;
+  bool applyCCC = false;
+  bool allSeeds = false;
+
+  int verbosity = 0;
+  long long maxevt = -1;
+
+  int cutValueCCC = 1620;  //Nominal value (from first iteration of CMSSW) is 1620
+
+  lStr_t mArgs;
+  for (int i = 1; i < argc; ++i) {
+    mArgs.push_back(argv[i]);
+  }
+
+  lStr_i i = mArgs.begin();
+  while (i != mArgs.end()) {
+    lStr_i start = i;
+
+    if (*i == "-h" || *i == "-help" || *i == "--help") {
+      printHelp(argv[0]);
+    } else if (*i == "--input") {
+      next_arg_or_die(mArgs, i);
+      inputFileName = *i;
+      haveInput = true;
+    } else if (*i == "--output") {
+      next_arg_or_die(mArgs, i);
+      outputFileName = *i;
+      haveOutput = true;
+    } else if (*i == "--verbosity") {
+      next_arg_or_die(mArgs, i);
+      verbosity = std::atoi(i->c_str());
+    } else if (*i == "--maxevt") {
+      next_arg_or_die(mArgs, i);
+      maxevt = std::atoi(i->c_str());
+    } else if (*i == "--clean-sim-tracks") {
+      cleanSimTracks = true;
+    } else if (*i == "--write-all-events") {
+      writeAllEvents = true;
+    } else if (*i == "--write-rec-tracks") {
+      writeRecTracks = true;
+    } else if (*i == "--write-hit-iter-masks") {
+      writeHitIterMasks = true;
+    } else if (*i == "--apply-ccc") {
+      applyCCC = true;
+      if (next_arg_option(mArgs, i)) {
+        cutValueCCC = std::atoi(i->c_str());
+      }
+    } else if (*i == "--all-seeds") {
+      allSeeds = true;
+    } else {
+      fprintf(stderr, "Error: Unknown option/argument '%s'.\n", i->c_str());
+      printHelp(argv[0]);
+      exit(1);
+    }
+    mArgs.erase(start, ++i);
+  }  //while arguments
+
+  if (not haveOutput or not haveInput) {
+    fprintf(stderr, "Error: both input and output are required\n");
+    printHelp(argv[0]);
+    exit(1);
+  }
+
+  using namespace std;
+
+  LayerNumberConverter lnc(TkLayout::phase1);
+  const unsigned int nTotalLayers = lnc.nLayers();
+
+  vector<unordered_map<unsigned int, unsigned int>> module_shortId_hash(nTotalLayers);
+
+  int nstot = 0;
+  std::vector<int> nhitstot(nTotalLayers, 0);
+
+  TFile* f = TFile::Open(inputFileName.c_str());
+  if (f == 0) {
+    fprintf(stderr, "Failed opening input root file '%s'\n", inputFileName.c_str());
+    exit(1);
+  }
+
+  TTree* t = (TTree*)f->Get("trackingNtuple/tree");
+
+  unsigned long long event;
+  t->SetBranchAddress("event", &event);
+
+  //sim tracks
+  std::vector<float>* sim_eta = 0;
+  std::vector<float>* sim_px = 0;
+  std::vector<float>* sim_py = 0;
+  std::vector<float>* sim_pz = 0;
+  std::vector<int>* sim_parentVtxIdx = 0;
+  std::vector<int>* sim_q = 0;
+  std::vector<int>* sim_event = 0;
+  std::vector<int>* sim_bunchCrossing = 0;
+  std::vector<int>* sim_nValid = 0;  //simHit count, actually
+  t->SetBranchAddress("sim_eta", &sim_eta);
+  t->SetBranchAddress("sim_px", &sim_px);
+  t->SetBranchAddress("sim_py", &sim_py);
+  t->SetBranchAddress("sim_pz", &sim_pz);
+  t->SetBranchAddress("sim_parentVtxIdx", &sim_parentVtxIdx);
+  t->SetBranchAddress("sim_q", &sim_q);
+  t->SetBranchAddress("sim_event", &sim_event);
+  t->SetBranchAddress("sim_bunchCrossing", &sim_bunchCrossing);
+  t->SetBranchAddress("sim_nValid", &sim_nValid);
+
+  std::vector<vector<int>>* sim_trkIdx = 0;
+  t->SetBranchAddress("sim_trkIdx", &sim_trkIdx);
+
+  //simvtx
+  std::vector<float>* simvtx_x = 0;
+  std::vector<float>* simvtx_y = 0;
+  std::vector<float>* simvtx_z = 0;
+  t->SetBranchAddress("simvtx_x", &simvtx_x);
+  t->SetBranchAddress("simvtx_y", &simvtx_y);
+  t->SetBranchAddress("simvtx_z", &simvtx_z);
+
+  //simhit
+  std::vector<short>* simhit_process = 0;
+  std::vector<int>* simhit_particle = 0;
+  std::vector<int>* simhit_simTrkIdx = 0;
+  std::vector<float>* simhit_x = 0;
+  std::vector<float>* simhit_y = 0;
+  std::vector<float>* simhit_z = 0;
+  std::vector<float>* simhit_px = 0;
+  std::vector<float>* simhit_py = 0;
+  std::vector<float>* simhit_pz = 0;
+  t->SetBranchAddress("simhit_process", &simhit_process);
+  t->SetBranchAddress("simhit_particle", &simhit_particle);
+  t->SetBranchAddress("simhit_simTrkIdx", &simhit_simTrkIdx);
+  t->SetBranchAddress("simhit_x", &simhit_x);
+  t->SetBranchAddress("simhit_y", &simhit_y);
+  t->SetBranchAddress("simhit_z", &simhit_z);
+  t->SetBranchAddress("simhit_px", &simhit_px);
+  t->SetBranchAddress("simhit_py", &simhit_py);
+  t->SetBranchAddress("simhit_pz", &simhit_pz);
+
+  std::vector<std::vector<int>>* simhit_hitIdx = 0;
+  t->SetBranchAddress("simhit_hitIdx", &simhit_hitIdx);
+  std::vector<std::vector<int>>* simhit_hitType = 0;
+  t->SetBranchAddress("simhit_hitType", &simhit_hitType);
+
+  //rec tracks
+  std::vector<int>* trk_q = 0;
+  std::vector<unsigned int>* trk_nValid = 0;
+  std::vector<int>* trk_seedIdx = 0;
+  std::vector<unsigned long long>* trk_algoMask = 0;
+  std::vector<unsigned int>* trk_algo = 0;
+  std::vector<unsigned int>* trk_originalAlgo = 0;
+  std::vector<float>* trk_nChi2 = 0;
+  std::vector<float>* trk_px = 0;
+  std::vector<float>* trk_py = 0;
+  std::vector<float>* trk_pz = 0;
+  std::vector<float>* trk_pt = 0;
+  std::vector<float>* trk_phi = 0;
+  std::vector<float>* trk_lambda = 0;
+  std::vector<float>* trk_refpoint_x = 0;
+  std::vector<float>* trk_refpoint_y = 0;
+  std::vector<float>* trk_refpoint_z = 0;
+  std::vector<float>* trk_dxyErr = 0;
+  std::vector<float>* trk_dzErr = 0;
+  std::vector<float>* trk_ptErr = 0;
+  std::vector<float>* trk_phiErr = 0;
+  std::vector<float>* trk_lambdaErr = 0;
+  t->SetBranchAddress("trk_q", &trk_q);
+  t->SetBranchAddress("trk_nValid", &trk_nValid);
+  t->SetBranchAddress("trk_seedIdx", &trk_seedIdx);
+  t->SetBranchAddress("trk_algoMask", &trk_algoMask);
+  t->SetBranchAddress("trk_algo", &trk_algo);
+  t->SetBranchAddress("trk_originalAlgo", &trk_originalAlgo);
+  t->SetBranchAddress("trk_nChi2", &trk_nChi2);
+  t->SetBranchAddress("trk_px", &trk_px);
+  t->SetBranchAddress("trk_py", &trk_py);
+  t->SetBranchAddress("trk_pz", &trk_pz);
+  t->SetBranchAddress("trk_pt", &trk_pt);
+  t->SetBranchAddress("trk_phi", &trk_phi);
+  t->SetBranchAddress("trk_lambda", &trk_lambda);
+  t->SetBranchAddress("trk_refpoint_x", &trk_refpoint_x);
+  t->SetBranchAddress("trk_refpoint_y", &trk_refpoint_y);
+  t->SetBranchAddress("trk_refpoint_z", &trk_refpoint_z);
+  t->SetBranchAddress("trk_dxyErr", &trk_dxyErr);
+  t->SetBranchAddress("trk_dzErr", &trk_dzErr);
+  t->SetBranchAddress("trk_ptErr", &trk_ptErr);
+  t->SetBranchAddress("trk_phiErr", &trk_phiErr);
+  t->SetBranchAddress("trk_lambdaErr", &trk_lambdaErr);
+
+  std::vector<std::vector<int>>* trk_hitIdx = 0;
+  t->SetBranchAddress("trk_hitIdx", &trk_hitIdx);
+  std::vector<std::vector<int>>* trk_hitType = 0;
+  t->SetBranchAddress("trk_hitType", &trk_hitType);
+
+  //seeds
+  std::vector<float>* see_stateTrajGlbX = 0;
+  std::vector<float>* see_stateTrajGlbY = 0;
+  std::vector<float>* see_stateTrajGlbZ = 0;
+  std::vector<float>* see_stateTrajGlbPx = 0;
+  std::vector<float>* see_stateTrajGlbPy = 0;
+  std::vector<float>* see_stateTrajGlbPz = 0;
+  std::vector<float>* see_eta = 0;  //PCA parameters
+  std::vector<float>* see_pt = 0;   //PCA parameters
+  std::vector<float>* see_stateCcov00 = 0;
+  std::vector<float>* see_stateCcov01 = 0;
+  std::vector<float>* see_stateCcov02 = 0;
+  std::vector<float>* see_stateCcov03 = 0;
+  std::vector<float>* see_stateCcov04 = 0;
+  std::vector<float>* see_stateCcov05 = 0;
+  std::vector<float>* see_stateCcov11 = 0;
+  std::vector<float>* see_stateCcov12 = 0;
+  std::vector<float>* see_stateCcov13 = 0;
+  std::vector<float>* see_stateCcov14 = 0;
+  std::vector<float>* see_stateCcov15 = 0;
+  std::vector<float>* see_stateCcov22 = 0;
+  std::vector<float>* see_stateCcov23 = 0;
+  std::vector<float>* see_stateCcov24 = 0;
+  std::vector<float>* see_stateCcov25 = 0;
+  std::vector<float>* see_stateCcov33 = 0;
+  std::vector<float>* see_stateCcov34 = 0;
+  std::vector<float>* see_stateCcov35 = 0;
+  std::vector<float>* see_stateCcov44 = 0;
+  std::vector<float>* see_stateCcov45 = 0;
+  std::vector<float>* see_stateCcov55 = 0;
+  std::vector<std::vector<float>>* see_stateCurvCov = 0;
+  std::vector<int>* see_q = 0;
+  std::vector<unsigned int>* see_algo = 0;
+  t->SetBranchAddress("see_stateTrajGlbX", &see_stateTrajGlbX);
+  t->SetBranchAddress("see_stateTrajGlbY", &see_stateTrajGlbY);
+  t->SetBranchAddress("see_stateTrajGlbZ", &see_stateTrajGlbZ);
+  t->SetBranchAddress("see_stateTrajGlbPx", &see_stateTrajGlbPx);
+  t->SetBranchAddress("see_stateTrajGlbPy", &see_stateTrajGlbPy);
+  t->SetBranchAddress("see_stateTrajGlbPz", &see_stateTrajGlbPz);
+  t->SetBranchAddress("see_eta", &see_eta);
+  t->SetBranchAddress("see_pt", &see_pt);
+
+  bool hasCartCov = t->GetBranch("see_stateCcov00") != nullptr;
+  if (hasCartCov) {
+    t->SetBranchAddress("see_stateCcov00", &see_stateCcov00);
+    t->SetBranchAddress("see_stateCcov01", &see_stateCcov01);
+    t->SetBranchAddress("see_stateCcov02", &see_stateCcov02);
+    t->SetBranchAddress("see_stateCcov03", &see_stateCcov03);
+    t->SetBranchAddress("see_stateCcov04", &see_stateCcov04);
+    t->SetBranchAddress("see_stateCcov05", &see_stateCcov05);
+    t->SetBranchAddress("see_stateCcov11", &see_stateCcov11);
+    t->SetBranchAddress("see_stateCcov12", &see_stateCcov12);
+    t->SetBranchAddress("see_stateCcov13", &see_stateCcov13);
+    t->SetBranchAddress("see_stateCcov14", &see_stateCcov14);
+    t->SetBranchAddress("see_stateCcov15", &see_stateCcov15);
+    t->SetBranchAddress("see_stateCcov22", &see_stateCcov22);
+    t->SetBranchAddress("see_stateCcov23", &see_stateCcov23);
+    t->SetBranchAddress("see_stateCcov24", &see_stateCcov24);
+    t->SetBranchAddress("see_stateCcov25", &see_stateCcov25);
+    t->SetBranchAddress("see_stateCcov33", &see_stateCcov33);
+    t->SetBranchAddress("see_stateCcov34", &see_stateCcov34);
+    t->SetBranchAddress("see_stateCcov35", &see_stateCcov35);
+    t->SetBranchAddress("see_stateCcov44", &see_stateCcov44);
+    t->SetBranchAddress("see_stateCcov45", &see_stateCcov45);
+    t->SetBranchAddress("see_stateCcov55", &see_stateCcov55);
+  } else {
+    t->SetBranchAddress("see_stateCurvCov", &see_stateCurvCov);
+  }
+  t->SetBranchAddress("see_q", &see_q);
+  t->SetBranchAddress("see_algo", &see_algo);
+
+  std::vector<std::vector<int>>* see_hitIdx = 0;
+  t->SetBranchAddress("see_hitIdx", &see_hitIdx);
+  std::vector<std::vector<int>>* see_hitType = 0;
+  t->SetBranchAddress("see_hitType", &see_hitType);
+
+  //pixel hits
+  vector<unsigned short>* pix_det = 0;
+  vector<unsigned short>* pix_lay = 0;
+  vector<unsigned int>* pix_detId = 0;
+  vector<float>* pix_x = 0;
+  vector<float>* pix_y = 0;
+  vector<float>* pix_z = 0;
+  vector<float>* pix_xx = 0;
+  vector<float>* pix_xy = 0;
+  vector<float>* pix_yy = 0;
+  vector<float>* pix_yz = 0;
+  vector<float>* pix_zz = 0;
+  vector<float>* pix_zx = 0;
+  vector<int>* pix_csize_col = 0;
+  vector<int>* pix_csize_row = 0;
+  vector<uint64_t>* pix_usedMask = 0;
+  //these were renamed in CMSSW_9_1_0: auto-detect
+  bool has910_det_lay = t->GetBranch("pix_det") == nullptr;
+  if (has910_det_lay) {
+    t->SetBranchAddress("pix_subdet", &pix_det);
+    t->SetBranchAddress("pix_layer", &pix_lay);
+  } else {
+    t->SetBranchAddress("pix_det", &pix_det);
+    t->SetBranchAddress("pix_lay", &pix_lay);
+  }
+  t->SetBranchAddress("pix_detId", &pix_detId);
+  t->SetBranchAddress("pix_x", &pix_x);
+  t->SetBranchAddress("pix_y", &pix_y);
+  t->SetBranchAddress("pix_z", &pix_z);
+  t->SetBranchAddress("pix_xx", &pix_xx);
+  t->SetBranchAddress("pix_xy", &pix_xy);
+  t->SetBranchAddress("pix_yy", &pix_yy);
+  t->SetBranchAddress("pix_yz", &pix_yz);
+  t->SetBranchAddress("pix_zz", &pix_zz);
+  t->SetBranchAddress("pix_zx", &pix_zx);
+  t->SetBranchAddress("pix_clustSizeCol", &pix_csize_col);
+  t->SetBranchAddress("pix_clustSizeRow", &pix_csize_row);
+  if (writeHitIterMasks) {
+    t->SetBranchAddress("pix_usedMask", &pix_usedMask);
+  }
+
+  vector<vector<int>>* pix_simHitIdx = 0;
+  t->SetBranchAddress("pix_simHitIdx", &pix_simHitIdx);
+  vector<vector<float>>* pix_chargeFraction = 0;
+  t->SetBranchAddress("pix_chargeFraction", &pix_chargeFraction);
+
+  //strip hits
+  vector<short>* glu_isBarrel = 0;
+  vector<unsigned int>* glu_det = 0;
+  vector<unsigned int>* glu_lay = 0;
+  vector<unsigned int>* glu_detId = 0;
+  vector<int>* glu_monoIdx = 0;
+  vector<int>* glu_stereoIdx = 0;
+  vector<float>* glu_x = 0;
+  vector<float>* glu_y = 0;
+  vector<float>* glu_z = 0;
+  vector<float>* glu_xx = 0;
+  vector<float>* glu_xy = 0;
+  vector<float>* glu_yy = 0;
+  vector<float>* glu_yz = 0;
+  vector<float>* glu_zz = 0;
+  vector<float>* glu_zx = 0;
+  t->SetBranchAddress("glu_isBarrel", &glu_isBarrel);
+  if (has910_det_lay) {
+    t->SetBranchAddress("glu_subdet", &glu_det);
+    t->SetBranchAddress("glu_layer", &glu_lay);
+  } else {
+    t->SetBranchAddress("glu_det", &glu_det);
+    t->SetBranchAddress("glu_lay", &glu_lay);
+  }
+  t->SetBranchAddress("glu_detId", &glu_detId);
+  t->SetBranchAddress("glu_monoIdx", &glu_monoIdx);
+  t->SetBranchAddress("glu_stereoIdx", &glu_stereoIdx);
+  t->SetBranchAddress("glu_x", &glu_x);
+  t->SetBranchAddress("glu_y", &glu_y);
+  t->SetBranchAddress("glu_z", &glu_z);
+  t->SetBranchAddress("glu_xx", &glu_xx);
+  t->SetBranchAddress("glu_xy", &glu_xy);
+  t->SetBranchAddress("glu_yy", &glu_yy);
+  t->SetBranchAddress("glu_yz", &glu_yz);
+  t->SetBranchAddress("glu_zz", &glu_zz);
+  t->SetBranchAddress("glu_zx", &glu_zx);
+
+  vector<short>* str_isBarrel = 0;
+  vector<short>* str_isStereo = 0;
+  vector<unsigned int>* str_det = 0;
+  vector<unsigned int>* str_lay = 0;
+  vector<unsigned int>* str_detId = 0;
+  vector<unsigned int>* str_simType = 0;
+  vector<float>* str_x = 0;
+  vector<float>* str_y = 0;
+  vector<float>* str_z = 0;
+  vector<float>* str_xx = 0;
+  vector<float>* str_xy = 0;
+  vector<float>* str_yy = 0;
+  vector<float>* str_yz = 0;
+  vector<float>* str_zz = 0;
+  vector<float>* str_zx = 0;
+  vector<float>* str_chargePerCM = 0;
+  vector<int>* str_csize = 0;
+  vector<uint64_t>* str_usedMask = 0;
+  t->SetBranchAddress("str_isBarrel", &str_isBarrel);
+  t->SetBranchAddress("str_isStereo", &str_isStereo);
+  if (has910_det_lay) {
+    t->SetBranchAddress("str_subdet", &str_det);
+    t->SetBranchAddress("str_layer", &str_lay);
+  } else {
+    t->SetBranchAddress("str_det", &str_det);
+    t->SetBranchAddress("str_lay", &str_lay);
+  }
+  t->SetBranchAddress("str_detId", &str_detId);
+  t->SetBranchAddress("str_simType", &str_simType);
+  t->SetBranchAddress("str_x", &str_x);
+  t->SetBranchAddress("str_y", &str_y);
+  t->SetBranchAddress("str_z", &str_z);
+  t->SetBranchAddress("str_xx", &str_xx);
+  t->SetBranchAddress("str_xy", &str_xy);
+  t->SetBranchAddress("str_yy", &str_yy);
+  t->SetBranchAddress("str_yz", &str_yz);
+  t->SetBranchAddress("str_zz", &str_zz);
+  t->SetBranchAddress("str_zx", &str_zx);
+  t->SetBranchAddress("str_chargePerCM", &str_chargePerCM);
+  t->SetBranchAddress("str_clustSize", &str_csize);
+  if (writeHitIterMasks) {
+    t->SetBranchAddress("str_usedMask", &str_usedMask);
+  }
+
+  vector<vector<int>>* str_simHitIdx = 0;
+  t->SetBranchAddress("str_simHitIdx", &str_simHitIdx);
+  vector<vector<float>>* str_chargeFraction = 0;
+  t->SetBranchAddress("str_chargeFraction", &str_chargeFraction);
+
+  // beam spot
+  float bsp_x;
+  float bsp_y;
+  float bsp_z;
+  float bsp_sigmax;
+  float bsp_sigmay;
+  float bsp_sigmaz;
+  t->SetBranchAddress("bsp_x", &bsp_x);
+  t->SetBranchAddress("bsp_y", &bsp_y);
+  t->SetBranchAddress("bsp_z", &bsp_z);
+  t->SetBranchAddress("bsp_sigmax", &bsp_sigmax);
+  t->SetBranchAddress("bsp_sigmay", &bsp_sigmay);
+  t->SetBranchAddress("bsp_sigmaz", &bsp_sigmaz);
+
+  long long totentries = t->GetEntries();
+  long long savedEvents = 0;
+
+  DataFile data_file;
+  int outOptions = DataFile::ES_Seeds;
+  if (writeRecTracks)
+    outOptions |= DataFile::ES_CmsswTracks;
+  if (writeHitIterMasks)
+    outOptions |= DataFile::ES_HitIterMasks;
+  outOptions |= DataFile::ES_BeamSpot;
+
+  if (maxevt < 0)
+    maxevt = totentries;
+  data_file.openWrite(outputFileName, static_cast<int>(nTotalLayers), std::min(maxevt, totentries), outOptions);
+
+  Event EE(0, static_cast<int>(nTotalLayers));
+
+  int numFailCCC = 0;
+  int numTotalStr = 0;
+  // gDebug = 8;
+
+  for (long long i = 0; savedEvents < maxevt && i < totentries && i < maxevt; ++i) {
+    EE.reset(i);
+
+    cout << "process entry i=" << i << " out of " << totentries << ", saved so far " << savedEvents
+         << ", with max=" << maxevt << endl;
+
+    t->GetEntry(i);
+
+    cout << "edm event=" << event << endl;
+
+    auto& bs = EE.beamSpot_;
+    bs.x = bsp_x;
+    bs.y = bsp_y;
+    bs.z = bsp_z;
+    bs.sigmaZ = bsp_sigmaz;
+    bs.beamWidthX = bsp_sigmax;
+    bs.beamWidthY = bsp_sigmay;
+    //dxdz and dydz are not in the trackingNtuple at the moment
+
+    for (unsigned int istr = 0; istr < str_lay->size(); ++istr) {
+      if (str_chargePerCM->at(istr) < cutValueCCC)
+        numFailCCC++;
+      numTotalStr++;
+    }
+
+    auto nSims = sim_q->size();
+    if (nSims == 0) {
+      cout << "branches not loaded" << endl;
+      exit(1);
+    }
+    if (verbosity > 0)
+      std::cout << __FILE__ << " " << __LINE__ << " nSims " << nSims << " nSeeds " << see_q->size() << " nRecT "
+                << trk_q->size() << std::endl;
+
+    //find best matching tkIdx from a list of simhits indices
+    auto bestTkIdx = [&](std::vector<int> const& shs, std::vector<float> const& shfs, int rhIdx, HitType rhType) {
+      //assume that all simhits are associated
+      int ibest = -1;
+      int shbest = -1;
+      float hpbest = -1;
+      float tpbest = -1;
+      float hfbest = -1;
+
+      float maxfrac = -1;
+      int ish = -1;
+      int nshs = shs.size();
+      for (auto const sh : shs) {
+        ish++;
+        auto tkidx = simhit_simTrkIdx->at(sh);
+        //use only sh with available TP
+        if (tkidx < 0)
+          continue;
+
+        auto hpx = simhit_px->at(sh);
+        auto hpy = simhit_py->at(sh);
+        auto hpz = simhit_pz->at(sh);
+        auto hp = sqrt(hpx * hpx + hpy * hpy + hpz * hpz);
+
+        //look only at hits with p> 50 MeV
+        if (hp < 0.05f)
+          continue;
+
+        auto tpx = sim_px->at(tkidx);
+        auto tpy = sim_py->at(tkidx);
+        auto tpz = sim_pz->at(tkidx);
+        auto tp = sqrt(tpx * tpx + tpy * tpy + tpz * tpz);
+
+        //take only hits with hp> 0.5*tp
+        if (hp < 0.5 * tp)
+          continue;
+
+        //pick tkidx corresponding to max hp/tp; .. this is probably redundant
+        if (maxfrac < hp / tp) {
+          maxfrac = hp / tp;
+          ibest = tkidx;
+          shbest = sh;
+          hpbest = hp;
+          tpbest = tp;
+          hfbest = shfs[ish];
+        }
+      }
+
+      //arbitration: a rechit with one matching sim is matched to sim if it's the first
+      //FIXME: SOME BETTER SELECTION CAN BE DONE (it will require some more correlated knowledge)
+      if (nshs == 1 && ibest >= 0) {
+        auto const& srhIdxV = simhit_hitIdx->at(shbest);
+        auto const& srhTypeV = simhit_hitType->at(shbest);
+        int ih = -1;
+        for (auto itype : srhTypeV) {
+          ih++;
+          if (HitType(itype) == rhType && srhIdxV[ih] != rhIdx) {
+            ibest = -1;
+            break;
+          }
+        }
+      }
+
+      if (ibest >= 0 && false) {
+        std::cout << " best tkIdx " << ibest << " rh " << rhIdx << " for sh " << shbest << " out of " << shs.size()
+                  << " hp " << hpbest << " chF " << hfbest << " tp " << tpbest << " process "
+                  << simhit_process->at(shbest) << " particle " << simhit_particle->at(shbest) << std::endl;
+        if (rhType == HitType::Strip) {
+          std::cout << "    sh " << simhit_x->at(shbest) << ", " << simhit_y->at(shbest) << ", " << simhit_z->at(shbest)
+                    << "  rh " << str_x->at(rhIdx) << ", " << str_y->at(rhIdx) << ", " << str_z->at(rhIdx) << std::endl;
+        }
+      }
+      return ibest;
+    };
+
+    vector<Track>& simTracks_ = EE.simTracks_;
+    vector<int> simTrackIdx_(sim_q->size(), -1);  //keep track of original index in ntuple
+    vector<int> seedSimIdx(see_q->size(), -1);
+    for (unsigned int isim = 0; isim < sim_q->size(); ++isim) {
+      //load sim production vertex data
+      auto iVtx = sim_parentVtxIdx->at(isim);
+      constexpr float largeValF = 9999.f;
+      float sim_prodx = iVtx >= 0 ? simvtx_x->at(iVtx) : largeValF;
+      float sim_prody = iVtx >= 0 ? simvtx_y->at(iVtx) : largeValF;
+      float sim_prodz = iVtx >= 0 ? simvtx_z->at(iVtx) : largeValF;
+      //if (fabs(sim_eta->at(isim))>0.8) continue;
+
+      vector<int> const& trkIdxV = sim_trkIdx->at(isim);
+
+      //if (trkIdx<0) continue;
+      //FIXME: CHECK IF THE LOOP AND BEST SELECTION IS NEEDED.
+      //Pick the first
+      const int trkIdx = trkIdxV.empty() ? -1 : trkIdxV[0];
+
+      int nlay = 0;
+      if (trkIdx >= 0) {
+        std::vector<int> hitlay(nTotalLayers, 0);
+        auto const& hits = trk_hitIdx->at(trkIdx);
+        auto const& hitTypes = trk_hitType->at(trkIdx);
+        auto nHits = hits.size();
+        for (auto ihit = 0U; ihit < nHits; ++ihit) {
+          auto ihIdx = hits[ihit];
+          auto const ihType = HitType(hitTypes[ihit]);
+
+          switch (ihType) {
+            case HitType::Pixel: {
+              int ipix = ihIdx;
+              if (ipix < 0)
+                continue;
+              int cmsswlay =
+                  lnc.convertLayerNumber(pix_det->at(ipix), pix_lay->at(ipix), useMatched, -1, pix_z->at(ipix) > 0);
+              if (cmsswlay >= 0 && cmsswlay < static_cast<int>(nTotalLayers))
+                hitlay[cmsswlay]++;
+              break;
+            }
+            case HitType::Strip: {
+              int istr = ihIdx;
+              if (istr < 0)
+                continue;
+              int cmsswlay = lnc.convertLayerNumber(
+                  str_det->at(istr), str_lay->at(istr), useMatched, str_isStereo->at(istr), str_z->at(istr) > 0);
+              if (cmsswlay >= 0 && cmsswlay < static_cast<int>(nTotalLayers))
+                hitlay[cmsswlay]++;
+              break;
+            }
+            case HitType::Glued: {
+              if (useMatched) {
+                int iglu = ihIdx;
+                if (iglu < 0)
+                  continue;
+                int cmsswlay =
+                    lnc.convertLayerNumber(glu_det->at(iglu), glu_lay->at(iglu), useMatched, -1, glu_z->at(iglu) > 0);
+                if (cmsswlay >= 0 && cmsswlay < static_cast<int>(nTotalLayers))
+                  hitlay[cmsswlay]++;
+              }
+              break;
+            }
+            case HitType::Invalid:
+              break;  //FIXME. Skip, really?
+            default:
+              throw std::logic_error("Track type can not be handled");
+          }  //hit type
+        }    //hits on track
+        for (unsigned int i = 0; i < nTotalLayers; i++)
+          if (hitlay[i] > 0)
+            nlay++;
+      }  //count nlay layers on matching reco track
+
+      //cout << Form("track q=%2i p=(%6.3f, %6.3f, %6.3f) x=(%6.3f, %6.3f, %6.3f) nlay=%i",sim_q->at(isim),sim_px->at(isim),sim_py->at(isim),sim_pz->at(isim),sim_prodx,sim_prody,sim_prodz,nlay) << endl;
+
+      SVector3 pos(sim_prodx, sim_prody, sim_prodz);
+      SVector3 mom(sim_px->at(isim), sim_py->at(isim), sim_pz->at(isim));
+      SMatrixSym66 err;
+      err.At(0, 0) = sim_prodx * sim_prodx;
+      err.At(1, 1) = sim_prody * sim_prody;
+      err.At(2, 2) = sim_prodz * sim_prodz;
+      err.At(3, 3) = sim_px->at(isim) * sim_px->at(isim);
+      err.At(4, 4) = sim_py->at(isim) * sim_py->at(isim);
+      err.At(5, 5) = sim_pz->at(isim) * sim_pz->at(isim);
+      TrackState state(sim_q->at(isim), pos, mom, err);
+      state.convertFromCartesianToCCS();
+      //create track: store number of reco hits in place of track chi2; fill hits later
+      //              set label to be its own index in the output file
+      Track track(state, float(nlay), simTracks_.size(), 0, nullptr);
+      if (sim_bunchCrossing->at(isim) == 0) {  //in time
+        if (sim_event->at(isim) == 0)
+          track.setProdType(Track::ProdType::Signal);
+        else
+          track.setProdType(Track::ProdType::InTimePU);
+      } else {
+        track.setProdType(Track::ProdType::OutOfTimePU);
+      }
+      if (trkIdx >= 0) {
+        int seedIdx = trk_seedIdx->at(trkIdx);
+        // Unused: auto const& shTypes = see_hitType->at(seedIdx);
+        seedSimIdx[seedIdx] = simTracks_.size();
+      }
+      if (cleanSimTracks) {
+        if (sim_nValid->at(isim) < cleanSimTrack_minSimHits)
+          continue;
+        if (cleanSimTrack_minRecHits > 0) {
+          int nRecToSimHit = 0;
+          for (unsigned int ipix = 0; ipix < pix_lay->size() && nRecToSimHit < cleanSimTrack_minRecHits; ++ipix) {
+            int ilay = -1;
+            ilay = lnc.convertLayerNumber(pix_det->at(ipix), pix_lay->at(ipix), useMatched, -1, pix_z->at(ipix) > 0);
+            if (ilay < 0)
+              continue;
+            int simTkIdxNt = bestTkIdx(pix_simHitIdx->at(ipix), pix_chargeFraction->at(ipix), ipix, HitType::Pixel);
+            if (simTkIdxNt >= 0)
+              nRecToSimHit++;
+          }
+          if (useMatched) {
+            for (unsigned int iglu = 0; iglu < glu_lay->size() && nRecToSimHit < cleanSimTrack_minRecHits; ++iglu) {
+              if (glu_isBarrel->at(iglu) == 0)
+                continue;
+              int igluMono = glu_monoIdx->at(iglu);
+              int simTkIdxNt =
+                  bestTkIdx(str_simHitIdx->at(igluMono), str_chargeFraction->at(igluMono), igluMono, HitType::Strip);
+              if (simTkIdxNt >= 0)
+                nRecToSimHit++;
+            }
+          }
+          for (unsigned int istr = 0; istr < str_lay->size() && nRecToSimHit < cleanSimTrack_minRecHits; ++istr) {
+            int ilay = -1;
+            ilay = lnc.convertLayerNumber(
+                str_det->at(istr), str_lay->at(istr), useMatched, str_isStereo->at(istr), str_z->at(istr) > 0);
+            if (useMatched && str_isBarrel->at(istr) == 1 && str_isStereo->at(istr))
+              continue;
+            if (ilay == -1)
+              continue;
+            int simTkIdxNt = bestTkIdx(str_simHitIdx->at(istr), str_chargeFraction->at(istr), istr, HitType::Strip);
+            if (simTkIdxNt >= 0)
+              nRecToSimHit++;
+          }
+          if (nRecToSimHit < cleanSimTrack_minRecHits)
+            continue;
+        }  //count rec-to-sim hits
+      }    //cleanSimTracks
+
+      simTrackIdx_[isim] = simTracks_.size();
+      simTracks_.push_back(track);
+    }
+
+    if (simTracks_.empty() and not writeAllEvents)
+      continue;
+
+    vector<Track>& seedTracks_ = EE.seedTracks_;
+    vector<vector<int>> pixHitSeedIdx(pix_lay->size());
+    vector<vector<int>> strHitSeedIdx(str_lay->size());
+    vector<vector<int>> gluHitSeedIdx(glu_lay->size());
+    for (unsigned int is = 0; is < see_q->size(); ++is) {
+      auto isAlgo = TrackAlgorithm(see_algo->at(is));
+      if (not allSeeds)
+        if (isAlgo != TrackAlgorithm::initialStep && isAlgo != TrackAlgorithm::hltIter0)
+          continue;  //select seed in acceptance
+      //if (see_pt->at(is)<0.5 || fabs(see_eta->at(is))>0.8) continue;//select seed in acceptance
+      SVector3 pos = SVector3(see_stateTrajGlbX->at(is), see_stateTrajGlbY->at(is), see_stateTrajGlbZ->at(is));
+      SVector3 mom = SVector3(see_stateTrajGlbPx->at(is), see_stateTrajGlbPy->at(is), see_stateTrajGlbPz->at(is));
+      SMatrixSym66 err;
+      if (hasCartCov) {
+        err.At(0, 0) = see_stateCcov00->at(is);
+        err.At(0, 1) = see_stateCcov01->at(is);
+        err.At(0, 2) = see_stateCcov02->at(is);
+        err.At(0, 3) = see_stateCcov03->at(is);
+        err.At(0, 4) = see_stateCcov04->at(is);
+        err.At(0, 5) = see_stateCcov05->at(is);
+        err.At(1, 1) = see_stateCcov11->at(is);
+        err.At(1, 2) = see_stateCcov12->at(is);
+        err.At(1, 3) = see_stateCcov13->at(is);
+        err.At(1, 4) = see_stateCcov14->at(is);
+        err.At(1, 5) = see_stateCcov15->at(is);
+        err.At(2, 2) = see_stateCcov22->at(is);
+        err.At(2, 3) = see_stateCcov23->at(is);
+        err.At(2, 4) = see_stateCcov24->at(is);
+        err.At(2, 5) = see_stateCcov25->at(is);
+        err.At(3, 3) = see_stateCcov33->at(is);
+        err.At(3, 4) = see_stateCcov34->at(is);
+        err.At(3, 5) = see_stateCcov35->at(is);
+        err.At(4, 4) = see_stateCcov44->at(is);
+        err.At(4, 5) = see_stateCcov45->at(is);
+        err.At(5, 5) = see_stateCcov55->at(is);
+      } else {
+        auto const& vCov = see_stateCurvCov->at(is);
+        assert(vCov.size() == 15);
+        auto vCovP = vCov.begin();
+        for (int i = 0; i < 5; ++i)
+          for (int j = 0; j <= i; ++j)
+            err.At(i, j) = *(vCovP++);
+      }
+      TrackState state(see_q->at(is), pos, mom, err);
+      if (hasCartCov)
+        state.convertFromCartesianToCCS();
+      else
+        state.convertFromGlbCurvilinearToCCS();
+      Track track(state, 0, seedSimIdx[is], 0, nullptr);
+      track.setAlgorithm(isAlgo);
+      auto const& shTypes = see_hitType->at(is);
+      auto const& shIdxs = see_hitIdx->at(is);
+      if (not allSeeds)
+        if (!((isAlgo == TrackAlgorithm::initialStep || isAlgo == TrackAlgorithm::hltIter0) &&
+              std::count(shTypes.begin(), shTypes.end(), int(HitType::Pixel)) >= 3))
+          continue;  //check algo and nhits
+      for (unsigned int ip = 0; ip < shTypes.size(); ip++) {
+        unsigned int hidx = shIdxs[ip];
+        switch (HitType(shTypes[ip])) {
+          case HitType::Pixel: {
+            pixHitSeedIdx[hidx].push_back(seedTracks_.size());
+            break;
+          }
+          case HitType::Strip: {
+            strHitSeedIdx[hidx].push_back(seedTracks_.size());
+            break;
+          }
+          case HitType::Glued: {
+            if (not useMatched) {
+              //decompose
+              int uidx = glu_monoIdx->at(hidx);
+              strHitSeedIdx[uidx].push_back(seedTracks_.size());
+              uidx = glu_stereoIdx->at(hidx);
+              strHitSeedIdx[uidx].push_back(seedTracks_.size());
+            } else {
+              gluHitSeedIdx[hidx].push_back(seedTracks_.size());
+            }
+            break;
+          }
+          case HitType::Invalid:
+            break;  //FIXME. Skip, really?
+          default:
+            throw std::logic_error("Track hit type can not be handled");
+        }  //switch( HitType
+      }
+      seedTracks_.push_back(track);
+    }
+
+    if (seedTracks_.empty() and not writeAllEvents)
+      continue;
+
+    vector<Track>& cmsswTracks_ = EE.cmsswTracks_;
+    vector<vector<int>> pixHitRecIdx(pix_lay->size());
+    vector<vector<int>> strHitRecIdx(str_lay->size());
+    vector<vector<int>> gluHitRecIdx(glu_lay->size());
+    for (unsigned int ir = 0; ir < trk_q->size(); ++ir) {
+      //check the origin; redundant for initialStep ntuples
+      if (not allSeeds)
+        if ((trk_algoMask->at(ir) & ((1 << int(TrackAlgorithm::initialStep)) | (1 << int(TrackAlgorithm::hltIter0)))) ==
+            0) {
+          if (verbosity > 1) {
+            std::cout << "track " << ir << " failed algo selection for " << int(TrackAlgorithm::initialStep)
+                      << ": mask " << trk_algoMask->at(ir) << " origAlgo " << trk_originalAlgo->at(ir) << " algo "
+                      << trk_algo->at(ir) << std::endl;
+          }
+          continue;
+        }
+      //fill the state in CCS upfront
+      SMatrixSym66 err;
+      /*	
+	vx = -dxy*sin(phi) - pt*cos(phi)/p*pz/p*dz;
+	vy =  dxy*cos(phi) - pt*sin(phi)/p*pz/p*dz;
+	vz = dz*pt*pt/p/p;
+	//partial: ignores cross-terms
+	c(vx,vx) = c(dxy,dxy)*sin(phi)*sin(phi) + c(dz,dz)*pow(pt*cos(phi)/p*pz/p ,2);
+	c(vx,vy) = -c(dxy,dxy)*cos(phi)*sin(phi) + c(dz,dz)*cos(phi)*sin(phi)*pow(pt/p*pz/p, 2);
+	c(vy,vy) = c(dxy,dxy)*cos(phi)*cos(phi) + c(dz,dz)*pow(pt*sin(phi)/p*pz/p ,2);
+	c(vx,vz) = -c(dz,dz)*pt*pt/p/p*pt/p*pz/p*cos(phi);
+	c(vy,vz) = -c(dz,dz)*pt*pt/p/p*pt/p*pz/p*sin(phi);
+	c(vz,vz) = c(dz,dz)*pow(pt*pt/p/p, 2);
+      */
+      float pt = trk_pt->at(ir);
+      float pz = trk_pz->at(ir);
+      float p2 = pt * pt + pz * pz;
+      float phi = trk_phi->at(ir);
+      float sP = sin(phi);
+      float cP = cos(phi);
+      float dxyErr2 = trk_dxyErr->at(ir);
+      dxyErr2 *= dxyErr2;
+      float dzErr2 = trk_dzErr->at(ir);
+      dzErr2 *= dzErr2;
+      float dzErrF2 = trk_dzErr->at(ir) * (pt * pz / p2);
+      dzErr2 *= dzErr2;
+      err.At(0, 0) = dxyErr2 * sP * sP + dzErrF2 * cP * cP;
+      err.At(0, 1) = -dxyErr2 * cP * sP + dzErrF2 * cP * sP;
+      err.At(1, 1) = dxyErr2 * cP * cP + dzErrF2 * sP * sP;
+      err.At(0, 2) = -dzErrF2 * cP * pt / pz;
+      err.At(1, 2) = -dzErrF2 * sP * pt / pz;
+      err.At(2, 2) = dzErr2 * std::pow((pt * pt / p2), 2);
+      err.At(3, 3) = std::pow(trk_ptErr->at(ir) / pt / pt, 2);
+      err.At(4, 4) = std::pow(trk_phiErr->at(ir), 2);
+      err.At(5, 5) = std::pow(trk_lambdaErr->at(ir), 2);
+      SVector3 pos = SVector3(trk_refpoint_x->at(ir), trk_refpoint_y->at(ir), trk_refpoint_z->at(ir));
+      SVector3 mom = SVector3(1.f / pt, phi, M_PI_2 - trk_lambda->at(ir));
+      TrackState state(trk_q->at(ir), pos, mom, err);
+      Track track(state, trk_nChi2->at(ir), trk_seedIdx->at(ir), 0, nullptr);  //hits are filled later
+      track.setAlgorithm(TrackAlgorithm(trk_originalAlgo->at(ir)));
+      auto const& hTypes = trk_hitType->at(ir);
+      auto const& hIdxs = trk_hitIdx->at(ir);
+      for (unsigned int ip = 0; ip < hTypes.size(); ip++) {
+        unsigned int hidx = hIdxs[ip];
+        switch (HitType(hTypes[ip])) {
+          case HitType::Pixel: {
+            //cout << "pix=" << hidx << " track=" << cmsswTracks_.size() << endl;
+            pixHitRecIdx[hidx].push_back(cmsswTracks_.size());
+            break;
+          }
+          case HitType::Strip: {
+            //cout << "pix=" << hidx << " track=" << cmsswTracks_.size() << endl;
+            strHitRecIdx[hidx].push_back(cmsswTracks_.size());
+            break;
+          }
+          case HitType::Glued: {
+            if (not useMatched)
+              throw std::logic_error("Tracks have glued hits, but matchedHit load is not configured");
+            //cout << "pix=" << hidx << " track=" << cmsswTracks_.size() << endl;
+            gluHitRecIdx[hidx].push_back(cmsswTracks_.size());
+            break;
+          }
+          case HitType::Invalid:
+            break;  //FIXME. Skip, really?
+          default:
+            throw std::logic_error("Track hit type can not be handled");
+        }  //switch( HitType
+      }
+      cmsswTracks_.push_back(track);
+    }
+
+    vector<vector<Hit>>& layerHits_ = EE.layerHits_;
+    vector<vector<uint64_t>>& layerHitMasks_ = EE.layerHitMasks_;
+    vector<MCHitInfo>& simHitsInfo_ = EE.simHitsInfo_;
+    int totHits = 0;
+    layerHits_.resize(nTotalLayers);
+    layerHitMasks_.resize(nTotalLayers);
+    for (unsigned int ipix = 0; ipix < pix_lay->size(); ++ipix) {
+      int ilay = -1;
+      ilay = lnc.convertLayerNumber(pix_det->at(ipix), pix_lay->at(ipix), useMatched, -1, pix_z->at(ipix) > 0);
+      if (ilay < 0)
+        continue;
+
+      unsigned int imoduleid;
+      {
+        auto ii =
+            module_shortId_hash[ilay].emplace(pix_detId->at(ipix), (unsigned int)module_shortId_hash[ilay].size());
+        imoduleid = ii.first->second;
+      }
+
+      int simTkIdxNt = bestTkIdx(pix_simHitIdx->at(ipix), pix_chargeFraction->at(ipix), ipix, HitType::Pixel);
+      int simTkIdx = simTkIdxNt >= 0 ? simTrackIdx_[simTkIdxNt] : -1;  //switch to index in simTracks_
+
+      //cout << Form("pix lay=%i det=%i x=(%6.3f, %6.3f, %6.3f)",ilay+1,pix_det->at(ipix),pix_x->at(ipix),pix_y->at(ipix),pix_z->at(ipix)) << endl;
+      SVector3 pos(pix_x->at(ipix), pix_y->at(ipix), pix_z->at(ipix));
+      SMatrixSym33 err;
+      err.At(0, 0) = pix_xx->at(ipix);
+      err.At(1, 1) = pix_yy->at(ipix);
+      err.At(2, 2) = pix_zz->at(ipix);
+      err.At(0, 1) = pix_xy->at(ipix);
+      err.At(0, 2) = pix_zx->at(ipix);
+      err.At(1, 2) = pix_yz->at(ipix);
+      if (simTkIdx >= 0) {
+        simTracks_[simTkIdx].addHitIdx(layerHits_[ilay].size(), ilay, 0);
+      }
+      for (unsigned int is = 0; is < pixHitSeedIdx[ipix].size(); is++) {
+        //cout << "xxx ipix=" << ipix << " seed=" << pixHitSeedIdx[ipix][is] << endl;
+        seedTracks_[pixHitSeedIdx[ipix][is]].addHitIdx(layerHits_[ilay].size(), ilay, 0);  //per-hit chi2 is not known
+      }
+      for (unsigned int ir = 0; ir < pixHitRecIdx[ipix].size(); ir++) {
+        //cout << "xxx ipix=" << ipix << " recTrack=" << pixHitRecIdx[ipix][ir] << endl;
+        cmsswTracks_[pixHitRecIdx[ipix][ir]].addHitIdx(layerHits_[ilay].size(), ilay, 0);  //per-hit chi2 is not known
+      }
+      Hit hit(pos, err, totHits);
+      hit.setupAsPixel(imoduleid, pix_csize_row->at(ipix), pix_csize_col->at(ipix));
+      layerHits_[ilay].push_back(hit);
+      if (writeHitIterMasks)
+        layerHitMasks_[ilay].push_back(pix_usedMask->at(ipix));
+      MCHitInfo hitInfo(simTkIdx, ilay, layerHits_[ilay].size() - 1, totHits);
+      simHitsInfo_.push_back(hitInfo);
+      totHits++;
+    }
+
+    if (useMatched) {
+      for (unsigned int iglu = 0; iglu < glu_lay->size(); ++iglu) {
+        if (glu_isBarrel->at(iglu) == 0)
+          continue;
+        int igluMono = glu_monoIdx->at(iglu);
+        int simTkIdxNt =
+            bestTkIdx(str_simHitIdx->at(igluMono), str_chargeFraction->at(igluMono), igluMono, HitType::Strip);
+        int simTkIdx = simTkIdxNt >= 0 ? simTrackIdx_[simTkIdxNt] : -1;  //switch to index in simTracks_
+
+        int ilay = lnc.convertLayerNumber(glu_det->at(iglu), glu_lay->at(iglu), useMatched, -1, glu_z->at(iglu) > 0);
+        // cout << Form("glu lay=%i det=%i bar=%i x=(%6.3f, %6.3f, %6.3f)",ilay+1,glu_det->at(iglu),glu_isBarrel->at(iglu),glu_x->at(iglu),glu_y->at(iglu),glu_z->at(iglu)) << endl;
+        SVector3 pos(glu_x->at(iglu), glu_y->at(iglu), glu_z->at(iglu));
+        SMatrixSym33 err;
+        err.At(0, 0) = glu_xx->at(iglu);
+        err.At(1, 1) = glu_yy->at(iglu);
+        err.At(2, 2) = glu_zz->at(iglu);
+        err.At(0, 1) = glu_xy->at(iglu);
+        err.At(0, 2) = glu_zx->at(iglu);
+        err.At(1, 2) = glu_yz->at(iglu);
+        if (simTkIdx >= 0) {
+          simTracks_[simTkIdx].addHitIdx(layerHits_[ilay].size(), ilay, 0);
+        }
+        for (unsigned int ir = 0; ir < gluHitSeedIdx[iglu].size(); ir++) {
+          //cout << "xxx iglu=" << iglu << " seed=" << gluHitSeedIdx[iglu][ir] << endl;
+          seedTracks_[gluHitSeedIdx[iglu][ir]].addHitIdx(layerHits_[ilay].size(), ilay, 0);  //per-hit chi2 is not known
+        }
+        for (unsigned int ir = 0; ir < gluHitRecIdx[iglu].size(); ir++) {
+          //cout << "xxx iglu=" << iglu << " recTrack=" << gluHitRecIdx[iglu][ir] << endl;
+          cmsswTracks_[gluHitRecIdx[iglu][ir]].addHitIdx(layerHits_[ilay].size(), ilay, 0);  //per-hit chi2 is not known
+        }
+
+        // QQQQ module-id-in-layer, adc and phi/theta spans are not done for matched hits.
+        // Will we ever use / need this?
+        assert(false && "Implement module-ids, cluster adc and spans for matched hits!");
+
+        Hit hit(pos, err, totHits);
+        layerHits_[ilay].push_back(hit);
+        MCHitInfo hitInfo(simTkIdx, ilay, layerHits_[ilay].size() - 1, totHits);
+        simHitsInfo_.push_back(hitInfo);
+        totHits++;
+      }
+    }
+
+    vector<int> strIdx;
+    strIdx.resize(str_lay->size());
+    for (unsigned int istr = 0; istr < str_lay->size(); ++istr) {
+      int ilay = -1;
+      ilay = lnc.convertLayerNumber(
+          str_det->at(istr), str_lay->at(istr), useMatched, str_isStereo->at(istr), str_z->at(istr) > 0);
+      if (useMatched && str_isBarrel->at(istr) == 1 && str_isStereo->at(istr))
+        continue;
+      if (ilay == -1)
+        continue;
+
+      unsigned int imoduleid;
+      {
+        auto ii =
+            module_shortId_hash[ilay].emplace(str_detId->at(istr), (unsigned int)module_shortId_hash[ilay].size());
+        imoduleid = ii.first->second;
+      }
+
+      int simTkIdxNt = bestTkIdx(str_simHitIdx->at(istr), str_chargeFraction->at(istr), istr, HitType::Strip);
+      int simTkIdx = simTkIdxNt >= 0 ? simTrackIdx_[simTkIdxNt] : -1;  //switch to index in simTracks_
+
+      bool passCCC = applyCCC ? (str_chargePerCM->at(istr) > cutValueCCC) : true;
+
+      //if (str_onTrack->at(istr)==0) continue;//do not consider hits that are not on track!
+      SVector3 pos(str_x->at(istr), str_y->at(istr), str_z->at(istr));
+      SMatrixSym33 err;
+      err.At(0, 0) = str_xx->at(istr);
+      err.At(1, 1) = str_yy->at(istr);
+      err.At(2, 2) = str_zz->at(istr);
+      err.At(0, 1) = str_xy->at(istr);
+      err.At(0, 2) = str_zx->at(istr);
+      err.At(1, 2) = str_yz->at(istr);
+      if (simTkIdx >= 0) {
+        if (passCCC)
+          simTracks_[simTkIdx].addHitIdx(layerHits_[ilay].size(), ilay, 0);
+        else
+          simTracks_[simTkIdx].addHitIdx(-9, ilay, 0);
+      }
+      for (unsigned int ir = 0; ir < strHitSeedIdx[istr].size(); ir++) {
+        //cout << "xxx istr=" << istr << " seed=" << strHitSeedIdx[istr][ir] << endl;
+        if (passCCC)
+          seedTracks_[strHitSeedIdx[istr][ir]].addHitIdx(layerHits_[ilay].size(), ilay, 0);  //per-hit chi2 is not known
+        else
+          seedTracks_[strHitSeedIdx[istr][ir]].addHitIdx(-9, ilay, 0);
+      }
+      for (unsigned int ir = 0; ir < strHitRecIdx[istr].size(); ir++) {
+        //cout << "xxx istr=" << istr << " recTrack=" << strHitRecIdx[istr][ir] << endl;
+        if (passCCC)
+          cmsswTracks_[strHitRecIdx[istr][ir]].addHitIdx(layerHits_[ilay].size(), ilay, 0);  //per-hit chi2 is not known
+        else
+          cmsswTracks_[strHitRecIdx[istr][ir]].addHitIdx(-9, ilay, 0);
+      }
+      if (passCCC) {
+        Hit hit(pos, err, totHits);
+        hit.setupAsStrip(imoduleid, str_chargePerCM->at(istr), str_csize->at(istr));
+        layerHits_[ilay].push_back(hit);
+        if (writeHitIterMasks)
+          layerHitMasks_[ilay].push_back(str_usedMask->at(istr));
+        MCHitInfo hitInfo(simTkIdx, ilay, layerHits_[ilay].size() - 1, totHits);
+        simHitsInfo_.push_back(hitInfo);
+        totHits++;
+      }
+    }
+
+    // Seed % hit statistics
+    nstot += seedTracks_.size();
+    for (unsigned int il = 0; il < layerHits_.size(); ++il) {
+      int nh = layerHits_[il].size();
+      nhitstot[il] += nh;
+    }
+
+    if (verbosity > 0) {
+      int nt = simTracks_.size();
+
+      int nl = layerHits_.size();
+
+      int nm = simHitsInfo_.size();
+
+      int ns = seedTracks_.size();
+
+      int nr = cmsswTracks_.size();
+
+      printf("number of simTracks %i\n", nt);
+      printf("number of layerHits %i\n", nl);
+      printf("number of simHitsInfo %i\n", nm);
+      printf("number of seedTracks %i\n", ns);
+      printf("number of recTracks %i\n", nr);
+
+      if (verbosity > 1) {
+        printf("\n");
+        for (int il = 0; il < nl; ++il) {
+          int nh = layerHits_[il].size();
+          for (int ih = 0; ih < nh; ++ih) {
+            printf("lay=%i idx=%i mcid=%i x=(%6.3f, %6.3f, %6.3f) r=%6.3f mask=0x%lx\n",
+                   il + 1,
+                   ih,
+                   layerHits_[il][ih].mcHitID(),
+                   layerHits_[il][ih].x(),
+                   layerHits_[il][ih].y(),
+                   layerHits_[il][ih].z(),
+                   sqrt(pow(layerHits_[il][ih].x(), 2) + pow(layerHits_[il][ih].y(), 2)),
+                   writeHitIterMasks ? layerHitMasks_[il][ih] : 0);
+          }
+        }
+
+        for (int i = 0; i < nt; ++i) {
+          float spt = sqrt(pow(simTracks_[i].px(), 2) + pow(simTracks_[i].py(), 2));
+          printf(
+              "sim track id=%i q=%2i p=(%6.3f, %6.3f, %6.3f) x=(%6.3f, %6.3f, %6.3f) pT=%7.4f nTotal=%i nFound=%i \n",
+              i,
+              simTracks_[i].charge(),
+              simTracks_[i].px(),
+              simTracks_[i].py(),
+              simTracks_[i].pz(),
+              simTracks_[i].x(),
+              simTracks_[i].y(),
+              simTracks_[i].z(),
+              spt,
+              simTracks_[i].nTotalHits(),
+              simTracks_[i].nFoundHits());
+          int nh = simTracks_[i].nTotalHits();
+          for (int ih = 0; ih < nh; ++ih) {
+            int hidx = simTracks_[i].getHitIdx(ih);
+            int hlay = simTracks_[i].getHitLyr(ih);
+            float hx = layerHits_[hlay][hidx].x();
+            float hy = layerHits_[hlay][hidx].y();
+            float hz = layerHits_[hlay][hidx].z();
+            printf("track #%4i hit #%2i idx=%4i lay=%2i x=(% 8.3f, % 8.3f, % 8.3f) r=%8.3f\n",
+                   i,
+                   ih,
+                   hidx,
+                   hlay,
+                   hx,
+                   hy,
+                   hz,
+                   sqrt(hx * hx + hy * hy));
+          }
+        }
+
+        for (int i = 0; i < ns; ++i) {
+          printf("seed id=%i label=%i algo=%i q=%2i pT=%6.3f p=(%6.3f, %6.3f, %6.3f) x=(%6.3f, %6.3f, %6.3f)\n",
+                 i,
+                 seedTracks_[i].label(),
+                 (int)seedTracks_[i].algorithm(),
+                 seedTracks_[i].charge(),
+                 seedTracks_[i].pT(),
+                 seedTracks_[i].px(),
+                 seedTracks_[i].py(),
+                 seedTracks_[i].pz(),
+                 seedTracks_[i].x(),
+                 seedTracks_[i].y(),
+                 seedTracks_[i].z());
+          int nh = seedTracks_[i].nTotalHits();
+          for (int ih = 0; ih < nh; ++ih)
+            printf("seed #%i hit #%i idx=%i\n", i, ih, seedTracks_[i].getHitIdx(ih));
+        }
+
+        if (writeRecTracks) {
+          for (int i = 0; i < nr; ++i) {
+            float spt = sqrt(pow(cmsswTracks_[i].px(), 2) + pow(cmsswTracks_[i].py(), 2));
+            printf(
+                "rec track id=%i label=%i algo=%i chi2=%6.3f q=%2i p=(%6.3f, %6.3f, %6.3f) x=(%6.3f, %6.3f, %6.3f) "
+                "pT=%7.4f nTotal=%i nFound=%i \n",
+                i,
+                cmsswTracks_[i].label(),
+                (int)cmsswTracks_[i].algorithm(),
+                cmsswTracks_[i].chi2(),
+                cmsswTracks_[i].charge(),
+                cmsswTracks_[i].px(),
+                cmsswTracks_[i].py(),
+                cmsswTracks_[i].pz(),
+                cmsswTracks_[i].x(),
+                cmsswTracks_[i].y(),
+                cmsswTracks_[i].z(),
+                spt,
+                cmsswTracks_[i].nTotalHits(),
+                cmsswTracks_[i].nFoundHits());
+            int nh = cmsswTracks_[i].nTotalHits();
+            for (int ih = 0; ih < nh; ++ih) {
+              int hidx = cmsswTracks_[i].getHitIdx(ih);
+              int hlay = cmsswTracks_[i].getHitLyr(ih);
+              float hx = layerHits_[hlay][hidx].x();
+              float hy = layerHits_[hlay][hidx].y();
+              float hz = layerHits_[hlay][hidx].z();
+              printf("track #%4i hit #%2i idx=%4i lay=%2i x=(% 8.3f, % 8.3f, % 8.3f) r=%8.3f\n",
+                     i,
+                     ih,
+                     hidx,
+                     hlay,
+                     hx,
+                     hy,
+                     hz,
+                     sqrt(hx * hx + hy * hy));
+            }
+          }
+        }  //if (writeRecTracks){
+
+      }  //verbosity>1
+    }    //verbosity>0
+    EE.write_out(data_file);
+
+    savedEvents++;
+    printf("end of event %lli\n", savedEvents);
+  }
+
+  data_file.CloseWrite(savedEvents);
+  printf("\nSaved %lli events\n\n", savedEvents);
+
+  printf("Average number of seeds per event %f\n", float(nstot) / float(savedEvents));
+  for (unsigned int il = 0; il < nhitstot.size(); ++il)
+    printf("Average number of hits in layer %3i = %7.2f\n",
+           il,
+           float(nhitstot[il]) / float(savedEvents));  //Includes those that failed the cluster charge cut
+
+  printf("Out of %i hits, %i failed the cut", numTotalStr, numFailCCC);
+
+  //========================================================================
+
+  printf("\n\n================================================================\n");
+  printf("=== Max module id for %u layers\n", nTotalLayers);
+  printf("================================================================\n");
+  for (unsigned int ii = 0; ii < nTotalLayers; ++ii) {
+    printf("Layer%2d : %d\n", ii, (int)module_shortId_hash[ii].size());
+  }
+}
diff --git a/RecoTracker/MkFitCore/BuildFile.xml b/RecoTracker/MkFitCore/BuildFile.xml
new file mode 100644
index 0000000000000..e3ad879ac23c2
--- /dev/null
+++ b/RecoTracker/MkFitCore/BuildFile.xml
@@ -0,0 +1,8 @@
+<use name="rootsmatrix"/>
+<use name="json"/>
+<use name="tbb"/>
+<flags CXXFLAGS="-fopenmp-simd -Wno-error=strict-aliasing"/>
+<flags ADD_SUBDIR="1"/>
+<export>
+  <lib name="RecoTrackerMkFitCore"/>
+</export>
diff --git a/RecoTracker/MkFitCore/interface/Config.h b/RecoTracker/MkFitCore/interface/Config.h
new file mode 100644
index 0000000000000..f754055add89b
--- /dev/null
+++ b/RecoTracker/MkFitCore/interface/Config.h
@@ -0,0 +1,168 @@
+#ifndef RecoTracker_MkFitCore_interface_Config_h
+#define RecoTracker_MkFitCore_interface_Config_h
+
+namespace mkfit {
+
+  enum PropagationFlagsEnum { PF_none = 0, PF_use_param_b_field = 0x1, PF_apply_material = 0x2 };
+
+  struct PropagationFlags {
+    bool use_param_b_field : 1;
+    bool apply_material : 1;
+    // Could add: bool use_trig_approx       -- now Config::useTrigApprox = true
+    // Could add: int  n_prop_to_r_iters : 8 -- now Config::Niter = 5
+
+    PropagationFlags() : use_param_b_field(false), apply_material(false) {}
+
+    PropagationFlags(int pfe)
+        : use_param_b_field(pfe & PF_use_param_b_field), apply_material(pfe & PF_apply_material) {}
+  };
+
+  class PropagationConfig {
+  public:
+    bool backward_fit_to_pca;
+    bool finding_requires_propagation_to_hit_pos;
+    PropagationFlags finding_inter_layer_pflags;
+    PropagationFlags finding_intra_layer_pflags;
+    PropagationFlags backward_fit_pflags;
+    PropagationFlags forward_fit_pflags;
+    PropagationFlags seed_fit_pflags;
+    PropagationFlags pca_prop_pflags;
+
+    void set_as_default(bool force = false);
+
+    static const PropagationConfig& get_default() { return *s_default; }
+
+  private:
+    static const PropagationConfig* s_default;
+  };
+
+  //------------------------------------------------------------------------------
+
+  namespace Const {
+    constexpr float PI = 3.14159265358979323846;
+    constexpr float TwoPI = 6.28318530717958647692;
+    constexpr float PIOver2 = Const::PI / 2.0f;
+    constexpr float PIOver4 = Const::PI / 4.0f;
+    constexpr float PI3Over4 = 3.0f * Const::PI / 4.0f;
+    constexpr float InvPI = 1.0f / Const::PI;
+    constexpr float sol = 0.299792458;  // speed of light in nm/s
+
+    // NAN and silly track parameter tracking options
+    constexpr bool nan_etc_sigs_enable = false;
+
+    constexpr bool nan_n_silly_check_seeds = true;
+    constexpr bool nan_n_silly_print_bad_seeds = false;
+    constexpr bool nan_n_silly_fixup_bad_seeds = false;
+    constexpr bool nan_n_silly_remove_bad_seeds = true;
+
+    constexpr bool nan_n_silly_check_cands_every_layer = false;
+    constexpr bool nan_n_silly_print_bad_cands_every_layer = false;
+    constexpr bool nan_n_silly_fixup_bad_cands_every_layer = false;
+
+    constexpr bool nan_n_silly_check_cands_pre_bkfit = true;
+    constexpr bool nan_n_silly_check_cands_post_bkfit = true;
+    constexpr bool nan_n_silly_print_bad_cands_bkfit = false;
+  }  // namespace Const
+
+  inline float cdist(float a) { return a > Const::PI ? Const::TwoPI - a : a; }
+
+  //------------------------------------------------------------------------------
+
+  namespace Config {
+    // config for fitting
+    constexpr int nLayers = 10;  // default/toy: 10; cms-like: 18 (barrel), 27 (endcap)
+
+    // Layer constants for common barrel / endcap.
+    // TrackerInfo more or less has all this information.
+    constexpr int nMaxTrkHits = 64;  // Used for array sizes in MkFitter/Finder, max hits in toy MC
+    constexpr int nAvgSimHits = 32;  // Used for reserve() calls for sim hits/states
+
+    // This will become layer dependent (in bits). To be consistent with min_dphi.
+    static constexpr int m_nphi = 256;
+
+    // Config for propagation - could/should enter into PropagationFlags?!
+    constexpr int Niter = 5;
+    constexpr bool useTrigApprox = true;
+
+    // Config for Bfield. Note: for now the same for CMS-2017 and CylCowWLids.
+    constexpr float Bfield = 3.8112;
+    constexpr float mag_c1 = 3.8114;
+    constexpr float mag_b0 = -3.94991e-06;
+    constexpr float mag_b1 = 7.53701e-06;
+    constexpr float mag_a = 2.43878e-11;
+
+    // Config for SelectHitIndices
+    // Use extra arrays to store phi and q of hits.
+    // MT: This would in principle allow fast selection of good hits, if
+    // we had good error estimates and reasonable *minimal* phi and q windows.
+    // Speed-wise, those arrays (filling AND access, about half each) cost 1.5%
+    // and could help us reduce the number of hits we need to process with bigger
+    // potential gains.
+#ifdef CONFIG_PhiQArrays
+    extern bool usePhiQArrays;
+#else
+    constexpr bool usePhiQArrays = true;
+#endif
+
+    // sorting config (bonus,penalty)
+    constexpr float validHitBonus_ = 4;
+    constexpr float validHitSlope_ = 0.2;
+    constexpr float overlapHitBonus_ = 0;  // set to negative for penalty
+    constexpr float missingHitPenalty_ = 8;
+    constexpr float tailMissingHitPenalty_ = 3;
+
+    // Threading
+#if defined(MKFIT_STANDALONE)
+    extern int numThreadsFinder;
+    extern int numThreadsEvents;
+    extern int numSeedsPerTask;
+#else
+    constexpr int numThreadsFinder = 1;
+    constexpr int numThreadsEvents = 1;
+    constexpr int numSeedsPerTask = 32;
+#endif
+
+    // config on seed cleaning
+    constexpr float track1GeVradius = 87.6;  // = 1/(c*B)
+    constexpr float c_etamax_brl = 0.9;
+    constexpr float c_dpt_common = 0.25;
+    constexpr float c_dzmax_brl = 0.005;
+    constexpr float c_drmax_brl = 0.010;
+    constexpr float c_ptmin_hpt = 2.0;
+    constexpr float c_dzmax_hpt = 0.010;
+    constexpr float c_drmax_hpt = 0.010;
+    constexpr float c_dzmax_els = 0.015;
+    constexpr float c_drmax_els = 0.015;
+
+    // config on duplicate removal
+#if defined(MKFIT_STANDALONE)
+    extern bool useHitsForDuplicates;
+    extern bool removeDuplicates;
+#else
+    const bool useHitsForDuplicates = true;
+#endif
+    extern const float maxdPhi;
+    extern const float maxdPt;
+    extern const float maxdEta;
+    extern const float minFracHitsShared;
+    extern const float maxdR;
+
+    // duplicate removal: tighter version
+    extern const float maxd1pt;
+    extern const float maxdphi;
+    extern const float maxdcth;
+    extern const float maxcth_ob;
+    extern const float maxcth_fw;
+
+    // ================================================================
+
+    inline float bFieldFromZR(const float z, const float r) {
+      return (Config::mag_b0 * z * z + Config::mag_b1 * z + Config::mag_c1) * (Config::mag_a * r * r + 1.f);
+    }
+
+  };  // namespace Config
+
+  //------------------------------------------------------------------------------
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/interface/ConfigWrapper.h b/RecoTracker/MkFitCore/interface/ConfigWrapper.h
new file mode 100644
index 0000000000000..745685c13f562
--- /dev/null
+++ b/RecoTracker/MkFitCore/interface/ConfigWrapper.h
@@ -0,0 +1,16 @@
+#ifndef RecoTracker_MkFitCore_interface_ConfigWrapper_h
+#define RecoTracker_MkFitCore_interface_ConfigWrapper_h
+
+namespace mkfit {
+  /**
+   * The purpose of this namespace is to hide the header of Config.h
+   * from CMSSW. This header contain uses of the build-time
+   * configuration macros, that should remain as internal details of
+   * MkFit package.
+   */
+  namespace ConfigWrapper {
+    void initializeForCMSSW();
+  }  // namespace ConfigWrapper
+}  // namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/interface/Hit.h b/RecoTracker/MkFitCore/interface/Hit.h
new file mode 100644
index 0000000000000..e2b2e33d3a4eb
--- /dev/null
+++ b/RecoTracker/MkFitCore/interface/Hit.h
@@ -0,0 +1,279 @@
+#ifndef RecoTracker_MkFitCore_interface_Hit_h
+#define RecoTracker_MkFitCore_interface_Hit_h
+
+#include "RecoTracker/MkFitCore/interface/Config.h"
+#include "RecoTracker/MkFitCore/interface/MatrixSTypes.h"
+
+#include <cmath>
+#include <string_view>
+
+namespace mkfit {
+
+  template <typename T>
+  inline T sqr(T x) {
+    return x * x;
+  }
+  template <typename T>
+  inline T cube(T x) {
+    return x * x * x;
+  }
+
+  inline float squashPhiGeneral(float phi) {
+    return phi - floor(0.5 * Const::InvPI * (phi + Const::PI)) * Const::TwoPI;
+  }
+
+  inline float squashPhiMinimal(float phi) {
+    return phi >= Const::PI ? phi - Const::TwoPI : (phi < -Const::PI ? phi + Const::TwoPI : phi);
+  }
+
+  inline float getRad2(float x, float y) { return x * x + y * y; }
+
+  inline float getInvRad2(float x, float y) { return 1.0f / (x * x + y * y); }
+
+  inline float getPhi(float x, float y) { return std::atan2(y, x); }
+
+  inline float getTheta(float r, float z) { return std::atan2(r, z); }
+
+  inline float getEta(float r, float z) { return -1.0f * std::log(std::tan(getTheta(r, z) / 2.0f)); }
+
+  inline float getEta(float theta) { return -1.0f * std::log(std::tan(theta / 2.0f)); }
+
+  inline float getEta(float x, float y, float z) {
+    const float theta = std::atan2(std::sqrt(x * x + y * y), z);
+    return -1.0f * std::log(std::tan(theta / 2.0f));
+  }
+
+  inline float getHypot(float x, float y) { return std::sqrt(x * x + y * y); }
+
+  inline float getRadErr2(float x, float y, float exx, float eyy, float exy) {
+    return (x * x * exx + y * y * eyy + 2.0f * x * y * exy) / getRad2(x, y);
+  }
+
+  inline float getInvRadErr2(float x, float y, float exx, float eyy, float exy) {
+    return (x * x * exx + y * y * eyy + 2.0f * x * y * exy) / cube(getRad2(x, y));
+  }
+
+  inline float getPhiErr2(float x, float y, float exx, float eyy, float exy) {
+    const float rad2 = getRad2(x, y);
+    return (y * y * exx + x * x * eyy - 2.0f * x * y * exy) / (rad2 * rad2);
+  }
+
+  inline float getThetaErr2(
+      float x, float y, float z, float exx, float eyy, float ezz, float exy, float exz, float eyz) {
+    const float rad2 = getRad2(x, y);
+    const float rad = std::sqrt(rad2);
+    const float hypot2 = rad2 + z * z;
+    const float dthetadx = x * z / (rad * hypot2);
+    const float dthetady = y * z / (rad * hypot2);
+    const float dthetadz = -rad / hypot2;
+    return dthetadx * dthetadx * exx + dthetady * dthetady * eyy + dthetadz * dthetadz * ezz +
+           2.0f * dthetadx * dthetady * exy + 2.0f * dthetadx * dthetadz * exz + 2.0f * dthetady * dthetadz * eyz;
+  }
+
+  inline float getEtaErr2(float x, float y, float z, float exx, float eyy, float ezz, float exy, float exz, float eyz) {
+    const float rad2 = getRad2(x, y);
+    const float detadx = -x / (rad2 * std::sqrt(1 + rad2 / (z * z)));
+    const float detady = -y / (rad2 * std::sqrt(1 + rad2 / (z * z)));
+    const float detadz = 1.0f / (z * std::sqrt(1 + rad2 / (z * z)));
+    return detadx * detadx * exx + detady * detady * eyy + detadz * detadz * ezz + 2.0f * detadx * detady * exy +
+           2.0f * detadx * detadz * exz + 2.0f * detady * detadz * eyz;
+  }
+
+  inline float getPxPxErr2(float ipt, float phi, float vipt, float vphi) {  // ipt = 1/pT, v = variance
+    const float iipt2 = 1.0f / (ipt * ipt);                                 //iipt = 1/(1/pT) = pT
+    const float cosP = std::cos(phi);
+    const float sinP = std::sin(phi);
+    return iipt2 * (iipt2 * cosP * cosP * vipt + sinP * sinP * vphi);
+  }
+
+  inline float getPyPyErr2(float ipt, float phi, float vipt, float vphi) {  // ipt = 1/pT, v = variance
+    const float iipt2 = 1.0f / (ipt * ipt);                                 //iipt = 1/(1/pT) = pT
+    const float cosP = std::cos(phi);
+    const float sinP = std::sin(phi);
+    return iipt2 * (iipt2 * sinP * sinP * vipt + cosP * cosP * vphi);
+  }
+
+  inline float getPzPzErr2(float ipt, float theta, float vipt, float vtheta) {  // ipt = 1/pT, v = variance
+    const float iipt2 = 1.0f / (ipt * ipt);                                     //iipt = 1/(1/pT) = pT
+    const float cotT = 1.0f / std::tan(theta);
+    const float cscT = 1.0f / std::sin(theta);
+    return iipt2 * (iipt2 * cotT * cotT * vipt + cscT * cscT * cscT * cscT * vtheta);
+  }
+
+  struct MCHitInfo {
+    MCHitInfo() {}
+    MCHitInfo(int track, int layer, int ithlayerhit, int mcHitID)
+        : mcTrackID_(track), layer_(layer), ithLayerHit_(ithlayerhit), mcHitID_(mcHitID) {}
+
+    int mcTrackID_;
+    int layer_;
+    int ithLayerHit_;
+    int mcHitID_;
+
+    int mcTrackID() const { return mcTrackID_; }
+    int layer() const { return layer_; }
+    int mcHitID() const { return mcHitID_; }
+    static void reset();
+  };
+  typedef std::vector<MCHitInfo> MCHitInfoVec;
+
+  struct MeasurementState {
+  public:
+    MeasurementState() {}
+    MeasurementState(const SVector3& p, const SVector6& e) : pos_(p), err_(e) {}
+    MeasurementState(const SVector3& p, const SMatrixSym33& e) : pos_(p) {
+      for (int i = 0; i < 6; ++i)
+        err_[i] = e.Array()[i];
+    }
+    const SVector3& parameters() const { return pos_; }
+    SMatrixSym33 errors() const {
+      SMatrixSym33 result;
+      for (int i = 0; i < 6; ++i)
+        result.Array()[i] = err_[i];
+      return result;
+    }
+    SVector3 pos_;
+    SVector6 err_;
+  };
+
+  class Hit {
+  public:
+    Hit() : mcHitID_(-1) {}
+
+    Hit(const SVector3& position, const SMatrixSym33& error, int mcHitID = -1)
+        : state_(position, error), mcHitID_(mcHitID) {}
+
+    const SVector3& position() const { return state_.parameters(); }
+    const SVector3& parameters() const { return state_.parameters(); }
+    const SMatrixSym33 error() const { return state_.errors(); }
+
+    const float* posArray() const { return state_.pos_.Array(); }
+    const float* errArray() const { return state_.err_.Array(); }
+
+    // Non-const versions needed for CopyOut of Matriplex.
+    SVector3& parameters_nc() { return state_.pos_; }
+    SVector6& error_nc() { return state_.err_; }
+
+    float r() const {
+      return sqrtf(state_.parameters().At(0) * state_.parameters().At(0) +
+                   state_.parameters().At(1) * state_.parameters().At(1));
+    }
+    float x() const { return state_.parameters().At(0); }
+    float y() const { return state_.parameters().At(1); }
+    float z() const { return state_.parameters().At(2); }
+    float exx() const { return state_.errors().At(0, 0); }
+    float eyy() const { return state_.errors().At(1, 1); }
+    float ezz() const { return state_.errors().At(2, 2); }
+    float phi() const { return getPhi(state_.parameters().At(0), state_.parameters().At(1)); }
+    float eta() const {
+      return getEta(state_.parameters().At(0), state_.parameters().At(1), state_.parameters().At(2));
+    }
+    float ephi() const { return getPhiErr2(x(), y(), exx(), eyy(), state_.errors().At(0, 1)); }
+    float eeta() const {
+      return getEtaErr2(x(),
+                        y(),
+                        z(),
+                        exx(),
+                        eyy(),
+                        ezz(),
+                        state_.errors().At(0, 1),
+                        state_.errors().At(0, 2),
+                        state_.errors().At(1, 2));
+    }
+
+    const MeasurementState& measurementState() const { return state_; }
+
+    int mcHitID() const { return mcHitID_; }
+    int layer(const MCHitInfoVec& globalMCHitInfo) const { return globalMCHitInfo[mcHitID_].layer(); }
+    int mcTrackID(const MCHitInfoVec& globalMCHitInfo) const { return globalMCHitInfo[mcHitID_].mcTrackID(); }
+
+    static constexpr int kMinChargePerCM = 1620;
+
+    struct PackedData {
+      unsigned int detid_in_layer : 12;
+      unsigned int charge_pcm : 8;  // MIMI see set/get funcs; applicable for phase-0/1
+      unsigned int span_rows : 3;
+      unsigned int span_cols : 3;
+
+      PackedData() : detid_in_layer(0), charge_pcm(0), span_rows(0), span_cols(0) {}
+
+      void set_charge_pcm(int cpcm) {
+        if (cpcm < kMinChargePerCM)
+          charge_pcm = 0;
+        else
+          charge_pcm = std::min(0xff, ((cpcm - kMinChargePerCM) >> 3) + 1);
+      }
+      unsigned int get_charge_pcm() const {
+        if (charge_pcm == 0)
+          return 0;
+        else
+          return ((charge_pcm - 1) << 3) + kMinChargePerCM;
+      }
+    };
+
+    unsigned int detIDinLayer() const { return pdata_.detid_in_layer; }
+    unsigned int chargePerCM() const { return pdata_.get_charge_pcm(); }
+    unsigned int spanRows() const { return pdata_.span_rows + 1; }
+    unsigned int spanCols() const { return pdata_.span_cols + 1; }
+
+    static unsigned int minChargePerCM() { return kMinChargePerCM; }
+    static unsigned int maxChargePerCM() { return kMinChargePerCM + (0xfe << 3); }
+    static unsigned int maxSpan() { return 8; }
+
+    void setupAsPixel(unsigned int id, int rows, int cols) {
+      pdata_.detid_in_layer = id;
+      pdata_.charge_pcm = 0xff;
+      pdata_.span_rows = std::min(0x7, rows - 1);
+      pdata_.span_cols = std::min(0x7, cols - 1);
+    }
+
+    void setupAsStrip(unsigned int id, int cpcm, int rows) {
+      pdata_.detid_in_layer = id;
+      pdata_.set_charge_pcm(cpcm);
+      pdata_.span_rows = std::min(0x7, rows - 1);
+    }
+
+  private:
+    MeasurementState state_;
+    int mcHitID_;
+    PackedData pdata_;
+  };
+
+  typedef std::vector<Hit> HitVec;
+
+  struct HitOnTrack {
+    int index : 24;
+    int layer : 8;
+
+    HitOnTrack() : index(-1), layer(-1) {}
+    HitOnTrack(int i, int l) : index(i), layer(l) {}
+
+    bool operator<(const HitOnTrack o) const {
+      if (layer != o.layer)
+        return layer < o.layer;
+      return index < o.index;
+    }
+  };
+
+  typedef std::vector<HitOnTrack> HoTVec;
+
+  void print(std::string_view label, const MeasurementState& s);
+
+  struct DeadRegion {
+    float phi1, phi2, q1, q2;
+    DeadRegion(float a1, float a2, float b1, float b2) : phi1(a1), phi2(a2), q1(b1), q2(b2) {}
+  };
+  typedef std::vector<DeadRegion> DeadVec;
+
+  struct BeamSpot {
+    float x = 0, y = 0, z = 0;
+    float sigmaZ = 5;
+    float beamWidthX = 5e-4, beamWidthY = 5e-4;
+    float dxdz = 0, dydz = 0;
+
+    BeamSpot() = default;
+    BeamSpot(float ix, float iy, float iz, float is, float ibx, float iby, float idxdz, float idydz)
+        : x(ix), y(iy), z(iz), sigmaZ(is), beamWidthX(ibx), beamWidthY(iby), dxdz(idxdz), dydz(idydz) {}
+  };
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/interface/HitStructures.h b/RecoTracker/MkFitCore/interface/HitStructures.h
new file mode 100644
index 0000000000000..5ae65c1c889bd
--- /dev/null
+++ b/RecoTracker/MkFitCore/interface/HitStructures.h
@@ -0,0 +1,882 @@
+#ifndef RecoTracker_MkFitCore_interface_HitStructures_h
+#define RecoTracker_MkFitCore_interface_HitStructures_h
+
+#include "RecoTracker/MkFitCore/interface/Config.h"
+#include "RecoTracker/MkFitCore/interface/Hit.h"
+#include "RecoTracker/MkFitCore/interface/Track.h"
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
+
+#include <algorithm>
+#include <array>
+
+namespace mkfit {
+
+  class IterationParams;
+
+  typedef std::pair<uint16_t, uint16_t> PhiBinInfo_t;
+
+  typedef std::array<PhiBinInfo_t, Config::m_nphi> vecPhiBinInfo_t;
+
+  typedef std::vector<vecPhiBinInfo_t> vecvecPhiBinInfo_t;
+
+  typedef std::array<bool, Config::m_nphi> vecPhiBinDead_t;
+
+  typedef std::vector<vecPhiBinDead_t> vecvecPhiBinDead_t;
+
+  //==============================================================================
+
+  inline bool sortHitsByPhiMT(const Hit& h1, const Hit& h2) {
+    return std::atan2(h1.position()[1], h1.position()[0]) < std::atan2(h2.position()[1], h2.position()[0]);
+  }
+
+  inline bool sortTrksByPhiMT(const Track& t1, const Track& t2) { return t1.momPhi() < t2.momPhi(); }
+
+  //==============================================================================
+  //==============================================================================
+
+  // Note: the same code is used for barrel and endcap. In barrel the longitudinal
+  // bins are in Z and in endcap they are in R -- here this coordinate is called Q
+
+  // When not defined, hits are accessed from the original hit vector and
+  // only sort ranks are kept for proper access.
+  //
+  //#define COPY_SORTED_HITS
+
+  class LayerOfHits {
+  public:
+    LayerOfHits() = default;
+
+    ~LayerOfHits();
+
+    // Setup and filling
+    //-------------------
+    void setupLayer(const LayerInfo& li);
+
+    void reset() {}
+
+    // Get in all hits from given hit-vec
+    void suckInHits(const HitVec& hitv);
+
+    // Get in all dead regions from given dead-vec
+    void suckInDeads(const DeadVec& deadv);
+
+    // Use external hit-vec and only use hits that are passed to me.
+    void beginRegistrationOfHits(const HitVec& hitv);
+    void registerHit(int idx);
+    void endRegistrationOfHits(bool build_original_to_internal_map);
+
+    int nHits() const { return m_n_hits; }
+
+    // Bin access / queries
+    //----------------------
+    int qBin(float q) const { return (q - m_qmin) * m_fq; }
+
+    int qBinChecked(float q) const { return std::clamp(qBin(q), 0, m_nq - 1); }
+
+    // if you don't pass phi in (-pi, +pi), mask away the upper bits using m_phi_mask or use the Checked version.
+    int phiBinFine(float phi) const { return std::floor(m_fphi_fine * (phi + Const::PI)); }
+    int phiBin(float phi) const { return phiBinFine(phi) >> m_phi_bits_shift; }
+
+    int phiBinChecked(float phi) const { return phiBin(phi) & m_phi_mask; }
+
+    int phiMaskApply(int in) const { return in & m_phi_mask; }
+
+    const vecPhiBinInfo_t& vecPhiBinInfo(float q) const { return m_phi_bin_infos[qBin(q)]; }
+
+    const vecvecPhiBinInfo_t& phi_bin_infos() const { return m_phi_bin_infos; }
+    const vecvecPhiBinDead_t& phi_bin_deads() const { return m_phi_bin_deads; }
+    PhiBinInfo_t phi_bin_info(int qi, int pi) const { return m_phi_bin_infos[qi][pi]; }
+    bool phi_bin_dead(int qi, int pi) const { return m_phi_bin_deads[qi][pi]; }
+
+    float hit_q(int i) const { return m_hit_qs[i]; }
+    float hit_phi(int i) const { return m_hit_phis[i]; }
+
+    // Use this to map original indices to sorted internal ones. m_ext_idcs needs to be initialized.
+    int getHitIndexFromOriginal(int i) const { return m_ext_idcs[i - m_min_ext_idx]; }
+    // Use this to remap internal hit index to external one.
+    int getOriginalHitIndex(int i) const { return m_hit_ranks[i]; }
+
+#ifdef COPY_SORTED_HITS
+    const Hit& refHit(int i) const { return m_hits[i]; }
+    const Hit* hitArray() const { return m_hits; }
+#else
+    const Hit& refHit(int i) const { return (*m_ext_hits)[i]; }
+    const Hit* hitArray() const { return m_ext_hits->data(); }
+#endif
+
+    // Left to document and demonstrate access to bin-info structures.
+    // void  selectHitIndices(float q, float phi, float dq, float dphi, std::vector<int>& idcs, bool isForSeeding=false, bool dump=false);
+
+    void printBins();
+
+    // Geometry / LayerInfo accessors
+    //--------------------------------
+
+    const LayerInfo* layer_info() const { return m_layer_info; }
+    int layer_id() const { return m_layer_info->layer_id(); }
+
+    bool is_barrel() const { return m_is_barrel; }
+    bool is_endcap() const { return !m_is_barrel; }
+
+    bool is_within_z_limits(float z) const { return m_layer_info->is_within_z_limits(z); }
+    bool is_within_r_limits(float r) const { return m_layer_info->is_within_r_limits(r); }
+
+    WSR_Result is_within_z_sensitive_region(float z, float dz) const {
+      return m_layer_info->is_within_z_sensitive_region(z, dz);
+    }
+
+    WSR_Result is_within_r_sensitive_region(float r, float dr) const {
+      return m_layer_info->is_within_r_sensitive_region(r, dr);
+    }
+
+    bool is_stereo() const { return m_layer_info->is_stereo(); }
+
+    // Sub-detector type
+    bool is_pixb_lyr() const { return m_layer_info->is_pixb_lyr(); }
+    bool is_pixe_lyr() const { return m_layer_info->is_pixe_lyr(); }
+    bool is_pix_lyr() const { return m_layer_info->is_pix_lyr(); }
+    bool is_tib_lyr() const { return m_layer_info->is_tib_lyr(); }
+    bool is_tob_lyr() const { return m_layer_info->is_tob_lyr(); }
+    bool is_tid_lyr() const { return m_layer_info->is_tid_lyr(); }
+    bool is_tec_lyr() const { return m_layer_info->is_tec_lyr(); }
+
+  private:
+    // Constants for phi-bin access / index manipulation.
+    static constexpr float m_fphi = Config::m_nphi / Const::TwoPI;
+    static constexpr int m_phi_mask = 0xff;
+    static constexpr int m_phi_bits = 8;
+    static constexpr float m_fphi_fine = 1024 / Const::TwoPI;
+    static constexpr int m_phi_mask_fine = 0x3ff;
+    static constexpr int m_phi_bits_fine = 10;  //can't be more than 16
+    static constexpr int m_phi_bits_shift = m_phi_bits_fine - m_phi_bits;
+    static constexpr int m_phi_fine_xmask = ~((1 << m_phi_bits_shift) - 1);
+
+    void setup_bins(float qmin, float qmax, float dq);
+
+    void empty_phi_bins(int q_bin, int phi_bin_1, int phi_bin_2, uint16_t hit_count) {
+      for (int pb = phi_bin_1; pb < phi_bin_2; ++pb) {
+        m_phi_bin_infos[q_bin][pb] = {hit_count, hit_count};
+      }
+    }
+
+    void empty_q_bins(int q_bin_1, int q_bin_2, uint16_t hit_count) {
+      for (int qb = q_bin_1; qb < q_bin_2; ++qb) {
+        empty_phi_bins(qb, 0, Config::m_nphi, hit_count);
+      }
+    }
+
+    void empty_phi_bins_dead(int q_bin, int phi_bin_1, int phi_bin_2) {
+      for (int pb = phi_bin_1; pb < phi_bin_2; ++pb) {
+        m_phi_bin_deads[q_bin][pb] = false;
+      }
+    }
+
+    void empty_q_bins_dead(int q_bin_1, int q_bin_2) {
+      for (int qb = q_bin_1; qb < q_bin_2; ++qb) {
+        empty_phi_bins_dead(qb, 0, Config::m_nphi);
+      }
+    }
+
+#ifdef COPY_SORTED_HITS
+    void alloc_hits(int size);
+    void free_hits()
+
+        Hit* m_hits = nullptr;
+    int m_capacity = 0;
+#else
+    const HitVec* m_ext_hits;
+#endif
+    unsigned int* m_hit_ranks = nullptr;  // allocated by IceSort via new []
+    std::vector<int> m_ext_idcs;
+    int m_min_ext_idx, m_max_ext_idx;
+    int m_n_hits = 0;
+
+    // Bin information for hits and dead regions
+    vecvecPhiBinInfo_t m_phi_bin_infos;
+    vecvecPhiBinDead_t m_phi_bin_deads;
+
+    // Cached hit phi and q values to minimize Hit memory access
+    std::vector<float> m_hit_phis;
+    std::vector<float> m_hit_qs;
+
+    // Geometry / q-binning constants - initialized in setupLayer()
+    const LayerInfo* m_layer_info = nullptr;
+    float m_qmin, m_qmax, m_fq;
+    int m_nq = 0;
+    bool m_is_barrel;
+
+    // Data needed during setup
+    struct HitInfo {
+      float phi;
+      float q;
+    };
+    std::vector<HitInfo> m_hit_infos;
+    std::vector<uint32_t> m_qphifines;
+  };
+
+  //==============================================================================
+
+  class EventOfHits {
+  public:
+    EventOfHits(const TrackerInfo& trk_inf);
+
+    void reset() {
+      for (auto& i : m_layers_of_hits) {
+        i.reset();
+      }
+    }
+
+    void suckInHits(int layer, const HitVec& hitv) { m_layers_of_hits[layer].suckInHits(hitv); }
+
+    void suckInDeads(int layer, const DeadVec& deadv) { m_layers_of_hits[layer].suckInDeads(deadv); }
+
+    const BeamSpot& refBeamSpot() const { return m_beam_spot; }
+    void setBeamSpot(const BeamSpot& bs) { m_beam_spot = bs; }
+
+    int nLayers() const { return m_n_layers; }
+
+    LayerOfHits& operator[](int i) { return m_layers_of_hits[i]; }
+    const LayerOfHits& operator[](int i) const { return m_layers_of_hits[i]; }
+
+  private:
+    std::vector<LayerOfHits> m_layers_of_hits;
+    int m_n_layers;
+    BeamSpot m_beam_spot;
+  };
+
+  //==============================================================================
+  // TrackCand, CombinedCandidate and EventOfCombinedCandidates
+  //==============================================================================
+
+  struct HoTNode {
+    HitOnTrack m_hot;
+    float m_chi2;
+    int m_prev_idx;
+  };
+
+  struct HitMatch {
+    int m_hit_idx = -1;
+    int m_module_id = -1;
+    float m_chi2 = 1e9;
+
+    void reset() {
+      m_hit_idx = -1;
+      m_module_id = -1;
+      m_chi2 = 1e9;
+    }
+  };
+
+  struct HitMatchPair {
+    HitMatch M[2];
+
+    void reset() {
+      M[0].reset();
+      M[1].reset();
+    }
+
+    void consider_hit_for_overlap(int hit_idx, int module_id, float chi2) {
+      if (module_id == M[0].m_module_id) {
+        if (chi2 < M[0].m_chi2) {
+          M[0].m_chi2 = chi2;
+          M[0].m_hit_idx = hit_idx;
+        }
+      } else if (module_id == M[1].m_module_id) {
+        if (chi2 < M[1].m_chi2) {
+          M[1].m_chi2 = chi2;
+          M[1].m_hit_idx = hit_idx;
+        }
+      } else {
+        if (M[0].m_chi2 > M[1].m_chi2) {
+          if (chi2 < M[0].m_chi2) {
+            M[0] = {hit_idx, module_id, chi2};
+          }
+        } else {
+          if (chi2 < M[1].m_chi2) {
+            M[1] = {hit_idx, module_id, chi2};
+          }
+        }
+      }
+    }
+
+    HitMatch* find_overlap(int hit_idx, int module_id) {
+      if (module_id == M[0].m_module_id) {
+        if (M[1].m_hit_idx >= 0)
+          return &M[1];
+      } else if (module_id == M[1].m_module_id) {
+        if (M[0].m_hit_idx >= 0)
+          return &M[0];
+      } else {
+        if (M[0].m_chi2 <= M[1].m_chi2) {
+          if (M[0].m_hit_idx >= 0)
+            return &M[0];
+        } else {
+          if (M[1].m_hit_idx >= 0)
+            return &M[1];
+        }
+      }
+
+      return nullptr;
+    }
+  };
+
+  // CcPool - CombCandidate Pool and Allocator
+
+  template <class T>
+  class CcPool {
+  public:
+    void reset(std::size_t size) {
+      if (size > m_mem.size())
+        m_mem.resize(size);
+      m_pos = 0;
+      m_size = size;
+    }
+
+    void release() {
+      std::vector<T> tmp;
+      m_mem.swap(tmp);
+      m_pos = 0;
+      m_size = 0;
+    }
+
+    CcPool(std::size_t size = 0) {
+      if (size)
+        reset(size);
+    }
+
+    T* allocate(std::size_t n) {
+      if (m_pos + n > m_size)
+        throw std::bad_alloc();
+      T* ret = &m_mem[m_pos];
+      m_pos += n;
+      return ret;
+    }
+
+    void deallocate(T* p, std::size_t n) noexcept {
+      // we do not care, implied deallocation of the whole pool on reset().
+    }
+
+  private:
+    std::vector<T> m_mem;
+    std::size_t m_pos = 0;
+    std::size_t m_size = 0;
+  };
+
+  template <class T>
+  class CcAlloc {
+  public:
+    typedef T value_type;
+
+    CcAlloc(CcPool<T>* p) : m_pool(p) {}
+
+    const void* pool_id() const { return m_pool; }
+
+    T* allocate(std::size_t n) { return m_pool->allocate(n); }
+
+    void deallocate(T* p, std::size_t n) noexcept { m_pool->deallocate(p, n); }
+
+  private:
+    CcPool<T>* m_pool;
+  };
+
+  template <class T, class U>
+  bool operator==(const CcAlloc<T>& a, const CcAlloc<U>& b) {
+    return a.pool_id() == b.pool_id();
+  }
+
+  //------------------------------------------------------------------------------
+
+  class CombCandidate;
+
+  class TrackCand : public TrackBase {
+  public:
+    TrackCand() = default;
+
+    explicit TrackCand(const TrackBase& base, CombCandidate* ccand) : TrackBase(base), m_comb_candidate(ccand) {
+      // Reset hit counters -- caller has to initialize hits.
+      lastHitIdx_ = -1;
+      nFoundHits_ = 0;
+    }
+
+    // CombCandidate is used as a hit-container for a set of TrackCands originating from
+    // the same seed and track building functions need this access to be able to add hits
+    // into this holder class.
+    // Access is guaranteed to be thread safe as seed ranges pointing into CombCandidate
+    // vector is assigned to threads doing track-finding and final processing is only done
+    // when all worker threads have finished.
+    CombCandidate* combCandidate() const { return m_comb_candidate; }
+    void setCombCandidate(CombCandidate* cc) { m_comb_candidate = cc; }
+
+    int lastCcIndex() const { return lastHitIdx_; }
+    int nFoundHits() const { return nFoundHits_; }
+    int nMissingHits() const { return nMissingHits_; }
+    int nOverlapHits() const { return nOverlapHits_; }
+    int nTotalHits() const { return nFoundHits_ + nMissingHits_; }
+
+    void setLastCcIndex(int i) { lastHitIdx_ = i; }
+    void setNFoundHits(int n) { nFoundHits_ = n; }
+    void setNMissingHits(int n) { nMissingHits_ = n; }
+    void setNOverlapHits(int n) { nOverlapHits_ = n; }
+
+    int nInsideMinusOneHits() const { return nInsideMinusOneHits_; }
+    int nTailMinusOneHits() const { return nTailMinusOneHits_; }
+
+    void setNInsideMinusOneHits(int n) { nInsideMinusOneHits_ = n; }
+    void setNTailMinusOneHits(int n) { nTailMinusOneHits_ = n; }
+
+    int originIndex() const { return m_origin_index; }
+    void setOriginIndex(int oi) { m_origin_index = oi; }
+
+    void resetOverlaps() { m_overlap_hits.reset(); }
+    void considerHitForOverlap(int hit_idx, int module_id, float chi2) {
+      m_overlap_hits.consider_hit_for_overlap(hit_idx, module_id, chi2);
+    }
+    HitMatch* findOverlap(int hit_idx, int module_id) { return m_overlap_hits.find_overlap(hit_idx, module_id); }
+
+    // Inlines after definition of CombCandidate
+
+    HitOnTrack getLastHitOnTrack() const;
+    int getLastHitIdx() const;
+    int getLastHitLyr() const;
+
+    // For additional filter
+    int getLastFoundPixelHitLyr() const;
+    int getLastFoundHitLyr() const;
+    int nUniqueLayers() const;
+
+    int nLayersByTypeEncoded(const TrackerInfo& trk_inf) const;
+    int nHitsByTypeEncoded(const TrackerInfo& trk_inf) const;
+
+    int nPixelDecoded(const int& encoded) const { return encoded % 100; }
+    int nStereoDecoded(const int& encoded) const { return (encoded / 100) % 100; }
+    int nMonoDecoded(const int& encoded) const { return (encoded / 10000) % 100; }
+    int nMatchedDecoded(const int& encoded) const { return encoded / 1000000; }
+    int nTotMatchDecoded(const int& encoded) const {
+      return encoded % 100 + (encoded / 100) % 100 + (encoded / 10000) % 100 - encoded / 1000000;
+    }
+
+    void addHitIdx(int hitIdx, int hitLyr, float chi2);
+
+    HoTNode& refLastHoTNode();              // for filling up overlap info
+    const HoTNode& refLastHoTNode() const;  // for dump traversal
+
+    void incOverlapCount() { ++nOverlapHits_; }
+
+    Track exportTrack(bool remove_missing_hits = false) const;
+
+    void resetShortTrack() {
+      score_ = getScoreWorstPossible();
+      m_comb_candidate = nullptr;
+    }
+
+  private:
+    CombCandidate* m_comb_candidate = nullptr;
+    HitMatchPair m_overlap_hits;
+
+    // using TrackBase::lastHitIdx_ to point into hit-on-track-node vector of CombCandidate
+    short int nMissingHits_ = 0;
+    short int nOverlapHits_ = 0;
+
+    short int nInsideMinusOneHits_ = 0;
+    short int nTailMinusOneHits_ = 0;
+
+    short int m_origin_index = -1;  // index of origin candidate (used for overlaps in Standard)
+  };
+
+  inline bool sortByScoreTrackCand(const TrackCand& cand1, const TrackCand& cand2) {
+    return cand1.score() > cand2.score();
+  }
+
+  inline float getScoreCand(const TrackCand& cand1, bool penalizeTailMissHits = false, bool inFindCandidates = false) {
+    int nfoundhits = cand1.nFoundHits();
+    int noverlaphits = cand1.nOverlapHits();
+    int nmisshits = cand1.nInsideMinusOneHits();
+    int ntailmisshits = penalizeTailMissHits ? cand1.nTailMinusOneHits() : 0;
+    float pt = cand1.pT();
+    float chi2 = cand1.chi2();
+    // Do not allow for chi2<0 in score calculation
+    if (chi2 < 0)
+      chi2 = 0.f;
+    return getScoreCalc(nfoundhits, ntailmisshits, noverlaphits, nmisshits, chi2, pt, inFindCandidates);
+  }
+
+  // CombCandidate -- a set of candidates from a given seed.
+
+  class CombCandidate {
+  public:
+    using trk_cand_vec_type = std::vector<TrackCand, CcAlloc<TrackCand>>;
+    using allocator_type = CcAlloc<TrackCand>;
+
+    enum SeedState_e { Dormant = 0, Finding, Finished };
+
+    CombCandidate(const allocator_type& alloc) : m_trk_cands(alloc), m_state(Dormant), m_pickup_layer(-1) {}
+
+    // Required by std::uninitialized_fill_n when declaring vector<CombCandidate> in EventOfCombCandidates
+    CombCandidate(const CombCandidate& o)
+        : m_trk_cands(o.m_trk_cands),
+          m_state(o.m_state),
+          m_pickup_layer(o.m_pickup_layer),
+          m_lastHitIdx_before_bkwsearch(o.m_lastHitIdx_before_bkwsearch),
+          m_nInsideMinusOneHits_before_bkwsearch(o.m_nInsideMinusOneHits_before_bkwsearch),
+          m_nTailMinusOneHits_before_bkwsearch(o.m_nTailMinusOneHits_before_bkwsearch),
+#ifdef DUMPHITWINDOW
+          m_seed_algo(o.m_seed_algo),
+          m_seed_label(o.m_seed_label),
+#endif
+          m_hots_size(o.m_hots_size),
+          m_hots(o.m_hots) {
+    }
+
+    // Required for std::swap().
+    CombCandidate(CombCandidate&& o)
+        : m_trk_cands(std::move(o.m_trk_cands)),
+          m_best_short_cand(std::move(o.m_best_short_cand)),
+          m_state(o.m_state),
+          m_pickup_layer(o.m_pickup_layer),
+          m_lastHitIdx_before_bkwsearch(o.m_lastHitIdx_before_bkwsearch),
+          m_nInsideMinusOneHits_before_bkwsearch(o.m_nInsideMinusOneHits_before_bkwsearch),
+          m_nTailMinusOneHits_before_bkwsearch(o.m_nTailMinusOneHits_before_bkwsearch),
+#ifdef DUMPHITWINDOW
+          m_seed_algo(o.m_seed_algo),
+          m_seed_label(o.m_seed_label),
+#endif
+          m_hots_size(o.m_hots_size),
+          m_hots(std::move(o.m_hots)) {
+      // This is not needed as we do EOCC::reset() after EOCCS::resize which
+      // calls Reset here and all CombCands get cleared.
+      // However, if at some point we start using this for other purposes this needs
+      // to be called as well.
+      // for (auto &tc : *this) tc.setCombCandidate(this);
+    }
+
+    // Required for std::swap when filtering EventOfCombinedCandidates::m_candidates.
+    // We do not call clear() on vectors as this will be done via EoCCs reset.
+    // Probably would be better (clearer) if there was a special function that does
+    // the swap in here or in EoCCs.
+    CombCandidate& operator=(CombCandidate&& o) {
+      m_trk_cands = (std::move(o.m_trk_cands));
+      m_best_short_cand = std::move(o.m_best_short_cand);
+      m_state = o.m_state;
+      m_pickup_layer = o.m_pickup_layer;
+      m_lastHitIdx_before_bkwsearch = o.m_lastHitIdx_before_bkwsearch;
+      m_nInsideMinusOneHits_before_bkwsearch = o.m_nInsideMinusOneHits_before_bkwsearch;
+      m_nTailMinusOneHits_before_bkwsearch = o.m_nTailMinusOneHits_before_bkwsearch;
+#ifdef DUMPHITWINDOW
+      m_seed_algo = o.m_seed_algo;
+      m_seed_label = o.m_seed_label;
+#endif
+      m_hots_size = o.m_hots_size;
+      m_hots = std::move(o.m_hots);
+
+      for (auto& tc : m_trk_cands)
+        tc.setCombCandidate(this);
+
+      return *this;
+    }
+
+    // std::vector-like interface to access m_trk_cands
+    bool empty() const { return m_trk_cands.empty(); }
+    trk_cand_vec_type::size_type size() const { return m_trk_cands.size(); }
+    void resize(trk_cand_vec_type::size_type count) { m_trk_cands.resize(count); }
+    TrackCand& operator[](int i) { return m_trk_cands[i]; }
+    const TrackCand& operator[](int i) const { return m_trk_cands[i]; }
+    TrackCand& front() { return m_trk_cands.front(); }
+    const TrackCand& front() const { return m_trk_cands.front(); }
+    trk_cand_vec_type::reference emplace_back(TrackCand& tc) { return m_trk_cands.emplace_back(tc); }
+    void clear() { m_trk_cands.clear(); }
+
+    void reset(int max_cands_per_seed, int expected_num_hots) {
+      std::vector<TrackCand, CcAlloc<TrackCand>> tmp(m_trk_cands.get_allocator());
+      m_trk_cands.swap(tmp);
+      m_trk_cands.reserve(max_cands_per_seed);  // we *must* never exceed this
+
+      m_best_short_cand.setScore(getScoreWorstPossible());
+
+      // state and pickup_layer set in importSeed.
+
+      // expected_num_hots is different for CloneEngine and Std, especially as long as we
+      // instantiate all candidates before purging them.
+      // ce:  N_layer * N_cands ~~ 20 * 6 = 120
+      // std: i don't know, maybe double?
+      m_hots.reserve(expected_num_hots);
+      m_hots_size = 0;
+      m_hots.clear();
+    }
+
+    void importSeed(const Track& seed, int region);
+
+    int addHit(const HitOnTrack& hot, float chi2, int prev_idx) {
+      m_hots.push_back({hot, chi2, prev_idx});
+      return m_hots_size++;
+    }
+
+    void mergeCandsAndBestShortOne(const IterationParams& params, bool update_score, bool sort_cands);
+
+    void compactifyHitStorageForBestCand(bool remove_seed_hits, int backward_fit_min_hits);
+    void beginBkwSearch();
+    void endBkwSearch();
+
+    // Accessors
+    //-----------
+    int hotsSize() const { return m_hots_size; }
+    const HoTNode& hot_node(int i) const { return m_hots[i]; }
+    HoTNode& hot_node_nc(int i) { return m_hots[i]; }
+    HitOnTrack hot(int i) const { return m_hots[i].m_hot; }
+    // Direct access into array for vectorized code in MkFinder
+    const HoTNode* hotsData() const { return m_hots.data(); }
+
+    const TrackCand& refBestShortCand() const { return m_best_short_cand; }
+    void setBestShortCand(const TrackCand& tc) { m_best_short_cand = tc; }
+
+    SeedState_e state() const { return m_state; }
+    void setState(SeedState_e ss) { m_state = ss; }
+
+    int pickupLayer() const { return m_pickup_layer; }
+
+#ifdef DUMPHITWINDOW
+    int seed_algo() const { return m_seed_algo; }
+    int seed_label() const { return m_seed_label; }
+#endif
+
+  private:
+    trk_cand_vec_type m_trk_cands;
+    TrackCand m_best_short_cand;
+    SeedState_e m_state : 8;
+    int m_pickup_layer : 16;
+    short int m_lastHitIdx_before_bkwsearch = -1;
+    short int m_nInsideMinusOneHits_before_bkwsearch = -1;
+    short int m_nTailMinusOneHits_before_bkwsearch = -1;
+
+#ifdef DUMPHITWINDOW
+    int m_seed_algo = 0;
+    int m_seed_label = 0;
+#endif
+    int m_hots_size = 0;
+    std::vector<HoTNode> m_hots;
+  };
+
+  //==============================================================================
+
+  inline HitOnTrack TrackCand::getLastHitOnTrack() const { return m_comb_candidate->hot(lastHitIdx_); }
+
+  inline int TrackCand::getLastHitIdx() const { return m_comb_candidate->hot(lastHitIdx_).index; }
+
+  inline int TrackCand::getLastHitLyr() const { return m_comb_candidate->hot(lastHitIdx_).layer; }
+
+  inline int TrackCand::getLastFoundHitLyr() const {
+    int nh = nTotalHits();
+    int ch = lastHitIdx_;
+    int ll = -1;
+    while (--nh >= 0) {
+      const HoTNode& hot_node = m_comb_candidate->hot_node(ch);
+      if (hot_node.m_hot.index < 0) {
+        ch = hot_node.m_prev_idx;
+      } else {
+        ll = hot_node.m_hot.layer;
+        break;
+      }
+    }
+    return ll;
+  }
+
+  inline int TrackCand::getLastFoundPixelHitLyr() const {
+    int nh = nTotalHits();
+    int ch = lastHitIdx_;
+    int ll = -1;
+    while (--nh >= 0) {
+      const HoTNode& hot_node = m_comb_candidate->hot_node(ch);
+      int tl = hot_node.m_hot.layer;
+      if (hot_node.m_hot.index < 0 || !((0 <= tl && tl <= 3) || (18 <= tl && tl <= 20) || (45 <= tl && tl <= 47))) {
+        ch = hot_node.m_prev_idx;
+      } else if ((0 <= tl && tl <= 3) || (18 <= tl && tl <= 20) || (45 <= tl && tl <= 47)) {
+        ll = hot_node.m_hot.layer;
+        break;
+      }
+    }
+    return ll;
+  }
+
+  inline int TrackCand::nUniqueLayers() const {
+    int nUL = 0;
+    int prevL = -1;
+    int nh = nTotalHits();
+    int ch = lastHitIdx_;
+
+    while (--nh >= 0) {
+      const HoTNode& hot_node = m_comb_candidate->hot_node(ch);
+      int thisL = hot_node.m_hot.layer;
+      if (thisL >= 0 && (hot_node.m_hot.index >= 0 || hot_node.m_hot.index == -9) && thisL != prevL) {
+        ++nUL;
+        prevL = thisL;
+      }
+      ch = hot_node.m_prev_idx;
+    }
+    return nUL;
+  }
+
+  inline int TrackCand::nHitsByTypeEncoded(const TrackerInfo& trk_inf) const {
+    int prevL = -1;
+    bool prevStereo = false;
+    int nh = nTotalHits();
+    int ch = lastHitIdx_;
+    int pix = 0, stereo = 0, mono = 0, matched = 0;
+    int doubleStereo = -1;
+    while (--nh >= 0) {
+      const HoTNode& hot_node = m_comb_candidate->hot_node(ch);
+      int thisL = hot_node.m_hot.layer;
+      if (thisL >= 0 && (hot_node.m_hot.index >= 0 || hot_node.m_hot.index == -9)) {
+        if (trk_inf.is_pix_lyr(thisL))
+          ++pix;
+        else if (trk_inf.is_stereo(thisL)) {
+          ++stereo;
+          if (thisL == prevL)
+            doubleStereo = thisL;
+        } else {
+          //mono if not pixel, nor stereo - can be matched to stereo
+          ++mono;
+          if (prevStereo && thisL == prevL - 1)
+            ++matched;
+          else if (thisL == prevL && thisL == doubleStereo - 1)
+            ++matched;  //doubleMatch, the first is counted early on
+        }
+        prevL = thisL;
+        prevStereo = stereo;
+      }
+      ch = hot_node.m_prev_idx;
+    }
+    return pix + 100 * stereo + 10000 * mono + 1000000 * matched;
+  }
+
+  inline int TrackCand::nLayersByTypeEncoded(const TrackerInfo& trk_inf) const {
+    int prevL = -1;
+    bool prevStereo = false;
+    int nh = nTotalHits();
+    int ch = lastHitIdx_;
+    int pix = 0, stereo = 0, mono = 0, matched = 0;
+    while (--nh >= 0) {
+      const HoTNode& hot_node = m_comb_candidate->hot_node(ch);
+      int thisL = hot_node.m_hot.layer;
+      if (thisL >= 0 && (hot_node.m_hot.index >= 0 || hot_node.m_hot.index == -9) && thisL != prevL) {
+        if (trk_inf.is_pix_lyr(thisL))
+          ++pix;
+        else if (trk_inf.is_stereo(thisL))
+          ++stereo;
+        else {
+          //mono if not pixel, nor stereo - can be matched to stereo
+          ++mono;
+          if (prevStereo && thisL == prevL - 1)
+            ++matched;
+        }
+        prevL = thisL;
+        prevStereo = stereo;
+      }
+      ch = hot_node.m_prev_idx;
+    }
+    return pix + 100 * stereo + 10000 * mono + 1000000 * matched;
+  }
+
+  inline HoTNode& TrackCand::refLastHoTNode() { return m_comb_candidate->hot_node_nc(lastHitIdx_); }
+
+  inline const HoTNode& TrackCand::refLastHoTNode() const { return m_comb_candidate->hot_node(lastHitIdx_); }
+
+  //------------------------------------------------------------------------------
+
+  inline void TrackCand::addHitIdx(int hitIdx, int hitLyr, float chi2) {
+    lastHitIdx_ = m_comb_candidate->addHit({hitIdx, hitLyr}, chi2, lastHitIdx_);
+
+    if (hitIdx >= 0 || hitIdx == -9) {
+      ++nFoundHits_;
+      chi2_ += chi2;
+      nInsideMinusOneHits_ += nTailMinusOneHits_;
+      nTailMinusOneHits_ = 0;
+    }
+    //Note that for tracks passing through an inactive module (hitIdx = -7), we do not count the -7 hit against the track when scoring.
+    else {
+      ++nMissingHits_;
+      if (hitIdx == -1)
+        ++nTailMinusOneHits_;
+    }
+  }
+
+  //==============================================================================
+
+  class EventOfCombCandidates {
+  public:
+    EventOfCombCandidates(int size = 0) : m_cc_pool(), m_candidates(), m_capacity(0), m_size(0) {}
+
+    void releaseMemory() {
+      {  // Get all the destructors called before nuking CcPool.
+        std::vector<CombCandidate> tmp;
+        m_candidates.swap(tmp);
+      }
+      m_capacity = 0;
+      m_size = 0;
+      m_cc_pool.release();
+    }
+
+    void reset(int new_capacity, int max_cands_per_seed, int expected_num_hots = 128) {
+      m_cc_pool.reset(new_capacity * max_cands_per_seed);
+      if (new_capacity > m_capacity) {
+        CcAlloc<TrackCand> alloc(&m_cc_pool);
+        std::vector<CombCandidate> tmp(new_capacity, alloc);
+        m_candidates.swap(tmp);
+        m_capacity = new_capacity;
+      }
+      for (int s = 0; s < new_capacity; ++s) {
+        m_candidates[s].reset(max_cands_per_seed, expected_num_hots);
+      }
+      for (int s = new_capacity; s < m_capacity; ++s) {
+        m_candidates[s].reset(0, 0);
+      }
+
+      m_size = 0;
+    }
+
+    void resizeAfterFiltering(int n_removed) {
+      assert(n_removed <= m_size);
+      m_size -= n_removed;
+    }
+
+    void insertSeed(const Track& seed, int region) {
+      assert(m_size < m_capacity);
+
+      m_candidates[m_size].importSeed(seed, region);
+
+      ++m_size;
+    }
+
+    void compactifyHitStorageForBestCand(bool remove_seed_hits, int backward_fit_min_hits) {
+      for (int i = 0; i < m_size; ++i)
+        m_candidates[i].compactifyHitStorageForBestCand(remove_seed_hits, backward_fit_min_hits);
+    }
+
+    void beginBkwSearch() {
+      for (int i = 0; i < m_size; ++i)
+        m_candidates[i].beginBkwSearch();
+    }
+    void endBkwSearch() {
+      for (int i = 0; i < m_size; ++i)
+        m_candidates[i].endBkwSearch();
+    }
+
+    // Accessors
+    int size() const { return m_size; }
+
+    const CombCandidate& operator[](int i) const { return m_candidates[i]; }
+    CombCandidate& operator[](int i) { return m_candidates[i]; }
+    CombCandidate& cand(int i) { return m_candidates[i]; }
+
+    // Direct access for vectorized functions in MkBuilder / MkFinder
+    const std::vector<CombCandidate>& refCandidates() const { return m_candidates; }
+    std::vector<CombCandidate>& refCandidates_nc() { return m_candidates; }
+
+  private:
+    CcPool<TrackCand> m_cc_pool;
+
+    std::vector<CombCandidate> m_candidates;
+
+    int m_capacity;
+    int m_size;
+  };
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/interface/IterationConfig.h b/RecoTracker/MkFitCore/interface/IterationConfig.h
new file mode 100644
index 0000000000000..93eb5f627f497
--- /dev/null
+++ b/RecoTracker/MkFitCore/interface/IterationConfig.h
@@ -0,0 +1,373 @@
+#ifndef RecoTracker_MkFitCore_interface_IterationConfig_h
+#define RecoTracker_MkFitCore_interface_IterationConfig_h
+
+#include "RecoTracker/MkFitCore/interface/SteeringParams.h"
+
+#include "nlohmann/json_fwd.hpp"
+
+#include <functional>
+
+namespace mkfit {
+
+  class EventOfHits;
+  class TrackerInfo;
+  class Track;
+
+  typedef std::vector<Track> TrackVec;
+
+  //==============================================================================
+  // Hit masks / IterationMaskIfc
+  //==============================================================================
+
+  struct IterationMaskIfcBase {
+    virtual ~IterationMaskIfcBase() {}
+
+    virtual const std::vector<bool> *get_mask_for_layer(int layer) const { return nullptr; }
+  };
+
+  struct IterationMaskIfc : public IterationMaskIfcBase {
+    std::vector<std::vector<bool>> m_mask_vector;
+
+    const std::vector<bool> *get_mask_for_layer(int layer) const override { return &m_mask_vector[layer]; }
+  };
+
+  //==============================================================================
+  // IterationLayerConfig
+  //==============================================================================
+
+  class IterationConfig;
+
+  class IterationLayerConfig {
+  public:
+    // Selection limits.
+    float m_select_min_dphi;
+    float m_select_max_dphi;
+    float m_select_min_dq;
+    float m_select_max_dq;
+
+    void set_selection_limits(float p1, float p2, float q1, float q2) {
+      m_select_min_dphi = p1;
+      m_select_max_dphi = p2;
+      m_select_min_dq = q1;
+      m_select_max_dq = q2;
+    }
+
+    //----------------------------------------------------------------------------
+
+    float min_dphi() const { return m_select_min_dphi; }
+    float max_dphi() const { return m_select_max_dphi; }
+    float min_dq() const { return m_select_min_dq; }
+    float max_dq() const { return m_select_max_dq; }
+
+    //Hit selection windows: 2D fit/layer (72 in phase-1 CMS geometry)
+    //cut = [0]*1/pT + [1]*std::fabs(theta-pi/2) + [2])
+    float c_dp_sf = 1.1;
+    float c_dp_0 = 0.0;
+    float c_dp_1 = 0.0;
+    float c_dp_2 = 0.0;
+    //
+    float c_dq_sf = 1.1;
+    float c_dq_0 = 0.0;
+    float c_dq_1 = 0.0;
+    float c_dq_2 = 0.0;
+    //
+    float c_c2_sf = 1.1;
+    float c_c2_0 = 0.0;
+    float c_c2_1 = 0.0;
+    float c_c2_2 = 0.0;
+
+    //----------------------------------------------------------------------------
+
+    IterationLayerConfig() {}
+  };
+
+  //==============================================================================
+  // IterationParams
+  //==============================================================================
+
+  class IterationParams {
+  public:
+    int nlayers_per_seed = 3;
+    int maxCandsPerSeed = 5;
+    int maxHolesPerCand = 4;
+    int maxConsecHoles = 1;
+    float chi2Cut_min = 15.0;
+    float chi2CutOverlap = 3.5;
+    float pTCutOverlap = 1.0;
+
+    //seed cleaning params
+    float c_ptthr_hpt = 2.0;
+    //initial
+    float c_drmax_bh = 0.010;
+    float c_dzmax_bh = 0.005;
+    float c_drmax_eh = 0.020;
+    float c_dzmax_eh = 0.020;
+    float c_drmax_bl = 0.010;
+    float c_dzmax_bl = 0.005;
+    float c_drmax_el = 0.030;
+    float c_dzmax_el = 0.030;
+
+    int minHitsQF = 4;
+    float fracSharedHits = 0.19;
+    float drth_central = 0.001;
+    float drth_obarrel = 0.001;
+    float drth_forward = 0.001;
+  };
+
+  //==============================================================================
+  // IterationSeedPartition
+  //==============================================================================
+
+  class IterationSeedPartition {
+  public:
+    std::vector<int> m_region;
+    std::vector<float> m_sort_score;
+
+    IterationSeedPartition(int size) : m_region(size), m_sort_score(size) {}
+  };
+
+  //==============================================================================
+  // IterationConfig
+  //==============================================================================
+
+  class IterationConfig {
+  public:
+    using partition_seeds_foo = void(const TrackerInfo &,
+                                     const TrackVec &,
+                                     const EventOfHits &,
+                                     IterationSeedPartition &);
+
+    int m_iteration_index = -1;
+    int m_track_algorithm = -1;
+
+    bool m_requires_seed_hit_sorting = false;
+    bool m_requires_quality_filter = false;
+    bool m_requires_dupclean_tight = false;
+
+    bool m_backward_search = false;
+    bool m_backward_drop_seed_hits = false;
+
+    int m_backward_fit_min_hits = -1;  // Min number of hits to keep when m_backward_drop_seed_hits is true
+
+    // Iteration parameters (could be a ptr)
+    IterationParams m_params;
+    IterationParams m_backward_params;
+
+    int m_n_regions = -1;
+    std::vector<int> m_region_order;
+    std::vector<SteeringParams> m_steering_params;
+    std::vector<IterationLayerConfig> m_layer_configs;
+
+    std::function<partition_seeds_foo> m_partition_seeds;
+
+    //----------------------------------------------------------------------------
+
+    IterationConfig() {}
+
+    // -------- Getter functions
+
+    IterationLayerConfig &layer(int i) { return m_layer_configs[i]; }
+    SteeringParams &steering_params(int region) { return m_steering_params[region]; }
+
+    bool merge_seed_hits_during_cleaning() const { return m_backward_search && m_backward_drop_seed_hits; }
+
+    // -------- Setup function
+
+    void cloneLayerSteerCore(const IterationConfig &o) {
+      // Clone common settings for an iteration.
+      // m_iteration_index, m_track_algorithm, cleaning and bkw-search flags,
+      // and IterationParams are not copied.
+
+      m_n_regions = o.m_n_regions;
+      m_region_order = o.m_region_order;
+      m_steering_params = o.m_steering_params;
+      m_layer_configs = o.m_layer_configs;
+
+      m_partition_seeds = o.m_partition_seeds;
+    }
+
+    void set_iteration_index_and_track_algorithm(int idx, int trk_alg) {
+      m_iteration_index = idx;
+      m_track_algorithm = trk_alg;
+    }
+
+    void set_qf_flags() {
+      m_requires_seed_hit_sorting = true;
+      m_requires_quality_filter = true;
+    }
+
+    void set_qf_params(int minHits, float sharedFrac) {
+      m_params.minHitsQF = minHits;
+      m_params.fracSharedHits = sharedFrac;
+    }
+
+    void set_dupclean_flag() { m_requires_dupclean_tight = true; }
+
+    void set_dupl_params(float sharedFrac, float drthCentral, float drthObarrel, float drthForward) {
+      m_params.fracSharedHits = sharedFrac;
+      m_params.drth_central = drthCentral;
+      m_params.drth_obarrel = drthObarrel;
+      m_params.drth_forward = drthForward;
+    }
+
+    void set_seed_cleaning_params(float pt_thr,
+                                  float dzmax_bh,
+                                  float drmax_bh,
+                                  float dzmax_bl,
+                                  float drmax_bl,
+                                  float dzmax_eh,
+                                  float drmax_eh,
+                                  float dzmax_el,
+                                  float drmax_el) {
+      m_params.c_ptthr_hpt = pt_thr;
+      m_params.c_drmax_bh = drmax_bh;
+      m_params.c_dzmax_bh = dzmax_bh;
+      m_params.c_drmax_eh = drmax_eh;
+      m_params.c_dzmax_eh = dzmax_eh;
+      m_params.c_drmax_bl = drmax_bl;
+      m_params.c_dzmax_bl = dzmax_bl;
+      m_params.c_drmax_el = drmax_el;
+      m_params.c_dzmax_el = dzmax_el;
+    }
+
+    void set_num_regions_layers(int nreg, int nlay) {
+      m_n_regions = nreg;
+      m_region_order.resize(nreg);
+      m_steering_params.resize(nreg);
+      for (int i = 0; i < nreg; ++i)
+        m_steering_params[i].m_region = i;
+      m_layer_configs.resize(nlay);
+    }
+  };
+
+  //==============================================================================
+  // IterationsInfo
+  //==============================================================================
+
+  class IterationsInfo {
+  public:
+    std::vector<IterationConfig> m_iterations;
+
+    IterationsInfo() {}
+
+    void resize(int ni) { m_iterations.resize(ni); }
+
+    int size() const { return m_iterations.size(); }
+
+    IterationConfig &operator[](int i) { return m_iterations[i]; }
+    const IterationConfig &operator[](int i) const { return m_iterations[i]; }
+  };
+
+  //==============================================================================
+
+  // IterationConfig instances are created in Geoms/CMS-2017.cc, Create_CMS_2017(),
+  // filling the IterationsInfo object passed in by reference.
+
+  //==============================================================================
+  // JSON config interface
+  //==============================================================================
+
+  class ConfigJsonPatcher {
+  public:
+    struct PatchReport {
+      int n_files = 0;
+      int n_json_entities = 0;
+      int n_replacements = 0;
+
+      void inc_counts(int f, int e, int r) {
+        n_files += f;
+        n_json_entities += e;
+        n_replacements += r;
+      }
+      void inc_counts(const PatchReport &pr) {
+        n_files += pr.n_files;
+        n_json_entities += pr.n_json_entities;
+        n_replacements += pr.n_replacements;
+      }
+      void reset() { n_files = n_json_entities = n_replacements = 0; }
+    };
+
+  private:
+    std::unique_ptr<nlohmann::json> m_json;
+    nlohmann::json *m_current = nullptr;
+
+    // add stack and cd_up() ? also, name stack for exceptions and printouts
+    std::vector<nlohmann::json *> m_json_stack;
+    std::vector<std::string> m_path_stack;
+
+    bool m_verbose = false;
+
+    std::string get_abs_path() const;
+    std::string exc_hdr(const char *func = nullptr) const;
+
+  public:
+    ConfigJsonPatcher(bool verbose = false);
+    ~ConfigJsonPatcher();
+
+    template <class T>
+    void load(const T &o);
+    template <class T>
+    void save(T &o);
+
+    void cd(const std::string &path);
+    void cd_up(const std::string &path = "");
+    void cd_top(const std::string &path = "");
+
+    template <typename T>
+    void replace(const std::string &path, T val);
+
+    template <typename T>
+    void replace(int first, int last, const std::string &path, T val);
+
+    nlohmann::json &get(const std::string &path);
+
+    int replace(const nlohmann::json &j);
+
+    std::string dump(int indent = 2);
+  };
+
+  class ConfigJson {
+  public:
+    ConfigJson(bool verbose = false) : m_verbose(verbose) {}
+
+    // Patch IterationsInfo from a vector of files.
+    // Assumes patch files include iteration-info preambles, i.e., they
+    // were saved with include_iter_info_preamble=true.
+    // If report is non-null counts are added to existing object.
+    void patch_Files(IterationsInfo &its_info,
+                     const std::vector<std::string> &fnames,
+                     ConfigJsonPatcher::PatchReport *report = nullptr);
+
+    // Load a single iteration from JSON file.
+    // Searches for a match between m_algorithm in its_info and in JSON file to decide
+    // which IterationConfig it will clone and patch-load the JSON file over.
+    // The IterationConfig in question *must* match in structure to what is on file,
+    // in particular, arrays must be of same lengths.
+    // Assumes JSON file has been saved WITHOUT iteration-info preamble.
+    // Returns a unique_ptr to the cloned IterationConfig.
+    // If report is non-null counts are added to existing object.
+    std::unique_ptr<IterationConfig> patchLoad_File(const IterationsInfo &its_info,
+                                                    const std::string &fname,
+                                                    ConfigJsonPatcher::PatchReport *report = nullptr);
+
+    // Load a single iteration from JSON file.
+    // This leaves IterationConfig data-members that are not registered
+    // in JSON schema at their default values.
+    // The only such member is std::function m_partition_seeds.
+    // Assumes JSON file has been saved WITHOUT iteration-info preamble.
+    // Returns a unique_ptr to the cloned IterationConfig.
+    std::unique_ptr<IterationConfig> load_File(const std::string &fname);
+
+    void save_Iterations(IterationsInfo &its_info, const std::string &fname_fmt, bool include_iter_info_preamble);
+
+    void dump(IterationsInfo &its_info);
+
+    void test_Direct(IterationConfig &it_cfg);
+    void test_Patcher(IterationConfig &it_cfg);
+
+  private:
+    bool m_verbose = false;
+  };
+}  // end namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/interface/MatrixSTypes.h b/RecoTracker/MkFitCore/interface/MatrixSTypes.h
new file mode 100644
index 0000000000000..7bad2a43570c9
--- /dev/null
+++ b/RecoTracker/MkFitCore/interface/MatrixSTypes.h
@@ -0,0 +1,48 @@
+#ifndef RecoTracker_MkFitCore_interface_MatrixSTypes_h
+#define RecoTracker_MkFitCore_interface_MatrixSTypes_h
+
+#include "Math/SMatrix.h"
+
+namespace mkfit {
+
+  typedef ROOT::Math::SMatrix<float, 6, 6, ROOT::Math::MatRepSym<float, 6> > SMatrixSym66;
+  typedef ROOT::Math::SMatrix<float, 6> SMatrix66;
+  typedef ROOT::Math::SVector<float, 6> SVector6;
+
+  typedef ROOT::Math::SMatrix<float, 3> SMatrix33;
+  typedef ROOT::Math::SMatrix<float, 3, 3, ROOT::Math::MatRepSym<float, 3> > SMatrixSym33;
+  typedef ROOT::Math::SVector<float, 3> SVector3;
+
+  typedef ROOT::Math::SMatrix<float, 2> SMatrix22;
+  typedef ROOT::Math::SMatrix<float, 2, 2, ROOT::Math::MatRepSym<float, 2> > SMatrixSym22;
+  typedef ROOT::Math::SVector<float, 2> SVector2;
+
+  typedef ROOT::Math::SMatrix<float, 3, 6> SMatrix36;
+  typedef ROOT::Math::SMatrix<float, 6, 3> SMatrix63;
+
+  typedef ROOT::Math::SMatrix<float, 2, 6> SMatrix26;
+  typedef ROOT::Math::SMatrix<float, 6, 2> SMatrix62;
+
+  template <typename Matrix>
+  inline void diagonalOnly(Matrix& m) {
+    for (int r = 0; r < m.kRows; r++) {
+      for (int c = 0; c < m.kCols; c++) {
+        if (r != c)
+          m[r][c] = 0.f;
+      }
+    }
+  }
+
+  template <typename Matrix>
+  void dumpMatrix(Matrix m) {
+    for (int r = 0; r < m.kRows; ++r) {
+      for (int c = 0; c < m.kCols; ++c) {
+        std::cout << std::setw(12) << m.At(r, c) << " ";
+      }
+      std::cout << std::endl;
+    }
+  }
+
+}  // namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/interface/MkBuilder.h b/RecoTracker/MkFitCore/interface/MkBuilder.h
new file mode 100644
index 0000000000000..6f97874749fe0
--- /dev/null
+++ b/RecoTracker/MkFitCore/interface/MkBuilder.h
@@ -0,0 +1,186 @@
+#ifndef RecoTracker_MkFitCore_interface_MkBuilder_h
+#define RecoTracker_MkFitCore_interface_MkBuilder_h
+
+#include "RecoTracker/MkFitCore/interface/IterationConfig.h"
+#include "RecoTracker/MkFitCore/interface/Track.h"
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
+
+#include <atomic>
+#include <functional>
+#include <map>
+#include <vector>
+
+namespace mkfit {
+
+  class CandCloner;
+  class LayerInfo;
+  class MkFinder;
+  class MkFitter;
+  class TrackerInfo;
+
+  class Event;
+
+  //==============================================================================
+  // MkJob
+  //==============================================================================
+
+  class MkJob {
+  public:
+    const TrackerInfo &m_trk_info;
+    // Config &config; // If we want to get rid of namespace / global config
+    const IterationConfig &m_iter_config;
+    const EventOfHits &m_event_of_hits;
+
+    const IterationMaskIfcBase *m_iter_mask_ifc = nullptr;
+
+    int num_regions() const { return m_iter_config.m_n_regions; }
+    const auto regions_begin() const { return m_iter_config.m_region_order.begin(); }
+    const auto regions_end() const { return m_iter_config.m_region_order.end(); }
+
+    const auto &steering_params(int i) { return m_iter_config.m_steering_params[i]; }
+
+    const auto &params() const { return m_iter_config.m_params; }
+    const auto &params_bks() const { return m_iter_config.m_backward_params; }
+
+    int max_max_cands() const { return std::max(params().maxCandsPerSeed, params_bks().maxCandsPerSeed); }
+
+    const std::vector<bool> *get_mask_for_layer(int layer) {
+      return m_iter_mask_ifc ? m_iter_mask_ifc->get_mask_for_layer(layer) : nullptr;
+    }
+  };
+
+  //==============================================================================
+  // MkBuilder
+  //==============================================================================
+
+  class MkBuilder {
+  public:
+    using insert_seed_foo = void(const Track &, int);
+    using filter_track_cand_foo = bool(const TrackCand &);
+
+    typedef std::vector<std::pair<int, int>> CandIdx_t;
+
+    MkBuilder(bool silent = true) : m_silent(silent) {}
+    ~MkBuilder() = default;
+
+    // --------
+
+    static std::unique_ptr<MkBuilder> make_builder(bool silent = true);
+    static void populate();
+
+    int total_cands() const {
+      int res = 0;
+      for (int i = 0; i < m_event_of_comb_cands.size(); ++i)
+        res += m_event_of_comb_cands[i].size();
+      return res;
+    }
+
+    std::pair<int, int> max_hits_layer(const EventOfHits &eoh) const {
+      int maxN = 0;
+      int maxL = 0;
+      for (int l = 0; l < eoh.nLayers(); ++l) {
+        int lsize = eoh[l].nHits();
+        if (lsize > maxN) {
+          maxN = lsize;
+          maxL = eoh[l].layer_id();
+        }
+      }
+      return {maxN, maxL};
+    }
+
+    void begin_event(MkJob *job, Event *ev, const char *build_type);
+    void end_event();
+    void release_memory();
+
+    void import_seeds(const TrackVec &in_seeds, std::function<insert_seed_foo> insert_seed);
+
+    // filter for rearranging cands that will / will not do backward search.
+    int filter_comb_cands(std::function<filter_track_cand_foo> filter);
+
+    void find_min_max_hots_size();
+
+    void select_best_comb_cands(bool clear_m_tracks = false, bool remove_missing_hits = false);
+    void export_best_comb_cands(TrackVec &out_vec, bool remove_missing_hits = false);
+    void export_tracks(TrackVec &out_vec);
+
+    void compactifyHitStorageForBestCand(bool remove_seed_hits, int backward_fit_min_hits) {
+      m_event_of_comb_cands.compactifyHitStorageForBestCand(remove_seed_hits, backward_fit_min_hits);
+    }
+
+    void beginBkwSearch() { m_event_of_comb_cands.beginBkwSearch(); }
+    void endBkwSearch() { m_event_of_comb_cands.endBkwSearch(); }
+
+    // MIMI hack to export tracks for BH
+    const TrackVec &ref_tracks() const { return m_tracks; }
+    TrackVec &ref_tracks_nc() { return m_tracks; }
+
+    // --------
+
+    void find_tracks_load_seeds_BH(const TrackVec &in_seeds);  // for FindTracksBestHit
+    void find_tracks_load_seeds(const TrackVec &in_seeds);
+
+    int find_tracks_unroll_candidates(std::vector<std::pair<int, int>> &seed_cand_vec,
+                                      int start_seed,
+                                      int end_seed,
+                                      int layer,
+                                      int prev_layer,
+                                      bool pickup_only,
+                                      SteeringParams::IterationType_e iteration_dir);
+
+    void find_tracks_handle_missed_layers(MkFinder *mkfndr,
+                                          const LayerInfo &layer_info,
+                                          std::vector<std::vector<TrackCand>> &tmp_cands,
+                                          const std::vector<std::pair<int, int>> &seed_cand_idx,
+                                          const int region,
+                                          const int start_seed,
+                                          const int itrack,
+                                          const int end);
+
+    void find_tracks_in_layers(CandCloner &cloner,
+                               MkFinder *mkfndr,
+                               SteeringParams::IterationType_e iteration_dir,
+                               const int start_seed,
+                               const int end_seed,
+                               const int region);
+
+    // --------
+
+    void seed_post_cleaning(TrackVec &tv);
+
+    void findTracksBestHit(SteeringParams::IterationType_e iteration_dir = SteeringParams::IT_FwdSearch);
+    void findTracksStandard(SteeringParams::IterationType_e iteration_dir = SteeringParams::IT_FwdSearch);
+    void findTracksCloneEngine(SteeringParams::IterationType_e iteration_dir = SteeringParams::IT_FwdSearch);
+
+    void backwardFitBH();
+    void fit_cands_BH(MkFinder *mkfndr, int start_cand, int end_cand, int region);
+
+    void backwardFit();
+    void fit_cands(MkFinder *mkfndr, int start_cand, int end_cand, int region);
+
+  private:
+    void fit_one_seed_set(TrackVec &simtracks, int itrack, int end, MkFitter *mkfttr, const bool is_brl[]);
+
+    MkJob *m_job = nullptr;
+
+    // MIMI -- Used by seed processing / validation.
+    Event *m_event = nullptr;
+
+    // State for BestHit
+    TrackVec m_tracks;
+
+    // State for Std / CloneEngine
+    EventOfCombCandidates m_event_of_comb_cands;
+
+    // Per-region seed information
+    std::vector<int> m_seedEtaSeparators;
+    std::vector<int> m_seedMinLastLayer;
+    std::vector<int> m_seedMaxLastLayer;
+
+    std::atomic<int> m_nan_n_silly_per_layer_count;
+
+    bool m_silent;
+  };
+
+}  // end namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/interface/MkBuilderWrapper.h b/RecoTracker/MkFitCore/interface/MkBuilderWrapper.h
new file mode 100644
index 0000000000000..63e16b3c0e759
--- /dev/null
+++ b/RecoTracker/MkFitCore/interface/MkBuilderWrapper.h
@@ -0,0 +1,29 @@
+#ifndef RecoTracker_MkFitCore_interface_MkBuilderWrapper_h
+#define RecoTracker_MkFitCore_interface_MkBuilderWrapper_h
+
+#include <memory>
+
+namespace mkfit {
+  class MkBuilder;
+
+  /**
+   * The purpose of this class is to hide the header of MkBuilder.h
+   * from CMSSW. The headers included by MkBuilder.h contain uses of
+   * the build-time configuration macros, that should remain as
+   * internal details of MkFit package.
+   */
+  class MkBuilderWrapper {
+  public:
+    MkBuilderWrapper(bool silent);
+    ~MkBuilderWrapper();
+
+    MkBuilder& get() { return *builder_; }
+
+    static void populate();
+
+  private:
+    std::unique_ptr<MkBuilder> builder_;
+  };
+}  // namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/interface/SteeringParams.h b/RecoTracker/MkFitCore/interface/SteeringParams.h
new file mode 100644
index 0000000000000..799ef4aecad7c
--- /dev/null
+++ b/RecoTracker/MkFitCore/interface/SteeringParams.h
@@ -0,0 +1,149 @@
+#ifndef RecoTracker_MkFitCore_interface_SteeringParams_h
+#define RecoTracker_MkFitCore_interface_SteeringParams_h
+
+#include <vector>
+#include <stdexcept>
+
+namespace mkfit {
+
+  //==============================================================================
+  // LayerControl
+  //==============================================================================
+
+  struct LayerControl {
+    int m_layer;
+
+    // Idea only ... need some parallel structure for candidates to make sense (where i can store it).
+    // Or have per layer containers where I place track indices to enable. Or something. Sigh.
+    // int  m_on_miss_jump_to = -999;
+    // int  m_on_hit_jump_to  = -999;
+
+    // Used to have pickup-only / bk-fit only bools etc.
+    // Moved to SteeringParams as layer indices where pickup/bkfit/bksrch start/end/start.
+
+    //----------------------------------------------------------------------------
+
+    LayerControl() : m_layer(-1) {}
+    LayerControl(int lay) : m_layer(lay) {}
+  };
+
+  //==============================================================================
+  // SteeringParams
+  //==============================================================================
+
+  class SteeringParams {
+  public:
+    enum IterationType_e { IT_FwdSearch, IT_BkwFit, IT_BkwSearch };
+
+    class iterator {
+      friend class SteeringParams;
+
+      const SteeringParams& m_steering_params;
+      IterationType_e m_type;
+      int m_cur_index = -1;
+      int m_end_index = -1;
+
+      iterator(const SteeringParams& sp, IterationType_e t) : m_steering_params(sp), m_type(t) {}
+
+    public:
+      const LayerControl& layer_control() const { return m_steering_params.m_layer_plan[m_cur_index]; }
+      int layer() const { return layer_control().m_layer; }
+      int index() const { return m_cur_index; }
+      int region() const { return m_steering_params.m_region; }
+
+      bool is_valid() const { return m_cur_index != -1; }
+
+      const LayerControl& operator->() const { return layer_control(); }
+
+      bool is_pickup_only() const {
+        if (m_type == IT_FwdSearch)
+          return m_cur_index == m_steering_params.m_fwd_search_pickup;
+        else if (m_type == IT_BkwSearch)
+          return m_cur_index == m_steering_params.m_bkw_search_pickup;
+        else
+          throw std::runtime_error("invalid iteration type");
+      }
+
+      bool operator++() {
+        if (!is_valid())
+          return false;
+        if (m_type == IT_FwdSearch) {
+          if (++m_cur_index == m_end_index)
+            m_cur_index = -1;
+        } else {
+          if (--m_cur_index == m_end_index)
+            m_cur_index = -1;
+        }
+        return is_valid();
+      }
+
+      // Functions for debug printouts
+      int end_index() const { return m_end_index; }
+      int next_layer() const {
+        if (m_type == IT_FwdSearch)
+          return m_steering_params.m_layer_plan[m_cur_index + 1].m_layer;
+        else
+          return m_steering_params.m_layer_plan[m_cur_index - 1].m_layer;
+      }
+      int last_layer() const {
+        if (m_type == IT_FwdSearch)
+          return m_steering_params.m_layer_plan[m_end_index - 1].m_layer;
+        else
+          return m_steering_params.m_layer_plan[m_end_index + 1].m_layer;
+      }
+    };
+
+    std::vector<LayerControl> m_layer_plan;
+
+    int m_region;
+
+    int m_fwd_search_pickup = 0;
+    int m_bkw_fit_last = 0;
+    int m_bkw_search_pickup = -1;
+
+    //----------------------------------------------------------------------------
+
+    SteeringParams() {}
+
+    void reserve_plan(int n) { m_layer_plan.reserve(n); }
+
+    void append_plan(int layer) { m_layer_plan.emplace_back(LayerControl(layer)); }
+
+    void fill_plan(int first, int last) {
+      for (int i = first; i <= last; ++i)
+        append_plan(i);
+    }
+
+    void set_iterator_limits(int fwd_search_pu, int bkw_fit_last, int bkw_search_pu = -1) {
+      m_fwd_search_pickup = fwd_search_pu;
+      m_bkw_fit_last = bkw_fit_last;
+      m_bkw_search_pickup = bkw_search_pu;
+    }
+
+    bool has_bksearch_plan() const { return m_bkw_search_pickup != -1; }
+
+    iterator make_iterator(IterationType_e type) const {
+      iterator it(*this, type);
+
+      if (type == IT_FwdSearch) {
+        it.m_cur_index = m_fwd_search_pickup;
+        it.m_end_index = m_layer_plan.size();
+      } else if (type == IT_BkwFit) {
+        it.m_cur_index = m_layer_plan.size() - 1;
+        it.m_end_index = m_bkw_fit_last - 1;
+      } else if (type == IT_BkwSearch) {
+        it.m_cur_index = m_bkw_search_pickup;
+        it.m_end_index = -1;
+      } else
+        throw std::invalid_argument("unknown iteration type");
+
+      if (!it.is_valid())
+        throw std::runtime_error("invalid iterator constructed");
+
+      return it;
+    }
+  };
+
+}  // end namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/interface/Track.h b/RecoTracker/MkFitCore/interface/Track.h
new file mode 100644
index 0000000000000..dcb24a480eec4
--- /dev/null
+++ b/RecoTracker/MkFitCore/interface/Track.h
@@ -0,0 +1,683 @@
+#ifndef RecoTracker_MkFitCore_interface_Track_h
+#define RecoTracker_MkFitCore_interface_Track_h
+
+#include "RecoTracker/MkFitCore/interface/Config.h"
+#include "RecoTracker/MkFitCore/interface/MatrixSTypes.h"
+#include "RecoTracker/MkFitCore/interface/Hit.h"
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
+
+#include <vector>
+#include <map>
+
+namespace mkfit {
+
+  typedef std::pair<int, int> SimTkIDInfo;
+  typedef std::vector<int> HitIdxVec;
+  typedef std::map<int, std::vector<int> > HitLayerMap;
+
+  inline int calculateCharge(const Hit& hit0, const Hit& hit1, const Hit& hit2) {
+    return ((hit2.y() - hit0.y()) * (hit2.x() - hit1.x()) > (hit2.y() - hit1.y()) * (hit2.x() - hit0.x()) ? 1 : -1);
+  }
+
+  inline int calculateCharge(const float hit0_x,
+                             const float hit0_y,
+                             const float hit1_x,
+                             const float hit1_y,
+                             const float hit2_x,
+                             const float hit2_y) {
+    return ((hit2_y - hit0_y) * (hit2_x - hit1_x) > (hit2_y - hit1_y) * (hit2_x - hit0_x) ? 1 : -1);
+  }
+
+  struct IdxChi2List {
+  public:
+    int trkIdx;           // candidate index
+    int hitIdx;           // hit index
+    unsigned int module;  // module id
+    int nhits;            // number of hits (used for sorting)
+    int ntailholes;       // number of holes at the end of the track (used for sorting)
+    int noverlaps;        // number of overlaps (used for sorting)
+    int nholes;           // number of holes (used for sorting)
+    float pt;             // pt (used for sorting)
+    float chi2;           // total chi2 (used for sorting)
+    float chi2_hit;       // chi2 of the added hit
+    float score;          // score used for candidate ranking
+  };
+
+  //==============================================================================
+  // TrackState
+  //==============================================================================
+
+  struct TrackState  //  possible to add same accessors as track?
+  {
+  public:
+    TrackState() : valid(true) {}
+    TrackState(int charge, const SVector3& pos, const SVector3& mom, const SMatrixSym66& err)
+        : parameters(SVector6(pos.At(0), pos.At(1), pos.At(2), mom.At(0), mom.At(1), mom.At(2))),
+          errors(err),
+          charge(charge),
+          valid(true) {}
+    SVector3 position() const { return SVector3(parameters[0], parameters[1], parameters[2]); }
+    SVector6 parameters;
+    SMatrixSym66 errors;
+    short charge;
+    bool valid;
+
+    // track state position
+    float x() const { return parameters.At(0); }
+    float y() const { return parameters.At(1); }
+    float z() const { return parameters.At(2); }
+    float posR() const { return getHypot(x(), y()); }
+    float posRsq() const { return x() * x() + y() * y(); }
+    float posPhi() const { return getPhi(x(), y()); }
+    float posEta() const { return getEta(posR(), z()); }
+
+    // track state position errors
+    float exx() const { return std::sqrt(errors.At(0, 0)); }
+    float eyy() const { return std::sqrt(errors.At(1, 1)); }
+    float ezz() const { return std::sqrt(errors.At(2, 2)); }
+    float exy() const { return std::sqrt(errors.At(0, 1)); }
+    float exz() const { return std::sqrt(errors.At(0, 2)); }
+    float eyz() const { return std::sqrt(errors.At(1, 2)); }
+
+    float eposR() const { return std::sqrt(getRadErr2(x(), y(), errors.At(0, 0), errors.At(1, 1), errors.At(0, 1))); }
+    float eposPhi() const { return std::sqrt(getPhiErr2(x(), y(), errors.At(0, 0), errors.At(1, 1), errors.At(0, 1))); }
+    float eposEta() const {
+      return std::sqrt(getEtaErr2(x(),
+                                  y(),
+                                  z(),
+                                  errors.At(0, 0),
+                                  errors.At(1, 1),
+                                  errors.At(2, 2),
+                                  errors.At(0, 1),
+                                  errors.At(0, 2),
+                                  errors.At(1, 2)));
+    }
+
+    // track state momentum
+    float invpT() const { return parameters.At(3); }
+    float momPhi() const { return parameters.At(4); }
+    float theta() const { return parameters.At(5); }
+    float pT() const { return std::abs(1.f / parameters.At(3)); }
+    float px() const { return pT() * std::cos(parameters.At(4)); }
+    float py() const { return pT() * std::sin(parameters.At(4)); }
+    float pz() const { return pT() / std::tan(parameters.At(5)); }
+    float momEta() const { return getEta(theta()); }
+    float p() const { return pT() / std::sin(parameters.At(5)); }
+
+    float einvpT() const { return std::sqrt(errors.At(3, 3)); }
+    float emomPhi() const { return std::sqrt(errors.At(4, 4)); }
+    float etheta() const { return std::sqrt(errors.At(5, 5)); }
+    float epT() const { return std::sqrt(errors.At(3, 3)) / (parameters.At(3) * parameters.At(3)); }
+    float emomEta() const { return std::sqrt(errors.At(5, 5)) / std::sin(parameters.At(5)); }
+    float epxpx() const { return std::sqrt(getPxPxErr2(invpT(), momPhi(), errors.At(3, 3), errors.At(4, 4))); }
+    float epypy() const { return std::sqrt(getPyPyErr2(invpT(), momPhi(), errors.At(3, 3), errors.At(4, 4))); }
+    float epzpz() const { return std::sqrt(getPyPyErr2(invpT(), theta(), errors.At(3, 3), errors.At(5, 5))); }
+
+    void convertFromCartesianToCCS();
+    void convertFromCCSToCartesian();
+    SMatrix66 jacobianCCSToCartesian(float invpt, float phi, float theta) const;
+    SMatrix66 jacobianCartesianToCCS(float px, float py, float pz) const;
+
+    void convertFromGlbCurvilinearToCCS();
+    void convertFromCCSToGlbCurvilinear();
+    //last row/column are zeros
+    SMatrix66 jacobianCCSToCurvilinear(float invpt, float cosP, float sinP, float cosT, float sinT, short charge) const;
+    SMatrix66 jacobianCurvilinearToCCS(float px, float py, float pz, short charge) const;
+  };
+
+  //==============================================================================
+  // TrackBase
+  //==============================================================================
+
+  class TrackBase {
+  public:
+    TrackBase() {}
+
+    TrackBase(const TrackState& state, float chi2, int label) : state_(state), chi2_(chi2), label_(label) {}
+
+    TrackBase(int charge, const SVector3& position, const SVector3& momentum, const SMatrixSym66& errors, float chi2)
+        : state_(charge, position, momentum, errors), chi2_(chi2) {}
+
+    const TrackState& state() const { return state_; }
+    void setState(const TrackState& newState) { state_ = newState; }
+
+    const SVector6& parameters() const { return state_.parameters; }
+    const SMatrixSym66& errors() const { return state_.errors; }
+
+    const float* posArray() const { return state_.parameters.Array(); }
+    const float* errArray() const { return state_.errors.Array(); }
+
+    // Non-const versions needed for CopyOut of Matriplex.
+    SVector6& parameters_nc() { return state_.parameters; }
+    SMatrixSym66& errors_nc() { return state_.errors; }
+    TrackState& state_nc() { return state_; }
+
+    SVector3 position() const { return SVector3(state_.parameters[0], state_.parameters[1], state_.parameters[2]); }
+    SVector3 momentum() const { return SVector3(state_.parameters[3], state_.parameters[4], state_.parameters[5]); }
+
+    float x() const { return state_.parameters[0]; }
+    float y() const { return state_.parameters[1]; }
+    float z() const { return state_.parameters[2]; }
+    float posR() const { return getHypot(state_.parameters[0], state_.parameters[1]); }
+    float posRsq() const { return state_.posRsq(); }
+    float posPhi() const { return getPhi(state_.parameters[0], state_.parameters[1]); }
+    float posEta() const { return getEta(state_.parameters[0], state_.parameters[1], state_.parameters[2]); }
+
+    float px() const { return state_.px(); }
+    float py() const { return state_.py(); }
+    float pz() const { return state_.pz(); }
+    float pT() const { return state_.pT(); }
+    float invpT() const { return state_.invpT(); }
+    float p() const { return state_.p(); }
+    float momPhi() const { return state_.momPhi(); }
+    float momEta() const { return state_.momEta(); }
+    float theta() const { return state_.theta(); }
+
+    // track state momentum errors
+    float epT() const { return state_.epT(); }
+    float emomPhi() const { return state_.emomPhi(); }
+    float emomEta() const { return state_.emomEta(); }
+
+    // ------------------------------------------------------------------------
+
+    int charge() const { return state_.charge; }
+    float chi2() const { return chi2_; }
+    float score() const { return score_; }
+    int label() const { return label_; }
+
+    void setCharge(int chg) { state_.charge = chg; }
+    void setChi2(float chi2) { chi2_ = chi2; }
+    void setScore(float s) { score_ = s; }
+    void setLabel(int lbl) { label_ = lbl; }
+
+    bool hasSillyValues(bool dump, bool fix, const char* pref = "");
+
+    bool hasNanNSillyValues() const;
+
+    float d0BeamSpot(const float x_bs, const float y_bs, bool linearize = false) const;
+
+    // ------------------------------------------------------------------------
+
+    struct Status {
+      static constexpr int kNSeedHitBits = 4;
+      static constexpr int kMaxSeedHits = (1 << kNSeedHitBits) - 1;
+
+      // Set to true for short, low-pt CMS tracks. They do not generate mc seeds and
+      // do not enter the efficiency denominator.
+      bool not_findable : 1;
+
+      // Set to true when number of holes would exceed an external limit, Config::maxHolesPerCand.
+      // XXXXMT Not used yet, -2 last hit idx is still used! Need to add it to MkFi**r classes.
+      // Problem is that I have to carry bits in/out of the MkFinder, too.
+      bool stopped : 1;
+
+      // Production type (most useful for sim tracks): 0, 1, 2, 3 for unset, signal, in-time PU, oot PU
+      unsigned int prod_type : 2;
+
+      unsigned int align_was_seed_type : 2;
+
+      // Whether or not the track matched to another track and had the lower cand score
+      bool duplicate : 1;
+
+      // Tracking iteration/algorithm
+      unsigned int algorithm : 6;
+
+      // Temporary store number of overlaps for Track here
+      int n_overlaps : 8;
+
+      // Number of seed hits at import time
+      unsigned int n_seed_hits : kNSeedHitBits;
+
+      // mkFit tracking region TrackerInfo::EtaRegion, determined by seed partition function
+      unsigned int eta_region : 3;
+
+      // The remaining bits.
+      unsigned int _free_bits_ : 4;
+
+      Status()
+          : not_findable(false),
+            stopped(false),
+            prod_type(0),
+            align_was_seed_type(0),
+            duplicate(false),
+            algorithm(0),
+            n_overlaps(0),
+            n_seed_hits(0),
+            eta_region(0),
+            _free_bits_(0) {}
+    };
+    static_assert(sizeof(Status) == sizeof(int));
+
+    Status getStatus() const { return status_; }
+    void setStatus(Status s) { status_ = s; }
+
+    bool isFindable() const { return !status_.not_findable; }
+    bool isNotFindable() const { return status_.not_findable; }
+    void setNotFindable() { status_.not_findable = true; }
+
+    void setDuplicateValue(bool d) { status_.duplicate = d; }
+    bool getDuplicateValue() const { return status_.duplicate; }
+    enum class ProdType { NotSet = 0, Signal = 1, InTimePU = 2, OutOfTimePU = 3 };
+    ProdType prodType() const { return ProdType(status_.prod_type); }
+    void setProdType(ProdType ptyp) { status_.prod_type = static_cast<unsigned int>(ptyp); }
+
+    int getNSeedHits() const { return status_.n_seed_hits; }
+    void setNSeedHits(int n) { status_.n_seed_hits = n; }
+    int getEtaRegion() const { return status_.eta_region; }
+    void setEtaRegion(int r) { status_.eta_region = r; }
+
+    // Those are defined in Track, TrackCand has separate member. To be consolidated but
+    // it's a binary format change.
+    // int  nOverlapHits()  const  { return status_.n_overlaps; }
+    // void setNOverlapHits(int n) { status_.n_overlaps = n; }
+
+    /// track algorithm; copy from TrackBase.h to keep in standalone builds
+    enum class TrackAlgorithm {
+      undefAlgorithm = 0,
+      ctf = 1,
+      duplicateMerge = 2,
+      cosmics = 3,
+      initialStep = 4,
+      lowPtTripletStep = 5,
+      pixelPairStep = 6,
+      detachedTripletStep = 7,
+      mixedTripletStep = 8,
+      pixelLessStep = 9,
+      tobTecStep = 10,
+      jetCoreRegionalStep = 11,
+      conversionStep = 12,
+      muonSeededStepInOut = 13,
+      muonSeededStepOutIn = 14,
+      outInEcalSeededConv = 15,
+      inOutEcalSeededConv = 16,
+      nuclInter = 17,
+      standAloneMuon = 18,
+      globalMuon = 19,
+      cosmicStandAloneMuon = 20,
+      cosmicGlobalMuon = 21,
+      // Phase1
+      highPtTripletStep = 22,
+      lowPtQuadStep = 23,
+      detachedQuadStep = 24,
+      reservedForUpgrades1 = 25,
+      reservedForUpgrades2 = 26,
+      bTagGhostTracks = 27,
+      beamhalo = 28,
+      gsf = 29,
+      // HLT algo name
+      hltPixel = 30,
+      // steps used by PF
+      hltIter0 = 31,
+      hltIter1 = 32,
+      hltIter2 = 33,
+      hltIter3 = 34,
+      hltIter4 = 35,
+      // steps used by all other objects @HLT
+      hltIterX = 36,
+      // steps used by HI muon regional iterative tracking
+      hiRegitMuInitialStep = 37,
+      hiRegitMuLowPtTripletStep = 38,
+      hiRegitMuPixelPairStep = 39,
+      hiRegitMuDetachedTripletStep = 40,
+      hiRegitMuMixedTripletStep = 41,
+      hiRegitMuPixelLessStep = 42,
+      hiRegitMuTobTecStep = 43,
+      hiRegitMuMuonSeededStepInOut = 44,
+      hiRegitMuMuonSeededStepOutIn = 45,
+      algoSize = 46
+    };
+
+    int algoint() const { return status_.algorithm; }
+    TrackAlgorithm algorithm() const { return TrackAlgorithm(status_.algorithm); }
+    void setAlgorithm(TrackAlgorithm algo) { status_.algorithm = static_cast<unsigned int>(algo); }
+    void setAlgoint(int algo) { status_.algorithm = algo; }
+    // To be used later
+    // bool isStopped() const { return status_.stopped; }
+    // void setStopped()      { status_.stopped = true; }
+
+    static const char* algoint_to_cstr(int algo);
+
+    // ------------------------------------------------------------------------
+
+  protected:
+    TrackState state_;
+    float chi2_ = 0.;
+    float score_ = 0.;
+    short int lastHitIdx_ = -1;
+    short int nFoundHits_ = 0;
+    Status status_;
+    int label_ = -1;
+  };
+
+  //==============================================================================
+  // TrackCand
+  //==============================================================================
+
+  // TrackCand depends on stuff in mkFit/HitStructures, CombCand in particular,
+  // so it is declared / implemented there.
+
+  // class TrackCand : public TrackBase { ... };
+
+  //==============================================================================
+  // Track
+  //==============================================================================
+
+  class Track : public TrackBase {
+  public:
+    Track() {}
+
+    explicit Track(const TrackBase& base) : TrackBase(base) {
+      // Reset hit counters -- caller has to initialize hits.
+      lastHitIdx_ = -1;
+      nFoundHits_ = 0;
+    }
+
+    Track(const TrackState& state, float chi2, int label, int nHits, const HitOnTrack* hits)
+        : TrackBase(state, chi2, label) {
+      reserveHits(nHits);
+      for (int h = 0; h < nHits; ++h) {
+        addHitIdx(hits[h].index, hits[h].layer, 0.0f);
+      }
+    }
+
+    Track(int charge, const SVector3& position, const SVector3& momentum, const SMatrixSym66& errors, float chi2)
+        : TrackBase(charge, position, momentum, errors, chi2) {}
+
+    Track(const Track& t) : TrackBase(t), hitsOnTrk_(t.hitsOnTrk_) {}
+
+    // used for swimming cmssw rec tracks to mkFit position
+    float swimPhiToR(const float x, const float y) const;
+
+    bool canReachRadius(float R) const;
+    float maxReachRadius() const;
+    float zAtR(float R, float* r_reached = nullptr) const;
+    float rAtZ(float Z) const;
+
+    //this function is very inefficient, use only for debug and validation!
+    HitVec hitsVector(const std::vector<HitVec>& globalHitVec) const {
+      HitVec hitsVec;
+      for (int ihit = 0; ihit < Config::nMaxTrkHits; ++ihit) {
+        const HitOnTrack& hot = hitsOnTrk_[ihit];
+        if (hot.index >= 0) {
+          hitsVec.push_back(globalHitVec[hot.layer][hot.index]);
+        }
+      }
+      return hitsVec;
+    }
+
+    void mcHitIDsVec(const std::vector<HitVec>& globalHitVec,
+                     const MCHitInfoVec& globalMCHitInfo,
+                     std::vector<int>& mcHitIDs) const {
+      for (int ihit = 0; ihit <= lastHitIdx_; ++ihit) {
+        const HitOnTrack& hot = hitsOnTrk_[ihit];
+        if ((hot.index >= 0) && (static_cast<size_t>(hot.index) < globalHitVec[hot.layer].size())) {
+          mcHitIDs.push_back(globalHitVec[hot.layer][hot.index].mcTrackID(globalMCHitInfo));
+        } else {
+          mcHitIDs.push_back(hot.index);
+        }
+      }
+    }
+
+    // The following 2 (well, 3) funcs to be fixed once we move lastHitIdx_ and nFoundHits_
+    // out of TrackBase. If we do it.
+    void reserveHits(int nHits) { hitsOnTrk_.reserve(nHits); }
+
+    void resetHits() {
+      lastHitIdx_ = -1;
+      nFoundHits_ = 0;
+      hitsOnTrk_.clear();
+    }
+
+    // For MkFinder::copy_out and TrackCand::ExportTrack
+    void resizeHits(int nHits, int nFoundHits) {
+      hitsOnTrk_.resize(nHits);
+      lastHitIdx_ = nHits - 1;
+      nFoundHits_ = nFoundHits;
+    }
+    // Used by TrackCand::ExportTrack
+    void setHitIdxAtPos(int pos, const HitOnTrack& hot) { hitsOnTrk_[pos] = hot; }
+
+    void resizeHitsForInput();
+
+    void addHitIdx(int hitIdx, int hitLyr, float chi2) {
+      hitsOnTrk_.push_back({hitIdx, hitLyr});
+      ++lastHitIdx_;
+      if (hitIdx >= 0 || hitIdx == -9) {
+        ++nFoundHits_;
+        chi2_ += chi2;
+      }
+    }
+
+    void addHitIdx(const HitOnTrack& hot, float chi2) { addHitIdx(hot.index, hot.layer, chi2); }
+
+    HitOnTrack getHitOnTrack(int posHitIdx) const { return hitsOnTrk_[posHitIdx]; }
+
+    int getHitIdx(int posHitIdx) const { return hitsOnTrk_[posHitIdx].index; }
+    int getHitLyr(int posHitIdx) const { return hitsOnTrk_[posHitIdx].layer; }
+
+    HitOnTrack getLastHitOnTrack() const { return hitsOnTrk_[lastHitIdx_]; }
+    int getLastHitIdx() const { return hitsOnTrk_[lastHitIdx_].index; }
+    int getLastHitLyr() const { return hitsOnTrk_[lastHitIdx_].layer; }
+
+    int getLastFoundHitPos() const {
+      int hi = lastHitIdx_;
+      while (hi >= 0 && hitsOnTrk_[hi].index < 0)
+        --hi;
+      return hi;
+    }
+
+    HitOnTrack getLastFoundHitOnTrack() const {
+      int p = getLastFoundHitPos();
+      return p >= 0 ? hitsOnTrk_[p] : HitOnTrack(-1, -1);
+    }
+    int getLastFoundHitIdx() const {
+      int p = getLastFoundHitPos();
+      return p >= 0 ? hitsOnTrk_[p].index : -1;
+    }
+    int getLastFoundHitLyr() const {
+      int p = getLastFoundHitPos();
+      return p >= 0 ? hitsOnTrk_[p].layer : -1;
+    }
+
+    int getLastFoundMCHitID(const std::vector<HitVec>& globalHitVec) const {
+      HitOnTrack hot = getLastFoundHitOnTrack();
+      return globalHitVec[hot.layer][hot.index].mcHitID();
+    }
+
+    int getMCHitIDFromLayer(const std::vector<HitVec>& globalHitVec, int layer) const {
+      int mcHitID = -1;
+      for (int ihit = 0; ihit <= lastHitIdx_; ++ihit) {
+        if (hitsOnTrk_[ihit].layer == layer) {
+          mcHitID = globalHitVec[hitsOnTrk_[ihit].layer][hitsOnTrk_[ihit].index].mcHitID();
+          break;
+        }
+      }
+      return mcHitID;
+    }
+
+    const HitOnTrack* getHitsOnTrackArray() const { return hitsOnTrk_.data(); }
+    const HitOnTrack* beginHitsOnTrack() const { return hitsOnTrk_.data(); }
+    const HitOnTrack* endHitsOnTrack() const { return hitsOnTrk_.data() + (lastHitIdx_ + 1); }
+
+    HitOnTrack* beginHitsOnTrack_nc() { return hitsOnTrk_.data(); }
+
+    void setHitIdx(int posHitIdx, int newIdx) { hitsOnTrk_[posHitIdx].index = newIdx; }
+
+    void setHitIdxLyr(int posHitIdx, int newIdx, int newLyr) { hitsOnTrk_[posHitIdx] = {newIdx, newLyr}; }
+
+    void countAndSetNFoundHits() {
+      nFoundHits_ = 0;
+      for (int i = 0; i <= lastHitIdx_; i++) {
+        if (hitsOnTrk_[i].index >= 0 || hitsOnTrk_[i].index == -9)
+          nFoundHits_++;
+      }
+    }
+
+    int nFoundHits() const { return nFoundHits_; }
+    int nTotalHits() const { return lastHitIdx_ + 1; }
+
+    int nOverlapHits() const { return status_.n_overlaps; }
+    void setNOverlapHits(int n) { status_.n_overlaps = n; }
+
+    int nInsideMinusOneHits() const {
+      int n = 0;
+      bool insideValid = false;
+      for (int i = lastHitIdx_; i >= 0; --i) {
+        if (hitsOnTrk_[i].index >= 0)
+          insideValid = true;
+        if (insideValid && hitsOnTrk_[i].index == -1)
+          ++n;
+      }
+      return n;
+    }
+
+    int nTailMinusOneHits() const {
+      int n = 0;
+      for (int i = lastHitIdx_; i >= 0; --i) {
+        if (hitsOnTrk_[i].index >= 0)
+          return n;
+        if (hitsOnTrk_[i].index == -1)
+          ++n;
+      }
+      return n;
+    }
+
+    int nUniqueLayers() const {
+      // make local copy in vector: sort it in place
+      std::vector<HitOnTrack> tmp_hitsOnTrk(hitsOnTrk_.begin(), hitsOnTrk_.end());
+      std::sort(tmp_hitsOnTrk.begin(), tmp_hitsOnTrk.end(), [](const auto& h1, const auto& h2) {
+        return h1.layer < h2.layer;
+      });
+
+      // local counters
+      auto lyr_cnt = 0;
+      auto prev_lyr = -1;
+
+      // loop over copy of hitsOnTrk
+      for (auto ihit = 0; ihit <= lastHitIdx_; ++ihit) {
+        const auto& hot = tmp_hitsOnTrk[ihit];
+        const auto lyr = hot.layer;
+        const auto idx = hot.index;
+        if (lyr >= 0 && (idx >= 0 || idx == -9) && lyr != prev_lyr) {
+          ++lyr_cnt;
+          prev_lyr = lyr;
+        }
+      }
+      return lyr_cnt;
+    }
+
+    // this method sorts the data member hitOnTrk_ and is ONLY to be used by sim track seeding
+    void sortHitsByLayer();
+
+    // used by fittest only (NOT mplex)
+    std::vector<int> foundLayers() const {
+      std::vector<int> layers;
+      for (int ihit = 0; ihit <= lastHitIdx_; ++ihit) {
+        if (hitsOnTrk_[ihit].index >= 0 || hitsOnTrk_[ihit].index == -9) {
+          layers.push_back(hitsOnTrk_[ihit].layer);
+        }
+      }
+      return layers;
+    }
+
+  private:
+    std::vector<HitOnTrack> hitsOnTrk_;
+  };
+
+  typedef std::vector<Track> TrackVec;
+  typedef std::vector<TrackVec> TrackVecVec;
+
+  inline bool sortByHitsChi2(const Track& cand1, const Track& cand2) {
+    if (cand1.nFoundHits() == cand2.nFoundHits())
+      return cand1.chi2() < cand2.chi2();
+    return cand1.nFoundHits() > cand2.nFoundHits();
+  }
+
+  inline bool sortByScoreCand(const Track& cand1, const Track& cand2) { return cand1.score() > cand2.score(); }
+
+  inline bool sortByScoreStruct(const IdxChi2List& cand1, const IdxChi2List& cand2) {
+    return cand1.score > cand2.score;
+  }
+
+  inline float getScoreWorstPossible() {
+    return -1e16;  // somewhat arbitrary value, used for handling of best short track during finding (will try to take it out)
+  }
+
+  inline float getScoreCalc(const int nfoundhits,
+                            const int ntailholes,
+                            const int noverlaphits,
+                            const int nmisshits,
+                            const float chi2,
+                            const float pt,
+                            const bool inFindCandidates = false) {
+    //// Do not allow for chi2<0 in score calculation
+    // if(chi2<0) chi2=0.f;
+
+    float maxBonus = 8.0;
+    float bonus = Config::validHitSlope_ * nfoundhits + Config::validHitBonus_;
+    float penalty = Config::missingHitPenalty_;
+    float tailPenalty = Config::tailMissingHitPenalty_;
+    float overlapBonus = Config::overlapHitBonus_;
+    if (pt < 0.9) {
+      penalty *= inFindCandidates ? 1.7f : 1.5f;
+      bonus = std::min(bonus * (inFindCandidates ? 0.9f : 1.0f), maxBonus);
+    }
+    float score_ =
+        bonus * nfoundhits + overlapBonus * noverlaphits - penalty * nmisshits - tailPenalty * ntailholes - chi2;
+    return score_;
+  }
+
+  inline float getScoreCand(const Track& cand1, bool penalizeTailMissHits = false, bool inFindCandidates = false) {
+    int nfoundhits = cand1.nFoundHits();
+    int noverlaphits = cand1.nOverlapHits();
+    int nmisshits = cand1.nInsideMinusOneHits();
+    float ntailmisshits = penalizeTailMissHits ? cand1.nTailMinusOneHits() : 0;
+    float pt = cand1.pT();
+    float chi2 = cand1.chi2();
+    // Do not allow for chi2<0 in score calculation
+    if (chi2 < 0)
+      chi2 = 0.f;
+    return getScoreCalc(nfoundhits, ntailmisshits, noverlaphits, nmisshits, chi2, pt, inFindCandidates);
+  }
+
+  inline float getScoreStruct(const IdxChi2List& cand1) {
+    int nfoundhits = cand1.nhits;
+    int ntailholes = cand1.ntailholes;
+    int noverlaphits = cand1.noverlaps;
+    int nmisshits = cand1.nholes;
+    float pt = cand1.pt;
+    float chi2 = cand1.chi2;
+    // Do not allow for chi2<0 in score calculation
+    if (chi2 < 0)
+      chi2 = 0.f;
+    return getScoreCalc(nfoundhits, ntailholes, noverlaphits, nmisshits, chi2, pt, true /*inFindCandidates*/);
+  }
+
+  template <typename Vector>
+  inline void squashPhiGeneral(Vector& v) {
+    const int i = v.kSize - 2;  // phi index
+    v[i] = squashPhiGeneral(v[i]);
+  }
+
+  //https://github.com/cms-sw/cmssw/blob/09c3fce6626f70fd04223e7dacebf0b485f73f54/SimTracker/TrackAssociatorProducers/plugins/getChi2.cc#L23
+  template <typename Vector, typename Matrix>
+  float computeHelixChi2(const Vector& simV, const Vector& recoV, const Matrix& recoM, const bool diagOnly = false) {
+    Vector diffV = recoV - simV;
+    if (diffV.kSize > 2)
+      squashPhiGeneral(diffV);
+
+    Matrix recoM_tmp = recoM;
+    if (diagOnly)
+      diagonalOnly(recoM_tmp);
+    int invFail(0);
+    const Matrix recoMI = recoM_tmp.InverseFast(invFail);
+
+    return ROOT::Math::Dot(diffV * recoMI, diffV) / (diffV.kSize - 1);
+  }
+
+  void print(const TrackState& s);
+  void print(std::string label, int itrack, const Track& trk, bool print_hits = false);
+  void print(std::string label, const TrackState& s);
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/interface/TrackerInfo.h b/RecoTracker/MkFitCore/interface/TrackerInfo.h
new file mode 100644
index 0000000000000..f897a3b4ee83a
--- /dev/null
+++ b/RecoTracker/MkFitCore/interface/TrackerInfo.h
@@ -0,0 +1,173 @@
+#ifndef RecoTracker_MkFitCore_interface_TrackerInfo_h
+#define RecoTracker_MkFitCore_interface_TrackerInfo_h
+
+#include <string>
+#include <vector>
+
+namespace mkfit {
+
+  class IterationsInfo;
+
+  //==============================================================================
+
+  enum WithinSensitiveRegion_e { WSR_Undef = -1, WSR_Inside = 0, WSR_Edge, WSR_Outside };
+
+  struct WSR_Result {
+    // Could also store XHitSize count equivalent here : 16;
+    WithinSensitiveRegion_e m_wsr : 8;
+    bool m_in_gap : 8;
+
+    WSR_Result() : m_wsr(WSR_Undef), m_in_gap(false) {}
+
+    WSR_Result(WithinSensitiveRegion_e wsr, bool in_gap) : m_wsr(wsr), m_in_gap(in_gap) {}
+  };
+
+  //==============================================================================
+
+  class LayerInfo {
+  public:
+    enum LayerType_e { Undef = -1, Barrel = 0, EndCapPos = 1, EndCapNeg = 2 };
+
+    LayerInfo(int lid, LayerType_e type) : m_layer_id(lid), m_layer_type(type) {}
+
+    void set_layer_type(LayerType_e t) { m_layer_type = t; }
+    void set_limits(float r1, float r2, float z1, float z2);
+    void set_propagate_to(float pto) { m_propagate_to = pto; }
+    void set_r_hole_range(float rh1, float rh2);
+    void set_q_bin(float qb) { m_q_bin = qb; }
+    void set_is_stereo(bool s) { m_is_stereo = s; }
+
+    int layer_id() const { return m_layer_id; }
+    LayerType_e layer_type() const { return m_layer_type; }
+    float rin() const { return m_rin; }
+    float rout() const { return m_rout; }
+    float r_mean() const { return 0.5f * (m_rin + m_rout); }
+    float zmin() const { return m_zmin; }
+    float zmax() const { return m_zmax; }
+    float z_mean() const { return 0.5f * (m_zmin + m_zmax); }
+    float propagate_to() const { return m_propagate_to; }
+    float q_bin() const { return m_q_bin; }
+    bool is_stereo() const { return m_is_stereo; }
+
+    bool is_barrel() const { return m_layer_type == Barrel; }
+
+    bool is_within_z_limits(float z) const { return z > m_zmin && z < m_zmax; }
+    bool is_within_r_limits(float r) const { return r > m_rin && r < m_rout; }
+    bool is_within_q_limits(float q) const { return is_barrel() ? is_within_z_limits(q) : is_within_r_limits(q); }
+
+    bool is_in_r_hole(float r) const { return m_has_r_range_hole ? is_in_r_hole_no_check(r) : false; }
+
+    bool is_pixb_lyr() const { return m_is_pixb_lyr; }
+    bool is_pixe_lyr() const { return m_is_pixe_lyr; }
+    bool is_pix_lyr() const { return (m_is_pixb_lyr || m_is_pixe_lyr); }
+    bool is_tib_lyr() const { return m_is_tib_lyr; }
+    bool is_tob_lyr() const { return m_is_tob_lyr; }
+    bool is_tid_lyr() const { return m_is_tid_lyr; }
+    bool is_tec_lyr() const { return m_is_tec_lyr; }
+
+    WSR_Result is_within_z_sensitive_region(float z, float dz) const {
+      if (z > m_zmax + dz || z < m_zmin - dz)
+        return WSR_Result(WSR_Outside, false);
+      if (z < m_zmax - dz && z > m_zmin + dz)
+        return WSR_Result(WSR_Inside, false);
+      return WSR_Result(WSR_Edge, false);
+    }
+
+    WSR_Result is_within_r_sensitive_region(float r, float dr) const {
+      if (r > m_rout + dr || r < m_rin - dr)
+        return WSR_Result(WSR_Outside, false);
+      if (r < m_rout - dr && r > m_rin + dr) {
+        if (m_has_r_range_hole) {
+          if (r < m_hole_r_max - dr && r > m_hole_r_min + dr)
+            return WSR_Result(WSR_Outside, true);
+          if (r < m_hole_r_max + dr && r > m_hole_r_min - dr)
+            return WSR_Result(WSR_Edge, true);
+        }
+        return WSR_Result(WSR_Inside, false);
+      }
+      return WSR_Result(WSR_Edge, false);
+    }
+
+    void print_layer() const {
+      printf("Layer %2d  r(%7.4f, %7.4f) z(% 9.4f, % 9.4f) is_brl=%d\n",
+             m_layer_id,
+             m_rin,
+             m_rout,
+             m_zmin,
+             m_zmax,
+             is_barrel());
+    }
+
+    // To be cleaned out with other geometry cleanup
+    bool m_is_pixb_lyr = false;
+    bool m_is_pixe_lyr = false;
+    bool m_is_tib_lyr = false;
+    bool m_is_tob_lyr = false;
+    bool m_is_tid_lyr = false;
+    bool m_is_tec_lyr = false;
+
+  private:
+    bool is_in_r_hole_no_check(float r) const { return r > m_hole_r_min && r < m_hole_r_max; }
+
+    int m_layer_id = -1;
+    LayerType_e m_layer_type = Undef;
+
+    float m_rin, m_rout, m_zmin, m_zmax;
+    float m_propagate_to;
+
+    float m_q_bin;                     // > 0 - bin width, < 0 - number of bins
+    float m_hole_r_min, m_hole_r_max;  // This could be turned into std::function when needed.
+    bool m_has_r_range_hole = false;
+    bool m_is_stereo = false;
+  };
+
+  //==============================================================================
+
+  class TrackerInfo {
+  public:
+    enum EtaRegion {
+      Reg_Begin = 0,
+      Reg_Endcap_Neg = 0,
+      Reg_Transition_Neg,
+      Reg_Barrel,
+      Reg_Transition_Pos,
+      Reg_Endcap_Pos,
+      Reg_End,
+      Reg_Count = Reg_End
+    };
+
+    void reserve_layers(int n_brl, int n_ec_pos, int n_ec_neg);
+    void create_layers(int n_brl, int n_ec_pos, int n_ec_neg);
+    LayerInfo& new_barrel_layer();
+    LayerInfo& new_ecap_pos_layer();
+    LayerInfo& new_ecap_neg_layer();
+
+    int n_layers() const { return m_layers.size(); }
+    const LayerInfo& layer(int l) const { return m_layers[l]; }
+    LayerInfo& layer_nc(int l) { return m_layers[l]; }
+
+    const LayerInfo& operator[](int l) const { return m_layers[l]; }
+
+    bool is_stereo(int i) const { return m_layers[i].is_stereo(); }
+    bool is_pixb_lyr(int i) const { return m_layers[i].is_pixb_lyr(); }
+    bool is_pixe_lyr(int i) const { return m_layers[i].is_pixe_lyr(); }
+    bool is_pix_lyr(int i) const { return m_layers[i].is_pix_lyr(); }
+    bool is_tib_lyr(int i) const { return m_layers[i].is_tib_lyr(); }
+    bool is_tob_lyr(int i) const { return m_layers[i].is_tob_lyr(); }
+    bool is_tid_lyr(int i) const { return m_layers[i].is_tid_lyr(); }
+    bool is_tec_lyr(int i) const { return m_layers[i].is_tec_lyr(); }
+
+    const LayerInfo& outer_barrel_layer() const { return m_layers[m_barrel.back()]; }
+
+  private:
+    int new_layer(LayerInfo::LayerType_e type);
+
+    std::vector<LayerInfo> m_layers;
+
+    std::vector<int> m_barrel;
+    std::vector<int> m_ecap_pos;
+    std::vector<int> m_ecap_neg;
+  };
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/interface/binnor.h b/RecoTracker/MkFitCore/interface/binnor.h
new file mode 100644
index 0000000000000..3563c1d116661
--- /dev/null
+++ b/RecoTracker/MkFitCore/interface/binnor.h
@@ -0,0 +1,246 @@
+#ifndef RecoTracker_MkFitCore_interface_binnor_h
+#define RecoTracker_MkFitCore_interface_binnor_h
+
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+#include <vector>
+
+#include <cstdio>
+
+namespace mkfit {
+
+  // For all axis types:
+  //--------------------
+  // R - real type
+  // I - bin index type
+  // M and N - number of bits for fine and normal binning
+
+  // axis_base
+  //----------
+  template <typename R, typename I, unsigned M, unsigned N>
+  struct axis_base {
+    static_assert(M >= N);
+
+    typedef R real_t;
+    typedef I index_t;
+
+    static constexpr unsigned c_M = M;
+    static constexpr unsigned c_N = N;
+    static constexpr unsigned c_M2N_shift = M - N;
+
+    const R m_R_min, m_R_max;
+    const R m_M_fac, m_N_fac;
+    const I m_last_M_bin, m_last_N_bin;
+
+    struct I_pair {
+      I begin;
+      I end;
+
+      I_pair() : begin(0), end(0) {}
+      I_pair(I b, I e) : begin(b), end(e) {}
+    };
+
+    axis_base(R min, R max, unsigned M_size, unsigned N_size)
+        : m_R_min(min),
+          m_R_max(max),
+          m_M_fac(M_size / (max - min)),
+          m_N_fac(N_size / (max - min)),
+          m_last_M_bin(M_size - 1),
+          m_last_N_bin(N_size - 1) {}
+
+    I R_to_M_bin(R r) const { return (r - m_R_min) * m_M_fac; }
+    I R_to_N_bin(R r) const { return (r - m_R_min) * m_N_fac; }
+
+    I R_to_M_bin_safe(R r) const { return r <= m_R_min ? 0 : (r >= m_R_max ? m_last_M_bin : R_to_M_bin(r)); }
+    I R_to_N_bin_safe(R r) const { return r <= m_R_min ? 0 : (r >= m_R_max ? m_last_N_bin : R_to_N_bin(r)); }
+
+    I M_bin_to_N_bin(I m) const { return m >> c_M2N_shift; }
+
+    I_pair Rminmax_to_N_bins(R rmin, R rmax) const {
+      return I_pair(R_to_N_bin_safe(rmin), R_to_N_bin_safe(rmax) + I{1});
+    }
+
+    I_pair Rrdr_to_N_bins(R r, R dr) const { return Rminmax_to_N_bins(r - dr, r + dr); }
+    I next_N_bin(I bin) const { return bin + 1; }
+  };
+
+  // axis_pow2_base
+  //---------------
+  template <typename R, typename I, unsigned M, unsigned N>
+  struct axis_pow2_base : public axis_base<R, I, M, N> {
+    static constexpr unsigned c_M_end = 1 << M;
+    static constexpr unsigned c_N_end = 1 << N;
+
+    axis_pow2_base(R min, R max) : axis_base<R, I, M, N>(min, max, c_M_end, c_N_end) {}
+
+    unsigned size_of_M() const { return c_M_end; }
+    unsigned size_of_N() const { return c_N_end; }
+  };
+
+  // axis_pow2_u1
+  //-------------
+  template <typename R, typename I, unsigned M, unsigned N>
+  struct axis_pow2_u1 : public axis_pow2_base<R, I, M, N> {
+    static constexpr I c_M_mask = (1 << M) - 1;
+    static constexpr I c_N_mask = (1 << N) - 1;
+
+    axis_pow2_u1(R min, R max) : axis_pow2_base<R, I, M, N>(min, max) {}
+
+    I R_to_M_bin_safe(R r) const { return this->R_to_M_bin(r) & c_M_mask; }
+    I R_to_N_bin_safe(R r) const { return this->R_to_N_bin(r) & c_N_mask; }
+
+    typename axis_base<R, I, M, N>::I_pair Rminmax_to_N_bins(R rmin, R rmax) const {
+      return typename axis_base<R, I, M, N>::I_pair(R_to_N_bin_safe(rmin), (this->R_to_N_bin(rmax) + I{1}) & c_N_mask);
+    }
+
+    typename axis_base<R, I, M, N>::I_pair Rrdr_to_N_bins(R r, R dr) const { return Rminmax_to_N_bins(r - dr, r + dr); }
+    I next_N_bin(I bin) const { return (bin + 1) & c_N_mask; }
+  };
+
+  // axis_pow2
+  //----------
+  template <typename R, typename I, unsigned M, unsigned N>
+  struct axis_pow2 : public axis_pow2_base<R, I, M, N> {
+    axis_pow2(R min, R max) : axis_pow2_base<R, I, M, N>(min, max) {}
+  };
+
+  // axis
+  //-----
+  template <typename R, typename I, unsigned M = 8 * sizeof(I), unsigned N = 8 * sizeof(I)>
+  struct axis : public axis_base<R, I, M, N> {
+    const unsigned m_num_M_bins, m_num_N_bins;
+
+    axis(R min, R max, unsigned n_bins)
+        : axis_base<R, I, M, N>(min, max, n_bins << this->c_M2N_shift, n_bins),
+          m_num_M_bins(n_bins << this->c_M2N_shift),
+          m_num_N_bins(n_bins) {}
+
+    axis(R min, R max, R bin_width) {
+      R extent = max - min;
+      unsigned n_bins = std::ceil(extent / bin_width);
+      R extra = (n_bins * bin_width - extent) / 2;
+
+      axis(min - extra, max + extra, n_bins);
+    }
+
+    unsigned size_of_M() const { return m_num_M_bins; }
+    unsigned size_of_N() const { return m_num_N_bins; }
+  };
+
+  // binnor
+  //---------------
+  // C - bin content type
+  // A1, A2 - axis types
+  // NB_first, NB_count - number of bits for storage of { first, count } pairs
+
+  template <typename C, typename A1, typename A2, unsigned NB_first = 8 * sizeof(C), unsigned NB_count = 8 * sizeof(C)>
+  struct binnor {
+    static_assert(std::is_same<typename A1::real_t, typename A2::real_t>());
+
+    static constexpr unsigned c_A2_Mout_mask = ~(((1 << A2::c_M2N_shift) - 1) << A1::c_M);
+
+    // Pair of axis bin indices.
+    struct B_pair {
+      typename A1::index_t bin1 : A1::c_M;
+      typename A2::index_t bin2 : A2::c_M;
+
+      B_pair() : bin1(0), bin2(0) {}
+      B_pair(typename A1::index_t i1, typename A2::index_t i2) : bin1(i1), bin2(i2) {}
+    };
+
+    // Bin content pair.
+    struct C_pair {
+      C first : NB_first;
+      C count : NB_count;
+
+      C_pair() : first(0), count(0) {}
+      C_pair(C f, C c) : first(f), count(c) {}
+
+      C end() const { return first + count; }
+    };
+
+    const A1 &m_a1;
+    const A2 &m_a2;
+    std::vector<B_pair> m_cons;
+    std::vector<C_pair> m_bins;
+    std::vector<C> m_ranks;
+
+    binnor(const A1 &a1, const A2 &a2) : m_a1(a1), m_a2(a2), m_bins(m_a1.size_of_N() * m_a2.size_of_N()) {}
+
+    // Access
+
+    B_pair m_bin_to_n_bin(B_pair m_bin) { return {m_a1.M_bin_to_N_bin(m_bin.bin1), m_a2.M_bin_to_N_bin(m_bin.bin2)}; }
+
+    B_pair get_n_bin(typename A1::index_t n1, typename A2::index_t n2) const { return {n1, n2}; }
+
+    B_pair get_n_bin(typename A1::real_t r1, typename A2::real_t r2) const {
+      return {m_a1.R_to_N_bin(r1), m_a2.R_to_N_bin(r2)};
+    }
+
+    C_pair &ref_content(B_pair n_bin) { return m_bins[n_bin.bin2 * m_a1.size_of_N() + n_bin.bin1]; }
+
+    C_pair get_content(B_pair n_bin) const { return m_bins[n_bin.bin2 * m_a1.size_of_N() + n_bin.bin1]; }
+
+    C_pair get_content(typename A1::index_t n1, typename A2::index_t n2) const {
+      return m_bins[n2 * m_a1.size_of_N() + n1];
+    }
+
+    C_pair get_content(typename A1::real_t r1, typename A2::real_t r2) const {
+      return get_content(m_a1.R_to_N_bin(r1), m_a2.R_to_N_bin(r2));
+    }
+
+    // Filling
+
+    void reset_contents() {
+      m_bins.assign(m_bins.size(), C_pair());
+      m_ranks.clear();
+      m_ranks.shrink_to_fit();
+    }
+
+    void begin_registration(C n_items) { m_cons.reserve(n_items); }
+
+    void register_entry(typename A1::real_t r1, typename A2::real_t r2) {
+      m_cons.push_back({m_a1.R_to_M_bin(r1), m_a2.R_to_M_bin(r2)});
+    }
+
+    void register_entry_safe(typename A1::real_t r1, typename A2::real_t r2) {
+      m_cons.push_back({m_a1.R_to_M_bin_safe(r1), m_a2.R_to_M_bin_safe(r2)});
+    }
+
+    // Do M-binning outside, potentially using R_to_M_bin_safe().
+    void register_m_bins(typename A1::index_t m1, typename A2::index_t m2) { m_cons.push_back({m1, m2}); }
+
+    void finalize_registration() {
+      // call internal sort, bin building from icc where template instantiation has to be made.
+
+      m_ranks.resize(m_cons.size());
+      std::iota(m_ranks.begin(), m_ranks.end(), 0);
+
+      std::sort(m_ranks.begin(), m_ranks.end(), [&](auto &a, auto &b) {
+        return (m_cons[a].raw & c_A2_Mout_mask) < (m_cons[b].raw & c_A2_Mout_mask);
+      });
+
+      for (C i = 0; i < m_ranks.size(); ++i) {
+        C j = m_ranks[i];
+        C_pair &c_bin = ref_content(m_bin_to_n_bin(m_cons[j]));
+        if (c_bin.count == 0)
+          c_bin.first = i;
+        ++c_bin.count;
+
+#ifdef DEBUG
+        B_pair n_pair = m_bin_to_n_bin(m_cons[j]);
+        printf("i=%4u j=%4u  %u %u %u %u\n", i, j, n_pair.bin1, n_pair.bin2, c_bin.first, c_bin.count);
+#endif
+      }
+
+      // Those could be kept to do preselection when determining search ranges.
+      // Especially since additional precision on Axis2 is screened out during sorting.
+      m_cons.clear();
+      m_cons.shrink_to_fit();
+    }
+  };
+
+}  // namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/interface/cms_common_macros.h b/RecoTracker/MkFitCore/interface/cms_common_macros.h
new file mode 100644
index 0000000000000..6c78c862d8926
--- /dev/null
+++ b/RecoTracker/MkFitCore/interface/cms_common_macros.h
@@ -0,0 +1,10 @@
+#ifndef RecoTracker_MkFitCore_interface_cms_common_macros_h
+#define RecoTracker_MkFitCore_interface_cms_common_macros_h
+
+#ifdef MKFIT_STANDALONE
+#define CMS_SA_ALLOW
+#else
+#include "FWCore/Utilities/interface/thread_safety_macros.h"
+#endif
+
+#endif
diff --git a/RecoTracker/MkFitCore/src/CCSErr.ah b/RecoTracker/MkFitCore/src/CCSErr.ah
new file mode 100644
index 0000000000000..4b50f58628323
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/CCSErr.ah
@@ -0,0 +1,208 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = b_0;
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_1 = b_1;
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t c_2 = b_3;
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_3 = b_6;
+      IntrVec_t b_10 = LD(b, 10);
+      IntrVec_t c_4 = b_10;
+      IntrVec_t b_15 = LD(b, 15);
+      IntrVec_t c_5 = b_15;
+
+
+
+
+
+
+
+      IntrVec_t c_6 = b_1;
+      IntrVec_t b_2 = LD(b, 2);
+      IntrVec_t c_7 = b_2;
+      IntrVec_t b_4 = LD(b, 4);
+      IntrVec_t c_8 = b_4;
+      ST(c, 0, c_0);
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+      IntrVec_t b_7 = LD(b, 7);
+      IntrVec_t c_9 = b_7;
+      IntrVec_t b_11 = LD(b, 11);
+      IntrVec_t c_10 = b_11;
+      IntrVec_t b_16 = LD(b, 16);
+      IntrVec_t c_11 = b_16;
+
+
+
+
+
+
+
+      IntrVec_t c_12 = b_3;
+      IntrVec_t c_13 = b_4;
+      IntrVec_t b_5 = LD(b, 5);
+      IntrVec_t c_14 = b_5;
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+      ST(c, 9, c_9);
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+      IntrVec_t b_8 = LD(b, 8);
+      IntrVec_t c_15 = b_8;
+      IntrVec_t b_12 = LD(b, 12);
+      IntrVec_t c_16 = b_12;
+      IntrVec_t b_17 = LD(b, 17);
+      IntrVec_t c_17 = b_17;
+
+
+
+
+
+
+
+      IntrVec_t a_21 = LD(a, 21);
+      IntrVec_t c_18 = MUL(a_21, b_6);
+      IntrVec_t c_19 = MUL(a_21, b_7);
+      IntrVec_t c_20 = MUL(a_21, b_8);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      ST(c, 17, c_17);
+      IntrVec_t b_9 = LD(b, 9);
+      IntrVec_t c_21 = MUL(a_21, b_9);
+      IntrVec_t b_13 = LD(b, 13);
+      IntrVec_t c_22 = MUL(a_21, b_13);
+      IntrVec_t b_18 = LD(b, 18);
+      IntrVec_t c_23 = MUL(a_21, b_18);
+
+      IntrVec_t a_22 = LD(a, 22);
+      c_18 = FMA(a_22, b_10, c_18);
+      c_19 = FMA(a_22, b_11, c_19);
+      c_20 = FMA(a_22, b_12, c_20);
+      c_21 = FMA(a_22, b_13, c_21);
+      IntrVec_t b_14 = LD(b, 14);
+      c_22 = FMA(a_22, b_14, c_22);
+      IntrVec_t b_19 = LD(b, 19);
+      c_23 = FMA(a_22, b_19, c_23);
+
+
+
+
+
+      IntrVec_t a_27 = LD(a, 27);
+      IntrVec_t c_24 = MUL(a_27, b_6);
+      IntrVec_t c_25 = MUL(a_27, b_7);
+      IntrVec_t c_26 = MUL(a_27, b_8);
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      ST(c, 20, c_20);
+      ST(c, 21, c_21);
+      ST(c, 22, c_22);
+      ST(c, 23, c_23);
+      IntrVec_t c_27 = MUL(a_27, b_9);
+      IntrVec_t c_28 = MUL(a_27, b_13);
+      IntrVec_t c_29 = MUL(a_27, b_18);
+
+      IntrVec_t a_28 = LD(a, 28);
+      c_24 = FMA(a_28, b_10, c_24);
+      c_25 = FMA(a_28, b_11, c_25);
+      c_26 = FMA(a_28, b_12, c_26);
+      c_27 = FMA(a_28, b_13, c_27);
+      c_28 = FMA(a_28, b_14, c_28);
+      c_29 = FMA(a_28, b_19, c_29);
+
+
+
+
+
+      IntrVec_t a_33 = LD(a, 33);
+      IntrVec_t c_30 = MUL(a_33, b_6);
+      IntrVec_t c_31 = MUL(a_33, b_7);
+      IntrVec_t c_32 = MUL(a_33, b_8);
+      ST(c, 24, c_24);
+      ST(c, 25, c_25);
+      ST(c, 26, c_26);
+      ST(c, 27, c_27);
+      ST(c, 28, c_28);
+      ST(c, 29, c_29);
+      IntrVec_t c_33 = MUL(a_33, b_9);
+      IntrVec_t c_34 = MUL(a_33, b_13);
+      IntrVec_t c_35 = MUL(a_33, b_18);
+
+      IntrVec_t a_34 = LD(a, 34);
+      c_30 = FMA(a_34, b_10, c_30);
+      c_31 = FMA(a_34, b_11, c_31);
+      c_32 = FMA(a_34, b_12, c_32);
+      c_33 = FMA(a_34, b_13, c_33);
+      c_34 = FMA(a_34, b_14, c_34);
+      c_35 = FMA(a_34, b_19, c_35);
+
+      IntrVec_t a_35 = LD(a, 35);
+      c_30 = FMA(a_35, b_15, c_30);
+      c_31 = FMA(a_35, b_16, c_31);
+      c_32 = FMA(a_35, b_17, c_32);
+      c_33 = FMA(a_35, b_18, c_33);
+      c_34 = FMA(a_35, b_19, c_34);
+      ST(c, 30, c_30);
+      ST(c, 31, c_31);
+      ST(c, 32, c_32);
+      ST(c, 33, c_33);
+      ST(c, 34, c_34);
+      IntrVec_t b_20 = LD(b, 20);
+      c_35 = FMA(a_35, b_20, c_35);
+      ST(c, 35, c_35);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = b[ 0*N+n];
+      c[ 1*N+n] = b[ 1*N+n];
+      c[ 2*N+n] = b[ 3*N+n];
+      c[ 3*N+n] = b[ 6*N+n];
+      c[ 4*N+n] = b[10*N+n];
+      c[ 5*N+n] = b[15*N+n];
+      c[ 6*N+n] = b[ 1*N+n];
+      c[ 7*N+n] = b[ 2*N+n];
+      c[ 8*N+n] = b[ 4*N+n];
+      c[ 9*N+n] = b[ 7*N+n];
+      c[10*N+n] = b[11*N+n];
+      c[11*N+n] = b[16*N+n];
+      c[12*N+n] = b[ 3*N+n];
+      c[13*N+n] = b[ 4*N+n];
+      c[14*N+n] = b[ 5*N+n];
+      c[15*N+n] = b[ 8*N+n];
+      c[16*N+n] = b[12*N+n];
+      c[17*N+n] = b[17*N+n];
+      c[18*N+n] = a[21*N+n]*b[ 6*N+n] + a[22*N+n]*b[10*N+n];
+      c[19*N+n] = a[21*N+n]*b[ 7*N+n] + a[22*N+n]*b[11*N+n];
+      c[20*N+n] = a[21*N+n]*b[ 8*N+n] + a[22*N+n]*b[12*N+n];
+      c[21*N+n] = a[21*N+n]*b[ 9*N+n] + a[22*N+n]*b[13*N+n];
+      c[22*N+n] = a[21*N+n]*b[13*N+n] + a[22*N+n]*b[14*N+n];
+      c[23*N+n] = a[21*N+n]*b[18*N+n] + a[22*N+n]*b[19*N+n];
+      c[24*N+n] = a[27*N+n]*b[ 6*N+n] + a[28*N+n]*b[10*N+n];
+      c[25*N+n] = a[27*N+n]*b[ 7*N+n] + a[28*N+n]*b[11*N+n];
+      c[26*N+n] = a[27*N+n]*b[ 8*N+n] + a[28*N+n]*b[12*N+n];
+      c[27*N+n] = a[27*N+n]*b[ 9*N+n] + a[28*N+n]*b[13*N+n];
+      c[28*N+n] = a[27*N+n]*b[13*N+n] + a[28*N+n]*b[14*N+n];
+      c[29*N+n] = a[27*N+n]*b[18*N+n] + a[28*N+n]*b[19*N+n];
+      c[30*N+n] = a[33*N+n]*b[ 6*N+n] + a[34*N+n]*b[10*N+n] + a[35*N+n]*b[15*N+n];
+      c[31*N+n] = a[33*N+n]*b[ 7*N+n] + a[34*N+n]*b[11*N+n] + a[35*N+n]*b[16*N+n];
+      c[32*N+n] = a[33*N+n]*b[ 8*N+n] + a[34*N+n]*b[12*N+n] + a[35*N+n]*b[17*N+n];
+      c[33*N+n] = a[33*N+n]*b[ 9*N+n] + a[34*N+n]*b[13*N+n] + a[35*N+n]*b[18*N+n];
+      c[34*N+n] = a[33*N+n]*b[13*N+n] + a[34*N+n]*b[14*N+n] + a[35*N+n]*b[19*N+n];
+      c[35*N+n] = a[33*N+n]*b[18*N+n] + a[34*N+n]*b[19*N+n] + a[35*N+n]*b[20*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/CCSErrTransp.ah b/RecoTracker/MkFitCore/src/CCSErrTransp.ah
new file mode 100644
index 0000000000000..c5864a3c03f2c
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/CCSErrTransp.ah
@@ -0,0 +1,147 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = b_0;
+
+
+
+
+
+
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_1 = b_6;
+
+      IntrVec_t b_7 = LD(b, 7);
+      IntrVec_t c_2 = b_7;
+      ST(c, 0, c_0);
+
+
+
+
+
+      IntrVec_t b_12 = LD(b, 12);
+      IntrVec_t c_3 = b_12;
+
+      IntrVec_t b_13 = LD(b, 13);
+      IntrVec_t c_4 = b_13;
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+
+      IntrVec_t b_14 = LD(b, 14);
+      IntrVec_t c_5 = b_14;
+
+
+
+
+      IntrVec_t b_18 = LD(b, 18);
+      IntrVec_t c_6 = b_18;
+
+      IntrVec_t b_19 = LD(b, 19);
+      IntrVec_t c_7 = b_19;
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+
+      IntrVec_t b_20 = LD(b, 20);
+      IntrVec_t c_8 = b_20;
+
+      IntrVec_t b_21 = LD(b, 21);
+      IntrVec_t a_21 = LD(a, 21);
+      IntrVec_t c_9 = MUL(b_21, a_21);
+
+      IntrVec_t b_22 = LD(b, 22);
+      IntrVec_t a_22 = LD(a, 22);
+      c_9 = FMA(b_22, a_22, c_9);
+
+
+      IntrVec_t b_24 = LD(b, 24);
+      IntrVec_t c_10 = b_24;
+
+      IntrVec_t b_25 = LD(b, 25);
+      IntrVec_t c_11 = b_25;
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+      ST(c, 9, c_9);
+
+      IntrVec_t b_26 = LD(b, 26);
+      IntrVec_t c_12 = b_26;
+
+      IntrVec_t b_27 = LD(b, 27);
+      IntrVec_t c_13 = MUL(b_27, a_21);
+      IntrVec_t a_27 = LD(a, 27);
+      IntrVec_t c_14 = MUL(b_27, a_27);
+
+      IntrVec_t b_28 = LD(b, 28);
+      c_13 = FMA(b_28, a_22, c_13);
+      IntrVec_t a_28 = LD(a, 28);
+      c_14 = FMA(b_28, a_28, c_14);
+
+
+      IntrVec_t b_30 = LD(b, 30);
+      IntrVec_t c_15 = b_30;
+
+      IntrVec_t b_31 = LD(b, 31);
+      IntrVec_t c_16 = b_31;
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+
+      IntrVec_t b_32 = LD(b, 32);
+      IntrVec_t c_17 = b_32;
+
+      IntrVec_t b_33 = LD(b, 33);
+      IntrVec_t c_18 = MUL(b_33, a_21);
+      IntrVec_t c_19 = MUL(b_33, a_27);
+      IntrVec_t a_33 = LD(a, 33);
+      IntrVec_t c_20 = MUL(b_33, a_33);
+
+      IntrVec_t b_34 = LD(b, 34);
+      c_18 = FMA(b_34, a_22, c_18);
+      c_19 = FMA(b_34, a_28, c_19);
+      IntrVec_t a_34 = LD(a, 34);
+      c_20 = FMA(b_34, a_34, c_20);
+
+      IntrVec_t b_35 = LD(b, 35);
+      IntrVec_t a_35 = LD(a, 35);
+      c_20 = FMA(b_35, a_35, c_20);
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      ST(c, 17, c_17);
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      ST(c, 20, c_20);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = b[ 0*N+n];
+      c[ 1*N+n] = b[ 6*N+n];
+      c[ 2*N+n] = b[ 7*N+n];
+      c[ 3*N+n] = b[12*N+n];
+      c[ 4*N+n] = b[13*N+n];
+      c[ 5*N+n] = b[14*N+n];
+      c[ 6*N+n] = b[18*N+n];
+      c[ 7*N+n] = b[19*N+n];
+      c[ 8*N+n] = b[20*N+n];
+      c[ 9*N+n] = b[21*N+n]*a[21*N+n] + b[22*N+n]*a[22*N+n];
+      c[10*N+n] = b[24*N+n];
+      c[11*N+n] = b[25*N+n];
+      c[12*N+n] = b[26*N+n];
+      c[13*N+n] = b[27*N+n]*a[21*N+n] + b[28*N+n]*a[22*N+n];
+      c[14*N+n] = b[27*N+n]*a[27*N+n] + b[28*N+n]*a[28*N+n];
+      c[15*N+n] = b[30*N+n];
+      c[16*N+n] = b[31*N+n];
+      c[17*N+n] = b[32*N+n];
+      c[18*N+n] = b[33*N+n]*a[21*N+n] + b[34*N+n]*a[22*N+n];
+      c[19*N+n] = b[33*N+n]*a[27*N+n] + b[34*N+n]*a[28*N+n];
+      c[20*N+n] = b[33*N+n]*a[33*N+n] + b[34*N+n]*a[34*N+n] + b[35*N+n]*a[35*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/CandCloner.cc b/RecoTracker/MkFitCore/src/CandCloner.cc
new file mode 100644
index 0000000000000..3710bf87962f8
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/CandCloner.cc
@@ -0,0 +1,240 @@
+#include "CandCloner.h"
+
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
+#include "RecoTracker/MkFitCore/interface/IterationConfig.h"
+
+//#define DEBUG
+#include "Debug.h"
+
+namespace {
+  inline bool sortCandListByScore(const mkfit::IdxChi2List &cand1, const mkfit::IdxChi2List &cand2) {
+    return mkfit::sortByScoreStruct(cand1, cand2);
+  }
+}  // namespace
+
+namespace mkfit {
+
+  void CandCloner::setup(const IterationParams &ip) {
+    mp_iteration_params = &ip;
+    for (int iseed = 0; iseed < s_max_seed_range; ++iseed) {
+      t_cands_for_next_lay[iseed].reserve(mp_iteration_params->maxCandsPerSeed);
+    }
+  }
+
+  void CandCloner::release() { mp_iteration_params = nullptr; }
+
+  void CandCloner::begin_eta_bin(EventOfCombCandidates *e_o_ccs,
+                                 std::vector<std::pair<int, int>> *update_list,
+                                 std::vector<std::vector<TrackCand>> *extra_cands,
+                                 int start_seed,
+                                 int n_seeds) {
+    mp_event_of_comb_candidates = e_o_ccs;
+    mp_kalman_update_list = update_list;
+    mp_extra_cands = extra_cands;
+    m_start_seed = start_seed;
+    m_n_seeds = n_seeds;
+    m_hits_to_add.resize(n_seeds);
+
+    for (int i = 0; i < n_seeds; ++i)
+      m_hits_to_add[i].reserve(4);
+
+#ifdef CC_TIME_ETA
+    printf("CandCloner::begin_eta_bin\n");
+    t_eta = dtime();
+#endif
+  }
+
+  void CandCloner::begin_layer(int lay) {
+    m_layer = lay;
+
+    m_idx_max = 0;
+    m_idx_max_prev = 0;
+
+    mp_kalman_update_list->clear();
+
+#ifdef CC_TIME_LAYER
+    t_lay = dtime();
+#endif
+  }
+
+  void CandCloner::begin_iteration() {
+    // Do nothing, "secondary" state vars updated when work completed/assigned.
+  }
+
+  void CandCloner::end_iteration() {
+    int proc_n = m_idx_max - m_idx_max_prev;
+
+    dprintf("CandCloner::end_iteration process %d, max_prev=%d, max=%d\n", proc_n, m_idx_max_prev, m_idx_max);
+
+    if (proc_n >= s_max_seed_range) {
+      // Round to multiple of s_max_seed_range.
+      doWork((m_idx_max / s_max_seed_range) * s_max_seed_range);
+    }
+  }
+
+  void CandCloner::end_layer() {
+    if (m_n_seeds > m_idx_max_prev) {
+      doWork(m_n_seeds);
+    }
+
+    for (int i = 0; i < m_n_seeds; ++i) {
+      m_hits_to_add[i].clear();
+    }
+
+#ifdef CC_TIME_LAYER
+    t_lay = dtime() - t_lay;
+    printf("CandCloner::end_layer %d -- t_lay=%8.6f\n", m_layer, t_lay);
+    printf("                      m_idx_max=%d, m_idx_max_prev=%d, issued work=%d\n",
+           m_idx_max,
+           m_idx_max_prev,
+           m_idx_max + 1 > m_idx_max_prev);
+#endif
+  }
+
+  void CandCloner::end_eta_bin() {
+#ifdef CC_TIME_ETA
+    t_eta = dtime() - t_eta;
+    printf("CandCloner::end_eta_bin t_eta=%8.6f\n", t_eta);
+#endif
+  }
+  //==============================================================================
+
+  void CandCloner::doWork(int idx) {
+    dprintf("CandCloner::DoWork assigning work from seed %d to %d\n", m_idx_max_prev, idx);
+
+    int beg = m_idx_max_prev;
+    int the_end = idx;
+
+    dprintf("CandCloner::DoWork working on beg=%d to the_end=%d\n", beg, the_end);
+
+    while (beg != the_end) {
+      int end = std::min(beg + s_max_seed_range, the_end);
+
+      dprintf("CandCloner::DoWork processing %4d -> %4d\n", beg, end);
+
+      processSeedRange(beg, end);
+
+      beg = end;
+    }
+
+    m_idx_max_prev = idx;
+  }
+
+  //==============================================================================
+
+  void CandCloner::processSeedRange(int is_beg, int is_end) {
+    // Process new hits for a range of seeds.
+
+    // bool debug = true;
+
+    dprintf("\nCandCloner::ProcessSeedRange is_beg=%d, is_end=%d\n", is_beg, is_end);
+
+    //1) sort the candidates
+    for (int is = is_beg; is < is_end; ++is) {
+      std::vector<IdxChi2List> &hitsForSeed = m_hits_to_add[is];
+
+      CombCandidate &ccand = mp_event_of_comb_candidates->cand(m_start_seed + is);
+      std::vector<TrackCand> &extras = (*mp_extra_cands)[is];
+      auto extra_i = extras.begin();
+      auto extra_e = extras.end();
+
+      // Extras are sorted by candScore.
+
+#ifdef DEBUG
+      dprint("  seed n " << is << " with input candidates=" << hitsForSeed.size());
+      for (int ih = 0; ih < (int)hitsForSeed.size(); ih++) {
+        dprint("trkIdx=" << hitsForSeed[ih].trkIdx << " hitIdx=" << hitsForSeed[ih].hitIdx
+                         << " chi2=" << hitsForSeed[ih].chi2 << std::endl
+                         << "    "
+                         << "original pt=" << ccand[hitsForSeed[ih].trkIdx].pT() << " "
+                         << "nTotalHits=" << ccand[hitsForSeed[ih].trkIdx].nTotalHits() << " "
+                         << "nFoundHits=" << ccand[hitsForSeed[ih].trkIdx].nFoundHits() << " "
+                         << "chi2=" << ccand[hitsForSeed[ih].trkIdx].chi2());
+      }
+#endif
+
+      if (!hitsForSeed.empty()) {
+        //sort the new hits
+        std::sort(hitsForSeed.begin(), hitsForSeed.end(), sortCandListByScore);
+
+        int num_hits = std::min((int)hitsForSeed.size(), mp_iteration_params->maxCandsPerSeed);
+
+        // This is from buffer, we know it was cleared after last usage.
+        std::vector<TrackCand> &cv = t_cands_for_next_lay[is - is_beg];
+
+        int n_pushed = 0;
+
+        for (int ih = 0; ih < num_hits; ih++) {
+          const IdxChi2List &h2a = hitsForSeed[ih];
+
+          TrackCand tc(ccand[h2a.trkIdx]);
+          tc.addHitIdx(h2a.hitIdx, m_layer, h2a.chi2_hit);
+          tc.setScore(h2a.score);
+
+          if (h2a.hitIdx == -2) {
+            if (h2a.score > ccand.refBestShortCand().score()) {
+              ccand.setBestShortCand(tc);
+            }
+            continue;
+          }
+
+          // Could also skip storing of cands with last -3 hit.
+
+          // Squeeze in extra tracks that are better than current one.
+          while (extra_i != extra_e && sortByScoreTrackCand(*extra_i, tc) &&
+                 n_pushed < mp_iteration_params->maxCandsPerSeed) {
+            cv.emplace_back(*extra_i);
+            ++n_pushed;
+            ++extra_i;
+          }
+
+          if (n_pushed >= mp_iteration_params->maxCandsPerSeed)
+            break;
+
+          // set the overlap if we have a true hit and pT > pTCutOverlap
+          HitMatch *hm;
+          if (tc.pT() > mp_iteration_params->pTCutOverlap && h2a.hitIdx >= 0 &&
+              (hm = ccand[h2a.trkIdx].findOverlap(h2a.hitIdx, h2a.module))) {
+            tc.addHitIdx(hm->m_hit_idx, m_layer, hm->m_chi2);
+            tc.incOverlapCount();
+          }
+
+          cv.emplace_back(tc);
+          ++n_pushed;
+
+          if (h2a.hitIdx >= 0) {
+            mp_kalman_update_list->push_back(std::pair<int, int>(m_start_seed + is, n_pushed - 1));
+          }
+        }
+
+        // Add remaining extras as long as there is still room for them.
+        while (extra_i != extra_e && n_pushed < mp_iteration_params->maxCandsPerSeed) {
+          cv.emplace_back(*extra_i);
+          ++n_pushed;
+          ++extra_i;
+        }
+
+        // Can not use ccand.swap(cv) -- allocations for TrackCand vectors need to be
+        // in the same memory segment for gather operation to work in backward-fit.
+        ccand.resize(cv.size());
+        for (size_t ii = 0; ii < cv.size(); ++ii) {
+          ccand[ii] = cv[ii];
+        }
+        cv.clear();
+      } else  // hitsForSeed.empty()
+      {
+        if (ccand.state() == CombCandidate::Finding) {
+          ccand.clear();
+
+          while (extra_i != extra_e) {
+            ccand.emplace_back(*extra_i);
+            ++extra_i;
+          }
+        }
+      }
+
+      extras.clear();
+    }
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/CandCloner.h b/RecoTracker/MkFitCore/src/CandCloner.h
new file mode 100644
index 0000000000000..f1d7c4eef592d
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/CandCloner.h
@@ -0,0 +1,75 @@
+#ifndef RecoTracker_MkFitCore_src_CandCloner_h
+#define RecoTracker_MkFitCore_src_CandCloner_h
+
+#include "MkFinder.h"
+
+#include <vector>
+
+namespace mkfit {
+
+  class IterationParams;
+  class EventOfCombCandidates;
+
+  //#define CC_TIME_LAYER
+  //#define CC_TIME_ETA
+
+  class CandCloner {
+  public:
+    // Maximum number of seeds processed in one call to processSeedRange()
+    static const int s_max_seed_range = MPT_SIZE;
+
+    CandCloner() { t_cands_for_next_lay.resize(s_max_seed_range); }
+
+    void setup(const IterationParams &ip);
+    void release();
+
+    void begin_eta_bin(EventOfCombCandidates *e_o_ccs,
+                       std::vector<std::pair<int, int>> *update_list,
+                       std::vector<std::vector<TrackCand>> *extra_cands,
+                       int start_seed,
+                       int n_seeds);
+    void begin_layer(int lay);
+    void begin_iteration();
+
+    void add_cand(int idx, const IdxChi2List &cand_info) {
+      m_hits_to_add[idx].push_back(cand_info);
+
+      m_idx_max = std::max(m_idx_max, idx);
+    }
+
+    int num_cands(int idx) { return m_hits_to_add[idx].size(); }
+
+    void end_iteration();
+    void end_layer();
+    void end_eta_bin();
+
+    void doWork(int idx);
+
+    void processSeedRange(int is_beg, int is_end);
+
+    // Accessor for MkFitter
+    CombCandidate &combCandWithOriginalIndex(int idx) { return mp_event_of_comb_candidates->cand(idx); }
+
+  private:
+    int m_idx_max, m_idx_max_prev;
+    std::vector<std::vector<IdxChi2List>> m_hits_to_add;
+
+    const IterationParams *mp_iteration_params = nullptr;
+    EventOfCombCandidates *mp_event_of_comb_candidates;
+    std::vector<std::pair<int, int>> *mp_kalman_update_list;
+    std::vector<std::vector<TrackCand>> *mp_extra_cands;
+
+#if defined(CC_TIME_ETA) or defined(CC_TIME_LAYER)
+    double t_eta, t_lay;
+#endif
+
+    int m_start_seed, m_n_seeds;
+    int m_layer;
+
+    // Temporary in processSeedRange(), resized/reserved  in constructor.
+    // Size of this one is s_max_seed_range
+    std::vector<std::vector<TrackCand>> t_cands_for_next_lay;
+  };
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/src/CartesianErr.ah b/RecoTracker/MkFitCore/src/CartesianErr.ah
new file mode 100644
index 0000000000000..4fecc89da1c6e
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/CartesianErr.ah
@@ -0,0 +1,201 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = b_0;
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_1 = b_1;
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t c_2 = b_3;
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_3 = b_6;
+      IntrVec_t b_10 = LD(b, 10);
+      IntrVec_t c_4 = b_10;
+      IntrVec_t b_15 = LD(b, 15);
+      IntrVec_t c_5 = b_15;
+
+
+
+
+
+
+
+      IntrVec_t c_6 = b_1;
+      IntrVec_t b_2 = LD(b, 2);
+      IntrVec_t c_7 = b_2;
+      IntrVec_t b_4 = LD(b, 4);
+      IntrVec_t c_8 = b_4;
+      ST(c, 0, c_0);
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+      IntrVec_t b_7 = LD(b, 7);
+      IntrVec_t c_9 = b_7;
+      IntrVec_t b_11 = LD(b, 11);
+      IntrVec_t c_10 = b_11;
+      IntrVec_t b_16 = LD(b, 16);
+      IntrVec_t c_11 = b_16;
+
+
+
+
+
+
+
+      IntrVec_t c_12 = b_3;
+      IntrVec_t c_13 = b_4;
+      IntrVec_t b_5 = LD(b, 5);
+      IntrVec_t c_14 = b_5;
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+      ST(c, 9, c_9);
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+      IntrVec_t b_8 = LD(b, 8);
+      IntrVec_t c_15 = b_8;
+      IntrVec_t b_12 = LD(b, 12);
+      IntrVec_t c_16 = b_12;
+      IntrVec_t b_17 = LD(b, 17);
+      IntrVec_t c_17 = b_17;
+
+
+
+
+
+
+
+      IntrVec_t a_21 = LD(a, 21);
+      IntrVec_t c_18 = MUL(a_21, b_6);
+      IntrVec_t c_19 = MUL(a_21, b_7);
+      IntrVec_t c_20 = MUL(a_21, b_8);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      ST(c, 17, c_17);
+      IntrVec_t b_9 = LD(b, 9);
+      IntrVec_t c_21 = MUL(a_21, b_9);
+      IntrVec_t b_13 = LD(b, 13);
+      IntrVec_t c_22 = MUL(a_21, b_13);
+      IntrVec_t b_18 = LD(b, 18);
+      IntrVec_t c_23 = MUL(a_21, b_18);
+
+      IntrVec_t a_22 = LD(a, 22);
+      c_18 = FMA(a_22, b_10, c_18);
+      c_19 = FMA(a_22, b_11, c_19);
+      c_20 = FMA(a_22, b_12, c_20);
+      c_21 = FMA(a_22, b_13, c_21);
+      IntrVec_t b_14 = LD(b, 14);
+      c_22 = FMA(a_22, b_14, c_22);
+      IntrVec_t b_19 = LD(b, 19);
+      c_23 = FMA(a_22, b_19, c_23);
+
+
+
+
+
+      IntrVec_t a_27 = LD(a, 27);
+      IntrVec_t c_24 = MUL(a_27, b_6);
+      IntrVec_t c_25 = MUL(a_27, b_7);
+      IntrVec_t c_26 = MUL(a_27, b_8);
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      ST(c, 20, c_20);
+      ST(c, 21, c_21);
+      ST(c, 22, c_22);
+      ST(c, 23, c_23);
+      IntrVec_t c_27 = MUL(a_27, b_9);
+      IntrVec_t c_28 = MUL(a_27, b_13);
+      IntrVec_t c_29 = MUL(a_27, b_18);
+
+      IntrVec_t a_28 = LD(a, 28);
+      c_24 = FMA(a_28, b_10, c_24);
+      c_25 = FMA(a_28, b_11, c_25);
+      c_26 = FMA(a_28, b_12, c_26);
+      c_27 = FMA(a_28, b_13, c_27);
+      c_28 = FMA(a_28, b_14, c_28);
+      c_29 = FMA(a_28, b_19, c_29);
+
+
+
+
+
+      IntrVec_t a_33 = LD(a, 33);
+      IntrVec_t c_30 = MUL(a_33, b_6);
+      IntrVec_t c_31 = MUL(a_33, b_7);
+      IntrVec_t c_32 = MUL(a_33, b_8);
+      ST(c, 24, c_24);
+      ST(c, 25, c_25);
+      ST(c, 26, c_26);
+      ST(c, 27, c_27);
+      ST(c, 28, c_28);
+      ST(c, 29, c_29);
+      IntrVec_t c_33 = MUL(a_33, b_9);
+      IntrVec_t c_34 = MUL(a_33, b_13);
+      IntrVec_t c_35 = MUL(a_33, b_18);
+
+
+      IntrVec_t a_35 = LD(a, 35);
+      c_30 = FMA(a_35, b_15, c_30);
+      c_31 = FMA(a_35, b_16, c_31);
+      c_32 = FMA(a_35, b_17, c_32);
+      c_33 = FMA(a_35, b_18, c_33);
+      c_34 = FMA(a_35, b_19, c_34);
+      ST(c, 30, c_30);
+      ST(c, 31, c_31);
+      ST(c, 32, c_32);
+      ST(c, 33, c_33);
+      ST(c, 34, c_34);
+      IntrVec_t b_20 = LD(b, 20);
+      c_35 = FMA(a_35, b_20, c_35);
+      ST(c, 35, c_35);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = b[ 0*N+n];
+      c[ 1*N+n] = b[ 1*N+n];
+      c[ 2*N+n] = b[ 3*N+n];
+      c[ 3*N+n] = b[ 6*N+n];
+      c[ 4*N+n] = b[10*N+n];
+      c[ 5*N+n] = b[15*N+n];
+      c[ 6*N+n] = b[ 1*N+n];
+      c[ 7*N+n] = b[ 2*N+n];
+      c[ 8*N+n] = b[ 4*N+n];
+      c[ 9*N+n] = b[ 7*N+n];
+      c[10*N+n] = b[11*N+n];
+      c[11*N+n] = b[16*N+n];
+      c[12*N+n] = b[ 3*N+n];
+      c[13*N+n] = b[ 4*N+n];
+      c[14*N+n] = b[ 5*N+n];
+      c[15*N+n] = b[ 8*N+n];
+      c[16*N+n] = b[12*N+n];
+      c[17*N+n] = b[17*N+n];
+      c[18*N+n] = a[21*N+n]*b[ 6*N+n] + a[22*N+n]*b[10*N+n];
+      c[19*N+n] = a[21*N+n]*b[ 7*N+n] + a[22*N+n]*b[11*N+n];
+      c[20*N+n] = a[21*N+n]*b[ 8*N+n] + a[22*N+n]*b[12*N+n];
+      c[21*N+n] = a[21*N+n]*b[ 9*N+n] + a[22*N+n]*b[13*N+n];
+      c[22*N+n] = a[21*N+n]*b[13*N+n] + a[22*N+n]*b[14*N+n];
+      c[23*N+n] = a[21*N+n]*b[18*N+n] + a[22*N+n]*b[19*N+n];
+      c[24*N+n] = a[27*N+n]*b[ 6*N+n] + a[28*N+n]*b[10*N+n];
+      c[25*N+n] = a[27*N+n]*b[ 7*N+n] + a[28*N+n]*b[11*N+n];
+      c[26*N+n] = a[27*N+n]*b[ 8*N+n] + a[28*N+n]*b[12*N+n];
+      c[27*N+n] = a[27*N+n]*b[ 9*N+n] + a[28*N+n]*b[13*N+n];
+      c[28*N+n] = a[27*N+n]*b[13*N+n] + a[28*N+n]*b[14*N+n];
+      c[29*N+n] = a[27*N+n]*b[18*N+n] + a[28*N+n]*b[19*N+n];
+      c[30*N+n] = a[33*N+n]*b[ 6*N+n] + a[35*N+n]*b[15*N+n];
+      c[31*N+n] = a[33*N+n]*b[ 7*N+n] + a[35*N+n]*b[16*N+n];
+      c[32*N+n] = a[33*N+n]*b[ 8*N+n] + a[35*N+n]*b[17*N+n];
+      c[33*N+n] = a[33*N+n]*b[ 9*N+n] + a[35*N+n]*b[18*N+n];
+      c[34*N+n] = a[33*N+n]*b[13*N+n] + a[35*N+n]*b[19*N+n];
+      c[35*N+n] = a[33*N+n]*b[18*N+n] + a[35*N+n]*b[20*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/CartesianErrTransp.ah b/RecoTracker/MkFitCore/src/CartesianErrTransp.ah
new file mode 100644
index 0000000000000..d57d9ebfbbfdd
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/CartesianErrTransp.ah
@@ -0,0 +1,145 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = b_0;
+
+
+
+
+
+
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_1 = b_6;
+
+      IntrVec_t b_7 = LD(b, 7);
+      IntrVec_t c_2 = b_7;
+      ST(c, 0, c_0);
+
+
+
+
+
+      IntrVec_t b_12 = LD(b, 12);
+      IntrVec_t c_3 = b_12;
+
+      IntrVec_t b_13 = LD(b, 13);
+      IntrVec_t c_4 = b_13;
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+
+      IntrVec_t b_14 = LD(b, 14);
+      IntrVec_t c_5 = b_14;
+
+
+
+
+      IntrVec_t b_18 = LD(b, 18);
+      IntrVec_t c_6 = b_18;
+
+      IntrVec_t b_19 = LD(b, 19);
+      IntrVec_t c_7 = b_19;
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+
+      IntrVec_t b_20 = LD(b, 20);
+      IntrVec_t c_8 = b_20;
+
+      IntrVec_t b_21 = LD(b, 21);
+      IntrVec_t a_21 = LD(a, 21);
+      IntrVec_t c_9 = MUL(b_21, a_21);
+
+      IntrVec_t b_22 = LD(b, 22);
+      IntrVec_t a_22 = LD(a, 22);
+      c_9 = FMA(b_22, a_22, c_9);
+
+
+      IntrVec_t b_24 = LD(b, 24);
+      IntrVec_t c_10 = b_24;
+
+      IntrVec_t b_25 = LD(b, 25);
+      IntrVec_t c_11 = b_25;
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+      ST(c, 9, c_9);
+
+      IntrVec_t b_26 = LD(b, 26);
+      IntrVec_t c_12 = b_26;
+
+      IntrVec_t b_27 = LD(b, 27);
+      IntrVec_t c_13 = MUL(b_27, a_21);
+      IntrVec_t a_27 = LD(a, 27);
+      IntrVec_t c_14 = MUL(b_27, a_27);
+
+      IntrVec_t b_28 = LD(b, 28);
+      c_13 = FMA(b_28, a_22, c_13);
+      IntrVec_t a_28 = LD(a, 28);
+      c_14 = FMA(b_28, a_28, c_14);
+
+
+      IntrVec_t b_30 = LD(b, 30);
+      IntrVec_t c_15 = b_30;
+
+      IntrVec_t b_31 = LD(b, 31);
+      IntrVec_t c_16 = b_31;
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+
+      IntrVec_t b_32 = LD(b, 32);
+      IntrVec_t c_17 = b_32;
+
+      IntrVec_t b_33 = LD(b, 33);
+      IntrVec_t c_18 = MUL(b_33, a_21);
+      IntrVec_t c_19 = MUL(b_33, a_27);
+      IntrVec_t a_33 = LD(a, 33);
+      IntrVec_t c_20 = MUL(b_33, a_33);
+
+      IntrVec_t b_34 = LD(b, 34);
+      c_18 = FMA(b_34, a_22, c_18);
+      c_19 = FMA(b_34, a_28, c_19);
+
+      IntrVec_t b_35 = LD(b, 35);
+      IntrVec_t a_35 = LD(a, 35);
+      c_20 = FMA(b_35, a_35, c_20);
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      ST(c, 17, c_17);
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      ST(c, 20, c_20);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = b[ 0*N+n];
+      c[ 1*N+n] = b[ 6*N+n];
+      c[ 2*N+n] = b[ 7*N+n];
+      c[ 3*N+n] = b[12*N+n];
+      c[ 4*N+n] = b[13*N+n];
+      c[ 5*N+n] = b[14*N+n];
+      c[ 6*N+n] = b[18*N+n];
+      c[ 7*N+n] = b[19*N+n];
+      c[ 8*N+n] = b[20*N+n];
+      c[ 9*N+n] = b[21*N+n]*a[21*N+n] + b[22*N+n]*a[22*N+n];
+      c[10*N+n] = b[24*N+n];
+      c[11*N+n] = b[25*N+n];
+      c[12*N+n] = b[26*N+n];
+      c[13*N+n] = b[27*N+n]*a[21*N+n] + b[28*N+n]*a[22*N+n];
+      c[14*N+n] = b[27*N+n]*a[27*N+n] + b[28*N+n]*a[28*N+n];
+      c[15*N+n] = b[30*N+n];
+      c[16*N+n] = b[31*N+n];
+      c[17*N+n] = b[32*N+n];
+      c[18*N+n] = b[33*N+n]*a[21*N+n] + b[34*N+n]*a[22*N+n];
+      c[19*N+n] = b[33*N+n]*a[27*N+n] + b[34*N+n]*a[28*N+n];
+      c[20*N+n] = b[33*N+n]*a[33*N+n] + b[35*N+n]*a[35*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/Config.cc b/RecoTracker/MkFitCore/src/Config.cc
new file mode 100644
index 0000000000000..3c97b5e6fa836
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Config.cc
@@ -0,0 +1,48 @@
+#include "RecoTracker/MkFitCore/interface/Config.h"
+
+namespace mkfit {
+
+  const PropagationConfig* PropagationConfig::s_default = nullptr;
+
+  void PropagationConfig::set_as_default(bool force) {
+    if (s_default != nullptr) {
+      if (force)
+        delete s_default;
+      else
+        return;
+    }
+    s_default = new PropagationConfig(*this);
+  }
+
+  //------------------------------------------------------------------------------
+
+  namespace Config {
+    // Multi threading configuration
+#if defined(MKFIT_STANDALONE)
+    int numThreadsFinder = 1;
+    int numThreadsEvents = 1;
+    int numSeedsPerTask = 32;
+#endif
+
+#if defined(MKFIT_STANDALONE)
+    bool removeDuplicates = false;
+    bool useHitsForDuplicates = true;
+#endif
+    const float maxdPt = 0.5;
+    const float maxdPhi = 0.25;
+    const float maxdEta = 0.05;
+    const float maxdR = 0.0025;
+    const float minFracHitsShared = 0.75;
+
+    const float maxd1pt = 1.8;     //windows for hit
+    const float maxdphi = 0.37;    //and/or dr
+    const float maxdcth = 0.37;    //comparisons
+    const float maxcth_ob = 1.99;  //eta 1.44
+    const float maxcth_fw = 6.05;  //eta 2.5
+
+#ifdef CONFIG_PhiQArrays
+    bool usePhiQArrays = true;
+#endif
+  }  // namespace Config
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/ConfigWrapper.cc b/RecoTracker/MkFitCore/src/ConfigWrapper.cc
new file mode 100644
index 0000000000000..f4a1fa63aa47c
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/ConfigWrapper.cc
@@ -0,0 +1,20 @@
+#include "RecoTracker/MkFitCore/interface/ConfigWrapper.h"
+#include "RecoTracker/MkFitCore/interface/Config.h"
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
+
+namespace mkfit {
+  namespace ConfigWrapper {
+    void initializeForCMSSW() {
+      PropagationConfig pconf;
+      pconf.backward_fit_to_pca = false;
+      pconf.finding_requires_propagation_to_hit_pos = true;
+      pconf.finding_inter_layer_pflags = PropagationFlags(PF_use_param_b_field | PF_apply_material);
+      pconf.finding_intra_layer_pflags = PropagationFlags(PF_none);
+      pconf.backward_fit_pflags = PropagationFlags(PF_use_param_b_field | PF_apply_material);
+      pconf.forward_fit_pflags = PropagationFlags(PF_use_param_b_field | PF_apply_material);
+      pconf.seed_fit_pflags = PropagationFlags(PF_none);
+      pconf.pca_prop_pflags = PropagationFlags(PF_none);
+      pconf.set_as_default();
+    }
+  }  // namespace ConfigWrapper
+}  // namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/Debug.h b/RecoTracker/MkFitCore/src/Debug.h
new file mode 100644
index 0000000000000..318e214bcc481
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Debug.h
@@ -0,0 +1,98 @@
+#ifndef RecoTracker_MkFitCore_src_Debug_h
+#ifdef DEBUG
+#define RecoTracker_MkFitCore_src_Debug_h
+
+#ifdef dprint
+
+#undef dprint
+#undef dprint_np
+#undef dcall
+#undef dprintf
+#undef dprintf_np
+
+#endif
+/*
+  Usage: DEBUG must be defined before this header file is included, typically
+
+  #define DEBUG
+  #include "Debug.h"
+
+  This defines macros dprint(), dcall() and dprintf();
+  dprint(x) is equivalent to std::cout << x << std::endl;
+    example: dprint("Hits in layer=" << ilayer);
+
+  dcall(x) simply calls x
+    example: dcall(pre_prop_print(ilay, mkfp));
+
+  dprintf(x) is equivalent to printf(x)
+    example: dprintf("Bad label for simtrack %d -- %d\n", itrack, track.label());
+
+  All printouts are also controlled by a bool variable "debug"
+  bool debug = true; is declared as a file global in an anonymous
+  namespace, and thus can be overridden within any interior scope
+  as needed, so one could change the global to false and only set
+  a local to true within certain scopes.
+
+  All are protected by a file scope mutex to avoid mixed printouts.
+  This mutex can also be acquired within a block via dmutex_guard:
+
+  if (debug) {
+    dmutex_guard;
+    [do complicated stuff]
+  }
+
+  The mutex is not reentrant, so avoid using dprint et al. within a scope
+  where the mutex has already been acquired, as doing so will deadlock.
+ */
+#include <mutex>
+
+#define dmutex_guard std::lock_guard<std::mutex> dlock(debug_mutex)
+#define dprint(x)                \
+  if (debug) {                   \
+    dmutex_guard;                \
+    std::cout << x << std::endl; \
+  }
+#define dprint_np(n, x)                       \
+  if (debug && n < N_proc) {                  \
+    dmutex_guard;                             \
+    std::cout << n << ": " << x << std::endl; \
+  }
+#define dcall(x)  \
+  if (debug) {    \
+    dmutex_guard; \
+    x;            \
+  }
+#define dprintf(...)     \
+  if (debug) {           \
+    dmutex_guard;        \
+    printf(__VA_ARGS__); \
+  }
+#define dprintf_np(n, ...)   \
+  if (debug && n < N_proc) { \
+    dmutex_guard;            \
+    std::cout << n << ": ";  \
+    printf(__VA_ARGS__);     \
+  }
+
+namespace {
+  bool debug = false;  // default, can be overridden locally
+  std::mutex debug_mutex;
+
+  struct debug_guard {
+    bool m_prev_debug;
+    debug_guard(bool state = true) : m_prev_debug(debug) { debug = state; }
+    ~debug_guard() { debug = m_prev_debug; }
+  };
+}  // namespace
+
+#else
+
+#define dprint(x) (void(0))
+#define dprint_np(n, x) (void(0))
+#define dcall(x) (void(0))
+#define dprintf(...) (void(0))
+#define dprintf_np(n, ...) (void(0))
+
+#endif
+
+#endif
diff --git a/RecoTracker/MkFitCore/src/FindingFoos.cc b/RecoTracker/MkFitCore/src/FindingFoos.cc
new file mode 100644
index 0000000000000..fb6de0f66c51a
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/FindingFoos.cc
@@ -0,0 +1,20 @@
+#include "FindingFoos.h"
+#include "MkBase.h"
+#include "KalmanUtilsMPlex.h"
+
+namespace {
+  using namespace mkfit;
+  const FindingFoos s_fndfoos_brl(kalmanPropagateAndComputeChi2, kalmanPropagateAndUpdate, &MkBase::propagateTracksToR);
+  const FindingFoos s_fndfoos_ec(kalmanPropagateAndComputeChi2Endcap,
+                                 kalmanPropagateAndUpdateEndcap,
+                                 &MkBase::propagateTracksToZ);
+}  // namespace
+
+namespace mkfit {
+
+  const FindingFoos& FindingFoos::get_barrel_finding_foos() { return s_fndfoos_brl; }
+  const FindingFoos& FindingFoos::get_endcap_finding_foos() { return s_fndfoos_ec; }
+
+  const FindingFoos& FindingFoos::get_finding_foos(bool is_barrel) { return is_barrel ? s_fndfoos_brl : s_fndfoos_ec; }
+
+}  // namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/FindingFoos.h b/RecoTracker/MkFitCore/src/FindingFoos.h
new file mode 100644
index 0000000000000..9cf6e156fab2a
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/FindingFoos.h
@@ -0,0 +1,38 @@
+#ifndef RecoTracker_MkFitCore_src_FindingFoos_h
+#define RecoTracker_MkFitCore_src_FindingFoos_h
+
+#include "Matrix.h"
+
+namespace mkfit {
+
+  class MkBase;
+
+#define COMPUTE_CHI2_ARGS                                                                                    \
+  const MPlexLS &, const MPlexLV &, const MPlexQI &, const MPlexHS &, const MPlexHV &, MPlexQF &, MPlexLV &, \
+      const int, const PropagationFlags, const bool
+
+#define UPDATE_PARAM_ARGS                                                                                         \
+  const MPlexLS &, const MPlexLV &, MPlexQI &, const MPlexHS &, const MPlexHV &, MPlexLS &, MPlexLV &, const int, \
+      const PropagationFlags, const bool
+
+  class FindingFoos {
+  public:
+    void (*m_compute_chi2_foo)(COMPUTE_CHI2_ARGS);
+    void (*m_update_param_foo)(UPDATE_PARAM_ARGS);
+    void (MkBase::*m_propagate_foo)(float, const int, const PropagationFlags);
+
+    FindingFoos() {}
+
+    FindingFoos(void (*cch2_f)(COMPUTE_CHI2_ARGS),
+                void (*updp_f)(UPDATE_PARAM_ARGS),
+                void (MkBase::*p_f)(float, const int, const PropagationFlags))
+        : m_compute_chi2_foo(cch2_f), m_update_param_foo(updp_f), m_propagate_foo(p_f) {}
+
+    static const FindingFoos &get_barrel_finding_foos();
+    static const FindingFoos &get_endcap_finding_foos();
+    static const FindingFoos &get_finding_foos(bool is_barrel);
+  };
+
+}  // end namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/src/Hit.cc b/RecoTracker/MkFitCore/src/Hit.cc
new file mode 100644
index 0000000000000..5a37fa7d7324e
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Hit.cc
@@ -0,0 +1,16 @@
+#include "RecoTracker/MkFitCore/interface/Hit.h"
+#include "Matrix.h"
+
+namespace mkfit {
+
+  void MCHitInfo::reset() {}
+
+  void print(std::string_view label, const MeasurementState& s) {
+    std::cout << label << std::endl;
+    std::cout << "x: " << s.parameters()[0] << " y: " << s.parameters()[1] << " z: " << s.parameters()[2] << std::endl
+              << "errors: " << std::endl;
+    dumpMatrix(s.errors());
+    std::cout << std::endl;
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/HitStructures.cc b/RecoTracker/MkFitCore/src/HitStructures.cc
new file mode 100644
index 0000000000000..08d9d6b91268d
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/HitStructures.cc
@@ -0,0 +1,624 @@
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
+
+#include "RecoTracker/MkFitCore/interface/IterationConfig.h"
+
+#include "Ice/IceRevisitedRadix.h"
+
+#include "Debug.h"
+
+namespace mkfit {
+
+  LayerOfHits::~LayerOfHits() {
+#ifdef COPY_SORTED_HITS
+    free_hits();
+#endif
+    operator delete[](m_hit_ranks);
+  }
+
+#ifdef COPY_SORTED_HITS
+  void LayerOfHits::alloc_hits(int size) {
+    m_hits = (Hit *)std::aligned_alloc(64, sizeof(Hit) * size);
+    m_capacity = size;
+    for (int ihit = 0; ihit < m_capacity; ihit++) {
+      m_hits[ihit] = Hit();
+    }
+  }
+
+  void LayerOfHits::free_hits() { std::free(m_hits); }
+#endif
+
+  void LayerOfHits::setup_bins(float qmin, float qmax, float dq) {
+    // Define layer with min/max and number of bins along q.
+
+    if (dq < 0) {
+      m_nq = (int)-dq;
+      m_qmin = qmin;
+      m_qmax = qmax;
+    } else {
+      float extent = qmax - qmin;
+      m_nq = std::ceil(extent / dq);
+      float extra = 0.5f * (m_nq * dq - extent);
+      m_qmin = qmin - extra;
+      m_qmax = qmax + extra;
+    }
+    m_fq = m_nq / (qmax - qmin);  // used in e.g. qbin = (q_hit - m_qmin) * m_fq;
+
+    m_phi_bin_infos.resize(m_nq);
+    m_phi_bin_deads.resize(m_nq);
+  }
+
+  void LayerOfHits::setupLayer(const LayerInfo &li) {
+    // Note, LayerInfo::q_bin( ==>  > 0 - bin width, < 0 - number of bins
+
+    assert(m_layer_info == nullptr && "setupLayer() already called.");
+
+    m_layer_info = &li;
+
+    m_is_barrel = m_layer_info->is_barrel();
+
+    if (m_is_barrel)
+      setup_bins(li.zmin(), li.zmax(), li.q_bin());
+    else
+      setup_bins(li.rin(), li.rout(), li.q_bin());
+  }
+
+  //==============================================================================
+
+  void LayerOfHits::suckInHits(const HitVec &hitv) {
+    assert(m_nq > 0 && "setupLayer() was not called.");
+
+    m_n_hits = hitv.size();
+    m_ext_hits = &hitv;
+
+#ifdef COPY_SORTED_HITS
+    if (m_capacity < m_n_hits) {
+      free_hits();
+      alloc_hits(m_n_hits);
+    }
+#endif
+
+    if (Config::usePhiQArrays) {
+      m_hit_phis.resize(m_n_hits);
+      m_hit_qs.resize(m_n_hits);
+      m_hit_infos.resize(m_n_hits);
+    }
+    m_qphifines.resize(m_n_hits);
+
+    for (int i = 0; i < m_n_hits; ++i) {
+      const Hit &h = hitv[i];
+
+      HitInfo hi = {h.phi(), m_is_barrel ? h.z() : h.r()};
+
+      m_qphifines[i] = phiBinFine(hi.phi) + (qBinChecked(hi.q) << 16);
+
+      if (Config::usePhiQArrays) {
+        m_hit_infos[i] = hi;
+      }
+    }
+
+    operator delete[](m_hit_ranks);
+    {
+      RadixSort sort;
+      sort.Sort(&m_qphifines[0], m_n_hits, RADIX_UNSIGNED);
+      m_hit_ranks = sort.RelinquishRanks();
+    }
+
+    int curr_qphi = -1;
+    empty_q_bins(0, m_nq, 0);
+
+    for (int i = 0; i < m_n_hits; ++i) {
+      int j = m_hit_ranks[i];
+
+#ifdef COPY_SORTED_HITS
+      memcpy(&m_hits[i], &hitv[j], sizeof(Hit));
+#endif
+
+      if (Config::usePhiQArrays) {
+        m_hit_phis[i] = m_hit_infos[j].phi;
+        m_hit_qs[i] = m_hit_infos[j].q;
+      }
+
+      // Combined q-phi bin with fine part masked off
+      const int jqphi = m_qphifines[j] & m_phi_fine_xmask;
+
+      const int phi_bin = (jqphi & m_phi_mask_fine) >> m_phi_bits_shift;
+      const int q_bin = jqphi >> 16;
+
+      // Fill the bin info
+      if (jqphi != curr_qphi) {
+        m_phi_bin_infos[q_bin][phi_bin] = {i, i};
+        curr_qphi = jqphi;
+      }
+
+      m_phi_bin_infos[q_bin][phi_bin].second++;
+    }
+  }
+
+  //==============================================================================
+
+  void LayerOfHits::suckInDeads(const DeadVec &deadv) {
+    assert(m_nq > 0 && "setupLayer() was not called.");
+
+    empty_q_bins_dead(0, m_nq);
+
+    for (const auto &d : deadv) {
+      int q_bin_1 = qBinChecked(d.q1);
+      int q_bin_2 = qBinChecked(d.q2) + 1;
+      int phi_bin_1 = phiBin(d.phi1);
+      int phi_bin_2 = phiBin(d.phi2) + 1;
+      for (int q_bin = q_bin_1; q_bin < q_bin_2; q_bin++) {
+        if (phi_bin_1 > phi_bin_2) {
+          for (int pb = phi_bin_1; pb < Config::m_nphi; pb++) {
+            m_phi_bin_deads[q_bin][pb] = true;
+          }
+          for (int pb = 0; pb < phi_bin_2; pb++) {
+            m_phi_bin_deads[q_bin][pb] = true;
+          }
+        } else {
+          for (int pb = phi_bin_1; pb < phi_bin_2; pb++) {
+            m_phi_bin_deads[q_bin][pb] = true;
+          }
+        }
+      }
+    }
+  }
+
+  void LayerOfHits::beginRegistrationOfHits(const HitVec &hitv) {
+    assert(m_nq > 0 && "setupLayer() was not called.");
+
+    m_ext_hits = &hitv;
+
+    m_n_hits = 0;
+    m_hit_infos.clear();
+    m_qphifines.clear();
+    m_ext_idcs.clear();
+    m_min_ext_idx = std::numeric_limits<int>::max();
+    m_max_ext_idx = std::numeric_limits<int>::min();
+  }
+
+  void LayerOfHits::registerHit(int idx) {
+    const Hit &h = (*m_ext_hits)[idx];
+
+    m_ext_idcs.push_back(idx);
+    m_min_ext_idx = std::min(m_min_ext_idx, idx);
+    m_max_ext_idx = std::max(m_max_ext_idx, idx);
+
+    HitInfo hi = {h.phi(), m_is_barrel ? h.z() : h.r()};
+
+    m_qphifines.push_back(phiBinFine(hi.phi) + (qBinChecked(hi.q) << 16));
+
+    if (Config::usePhiQArrays) {
+      m_hit_infos.emplace_back(hi);
+    }
+  }
+
+  void LayerOfHits::endRegistrationOfHits(bool build_original_to_internal_map) {
+    m_n_hits = m_ext_idcs.size();
+    if (m_n_hits == 0)
+      return;
+
+    // radix
+    operator delete[](m_hit_ranks);
+    {
+      RadixSort sort;
+      sort.Sort(&m_qphifines[0], m_n_hits, RADIX_UNSIGNED);
+      m_hit_ranks = sort.RelinquishRanks();
+    }
+
+    // copy q/phi
+
+#ifdef COPY_SORTED_HITS
+    if (m_capacity < m_n_hits) {
+      free_hits();
+      alloc_hits(m_n_hits);
+    }
+#endif
+
+    if (Config::usePhiQArrays) {
+      m_hit_phis.resize(m_n_hits);
+      m_hit_qs.resize(m_n_hits);
+    }
+
+    int curr_qphi = -1;
+    empty_q_bins(0, m_nq, 0);
+
+    for (int i = 0; i < m_n_hits; ++i) {
+      int j = m_hit_ranks[i];  // index in intermediate
+      int k = m_ext_idcs[j];   // index in external hit_vec
+
+#ifdef COPY_SORTED_HITS
+      memcpy(&m_hits[i], &hitv[k], sizeof(Hit));
+#endif
+
+      if (Config::usePhiQArrays) {
+        m_hit_phis[i] = m_hit_infos[j].phi;
+        m_hit_qs[i] = m_hit_infos[j].q;
+      }
+
+      // Combined q-phi bin with fine part masked off
+      const int jqphi = m_qphifines[j] & m_phi_fine_xmask;
+
+      const int phi_bin = (jqphi & m_phi_mask_fine) >> m_phi_bits_shift;
+      const int q_bin = jqphi >> 16;
+
+      // Fill the bin info
+      if (jqphi != curr_qphi) {
+        m_phi_bin_infos[q_bin][phi_bin] = {i, i};
+        curr_qphi = jqphi;
+      }
+
+      m_phi_bin_infos[q_bin][phi_bin].second++;
+
+      // m_hit_ranks[i] will never be used again - use it to point to external/original index.
+      m_hit_ranks[i] = k;
+    }
+
+    if (build_original_to_internal_map) {
+      if (m_max_ext_idx - m_min_ext_idx + 1 > 8 * m_n_hits) {
+        // If this happens we might:
+        // a) Use external indices for everything. -- *** We are now. ***
+        // b) Build these maps for seeding layers only.
+        // c) Have a flag in hit-on-track that tells us if the hit index has been remapped,
+        //    essentially, if it is a seed hit. This might be smart anyway.
+        //    One could use index < -256 or something similar.
+
+        printf(
+            "LayerOfHits::endRegistrationOfHits() original_to_internal index map vector is largish: m_n_hits=%d, "
+            "map_vector_size=%d\n",
+            m_n_hits,
+            m_max_ext_idx - m_min_ext_idx + 1);
+      }
+
+      m_ext_idcs.resize(m_max_ext_idx - m_min_ext_idx + 1);
+      for (int i = 0; i < m_n_hits; ++i) {
+        m_ext_idcs[m_hit_ranks[i] - m_min_ext_idx] = i;
+      }
+    }
+
+    // We can release m_hit_infos and m_qphifines -- and realloc on next BeginInput.
+    // m_qphifines could still be used as pre-selection in selectHitIndices().
+  }
+
+  //==============================================================================
+
+  /*
+  // Example code for looping over a given (q, phi) 2D range.
+  // A significantly more complex implementation of this can be found in MkFinder::selectHitIndices().
+  void LayerOfHits::selectHitIndices(float q, float phi, float dq, float dphi, std::vector<int>& idcs, bool isForSeeding, bool dump)
+  {
+    // Sanitizes q, dq and dphi. phi is expected to be in -pi, pi.
+
+    // Make sure how phi bins work beyond -pi, +pi.
+    // for (float p = -8; p <= 8; p += 0.05)
+    // {
+    //   int pb = phiBin(p);
+    //   printf("%5.2f %4d %4d\n", p, pb, pb & m_phi_mask);
+    // }
+
+    if ( ! isForSeeding) // seeding has set cuts for dq and dphi
+    {
+      // XXXX MT: min search windows not enforced here.
+      dq   = std::min(std::abs(dq),   max_dq());
+      dphi = std::min(std::abs(dphi), max_dphi());
+    }
+
+    int qb1 = qBinChecked(q - dq);
+    int qb2 = qBinChecked(q + dq) + 1;
+    int pb1 = phiBin(phi - dphi);
+    int pb2 = phiBin(phi + dphi) + 1;
+
+    // int extra = 2;
+    // qb1 -= 2; if (qb < 0) qb = 0;
+    // qb2 += 2; if (qb >= m_nq) qb = m_nq;
+
+    if (dump)
+      printf("LayerOfHits::SelectHitIndices %6.3f %6.3f %6.4f %7.5f %3d %3d %4d %4d\n",
+            q, phi, dq, dphi, qb1, qb2, pb1, pb2);
+
+    // This should be input argument, well ... it will be Matriplex op, or sth. // KPM -- it is now! used for seeding
+    for (int qi = qb1; qi < qb2; ++qi)
+    {
+      for (int pi = pb1; pi < pb2; ++pi)
+      {
+        int pb = pi & m_phi_mask;
+
+        for (uint16_t hi = m_phi_bin_infos[qi][pb].first; hi < m_phi_bin_infos[qi][pb].second; ++hi)
+        {
+          // Here could enforce some furhter selection on hits
+    if (Config::usePhiQArrays)
+    {
+      float ddq   = std::abs(q   - m_hit_qs[hi]);
+      float ddphi = std::abs(phi - m_hit_phis[hi]);
+      if (ddphi > Const::PI) ddphi = Const::TwoPI - ddphi;
+
+      if (dump)
+        printf("     SHI %3d %4d %4d %5d  %6.3f %6.3f %6.4f %7.5f   %s\n",
+        qi, pi, pb, hi,
+        m_hit_qs[hi], m_hit_phis[hi], ddq, ddphi,
+        (ddq < dq && ddphi < dphi) ? "PASS" : "FAIL");
+
+      if (ddq < dq && ddphi < dphi)
+      {
+        idcs.push_back(hi);
+      }
+    }
+    else // do not use phi-q arrays
+    {
+      idcs.push_back(hi);
+    }
+        }
+      }
+    }
+  }
+  */
+
+  void LayerOfHits::printBins() {
+    for (int qb = 0; qb < m_nq; ++qb) {
+      printf("%c bin %d\n", is_barrel() ? 'Z' : 'R', qb);
+      for (int pb = 0; pb < Config::m_nphi; ++pb) {
+        if (pb % 8 == 0)
+          printf(" Phi %4d: ", pb);
+        printf("%5d,%4d   %s",
+               m_phi_bin_infos[qb][pb].first,
+               m_phi_bin_infos[qb][pb].second,
+               ((pb + 1) % 8 == 0) ? "\n" : "");
+      }
+    }
+  }
+
+  //==============================================================================
+  // EventOfHits
+  //==============================================================================
+
+  EventOfHits::EventOfHits(const TrackerInfo &trk_inf)
+      : m_layers_of_hits(trk_inf.n_layers()), m_n_layers(trk_inf.n_layers()) {
+    for (int ii = 0; ii < trk_inf.n_layers(); ++ii) {
+      const LayerInfo &li = trk_inf.layer(ii);
+      m_layers_of_hits[li.layer_id()].setupLayer(li);
+    }
+  }
+
+  //==============================================================================
+  // TrackCand
+  //==============================================================================
+
+  Track TrackCand::exportTrack(bool remove_missing_hits) const {
+    dprintf("TrackCand::exportTrack label=%5d, total_hits=%2d, overlaps=%2d -- n_seed_hits=%d,prod_type=%d\n",
+            label(),
+            nTotalHits(),
+            nOverlapHits_,
+            getNSeedHits(),
+            (int)prodType());
+
+    Track res(*this);
+    res.resizeHits(remove_missing_hits ? nFoundHits() : nTotalHits(), nFoundHits());
+    res.setNOverlapHits(nOverlapHits());
+
+    int nh = nTotalHits();
+    int ch = lastHitIdx_;
+    int good_hits_pos = nFoundHits();
+    while (--nh >= 0) {
+      const HoTNode &hot_node = m_comb_candidate->hot_node(ch);
+      if (remove_missing_hits) {
+        if (hot_node.m_hot.index >= 0)
+          res.setHitIdxAtPos(--good_hits_pos, hot_node.m_hot);
+      } else {
+        res.setHitIdxAtPos(nh, hot_node.m_hot);
+      }
+      dprintf("  nh=%2d, ch=%d, idx=%d lyr=%d prev_idx=%d\n",
+              nh,
+              ch,
+              hot_node.m_hot.index,
+              hot_node.m_hot.layer,
+              hot_node.m_prev_idx);
+      ch = hot_node.m_prev_idx;
+    }
+
+    return res;
+  }
+
+  //==============================================================================
+  // CombCandidate
+  //==============================================================================
+
+  void CombCandidate::importSeed(const Track &seed, int region) {
+    m_trk_cands.emplace_back(TrackCand(seed, this));
+
+    m_state = CombCandidate::Dormant;
+    m_pickup_layer = seed.getLastHitLyr();
+#ifdef DUMPHITWINDOW
+    m_seed_algo = seed.algoint();
+    m_seed_label = seed.label();
+#endif
+
+    TrackCand &cand = m_trk_cands.back();
+    cand.setNSeedHits(seed.nTotalHits());
+    cand.setEtaRegion(region);
+
+    dprintf("Importing pt=%f eta=%f, lastCcIndex=%d\n", cand.pT(), cand.momEta(), cand.lastCcIndex());
+
+    for (const HitOnTrack *hp = seed.beginHitsOnTrack(); hp != seed.endHitsOnTrack(); ++hp) {
+      dprintf(" hit idx=%d lyr=%d\n", hp->index, hp->layer);
+      cand.addHitIdx(hp->index, hp->layer, 0.0f);
+    }
+
+    cand.setScore(getScoreCand(cand));
+  }
+
+  void CombCandidate::mergeCandsAndBestShortOne(const IterationParams &params, bool update_score, bool sort_cands) {
+    TrackCand *best_short = m_best_short_cand.combCandidate() ? &m_best_short_cand : nullptr;
+
+    if (!empty()) {
+      if (update_score) {
+        for (auto &c : m_trk_cands)
+          c.setScore(getScoreCand(c));
+        if (best_short)
+          best_short->setScore(getScoreCand(*best_short));
+      }
+      if (sort_cands) {
+        std::sort(m_trk_cands.begin(), m_trk_cands.end(), sortByScoreTrackCand);
+      }
+
+      if (best_short && best_short->score() > m_trk_cands.back().score()) {
+        auto ci = m_trk_cands.begin();
+        while (ci->score() > best_short->score())
+          ++ci;
+
+        if ((int)m_trk_cands.size() >= params.maxCandsPerSeed)
+          m_trk_cands.pop_back();
+
+          // To print out what has been replaced -- remove when done with short track handling.
+#ifdef DEBUG
+        if (ci == m_trk_cands.begin()) {
+          printf("FindTracksStd -- Replacing best cand (%f) with short one (%f) in final sorting\n",
+                 m_trk_cands.front().score(),
+                 best_short->score());
+        }
+#endif
+
+        m_trk_cands.insert(ci, *best_short);
+      }
+
+    } else if (best_short) {
+      m_trk_cands.push_back(*best_short);
+    }
+
+    if (best_short)
+      best_short->resetShortTrack();
+
+    // assert(capacity() == (size_t)Config::maxCandsPerSeed);
+  }
+
+  void CombCandidate::compactifyHitStorageForBestCand(bool remove_seed_hits, int backward_fit_min_hits) {
+    // The best candidate is assumed to be in position 0 (after mergeCandsAndBestShortOne
+    // mergeCandsAndBestShortOne has been called).
+    // Other cands are dropped, their hits are dropped as well.
+    // Seed hits are dropped if remove_seed_hits is true.
+
+    /* The following considerations are related to the following implementation:
+  minNrOfHitsForRebuild (checked against "nHits - nseed") has a default at 5, except
+  1 in initialStep
+  4 in tobTec and pixelLess
+  https://github.com/cms-sw/cmssw/blob/master/RecoTracker/CkfPattern/plugins/GroupedCkfTrajectoryBuilder.cc#L1015
+
+  NOTE: some of those can be matched hits !!!
+
+  the hit splitting is triggered here: https://github.com/cms-sw/cmssw/blob/master/RecoTracker/CkfPattern/src/CkfTrackCandidateMakerBase.cc#L468
+  after the rebuild has already happened: https://github.com/cms-sw/cmssw/blob/master/RecoTracker/CkfPattern/src/CkfTrackCandidateMakerBase.cc#L313
+  */
+
+    assert(!m_trk_cands.empty());
+    m_trk_cands.resize(1);
+    TrackCand &tc = m_trk_cands[0];
+
+    // Do NOT remove any seed hits if fewer than backward_fit_min_hits hits are available.
+    if (remove_seed_hits && tc.nFoundHits() <= backward_fit_min_hits) {
+      remove_seed_hits = false;
+    }
+
+    // Stash HoTNodes at the end of m_hots.
+    int stash_end = m_hots.size();
+    int stash_pos = stash_end;
+
+    int idx = tc.lastCcIndex();
+
+    if (remove_seed_hits) {
+      // Skip invalid hits that would now be at the head of the candidate.
+      // Make sure to subtract / recount number of hits:
+      // as this is rather involved, just call addHitIdx() repeatedly so counts
+      // of holes get updated correctly.
+      // Though one should not care super much ... it's only relevant for relative scores
+      // and here we are trimming everything down to a single candidate.
+
+      int n_hits_to_pick = std::max(tc.nFoundHits() - tc.getNSeedHits(), backward_fit_min_hits);
+      while (n_hits_to_pick > 0) {
+        m_hots[--stash_pos] = m_hots[idx];
+        if (m_hots[idx].m_hot.index >= 0)
+          --n_hits_to_pick;
+        idx = m_hots[idx].m_prev_idx;
+      }
+
+      m_hots_size = 0;
+      m_hots.clear();
+      tc.setLastCcIndex(-1);
+      tc.setNFoundHits(0);
+      tc.setNMissingHits(0);
+      tc.setNInsideMinusOneHits(0);
+      tc.setNTailMinusOneHits(0);
+      while (stash_pos != stash_end && m_hots[stash_pos].m_hot.index < 0)
+        ++stash_pos;
+      while (stash_pos != stash_end) {
+        HoTNode &hn = m_hots[stash_pos];
+        tc.addHitIdx(hn.m_hot.index, hn.m_hot.layer, hn.m_chi2);
+        ++stash_pos;
+      }
+    } else {
+      while (idx != -1) {
+        m_hots[--stash_pos] = m_hots[idx];
+        idx = m_hots[idx].m_prev_idx;
+      }
+
+      // If we are not removing seed_hits, track is good as it is,
+      // just fixup m_hots and t.lastCcIndex.
+      int pos = 0;
+      while (stash_pos != stash_end) {
+        m_hots[pos].m_hot = m_hots[stash_pos].m_hot;
+        m_hots[pos].m_chi2 = m_hots[stash_pos].m_chi2;
+        m_hots[pos].m_prev_idx = pos - 1;
+        ++pos;
+        ++stash_pos;
+      }
+      m_hots.resize(pos);
+      m_hots_size = pos;
+      tc.setLastCcIndex(pos - 1);
+    }
+  }
+
+  void CombCandidate::beginBkwSearch() {
+    // Assumes compactifyHitStorageForBestCand() has already been called.
+    //
+    // This is to be called before backward-search to start with a single
+    // input candidate for backward combinatorial search.
+    //
+    // m_state and m_pickup_layer are also set.
+
+    TrackCand &tc = m_trk_cands[0];
+
+    m_state = Dormant;
+    m_pickup_layer = m_hots[0].m_hot.layer;
+    m_lastHitIdx_before_bkwsearch = tc.lastCcIndex();
+    m_nInsideMinusOneHits_before_bkwsearch = tc.nInsideMinusOneHits();
+    m_nTailMinusOneHits_before_bkwsearch = tc.nTailMinusOneHits();
+    tc.setLastCcIndex(0);
+    tc.setNInsideMinusOneHits(0);
+    tc.setNTailMinusOneHits(0);
+  }
+
+  void CombCandidate::endBkwSearch() {
+    // mergeCandsAndBestShortOne() has already been called (from MkBuilder::FindXxx()).
+    // We have to fixup the best candidate.
+
+    TrackCand &tc = m_trk_cands[0];
+
+    int curr_idx = tc.lastCcIndex();
+    if (curr_idx != 0) {
+      int last_idx = -1, prev_idx;
+      do {
+        prev_idx = m_hots[curr_idx].m_prev_idx;
+
+        m_hots[curr_idx].m_prev_idx = last_idx;
+
+        last_idx = curr_idx;
+        curr_idx = prev_idx;
+      } while (prev_idx != -1);
+    }
+
+    tc.setLastCcIndex(m_lastHitIdx_before_bkwsearch);
+    tc.setNInsideMinusOneHits(m_nInsideMinusOneHits_before_bkwsearch + tc.nInsideMinusOneHits());
+    tc.setNTailMinusOneHits(m_nTailMinusOneHits_before_bkwsearch + tc.nTailMinusOneHits());
+    m_lastHitIdx_before_bkwsearch = -1;
+    m_nInsideMinusOneHits_before_bkwsearch = -1;
+    m_nTailMinusOneHits_before_bkwsearch = -1;
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/Ice/IceFPU.h b/RecoTracker/MkFitCore/src/Ice/IceFPU.h
new file mode 100644
index 0000000000000..f19b622e428c0
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Ice/IceFPU.h
@@ -0,0 +1,278 @@
+//----------------------------------------------------------------------
+/**
+ *	Contains FPU related code.
+ *	\file		IceFPU.h
+ *	\author		Pierre Terdiman
+ *	\date		April, 4, 2000
+ */
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// Include Guard
+#ifndef RecoTracker_MkFitCore_src_Ice_IceFPU_h
+#define RecoTracker_MkFitCore_src_Ice_IceFPU_h
+
+#define SIGN_BITMASK 0x80000000
+
+//! Integer representation of a floating-point value.
+#define IR(x) ((udword&)(x))
+
+//! Signed integer representation of a floating-point value.
+#define SIR(x) ((sdword&)(x))
+
+//! Absolute integer representation of a floating-point value
+#define AIR(x) (IR(x) & 0x7fffffff)
+
+//! Floating-point representation of an integer value.
+#define FR(x) ((float&)(x))
+
+//! Integer-based comparison of a floating point value.
+//! Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
+#define IS_NEGATIVE_FLOAT(x) (IR(x) & 0x80000000)
+
+//! Fast fabs for floating-point values. It just clears the sign bit.
+//! Don't use it blindy, it can be faster or slower than the FPU comparison, depends on the context.
+inline_ float FastFabs(float x) {
+  udword FloatBits = IR(x) & 0x7fffffff;
+  return FR(FloatBits);
+}
+
+#ifdef WIN32
+//! Fast square root for floating-point values.
+inline_ float FastSqrt(float square) {
+  float retval;
+
+  __asm {
+      mov             eax, square
+      sub             eax, 0x3F800000
+      sar             eax, 1
+      add             eax, 0x3F800000
+      mov             [retval], eax
+  }
+  return retval;
+}
+#endif
+
+//! Saturates positive to zero.
+inline_ float fsat(float f) {
+  udword y = (udword&)f & ~((sdword&)f >> 31);
+  return (float&)y;
+}
+
+//! Computes 1.0f / sqrtf(x).
+inline_ float frsqrt(float f) {
+  float x = f * 0.5f;
+  udword y = 0x5f3759df - ((udword&)f >> 1);
+  // Iteration...
+  (float&)y = (float&)y * (1.5f - (x * (float&)y * (float&)y));
+  // Result
+  return (float&)y;
+}
+
+//! Computes 1.0f / sqrtf(x). Comes from NVIDIA.
+inline_ float InvSqrt(const float& x) {
+  udword tmp = (udword(IEEE_1_0 << 1) + IEEE_1_0 - *(udword*)&x) >> 1;
+  float y = *(float*)&tmp;
+  return y * (1.47f - 0.47f * x * y * y);
+}
+
+//! Computes 1.0f / sqrtf(x). Comes from Quake3. Looks like the first one I had above.
+//! See http://www.magic-software.com/3DGEDInvSqrt.html
+inline_ float RSqrt(float number) {
+  long i;
+  float x2, y;
+  const float threehalfs = 1.5f;
+
+  x2 = number * 0.5f;
+  y = number;
+  i = *(long*)&y;
+  i = 0x5f3759df - (i >> 1);
+  y = *(float*)&i;
+  y = y * (threehalfs - (x2 * y * y));
+
+  return y;
+}
+
+//! TO BE DOCUMENTED
+inline_ float fsqrt(float f) {
+  udword y = (((sdword&)f - 0x3f800000) >> 1) + 0x3f800000;
+  // Iteration...?
+  // (float&)y = (3.0f - ((float&)y * (float&)y) / f) * (float&)y * 0.5f;
+  // Result
+  return (float&)y;
+}
+
+//! Returns the float ranged espilon value.
+inline_ float fepsilon(float f) {
+  udword b = (udword&)f & 0xff800000;
+  udword a = b | 0x00000001;
+  (float&)a -= (float&)b;
+  // Result
+  return (float&)a;
+}
+
+//! Is the float valid ?
+inline_ bool IsNAN(float value) { return (IR(value) & 0x7f800000) == 0x7f800000; }
+inline_ bool IsIndeterminate(float value) { return IR(value) == 0xffc00000; }
+inline_ bool IsPlusInf(float value) { return IR(value) == 0x7f800000; }
+inline_ bool IsMinusInf(float value) { return IR(value) == 0xff800000; }
+
+inline_ bool IsValidFloat(float value) {
+  if (IsNAN(value))
+    return false;
+  if (IsIndeterminate(value))
+    return false;
+  if (IsPlusInf(value))
+    return false;
+  if (IsMinusInf(value))
+    return false;
+  return true;
+}
+
+#define CHECK_VALID_FLOAT(x) ASSERT(IsValidFloat(x));
+
+/*
+//! FPU precision setting function.
+inline_ void SetFPU()
+{
+// This function evaluates whether the floating-point
+// control word is set to single precision/round to nearest/
+// exceptions disabled. If these conditions don't hold, the
+// function changes the control word to set them and returns
+// true, putting the old control word value in the passback
+// location pointed to by pwOldCW.
+{
+uword wTemp, wSave;
+
+__asm fstcw wSave
+if (wSave & 0x300 ||            // Not single mode
+0x3f != (wSave & 0x3f) ||   // Exceptions enabled
+wSave & 0xC00)              // Not round to nearest mode
+{
+__asm
+{
+mov ax, wSave
+and ax, not 300h    ;; single mode
+or  ax, 3fh         ;; disable all exceptions
+and ax, not 0xC00   ;; round to nearest mode
+mov wTemp, ax
+fldcw   wTemp
+}
+}
+}
+}
+*/
+//! This function computes the slowest possible floating-point value (you can also directly use FLT_EPSILON)
+inline_ float ComputeFloatEpsilon() {
+  float f = 1.0f;
+  ((udword&)f) ^= 1;
+  return f - 1.0f;  // You can check it's the same as FLT_EPSILON
+}
+
+inline_ bool IsFloatZero(float x, float epsilon = 1e-6f) { return x * x < epsilon; }
+
+#ifdef WIN32
+#define FCOMI_ST0 _asm _emit 0xdb _asm _emit 0xf0
+#define FCOMIP_ST0 _asm _emit 0xdf _asm _emit 0xf0
+#define FCMOVB_ST0 _asm _emit 0xda _asm _emit 0xc0
+#define FCMOVNB_ST0 _asm _emit 0xdb _asm _emit 0xc0
+
+#define FCOMI_ST1 _asm _emit 0xdb _asm _emit 0xf1
+#define FCOMIP_ST1 _asm _emit 0xdf _asm _emit 0xf1
+#define FCMOVB_ST1 _asm _emit 0xda _asm _emit 0xc1
+#define FCMOVNB_ST1 _asm _emit 0xdb _asm _emit 0xc1
+
+#define FCOMI_ST2 _asm _emit 0xdb _asm _emit 0xf2
+#define FCOMIP_ST2 _asm _emit 0xdf _asm _emit 0xf2
+#define FCMOVB_ST2 _asm _emit 0xda _asm _emit 0xc2
+#define FCMOVNB_ST2 _asm _emit 0xdb _asm _emit 0xc2
+
+#define FCOMI_ST3 _asm _emit 0xdb _asm _emit 0xf3
+#define FCOMIP_ST3 _asm _emit 0xdf _asm _emit 0xf3
+#define FCMOVB_ST3 _asm _emit 0xda _asm _emit 0xc3
+#define FCMOVNB_ST3 _asm _emit 0xdb _asm _emit 0xc3
+
+#define FCOMI_ST4 _asm _emit 0xdb _asm _emit 0xf4
+#define FCOMIP_ST4 _asm _emit 0xdf _asm _emit 0xf4
+#define FCMOVB_ST4 _asm _emit 0xda _asm _emit 0xc4
+#define FCMOVNB_ST4 _asm _emit 0xdb _asm _emit 0xc4
+
+#define FCOMI_ST5 _asm _emit 0xdb _asm _emit 0xf5
+#define FCOMIP_ST5 _asm _emit 0xdf _asm _emit 0xf5
+#define FCMOVB_ST5 _asm _emit 0xda _asm _emit 0xc5
+#define FCMOVNB_ST5 _asm _emit 0xdb _asm _emit 0xc5
+
+#define FCOMI_ST6 _asm _emit 0xdb _asm _emit 0xf6
+#define FCOMIP_ST6 _asm _emit 0xdf _asm _emit 0xf6
+#define FCMOVB_ST6 _asm _emit 0xda _asm _emit 0xc6
+#define FCMOVNB_ST6 _asm _emit 0xdb _asm _emit 0xc6
+
+#define FCOMI_ST7 _asm _emit 0xdb _asm _emit 0xf7
+#define FCOMIP_ST7 _asm _emit 0xdf _asm _emit 0xf7
+#define FCMOVB_ST7 _asm _emit 0xda _asm _emit 0xc7
+#define FCMOVNB_ST7 _asm _emit 0xdb _asm _emit 0xc7
+
+//! A global function to find MAX(a,b) using FCOMI/FCMOV
+inline_ float FCMax2(float a, float b) {
+  float Res;
+  _asm fld[a] _asm fld[b] FCOMI_ST1 FCMOVB_ST1 _asm fstp[Res] _asm fcomp return Res;
+}
+
+//! A global function to find MIN(a,b) using FCOMI/FCMOV
+inline_ float FCMin2(float a, float b) {
+  float Res;
+  _asm fld[a] _asm fld[b] FCOMI_ST1 FCMOVNB_ST1 _asm fstp[Res] _asm fcomp return Res;
+}
+
+//! A global function to find MAX(a,b,c) using FCOMI/FCMOV
+inline_ float FCMax3(float a, float b, float c) {
+  float Res;
+  _asm fld[a] _asm fld[b] _asm fld[c] FCOMI_ST1 FCMOVB_ST1 FCOMI_ST2 FCMOVB_ST2 _asm fstp[Res] _asm fcompp return Res;
+}
+
+//! A global function to find MIN(a,b,c) using FCOMI/FCMOV
+inline_ float FCMin3(float a, float b, float c) {
+  float Res;
+  _asm fld[a] _asm fld[b] _asm fld[c] FCOMI_ST1 FCMOVNB_ST1 FCOMI_ST2 FCMOVNB_ST2 _asm fstp[Res] _asm fcompp return Res;
+}
+#endif
+
+inline_ int ConvertToSortable(float f) {
+  int& Fi = (int&)f;
+  int Fmask = (Fi >> 31);
+  Fi ^= Fmask;
+  Fmask &= ~(1 << 31);
+  Fi -= Fmask;
+  return Fi;
+}
+
+enum FPUMode {
+  FPU_FLOOR = 0,
+  FPU_CEIL = 1,
+  FPU_BEST = 2,
+
+  FPU_FORCE_DWORD = 0x7fffffff
+};
+
+#ifdef WIN32
+FUNCTION ICECORE_API FPUMode GetFPUMode();
+FUNCTION ICECORE_API void SaveFPU();
+FUNCTION ICECORE_API void RestoreFPU();
+FUNCTION ICECORE_API void SetFPUFloorMode();
+FUNCTION ICECORE_API void SetFPUCeilMode();
+FUNCTION ICECORE_API void SetFPUBestMode();
+
+FUNCTION ICECORE_API void SetFPUPrecision24();
+FUNCTION ICECORE_API void SetFPUPrecision53();
+FUNCTION ICECORE_API void SetFPUPrecision64();
+FUNCTION ICECORE_API void SetFPURoundingChop();
+FUNCTION ICECORE_API void SetFPURoundingUp();
+FUNCTION ICECORE_API void SetFPURoundingDown();
+FUNCTION ICECORE_API void SetFPURoundingNear();
+
+FUNCTION ICECORE_API int intChop(const float& f);
+FUNCTION ICECORE_API int intFloor(const float& f);
+FUNCTION ICECORE_API int intCeil(const float& f);
+#endif
+
+#endif  // __ICEFPU_H__
diff --git a/RecoTracker/MkFitCore/src/Ice/IceMemoryMacros.h b/RecoTracker/MkFitCore/src/Ice/IceMemoryMacros.h
new file mode 100644
index 0000000000000..65e114cf62043
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Ice/IceMemoryMacros.h
@@ -0,0 +1,123 @@
+//----------------------------------------------------------------------
+/**
+ *	Contains all memory macros.
+ *	\file		IceMemoryMacros.h
+ *	\author		Pierre Terdiman
+ *	\date		April, 4, 2000
+ */
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// Include Guard
+#ifndef RecoTracker_MkFitCore_src_Ice_IceMemoryMacros_h
+#define RecoTracker_MkFitCore_src_Ice_IceMemoryMacros_h
+
+#undef ZeroMemory
+#undef CopyMemory
+#undef MoveMemory
+#undef FillMemory
+
+#include <cstring>
+
+//!	Clears a buffer.
+//!	\param		addr	[in] buffer address
+//!	\param		size	[in] buffer length
+//!	\see		FillMemory
+//!	\see		StoreDwords
+//!	\see		CopyMemory
+//!	\see		MoveMemory
+inline void ZeroMemory(void* addr, udword size) { memset(addr, 0, size); }
+
+//!	Fills a buffer with a given byte.
+//!	\param		addr	[in] buffer address
+//!	\param		size	[in] buffer length
+//!	\param		val		[in] the byte value
+//!	\see		StoreDwords
+//!	\see		ZeroMemory
+//!	\see		CopyMemory
+//!	\see		MoveMemory
+inline void FillMemory(void* dest, udword size, ubyte val) { memset(dest, val, size); }
+
+#ifdef WIN32
+//!	Fills a buffer with a given dword.
+//!	\param		addr	[in] buffer address
+//!	\param		nb		[in] number of dwords to write
+//!	\param		value	[in] the dword value
+//!	\see		FillMemory
+//!	\see		ZeroMemory
+//!	\see		CopyMemory
+//!	\see		MoveMemory
+//!	\warning	writes nb*4 bytes !
+inline_ void StoreDwords(udword* dest, udword nb, udword value) {
+  // The asm code below **SHOULD** be equivalent to one of those C versions
+  // or the other if your compiled is good: (checked on VC++ 6.0)
+  //
+  //	1) while(nb--)	*dest++ = value;
+  //
+  //	2) for(udword i=0;i<nb;i++)	dest[i] = value;
+  //
+  _asm push eax _asm push ecx _asm push edi _asm mov edi, dest _asm mov ecx, nb _asm mov eax,
+      value _asm rep stosd _asm pop edi _asm pop ecx _asm pop eax
+}
+#endif
+
+//!	Copies a buffer.
+//!	\param		addr	[in] destination buffer address
+//!	\param		addr	[in] source buffer address
+//!	\param		size	[in] buffer length
+//!	\see		ZeroMemory
+//!	\see		FillMemory
+//!	\see		StoreDwords
+//!	\see		MoveMemory
+inline void CopyMemory(void* dest, const void* src, udword size) { memcpy(dest, src, size); }
+
+//!	Moves a buffer.
+//!	\param		addr	[in] destination buffer address
+//!	\param		addr	[in] source buffer address
+//!	\param		size	[in] buffer length
+//!	\see		ZeroMemory
+//!	\see		FillMemory
+//!	\see		StoreDwords
+//!	\see		CopyMemory
+inline void MoveMemory(void* dest, const void* src, udword size) { memmove(dest, src, size); }
+
+#define SIZEOFOBJECT sizeof(*this)  //!< Gives the size of current object. Avoid some mistakes (e.g. "sizeof(this)").
+//#define CLEAROBJECT		{ memset(this, 0, SIZEOFOBJECT); } //!< Clears current object. Laziness is my business. HANDLE WITH CARE.
+#define DELETESINGLE(x) \
+  if (x) {              \
+    delete x;           \
+    x = 0;              \
+  }  //!< Deletes an instance of a class.
+#define DELETEARRAY(x)    \
+  if (x) {                \
+    operator delete[](x); \
+    x = 0;                \
+  }  //!< Deletes an array.
+#define SAFE_RELEASE(x) \
+  if (x) {              \
+    (x)->release();     \
+    (x) = 0;            \
+  }  //!< Safe D3D-style release
+#define SAFE_DESTRUCT(x) \
+  if (x) {               \
+    (x)->SelfDestruct(); \
+    (x) = 0;             \
+  }  //!< Safe ICE-style release
+
+#ifdef __ICEERROR_H__
+#define CHECKALLOC(x) \
+  if (!x)             \
+    return SetIceError("Out of memory.", EC_OUT_OF_MEMORY);  //!< Standard alloc checking. HANDLE WITH CARE.
+#else
+#define CHECKALLOC(x) \
+  if (!x)             \
+    return false;
+#endif
+
+//! Standard allocation cycle
+#define SAFE_ALLOC(ptr, type, count) \
+  DELETEARRAY(ptr);                  \
+  ptr = new type[count];             \
+  CHECKALLOC(ptr);
+
+#endif  // __ICEMEMORYMACROS_H__
diff --git a/RecoTracker/MkFitCore/src/Ice/IcePreprocessor.h b/RecoTracker/MkFitCore/src/Ice/IcePreprocessor.h
new file mode 100644
index 0000000000000..1587c63c721a0
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Ice/IcePreprocessor.h
@@ -0,0 +1,23 @@
+//----------------------------------------------------------------------
+/**
+ *	Contains preprocessor stuff. This should be the first included header.
+ *	\file		IcePreprocessor.h
+ *	\author		Pierre Terdiman
+ *	\date		April, 4, 2000
+ */
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// Include Guard
+#ifndef RecoTracker_MkFitCore_src_Ice_IcePreprocessor_h
+#define RecoTracker_MkFitCore_src_Ice_IcePreprocessor_h
+
+#define FUNCTION extern "C"
+
+// Cosmetic stuff [mainly useful with multiple inheritance]
+#define override(base_class) virtual
+
+// Down the hatch
+// #pragma inline_depth( 255 ) // MT: this annoys gcc.
+
+#endif  // __ICEPREPROCESSOR_H__
diff --git a/RecoTracker/MkFitCore/src/Ice/IceRevisitedRadix.cc b/RecoTracker/MkFitCore/src/Ice/IceRevisitedRadix.cc
new file mode 100644
index 0000000000000..87581d3345e94
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Ice/IceRevisitedRadix.cc
@@ -0,0 +1,547 @@
+//----------------------------------------------------------------------
+/**
+ *	Contains source code from the article "Radix Sort Revisited".
+ *	\file		IceRevisitedRadix.cpp
+ *	\author		Pierre Terdiman
+ *	\date		April, 4, 2000
+ */
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+/**
+ *	Revisited Radix Sort.
+ *	This is my new radix routine:
+ *  - it uses indices and doesn't recopy the values anymore, hence wasting less ram
+ *  - it creates all the histograms in one run instead of four
+ *  - it sorts words faster than dwords and bytes faster than words
+ *  - it correctly sorts negative floating-point values by patching the offsets
+ *  - it automatically takes advantage of temporal coherence
+ *  - multiple keys support is a side effect of temporal coherence
+ *  - it may be worth recoding in asm... (mainly to use FCOMI, FCMOV, etc) [it's probably memory-bound anyway]
+ *
+ *	History:
+ *	- 08.15.98: very first version
+ *	- 04.04.00: recoded for the radix article
+ *	- 12.xx.00: code lifting
+ *	- 09.18.01: faster CHECK_PASS_VALIDITY thanks to Mark D. Shattuck (who provided other tips, not included here)
+ *	- 10.11.01: added local ram support
+ *	- 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting......
+ *	- 01.02.02:	- "mIndices" renamed => "mRanks". That's a rank sorter after all.
+ *			- ranks are not "reset" anymore, but implicit on first calls
+ *	- 07.05.02:	- offsets rewritten with one less indirection.
+ *	- 11.03.02:	- "bool" replaced with RadixHint enum
+ *
+ *	\class		RadixSort
+ *	\author		Pierre Terdiman
+ *	\version	1.4
+ *	\date		August, 15, 1998
+ */
+//----------------------------------------------------------------------
+
+/*
+To do:
+	- add an offset parameter between two input values (avoid some data recopy sometimes)
+	- unroll ? asm ?
+	- 11 bits trick & 3 passes as Michael did
+	- prefetch stuff the day I have a P3
+	- make a version with 16-bits indices ?
+*/
+
+//------------------------------------------------------------------------------
+
+// Snatch from Opcode.h in Gled::Var1
+
+#include "IceRevisitedRadix.h"
+
+#include "IceMemoryMacros.h"
+
+//------------------------------------------------------------------------------
+
+#define INVALIDATE_RANKS mCurrentSize |= 0x80000000
+#define VALIDATE_RANKS mCurrentSize &= 0x7fffffff
+#define CURRENT_SIZE (mCurrentSize & 0x7fffffff)
+#define INVALID_RANKS (mCurrentSize & 0x80000000)
+
+#define CHECK_RESIZE(n)     \
+  if (n != mPreviousSize) { \
+    if (n > mCurrentSize)   \
+      Resize(n);            \
+    else                    \
+      ResetRanks();         \
+    mPreviousSize = n;      \
+  }
+
+#define CREATE_HISTOGRAMS(type, buffer)                                                \
+  /* Clear counters/histograms */                                                      \
+  ZeroMemory(mHistogram, 256 * 4 * sizeof(udword));                                    \
+                                                                                       \
+  /* Prepare to count */                                                               \
+  ubyte* p = (ubyte*)input;                                                            \
+  ubyte* pe = &p[nb * 4];                                                              \
+  udword* h0 = &mHistogram[0];   /* Histogram for first pass (LSB) */                  \
+  udword* h1 = &mHistogram[256]; /* Histogram for second pass	  */                     \
+  udword* h2 = &mHistogram[512]; /* Histogram for third pass	  */                      \
+  udword* h3 = &mHistogram[768]; /* Histogram for last pass (MSB)  */                  \
+                                                                                       \
+  bool AlreadySorted = true; /* Optimism... */                                         \
+                                                                                       \
+  if (INVALID_RANKS) {                                                                 \
+    /* Prepare for temporal coherence */                                               \
+    type* Running = (type*)buffer;                                                     \
+    type PrevVal = *Running;                                                           \
+                                                                                       \
+    while (p != pe) {                                                                  \
+      /* Read input buffer in previous sorted order */                                 \
+      type Val = *Running++;                                                           \
+      /* Check whether already sorted or not */                                        \
+      if (Val < PrevVal) {                                                             \
+        AlreadySorted = false;                                                         \
+        break;                                                                         \
+      } /* Early out */                                                                \
+      /* Update for next iteration */                                                  \
+      PrevVal = Val;                                                                   \
+                                                                                       \
+      /* Create histograms */                                                          \
+      h0[*p++]++;                                                                      \
+      h1[*p++]++;                                                                      \
+      h2[*p++]++;                                                                      \
+      h3[*p++]++;                                                                      \
+    }                                                                                  \
+                                                                                       \
+    /* If all input values are already sorted, we just have to return and leave the */ \
+    /* previous list unchanged. That way the routine may take advantage of temporal */ \
+    /* coherence, for example when used to sort transparent faces.					*/              \
+    if (AlreadySorted) {                                                               \
+      mNbHits++;                                                                       \
+      for (udword i = 0; i < nb; i++)                                                  \
+        mRanks[i] = i;                                                                 \
+      return *this;                                                                    \
+    }                                                                                  \
+  } else {                                                                             \
+    /* Prepare for temporal coherence */                                               \
+    udword* Indices = mRanks;                                                          \
+    type PrevVal = (type)buffer[*Indices];                                             \
+                                                                                       \
+    while (p != pe) {                                                                  \
+      /* Read input buffer in previous sorted order */                                 \
+      type Val = (type)buffer[*Indices++];                                             \
+      /* Check whether already sorted or not */                                        \
+      if (Val < PrevVal) {                                                             \
+        AlreadySorted = false;                                                         \
+        break;                                                                         \
+      } /* Early out */                                                                \
+      /* Update for next iteration */                                                  \
+      PrevVal = Val;                                                                   \
+                                                                                       \
+      /* Create histograms */                                                          \
+      h0[*p++]++;                                                                      \
+      h1[*p++]++;                                                                      \
+      h2[*p++]++;                                                                      \
+      h3[*p++]++;                                                                      \
+    }                                                                                  \
+                                                                                       \
+    /* If all input values are already sorted, we just have to return and leave the */ \
+    /* previous list unchanged. That way the routine may take advantage of temporal */ \
+    /* coherence, for example when used to sort transparent faces.					*/              \
+    if (AlreadySorted) {                                                               \
+      mNbHits++;                                                                       \
+      return *this;                                                                    \
+    }                                                                                  \
+  }                                                                                    \
+                                                                                       \
+  /* Else there has been an early out and we must finish computing the histograms */   \
+  while (p != pe) {                                                                    \
+    /* Create histograms without the previous overhead */                              \
+    h0[*p++]++;                                                                        \
+    h1[*p++]++;                                                                        \
+    h2[*p++]++;                                                                        \
+    h3[*p++]++;                                                                        \
+  }
+
+#define CHECK_PASS_VALIDITY(pass)                                                         \
+  /* Shortcut to current counters */                                                      \
+  udword* CurCount = &mHistogram[pass << 8];                                              \
+                                                                                          \
+  /* Reset flag. The sorting pass is supposed to be performed. (default) */               \
+  bool PerformPass = true;                                                                \
+                                                                                          \
+  /* Check pass validity */                                                               \
+                                                                                          \
+  /* If all values have the same byte, sorting is useless. */                             \
+  /* It may happen when sorting bytes or words instead of dwords. */                      \
+  /* This routine actually sorts words faster than dwords, and bytes */                   \
+  /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */             \
+  /* for words and O(n) for bytes. Running time for floats depends on actual values... */ \
+                                                                                          \
+  /* Get first byte */                                                                    \
+  ubyte UniqueVal = *(((ubyte*)input) + pass);                                            \
+                                                                                          \
+  /* Check that byte's counter */                                                         \
+  if (CurCount[UniqueVal] == nb)                                                          \
+    PerformPass = false;
+
+//----------------------------------------------------------------------
+/**
+ *	Constructor.
+ */
+//----------------------------------------------------------------------
+RadixSort::RadixSort() : mCurrentSize(0), mRanks(nullptr), mRanks2(nullptr), mTotalCalls(0), mNbHits(0) {
+#ifndef RADIX_LOCAL_RAM
+  // Allocate input-independent ram
+  mHistogram = new udword[256 * 4];
+  mLink = new udword[256];
+#endif
+  // Initialize indices
+  INVALIDATE_RANKS;
+}
+
+//----------------------------------------------------------------------
+/**
+ *	Destructor.
+ */
+//----------------------------------------------------------------------
+RadixSort::~RadixSort() {
+  // Release everything
+#ifndef RADIX_LOCAL_RAM
+  DELETEARRAY(mLink);
+  DELETEARRAY(mHistogram);
+#endif
+  DELETEARRAY(mRanks2);
+  DELETEARRAY(mRanks);
+}
+
+//----------------------------------------------------------------------
+/**
+ * Detach mRanks. After this the caller is responsible for
+ * freeing this array via delete [] operator.
+ */
+//----------------------------------------------------------------------
+udword* RadixSort::RelinquishRanks() {
+  udword* ranks = mRanks;
+  mRanks = nullptr;
+  DELETEARRAY(mRanks2);
+  mCurrentSize = 0;
+  return ranks;
+}
+
+//----------------------------------------------------------------------
+/**
+ *	Resizes the inner lists.
+ *	\param		nb	[in] new size (number of dwords)
+ *	\return		true if success
+ */
+//----------------------------------------------------------------------
+bool RadixSort::Resize(udword nb) {
+  // Free previously used ram
+  DELETEARRAY(mRanks2);
+  DELETEARRAY(mRanks);
+
+  // Get some fresh one
+  mRanks = new udword[nb];
+  CHECKALLOC(mRanks);
+  mRanks2 = new udword[nb];
+  CHECKALLOC(mRanks2);
+
+  return true;
+}
+
+inline_ void RadixSort::CheckResize(udword nb) {
+  udword CurSize = CURRENT_SIZE;
+  if (nb != CurSize) {
+    if (nb > CurSize)
+      Resize(nb);
+    mCurrentSize = nb;
+    INVALIDATE_RANKS;
+  }
+}
+
+//----------------------------------------------------------------------
+/**
+ *	Main sort routine.
+ *	This one is for integer values. After the call, mRanks
+ *	contains a list of indices in sorted order, i.e. in the order
+ *	you may process your data.
+ *	\param		input	[in] a list of integer values to sort
+ *	\param		nb	[in] number of values to sort, must be < 2^31
+ *	\param		hint	[in] RADIX_SIGNED to handle negative values, 
+ *                                 RADIX_UNSIGNED if you know your input buffer only contains positive values
+ *	\return		Self-Reference
+ */
+//----------------------------------------------------------------------
+RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint) {
+  // Checkings
+  if (!input || !nb || nb & 0x80000000)
+    return *this;
+
+  // Stats
+  mTotalCalls++;
+
+  // Resize lists if needed
+  CheckResize(nb);
+
+#ifdef RADIX_LOCAL_RAM
+  // Allocate histograms & offsets on the stack
+  udword mHistogram[256 * 4];
+  udword* mLink[256];
+#endif
+
+  // Create histograms (counters). Counters for all passes are created in one run.
+  // Pros:	read input buffer once instead of four times
+  // Cons:	mHistogram is 4Kb instead of 1Kb
+  // We must take care of signed/unsigned values for temporal
+  // coherence.... I just have 2 code paths even if just a single
+  // opcode changes. Self-modifying code, someone?
+  if (hint == RADIX_UNSIGNED) {
+    CREATE_HISTOGRAMS(udword, input);
+  } else {
+    CREATE_HISTOGRAMS(sdword, input);
+  }
+
+  // Compute #negative values involved if needed
+  udword NbNegativeValues = 0;
+  if (hint == RADIX_SIGNED) {
+    // An efficient way to compute the number of negatives values
+    // we'll have to deal with is simply to sum the 128 last values
+    // of the last histogram. Last histogram because that's the one
+    // for the Most Significant Byte, responsible for the sign. 128
+    // last values because the 128 first ones are related to
+    // positive numbers.
+    udword* h3 = &mHistogram[768];
+    for (udword i = 128; i < 256; i++)
+      NbNegativeValues += h3[i];  // 768 for last histogram, 128 for negative part
+  }
+
+  // Radix sort, j is the pass number (0=LSB, 3=MSB)
+  for (udword j = 0; j < 4; j++) {
+    CHECK_PASS_VALIDITY(j);
+
+    // Sometimes the fourth (negative) pass is skipped because all
+    // numbers are negative and the MSB is 0xFF (for example). This
+    // is not a problem, numbers are correctly sorted anyway.
+    if (PerformPass) {
+      // Should we care about negative values?
+      if (j != 3 || hint == RADIX_UNSIGNED) {
+        // Here we deal with positive values only
+
+        // Create offsets
+        mLink[0] = mRanks2;
+        for (udword i = 1; i < 256; i++)
+          mLink[i] = mLink[i - 1] + CurCount[i - 1];
+      } else {
+        // This is a special case to correctly handle negative
+        // integers. They're sorted in the right order but at
+        // the wrong place.
+
+        // Create biased offsets, in order for negative numbers to be sorted as well
+        mLink[0] = &mRanks2[NbNegativeValues];  // First positive number takes place after the negative ones
+        for (udword i = 1; i < 128; i++)
+          mLink[i] = mLink[i - 1] + CurCount[i - 1];  // 1 to 128 for positive numbers
+
+        // Fixing the wrong place for negative values
+        mLink[128] = mRanks2;
+        for (udword i = 129; i < 256; i++)
+          mLink[i] = mLink[i - 1] + CurCount[i - 1];
+      }
+
+      // Perform Radix Sort
+      ubyte* InputBytes = (ubyte*)input;
+      InputBytes += j;
+      if (INVALID_RANKS) {
+        for (udword i = 0; i < nb; i++)
+          *mLink[InputBytes[i << 2]]++ = i;
+        VALIDATE_RANKS;
+      } else {
+        udword* Indices = mRanks;
+        udword* IndicesEnd = &mRanks[nb];
+        while (Indices != IndicesEnd) {
+          udword id = *Indices++;
+          *mLink[InputBytes[id << 2]]++ = id;
+        }
+      }
+
+      // Swap pointers for next pass. Valid indices - the most
+      // recent ones - are in mRanks after the swap.
+      udword* Tmp = mRanks;
+      mRanks = mRanks2;
+      mRanks2 = Tmp;
+    }
+  }
+  return *this;
+}
+
+//----------------------------------------------------------------------
+/**
+ *	Main sort routine.
+ *	This one is for floating-point values. After the call, mRanks
+ *	contains a list of indices in sorted order, i.e. in the order
+ *	you may process your data.
+ *	\param		input		[in] a list of floating-point values to sort
+ *	\param		nb		[in] number of values to sort, must be < 2^31
+ *	\return		Self-Reference
+ *	\warning	only sorts IEEE floating-point values
+ */
+//----------------------------------------------------------------------
+RadixSort& RadixSort::Sort(const float* input2, udword nb) {
+  // Checkings
+  if (!input2 || !nb || nb & 0x80000000)
+    return *this;
+
+  // Stats
+  mTotalCalls++;
+
+  udword* input = (udword*)input2;
+
+  // Resize lists if needed
+  CheckResize(nb);
+
+#ifdef RADIX_LOCAL_RAM
+  // Allocate histograms & offsets on the stack
+  udword mHistogram[256 * 4];
+  udword* mLink[256];
+#endif
+
+  // Create histograms (counters). Counters for all passes are created
+  // in one run.
+  // Pros:	read input buffer once instead of four times
+  // Cons:	mHistogram is 4Kb instead of 1Kb
+  //
+  // Floating-point values are always supposed to be signed values, so
+  // there's only one code path there.
+  // Please note the floating point comparison needed for temporal
+  // coherence! Although the resulting asm code is dreadful, this is
+  // surprisingly not such a performance hit - well, I suppose that's
+  // a big one on first generation Pentiums....We can't make
+  // comparison on integer representations because, as Chris said, it
+  // just wouldn't work with mixed positive/negative values....
+  { CREATE_HISTOGRAMS(float, input2); }
+
+  // Compute #negative values involved if needed
+  udword NbNegativeValues = 0;
+  // An efficient way to compute the number of negatives values we'll
+  // have to deal with is simply to sum the 128 last values of the
+  // last histogram. Last histogram because that's the one for the
+  // Most Significant Byte, responsible for the sign. 128 last values
+  // because the 128 first ones are related to positive numbers.
+  udword* h3 = &mHistogram[768];
+  for (udword i = 128; i < 256; i++)
+    NbNegativeValues += h3[i];  // 768 for last histogram, 128 for negative part
+
+  // Radix sort, j is the pass number (0=LSB, 3=MSB)
+  for (udword j = 0; j < 4; j++) {
+    // Should we care about negative values?
+    if (j != 3) {
+      // Here we deal with positive values only
+      CHECK_PASS_VALIDITY(j);
+
+      if (PerformPass) {
+        // Create offsets
+        mLink[0] = mRanks2;
+        for (udword i = 1; i < 256; i++)
+          mLink[i] = mLink[i - 1] + CurCount[i - 1];
+
+        // Perform Radix Sort
+        ubyte* InputBytes = (ubyte*)input;
+        InputBytes += j;
+        if (INVALID_RANKS) {
+          for (udword i = 0; i < nb; i++)
+            *mLink[InputBytes[i << 2]]++ = i;
+          VALIDATE_RANKS;
+        } else {
+          udword* Indices = mRanks;
+          udword* IndicesEnd = &mRanks[nb];
+          while (Indices != IndicesEnd) {
+            udword id = *Indices++;
+            *mLink[InputBytes[id << 2]]++ = id;
+          }
+        }
+
+        // Swap pointers for next pass. Valid indices - the most
+        // recent ones - are in mRanks after the swap.
+        udword* Tmp = mRanks;
+        mRanks = mRanks2;
+        mRanks2 = Tmp;
+      }
+    } else {
+      // This is a special case to correctly handle negative values
+      CHECK_PASS_VALIDITY(j);
+
+      if (PerformPass) {
+        // Create biased offsets, in order for negative numbers
+        // to be sorted as well
+        mLink[0] = &mRanks2[NbNegativeValues];  // First positive number takes place after the negative ones
+        for (udword i = 1; i < 128; i++)
+          mLink[i] = mLink[i - 1] + CurCount[i - 1];  // 1 to 128 for positive numbers
+
+        // We must reverse the sorting order for negative numbers!
+        mLink[255] = mRanks2;
+        for (udword i = 0; i < 127; i++)
+          mLink[254 - i] = mLink[255 - i] + CurCount[255 - i];  // Fixing the wrong order for negative values
+        for (udword i = 128; i < 256; i++)
+          mLink[i] += CurCount[i];  // Fixing the wrong place for negative values
+
+        // Perform Radix Sort
+        if (INVALID_RANKS) {
+          for (udword i = 0; i < nb; i++) {
+            udword Radix = input[i] >> 24;  // Radix byte, same as above. AND is useless here (udword).
+            // ### cmp to be killed. Not good. Later.
+            if (Radix < 128)
+              *mLink[Radix]++ = i;  // Number is positive, same as above
+            else
+              *(--mLink[Radix]) = i;  // Number is negative, flip the sorting order
+          }
+          VALIDATE_RANKS;
+        } else {
+          for (udword i = 0; i < nb; i++) {
+            udword Radix = input[mRanks[i]] >> 24;  // Radix byte, same as above. AND is useless here (udword).
+            // ### cmp to be killed. Not good. Later.
+            if (Radix < 128)
+              *mLink[Radix]++ = mRanks[i];  // Number is positive, same as above
+            else
+              *(--mLink[Radix]) = mRanks[i];  // Number is negative, flip the sorting order
+          }
+        }
+        // Swap pointers for next pass. Valid indices - the most
+        // recent ones - are in mRanks after the swap.
+        udword* Tmp = mRanks;
+        mRanks = mRanks2;
+        mRanks2 = Tmp;
+      } else {
+        // The pass is useless, yet we still have to reverse the order of current list if all values are negative.
+        if (UniqueVal >= 128) {
+          if (INVALID_RANKS) {
+            // ###Possible?
+            for (udword i = 0; i < nb; i++)
+              mRanks2[i] = nb - i - 1;
+            VALIDATE_RANKS;
+          } else {
+            for (udword i = 0; i < nb; i++)
+              mRanks2[i] = mRanks[nb - i - 1];
+          }
+
+          // Swap pointers for next pass. Valid indices - the
+          // most recent ones - are in mRanks after the swap.
+          udword* Tmp = mRanks;
+          mRanks = mRanks2;
+          mRanks2 = Tmp;
+        }
+      }
+    }
+  }
+  return *this;
+}
+
+//----------------------------------------------------------------------
+/**
+ *	Gets the ram used.
+ *	\return		memory used in bytes
+ */
+//----------------------------------------------------------------------
+udword RadixSort::GetUsedRam() const {
+  udword UsedRam = sizeof(RadixSort);
+#ifndef RADIX_LOCAL_RAM
+  UsedRam += 256 * 4 * sizeof(udword);  // Histograms
+  UsedRam += 256 * sizeof(udword);      // Link
+#endif
+  UsedRam += 2 * CURRENT_SIZE * sizeof(udword);  // 2 lists of indices
+  return UsedRam;
+}
diff --git a/RecoTracker/MkFitCore/src/Ice/IceRevisitedRadix.h b/RecoTracker/MkFitCore/src/Ice/IceRevisitedRadix.h
new file mode 100644
index 0000000000000..790d35f1a9397
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Ice/IceRevisitedRadix.h
@@ -0,0 +1,73 @@
+//----------------------------------------------------------------------
+/**
+ *	Contains source code from the article "Radix Sort Revisited".
+ *	\file		IceRevisitedRadix.h
+ *	\author		Pierre Terdiman
+ *	\date		April, 4, 2000
+ */
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// Include Guard
+#ifndef RecoTracker_MkFitCore_src_Ice_IceRevisitedRadix_h
+#define RecoTracker_MkFitCore_src_Ice_IceRevisitedRadix_h
+
+#include "IcePreprocessor.h"
+#include "IceTypes.h"
+
+//! Allocate histograms & offsets locally
+#define RADIX_LOCAL_RAM
+
+enum RadixHint {
+  RADIX_SIGNED,    //!< Input values are signed
+  RADIX_UNSIGNED,  //!< Input values are unsigned
+
+  RADIX_FORCE_DWORD = 0x7fffffff
+};
+
+class RadixSort {
+public:
+  // Constructor/Destructor
+  RadixSort();
+  ~RadixSort();
+  // Sorting methods
+  RadixSort& Sort(const udword* input, udword nb, RadixHint hint = RADIX_SIGNED);
+  RadixSort& Sort(const float* input, udword nb);
+
+  //! Access to results. mRanks is a list of indices in sorted order,
+  //i.e. in the order you may further process your data
+  const udword* GetRanks() const { return mRanks; }
+
+  //! Detach mRanks. After this the caller is responsible for
+  //! freeing this array via delete [] operator.
+  udword* RelinquishRanks();
+
+  //! mIndices2 gets trashed on calling the sort routine, but
+  //otherwise you can recycle it the way you want.
+  udword* GetRecyclable() const { return mRanks2; }
+
+  // Stats
+  udword GetUsedRam() const;
+  //! Returns the total number of calls to the radix sorter.
+  udword GetNbTotalCalls() const { return mTotalCalls; }
+  //! Returns the number of eraly exits due to temporal coherence.
+  udword GetNbHits() const { return mNbHits; }
+
+private:
+#ifndef RADIX_LOCAL_RAM
+  udword* mHistogram;  //!< Counters for each byte
+  udword* mLink;       //!< offsets (nearly a cumulative distribution function)
+#endif
+  udword mCurrentSize;  //!< Current size of the indices list
+  udword* mRanks;       //!< Two lists, swapped each pass
+  udword* mRanks2;
+  // Stats
+  udword mTotalCalls;  //!< Total number of calls to the sort routine
+  udword mNbHits;      //!< Number of early exits due to coherence
+
+  // Internal methods
+  void CheckResize(udword nb);
+  bool Resize(udword nb);
+};
+
+#endif  // __ICERADIXSORT_H__
diff --git a/RecoTracker/MkFitCore/src/Ice/IceTypes.h b/RecoTracker/MkFitCore/src/Ice/IceTypes.h
new file mode 100644
index 0000000000000..fcb4f2209074b
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Ice/IceTypes.h
@@ -0,0 +1,119 @@
+//----------------------------------------------------------------------
+/**
+ *	Contains custom types.
+ *	\file		IceTypes.h
+ *	\author		Pierre Terdiman
+ *	\date		April, 4, 2000
+ */
+//----------------------------------------------------------------------
+
+//----------------------------------------------------------------------
+// Include Guard
+#ifndef RecoTracker_MkFitCore_src_Ice_IceTypes_h
+#define RecoTracker_MkFitCore_src_Ice_IceTypes_h
+
+#include <cfloat>
+#include <cstdlib>
+
+#define inline_ inline
+
+// Constants
+const float PI = 3.14159265358979323846f;      //!< PI
+const float HALFPI = 1.57079632679489661923f;  //!< 0.5 * PI
+const float TWOPI = 6.28318530717958647692f;   //!< 2.0 * PI
+const float INVPI = 0.31830988618379067154f;   //!< 1.0 / PI
+
+const float RADTODEG = 57.2957795130823208768f;  //!< 180.0 / PI
+const float DEGTORAD = 0.01745329251994329577f;  //!< PI / 180.0
+
+const float EXP = 2.71828182845904523536f;      //!< e
+const float INVLOG2 = 3.32192809488736234787f;  //!< 1.0 / log10(2)
+const float LN2 = 0.693147180559945f;           //!< ln(2)
+const float INVLN2 = 1.44269504089f;            //!< 1.0f / ln(2)
+
+const float INV3 = 0.33333333333333333333f;    //!< 1/3
+const float INV6 = 0.16666666666666666666f;    //!< 1/6
+const float INV7 = 0.14285714285714285714f;    //!< 1/7
+const float INV9 = 0.11111111111111111111f;    //!< 1/9
+const float INV255 = 0.00392156862745098039f;  //!< 1/255
+
+const float SQRT2 = 1.41421356237f;      //!< sqrt(2)
+const float INVSQRT2 = 0.707106781188f;  //!< 1 / sqrt(2)
+
+const float SQRT3 = 1.73205080757f;      //!< sqrt(3)
+const float INVSQRT3 = 0.577350269189f;  //!< 1 / sqrt(3)
+
+// Custom types used in ICE
+typedef signed char sbyte;          //!< sizeof(sbyte)	must be 1
+typedef unsigned char ubyte;        //!< sizeof(ubyte)	must be 1
+typedef signed short sword;         //!< sizeof(sword)	must be 2
+typedef unsigned short uword;       //!< sizeof(uword)	must be 2
+typedef signed int sdword;          //!< sizeof(sdword)	must be 4
+typedef unsigned int udword;        //!< sizeof(udword)	must be 4
+typedef signed long long sqword;    //!< sizeof(sqword)	must be 8
+typedef unsigned long long uqword;  //!< sizeof(uqword)	must be 8
+
+// Added by M. Tadel (needed for 64-bit port)
+typedef unsigned long sxword;  //!< pointer-sized   signed integer
+typedef unsigned long uxword;  //!< pointer-sized unsigned integer
+
+const udword OPC_INVALID_ID = 0xffffffff;  //!< Invalid dword ID (counterpart of 0 pointers)
+const udword INVALID_NUMBER = 0xDEADBEEF;  //!< Standard junk value
+
+// Type ranges
+const sbyte MAX_SBYTE = 0x7f;          //!< max possible sbyte value
+const sbyte MIN_SBYTE = 0x80;          //!< min possible sbyte value
+const ubyte MAX_UBYTE = 0xff;          //!< max possible ubyte value
+const ubyte MIN_UBYTE = 0x00;          //!< min possible ubyte value
+const sword MAX_SWORD = 0x7fff;        //!< max possible sword value
+const sword MIN_SWORD = 0x8000;        //!< min possible sword value
+const uword MAX_UWORD = 0xffff;        //!< max possible uword value
+const uword MIN_UWORD = 0x0000;        //!< min possible uword value
+const sdword MAX_SDWORD = 0x7fffffff;  //!< max possible sdword value
+const sdword MIN_SDWORD = 0x80000000;  //!< min possible sdword value
+const udword MAX_UDWORD = 0xffffffff;  //!< max possible udword value
+const udword MIN_UDWORD = 0x00000000;  //!< min possible udword value
+
+const float MAX_FLOAT = FLT_MAX;                         //!< max possible float value
+const float MIN_FLOAT = -FLT_MAX;                        //!< min possible loat value
+const float ONE_OVER_RAND_MAX = 1.0f / float(RAND_MAX);  //!< Inverse of the max possible value returned by rand()
+
+const udword IEEE_1_0 = 0x3f800000;        //!< integer representation of 1.0
+const udword IEEE_255_0 = 0x437f0000;      //!< integer representation of 255.0
+const udword IEEE_MAX_FLOAT = 0x7f7fffff;  //!< integer representation of MAX_FLOAT
+const udword IEEE_MIN_FLOAT = 0xff7fffff;  //!< integer representation of MIN_FLOAT
+const udword IEEE_UNDERFLOW_LIMIT = 0x1a000000;
+
+#undef MIN
+#undef MAX
+#define MIN(a, b) ((a) < (b) ? (a) : (b))                    //!< Returns the min value between a and b
+#define MAX(a, b) ((a) > (b) ? (a) : (b))                    //!< Returns the max value between a and b
+#define MAXMAX(a, b, c) ((a) > (b) ? MAX(a, c) : MAX(b, c))  //!< Returns the max value between a, b and c
+
+template <class T>
+inline_ const T& TMin(const T& a, const T& b) {
+  return b < a ? b : a;
+}
+template <class T>
+inline_ const T& TMax(const T& a, const T& b) {
+  return a < b ? b : a;
+}
+template <class T>
+inline_ void TSetMin(T& a, const T& b) {
+  if (a > b)
+    a = b;
+}
+template <class T>
+inline_ void TSetMax(T& a, const T& b) {
+  if (a < b)
+    a = b;
+}
+
+#ifdef _WIN32
+#define srand48(x) srand((unsigned int)(x))
+#define srandom(x) srand((unsigned int)(x))
+#define random() ((double)rand())
+#define drand48() ((double)(((double)rand()) / ((double)RAND_MAX)))
+#endif
+
+#endif  // __ICETYPES_H__
diff --git a/RecoTracker/MkFitCore/src/IterationConfig.cc b/RecoTracker/MkFitCore/src/IterationConfig.cc
new file mode 100644
index 0000000000000..f0ddbd06390ea
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/IterationConfig.cc
@@ -0,0 +1,686 @@
+#include "RecoTracker/MkFitCore/interface/IterationConfig.h"
+#include "RecoTracker/MkFitCore/interface/Config.h"
+#include "RecoTracker/MkFitCore/interface/Track.h"
+
+#include "nlohmann/json.hpp"
+
+#include <fstream>
+#include <regex>
+#include <iostream>
+#include <iomanip>
+
+// Redefine to also support ordered_json ... we want to keep variable order in JSON save files.
+#define ITCONF_DEFINE_TYPE_NON_INTRUSIVE(Type, ...)                                             \
+  inline void to_json(nlohmann::json &nlohmann_json_j, const Type &nlohmann_json_t) {           \
+    NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__))                    \
+  }                                                                                             \
+  inline void from_json(const nlohmann::json &nlohmann_json_j, Type &nlohmann_json_t) {         \
+    NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__))                  \
+  }                                                                                             \
+  inline void to_json(nlohmann::ordered_json &nlohmann_json_j, const Type &nlohmann_json_t) {   \
+    NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__))                    \
+  }                                                                                             \
+  inline void from_json(const nlohmann::ordered_json &nlohmann_json_j, Type &nlohmann_json_t) { \
+    NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__))                  \
+  }
+
+namespace mkfit {
+
+  // Begin AUTO code, some members commented out.
+
+  ITCONF_DEFINE_TYPE_NON_INTRUSIVE(mkfit::LayerControl,
+                                   /* int   */ m_layer)
+
+  ITCONF_DEFINE_TYPE_NON_INTRUSIVE(mkfit::SteeringParams,
+                                   /* std::vector<LayerControl> */ m_layer_plan,
+                                   /* int */ m_region,
+                                   /* int */ m_fwd_search_pickup,
+                                   /* int */ m_bkw_fit_last,
+                                   /* int */ m_bkw_search_pickup
+
+  )
+
+  ITCONF_DEFINE_TYPE_NON_INTRUSIVE(mkfit::IterationLayerConfig,
+                                   /* float */ m_select_min_dphi,
+                                   /* float */ m_select_max_dphi,
+                                   /* float */ m_select_min_dq,
+                                   /* float */ m_select_max_dq,
+                                   /* float */ c_dp_sf,
+                                   /* float */ c_dp_0,
+                                   /* float */ c_dp_1,
+                                   /* float */ c_dp_2,
+                                   /* float */ c_dq_sf,
+                                   /* float */ c_dq_0,
+                                   /* float */ c_dq_1,
+                                   /* float */ c_dq_2,
+                                   /* float */ c_c2_sf,
+                                   /* float */ c_c2_0,
+                                   /* float */ c_c2_1,
+                                   /* float */ c_c2_2)
+
+  ITCONF_DEFINE_TYPE_NON_INTRUSIVE(mkfit::IterationParams,
+                                   /* int */ nlayers_per_seed,
+                                   /* int */ maxCandsPerSeed,
+                                   /* int */ maxHolesPerCand,
+                                   /* int */ maxConsecHoles,
+                                   /* float */ chi2Cut_min,
+                                   /* float */ chi2CutOverlap,
+                                   /* float */ pTCutOverlap,
+                                   /* float */ c_ptthr_hpt,
+                                   /* float */ c_drmax_bh,
+                                   /* float */ c_dzmax_bh,
+                                   /* float */ c_drmax_eh,
+                                   /* float */ c_dzmax_eh,
+                                   /* float */ c_drmax_bl,
+                                   /* float */ c_dzmax_bl,
+                                   /* float */ c_drmax_el,
+                                   /* float */ c_dzmax_el,
+                                   /* int */ minHitsQF,
+                                   /* float */ fracSharedHits,
+                                   /* float */ drth_central,
+                                   /* float */ drth_obarrel,
+                                   /* float */ drth_forward
+
+  )
+
+  ITCONF_DEFINE_TYPE_NON_INTRUSIVE(
+      mkfit::IterationConfig,
+      /* int */ m_iteration_index,
+      /* int */ m_track_algorithm,
+      /* bool */ m_requires_seed_hit_sorting,
+      /* bool */ m_requires_quality_filter,
+      /* bool */ m_requires_dupclean_tight,
+      /* bool */ m_backward_search,
+      /* bool */ m_backward_drop_seed_hits,
+      /* int */ m_backward_fit_min_hits,
+      /* mkfit::IterationParams */ m_params,
+      /* mkfit::IterationParams */ m_backward_params,
+      /* int */ m_n_regions,
+      /* vector<int> */ m_region_order,
+      /* vector<mkfit::SteeringParams> */ m_steering_params,
+      /* vector<mkfit::IterationLayerConfig> */ m_layer_configs
+      // /* function<void(const TrackerInfo&,const TrackVec&,const EventOfHits&,IterationSeedPartition&)> */   m_partition_seeds
+  )
+
+  ITCONF_DEFINE_TYPE_NON_INTRUSIVE(mkfit::IterationsInfo,
+                                   /* vector<mkfit::IterationConfig> */ m_iterations)
+
+  // End AUTO code.
+
+  // ============================================================================
+  // ConfigJsonPatcher
+  // ============================================================================
+
+  ConfigJsonPatcher::ConfigJsonPatcher(bool verbose) : m_verbose(verbose) {}
+
+  ConfigJsonPatcher::~ConfigJsonPatcher() = default;
+
+  std::string ConfigJsonPatcher::get_abs_path() const {
+    std::string s;
+    s.reserve(64);
+    for (auto &p : m_path_stack)
+      s += p;
+    return s;
+  }
+
+  std::string ConfigJsonPatcher::exc_hdr(const char *func) const {
+    std::string s;
+    s.reserve(128);
+    s = "ConfigJsonPatcher";
+    if (func) {
+      s += "::";
+      s += func;
+    }
+    s += " '";
+    s += get_abs_path();
+    s += "' ";
+    return s;
+  }
+
+  template <class T>
+  void ConfigJsonPatcher::load(const T &o) {
+    m_json = std::make_unique<nlohmann::json>();
+    *m_json = o;
+    cd_top();
+  }
+  template void ConfigJsonPatcher::load<IterationsInfo>(const IterationsInfo &o);
+  template void ConfigJsonPatcher::load<IterationConfig>(const IterationConfig &o);
+
+  template <class T>
+  void ConfigJsonPatcher::save(T &o) {
+    from_json(*m_json, o);
+  }
+  template void ConfigJsonPatcher::save<IterationConfig>(IterationConfig &o);
+
+  // Must not bork the IterationConfig elements of IterationsInfo ... default
+  // deserializator apparently reinitializes the vectors with defaults c-tors.
+  template <>
+  void ConfigJsonPatcher::save<IterationsInfo>(IterationsInfo &o) {
+    auto &itc_arr = m_json->at("m_iterations");
+    for (int i = 0; i < o.size(); ++i) {
+      from_json(itc_arr[i], o[i]);
+    }
+  }
+
+  void ConfigJsonPatcher::cd(const std::string &path) {
+    nlohmann::json::json_pointer jp(path);
+    m_json_stack.push_back(m_current);
+    m_path_stack.push_back(path);
+    m_current = &m_current->at(jp);
+  }
+
+  void ConfigJsonPatcher::cd_up(const std::string &path) {
+    if (m_json_stack.empty())
+      throw std::runtime_error("JSON stack empty on cd_up");
+
+    m_current = m_json_stack.back();
+    m_json_stack.pop_back();
+    m_path_stack.pop_back();
+    if (!path.empty())
+      cd(path);
+  }
+
+  void ConfigJsonPatcher::cd_top(const std::string &path) {
+    m_current = m_json.get();
+    m_json_stack.clear();
+    m_path_stack.clear();
+    if (!path.empty())
+      cd(path);
+  }
+
+  template <typename T>
+  void ConfigJsonPatcher::replace(const std::string &path, T val) {
+    nlohmann::json::json_pointer jp(path);
+    m_current->at(jp) = val;
+  }
+  template void ConfigJsonPatcher::replace<int>(const std::string &path, int val);
+  template void ConfigJsonPatcher::replace<float>(const std::string &path, float val);
+  template void ConfigJsonPatcher::replace<double>(const std::string &path, double val);
+
+  template <typename T>
+  void ConfigJsonPatcher::replace(int first, int last, const std::string &path, T val) {
+    nlohmann::json::json_pointer jp(path);
+    for (int i = first; i <= last; ++i) {
+      m_current->at(i).at(jp) = val;
+    }
+  }
+  template void ConfigJsonPatcher::replace<int>(int first, int last, const std::string &path, int val);
+  template void ConfigJsonPatcher::replace<float>(int first, int last, const std::string &path, float val);
+  template void ConfigJsonPatcher::replace<double>(int first, int last, const std::string &path, double val);
+
+  nlohmann::json &ConfigJsonPatcher::get(const std::string &path) {
+    nlohmann::json::json_pointer jp(path);
+    return m_current->at(jp);
+  }
+
+  int ConfigJsonPatcher::replace(const nlohmann::json &j) {
+    if (j.is_null())
+      throw std::runtime_error(exc_hdr(__func__) + "null not expected");
+
+    if (j.is_boolean() || j.is_number() || j.is_string()) {
+      throw std::runtime_error(exc_hdr(__func__) + "value not expected on this parsing level");
+    }
+
+    int n_replaced = 0;
+
+    if (j.is_object()) {
+      static const std::regex index_range_re("^\\[(\\d+)..(\\d+)\\]$", std::regex::optimize);
+
+      for (auto &[key, value] : j.items()) {
+        std::smatch m;
+        std::regex_search(key, m, index_range_re);
+
+        if (m.size() == 3) {
+          if (!m_current->is_array())
+            throw std::runtime_error(exc_hdr(__func__) + "array range encountered when current json is not an array");
+          int first = std::stoi(m.str(1));
+          int last = std::stoi(m.str(2));
+          for (int i = first; i <= last; ++i) {
+            std::string s("/");
+            s += std::to_string(i);
+            cd(s);
+            if (value.is_array()) {
+              for (auto &el : value)
+                n_replaced += replace(el);
+            } else {
+              n_replaced += replace(value);
+            }
+            cd_up();
+          }
+        } else if (value.is_array() || value.is_object()) {
+          std::string s("/");
+          s += key;
+          cd(s);
+          n_replaced += replace(value);
+          cd_up();
+        } else if (value.is_number() || value.is_boolean() || value.is_string()) {
+          std::string s("/");
+          s += key;
+          nlohmann::json::json_pointer jp(s);
+          if (m_current->at(jp) != value) {
+            if (m_verbose)
+              std::cout << "  " << get_abs_path() << s << ": " << m_current->at(jp) << " -> " << value << "\n";
+
+            m_current->at(jp) = value;
+            ++n_replaced;
+          }
+        } else {
+          throw std::runtime_error(exc_hdr(__func__) + "unexpected value type");
+        }
+      }
+    } else if (j.is_array() && j.empty()) {
+    } else if (j.is_array()) {
+      // Arrays are somewhat tricky.
+      // At the moment all elements are expected to be objects.
+      //    This means arrays of basic types are not supported (like layer index arrays).
+      //    Should not be too hard to add support for this.
+      // Now, the objects in the array can be of two kinds:
+      // a) Their keys can be json_pointer strings starting with numbers or ranges [i_low..i_high].
+      // b) They can be actual elements of the array. In this case we require the length of
+      //    the array to be equal to existing length in the configuration.
+      // It is not allowed for these two kinds to mix.
+
+      // Determine the kind of array: json_ptr or object
+
+      static const std::regex index_re("^(?:\\[\\d+..\\d+\\]|\\d+(?:/.*)?)$", std::regex::optimize);
+
+      bool has_index = false, has_plain = false;
+      for (int i = 0; i < (int)j.size(); ++i) {
+        const nlohmann::json &el = j[i];
+
+        if (!el.is_object())
+          throw std::runtime_error(exc_hdr(__func__) + "array elements expected to be objects");
+
+        for (nlohmann::json::const_iterator it = el.begin(); it != el.end(); ++it) {
+          if (std::regex_search(it.key(), index_re)) {
+            has_index = true;
+            if (has_plain)
+              throw std::runtime_error(exc_hdr(__func__) + "indexed array entry following plain one");
+          } else {
+            has_plain = true;
+            if (has_index)
+              throw std::runtime_error(exc_hdr(__func__) + "plain array entry following indexed one");
+          }
+        }
+      }
+      if (has_index) {
+        for (auto &element : j) {
+          n_replaced += replace(element);
+        }
+      } else {
+        if (m_current && !m_current->is_array())
+          throw std::runtime_error(exc_hdr(__func__) + "plain array detected when current is not an array");
+        if (m_current->size() != j.size())
+          throw std::runtime_error(exc_hdr(__func__) + "plain array of different size than at current pos");
+
+        std::string s;
+        for (int i = 0; i < (int)j.size(); ++i) {
+          s = "/";
+          s += std::to_string(i);
+          cd(s);
+          n_replaced += replace(j[i]);
+          cd_up();
+        }
+      }
+    } else {
+      throw std::runtime_error(exc_hdr(__func__) + "unexpected json type");
+    }
+
+    return n_replaced;
+  }
+
+  std::string ConfigJsonPatcher::dump(int indent) { return m_json->dump(indent); }
+
+  // ============================================================================
+  // patch_File steering function
+  // ============================================================================
+  /*
+    See example JSON patcher input: "mkFit/config-parse/test.json"
+
+    The file can contain several valid JSON dumps in sequence.
+
+    '/' character can be used to descend more than one level at a time.
+
+    A number can be used to specify an array index. This can be combined with
+    the '/' syntax.
+
+    "[first,last]" key (as string) can be used to denote a range of array
+    elements. Such a key must not be combined with a '/' syntax.
+*/
+
+  namespace {
+    // Open file for writing, throw exception on failure.
+    void open_ofstream(std::ofstream &ofs, const std::string &fname, const char *pfx = nullptr) {
+      ofs.open(fname, std::ofstream::trunc);
+      if (!ofs) {
+        char m[2048];
+        snprintf(m, 2048, "%s%sError opening %s for write: %m", pfx ? pfx : "", pfx ? " " : "", fname.c_str());
+        throw std::runtime_error(m);
+      }
+    }
+
+    // Open file for reading, throw exception on failure.
+    void open_ifstream(std::ifstream &ifs, const std::string &fname, const char *pfx = nullptr) {
+      ifs.open(fname);
+      if (!ifs) {
+        char m[2048];
+        snprintf(m, 2048, "%s%sError opening %s for read: %m", pfx ? pfx : "", pfx ? " " : "", fname.c_str());
+        throw std::runtime_error(m);
+      }
+    }
+
+    // Skip white-space, return true if more characters are available, false if eof.
+    bool skipws_ifstream(std::ifstream &ifs) {
+      while (std::isspace(ifs.peek()))
+        ifs.get();
+      return !ifs.eof();
+    }
+  }  // namespace
+
+  void ConfigJson::patch_Files(IterationsInfo &its_info,
+                               const std::vector<std::string> &fnames,
+                               ConfigJsonPatcher::PatchReport *report) {
+    ConfigJsonPatcher cjp(m_verbose);
+    cjp.load(its_info);
+
+    ConfigJsonPatcher::PatchReport rep;
+
+    for (auto &fname : fnames) {
+      std::ifstream ifs;
+      open_ifstream(ifs, fname, __func__);
+
+      if (m_verbose) {
+        printf("%s begin reading from file %s.\n", __func__, fname.c_str());
+      }
+
+      int n_read = 0, n_tot_replaced = 0;
+      while (skipws_ifstream(ifs)) {
+        nlohmann::json j;
+        ifs >> j;
+        ++n_read;
+
+        if (m_verbose) {
+          std::cout << " Read JSON entity " << n_read << " -- applying patch:\n";
+          // std::cout << j.dump(3) << "\n";
+        }
+
+        int n_replaced = cjp.replace(j);
+
+        if (m_verbose) {
+          std::cout << " Replaced " << n_replaced << " entries.\n";
+        }
+        cjp.cd_top();
+        n_tot_replaced += n_replaced;
+      }
+
+      if (m_verbose) {
+        printf("%s read %d JSON entities from file %s, replaced %d parameters.\n",
+               __func__,
+               n_read,
+               fname.c_str(),
+               n_tot_replaced);
+      }
+
+      ifs.close();
+
+      rep.inc_counts(1, n_read, n_tot_replaced);
+    }
+
+    if (rep.n_replacements > 0) {
+      cjp.save(its_info);
+    }
+
+    if (report)
+      report->inc_counts(rep);
+  }
+
+  std::unique_ptr<IterationConfig> ConfigJson::patchLoad_File(const IterationsInfo &its_info,
+                                                              const std::string &fname,
+                                                              ConfigJsonPatcher::PatchReport *report) {
+    ConfigJsonPatcher::PatchReport rep;
+
+    std::ifstream ifs;
+    open_ifstream(ifs, fname, __func__);
+
+    if (m_verbose) {
+      printf("%s begin reading from file %s.\n", __func__, fname.c_str());
+    }
+
+    if (!skipws_ifstream(ifs))
+      throw std::runtime_error("empty file");
+
+    nlohmann::json j;
+    ifs >> j;
+    int track_algo = j["m_track_algorithm"];
+
+    int iii = -1;
+    for (int i = 0; i < its_info.size(); ++i) {
+      if (its_info[i].m_track_algorithm == track_algo) {
+        iii = i;
+        break;
+      }
+    }
+    if (iii == -1)
+      throw std::runtime_error("matching IterationConfig not found");
+
+    if (m_verbose) {
+      std::cout << " Read JSON entity, Iteration index is " << iii << " -- cloning and applying JSON patch:\n";
+    }
+
+    IterationConfig *icp = new IterationConfig(its_info[iii]);
+    IterationConfig &ic = *icp;
+
+    ConfigJsonPatcher cjp(m_verbose);
+    cjp.load(ic);
+
+    int n_replaced = cjp.replace(j);
+
+    cjp.cd_top();
+
+    if (m_verbose) {
+      printf("%s read 1 JSON entity from file %s, replaced %d parameters.\n", __func__, fname.c_str(), n_replaced);
+    }
+
+    ifs.close();
+
+    rep.inc_counts(1, 1, n_replaced);
+
+    if (rep.n_replacements > 0) {
+      cjp.save(ic);
+    }
+
+    if (report)
+      report->inc_counts(rep);
+
+    return std::unique_ptr<IterationConfig>(icp);
+  }
+
+  std::unique_ptr<IterationConfig> ConfigJson::load_File(const std::string &fname) {
+    std::ifstream ifs;
+    open_ifstream(ifs, fname, __func__);
+
+    if (m_verbose) {
+      printf("%s begin reading from file %s.\n", __func__, fname.c_str());
+    }
+
+    if (!skipws_ifstream(ifs))
+      throw std::runtime_error("empty file");
+
+    nlohmann::json j;
+    ifs >> j;
+
+    if (m_verbose) {
+      std::cout << " Read JSON entity, iteration index is " << j["m_iteration_index"] << ", track algorithm is "
+                << j["m_track_algorithm"] << ". Instantiating IterationConfig object and over-laying it with JSON.\n";
+    }
+
+    IterationConfig *icp = new IterationConfig();
+
+    from_json(j, *icp);
+
+    return std::unique_ptr<IterationConfig>(icp);
+  }
+
+  // ============================================================================
+  // Save each IterationConfig into a separate json file
+  // ============================================================================
+
+  void ConfigJson::save_Iterations(IterationsInfo &its_info,
+                                   const std::string &fname_fmt,
+                                   bool include_iter_info_preamble) {
+    bool has_pct_d = fname_fmt.find("%d") != std::string::npos;
+    bool has_pct_s = fname_fmt.find("%s") != std::string::npos;
+
+    assert((has_pct_d || has_pct_s) && "JSON save filename-format must include a %d or %s substring");
+    assert(!(has_pct_d && has_pct_s) && "JSON save filename-format must include only one of %d or %s substrings");
+
+    for (int ii = 0; ii < its_info.size(); ++ii) {
+      const IterationConfig &itconf = its_info[ii];
+
+      char fname[1024];
+      if (has_pct_d)
+        snprintf(fname, 1024, fname_fmt.c_str(), ii);
+      else
+        snprintf(fname, 1024, fname_fmt.c_str(), TrackBase::algoint_to_cstr(itconf.m_track_algorithm));
+
+      std::ofstream ofs;
+      open_ofstream(ofs, fname, __func__);
+
+      if (include_iter_info_preamble) {
+        ofs << "{ \"m_iterations/" << ii << "\": ";
+      }
+
+      nlohmann::ordered_json j;
+      to_json(j, itconf);
+
+      ofs << std::setw(1);
+      ofs << j;
+
+      if (include_iter_info_preamble) {
+        ofs << " }";
+      }
+
+      ofs << "\n";
+      ofs.close();
+    }
+  }
+
+  void ConfigJson::dump(IterationsInfo &its_info) {
+    nlohmann::ordered_json j = its_info;
+    std::cout << j.dump(3) << "\n";
+  }
+
+  // ============================================================================
+  // Tests for ConfigJson stuff
+  // ============================================================================
+
+  void ConfigJson::test_Direct(IterationConfig &it_cfg) {
+    using nlohmann::json;
+
+    std::string lojz("/m_select_max_dphi");
+
+    json j = it_cfg;
+    std::cout << j.dump(1) << "\n";
+
+    std::cout << "Layer 43, m_select_max_dphi = " << j["/m_layer_configs/43/m_select_max_dphi"_json_pointer] << "\n";
+    std::cout << "Patching it to pi ...\n";
+    json p = R"([
+        { "op": "replace", "path": "/m_layer_configs/43/m_select_max_dphi", "value": 3.141 }
+    ])"_json;
+    j = j.patch(p);
+    std::cout << "Layer 43, m_select_max_dphi = " << j["/m_layer_configs/43/m_select_max_dphi"_json_pointer] << "\n";
+
+    auto &jx = j["/m_layer_configs/60"_json_pointer];
+    // jx["m_select_max_dphi"] = 99.876;
+    json::json_pointer jp(lojz);
+    jx[jp] = 99.876;
+
+    // try loading it back, see what happens to vector m_layer_configs.
+
+    from_json(j, it_cfg);
+    printf("Layer 43 : m_select_max_dphi = %f, size_of_layer_vec=%d, m_n_regions=%d, size_of_steering_params=%d\n",
+           it_cfg.m_layer_configs[43].m_select_max_dphi,
+           (int)it_cfg.m_layer_configs.size(),
+           it_cfg.m_n_regions,
+           (int)it_cfg.m_steering_params.size());
+
+    printf("Layer 60 : m_select_max_dphi = %f, size_of_layer_vec=%d, m_n_regions=%d, size_of_steering_params=%d\n",
+           it_cfg.m_layer_configs[60].m_select_max_dphi,
+           (int)it_cfg.m_layer_configs.size(),
+           it_cfg.m_n_regions,
+           (int)it_cfg.m_steering_params.size());
+
+    // try accessing something that does not exist
+
+    // std::cout << "Non-existent path " << j["/m_layer_configs/143/m_select_max_dphi"_json_pointer] << "\n";
+
+    auto &x = j["/m_layer_configs"_json_pointer];
+    std::cout << "Typename /m_layer_configs " << x.type_name() << "\n";
+    auto &y = j["/m_layer_configs/143"_json_pointer];
+    std::cout << "Typename /m_layer_configs/143 " << y.type_name() << ", is_null=" << y.is_null() << "\n";
+  }
+
+  void ConfigJson::test_Patcher(IterationConfig &it_cfg) {
+    ConfigJsonPatcher cjp;
+    cjp.load(it_cfg);
+
+    std::cout << cjp.dump(1) << "\n";
+
+    {
+      cjp.cd("/m_layer_configs/43/m_select_max_dphi");
+      std::cout << "Layer 43, m_select_max_dphi = " << cjp.get("") << "\n";
+      std::cout << "Setting it to pi ...\n";
+      cjp.replace("", 3.141);
+      cjp.cd_top();
+      std::cout << "Layer 43, m_select_max_dphi = " << cjp.get("/m_layer_configs/43/m_select_max_dphi") << "\n";
+    }
+    {
+      std::cout << "Replacing layer 60 m_select_max_dphi with full path\n";
+      cjp.replace("/m_layer_configs/60/m_select_max_dphi", 99.876);
+    }
+    try {
+      std::cout << "Trying to replace an non-existent array entry\n";
+      cjp.replace("/m_layer_configs/1460/m_select_max_dphi", 666.666);
+    } catch (std::exception &exc) {
+      std::cout << "Caugth exception: " << exc.what() << "\n";
+    }
+    try {
+      std::cout << "Trying to replace an non-existent object entry\n";
+      cjp.replace("/m_layer_configs/1/moo_select_max_dphi", 666.666);
+    } catch (std::exception &exc) {
+      std::cout << "Caugth exception: " << exc.what() << "\n";
+    }
+    {
+      std::cout << "Replacing m_select_max_dphi on layers 1 to 3 to 7.7\n";
+      cjp.cd("/m_layer_configs");
+      cjp.replace(1, 3, "/m_select_max_dphi", 7.7);
+      cjp.cd_top();
+    }
+
+    // try getting it back into c++, see what happens to vector m_layer_configs.
+
+    cjp.save(it_cfg);
+
+    printf("Layer 43: m_select_max_dphi = %f, size_of_layer_vec=%d, m_n_regions=%d, size_of_steering_params=%d\n",
+           it_cfg.m_layer_configs[43].m_select_max_dphi,
+           (int)it_cfg.m_layer_configs.size(),
+           it_cfg.m_n_regions,
+           (int)it_cfg.m_steering_params.size());
+
+    printf("Layer 60: m_select_max_dphi = %f\n", it_cfg.m_layer_configs[60].m_select_max_dphi);
+    for (int i = 0; i < 5; ++i)
+      printf("Layer %2d: m_select_max_dphi = %f\n", i, it_cfg.m_layer_configs[i].m_select_max_dphi);
+
+    // try accessing something that does not exist
+
+    // std::cout << "Non-existent path " << j["/m_layer_configs/143/m_select_max_dphi"_json_pointer] << "\n";
+
+    auto &j = cjp.get("");
+
+    auto &x = j["/m_layer_configs"_json_pointer];
+    std::cout << "Typename /m_layer_configs " << x.type_name() << "\n";
+    auto &y = j["/m_layer_configs/143"_json_pointer];
+    std::cout << "Typename /m_layer_configs/143 " << y.type_name() << ", is_null=" << y.is_null() << "\n";
+  }
+
+}  // namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/K62HC.ah b/RecoTracker/MkFitCore/src/K62HC.ah
new file mode 100644
index 0000000000000..bb10bba9f722f
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/K62HC.ah
@@ -0,0 +1,131 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+
+      IntrVec_t a_1 = LD(a, 1);
+      IntrVec_t b_1 = LD(b, 1);
+      c_0 = FMA(a_1, b_1, c_0);
+
+      IntrVec_t a_2 = LD(a, 2);
+      IntrVec_t c_1 = MUL(a_2, b_0);
+      IntrVec_t c_2 = MUL(a_2, b_1);
+
+      IntrVec_t a_3 = LD(a, 3);
+      c_1 = FMA(a_3, b_1, c_1);
+      ST(c, 0, c_0);
+      IntrVec_t b_2 = LD(b, 2);
+      c_2 = FMA(a_3, b_2, c_2);
+
+      IntrVec_t a_4 = LD(a, 4);
+      IntrVec_t c_3 = MUL(a_4, b_0);
+      ST(c, 1, c_1);
+      IntrVec_t c_4 = MUL(a_4, b_1);
+      ST(c, 2, c_2);
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t c_5 = MUL(a_4, b_3);
+
+      IntrVec_t a_5 = LD(a, 5);
+      c_3 = FMA(a_5, b_1, c_3);
+      c_4 = FMA(a_5, b_2, c_4);
+      IntrVec_t b_4 = LD(b, 4);
+      c_5 = FMA(a_5, b_4, c_5);
+
+      IntrVec_t a_6 = LD(a, 6);
+      IntrVec_t c_6 = MUL(a_6, b_0);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+      IntrVec_t c_7 = MUL(a_6, b_1);
+      IntrVec_t c_8 = MUL(a_6, b_3);
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_9 = MUL(a_6, b_6);
+
+      IntrVec_t a_7 = LD(a, 7);
+      c_6 = FMA(a_7, b_1, c_6);
+      c_7 = FMA(a_7, b_2, c_7);
+      c_8 = FMA(a_7, b_4, c_8);
+      IntrVec_t b_7 = LD(b, 7);
+      c_9 = FMA(a_7, b_7, c_9);
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+
+      IntrVec_t a_8 = LD(a, 8);
+      IntrVec_t c_10 = MUL(a_8, b_0);
+      ST(c, 9, c_9);
+      IntrVec_t c_11 = MUL(a_8, b_1);
+      IntrVec_t c_12 = MUL(a_8, b_3);
+      IntrVec_t c_13 = MUL(a_8, b_6);
+      IntrVec_t b_10 = LD(b, 10);
+      IntrVec_t c_14 = MUL(a_8, b_10);
+
+      IntrVec_t a_9 = LD(a, 9);
+      c_10 = FMA(a_9, b_1, c_10);
+      c_11 = FMA(a_9, b_2, c_11);
+      c_12 = FMA(a_9, b_4, c_12);
+      c_13 = FMA(a_9, b_7, c_13);
+      IntrVec_t b_11 = LD(b, 11);
+      c_14 = FMA(a_9, b_11, c_14);
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+
+      IntrVec_t a_10 = LD(a, 10);
+      IntrVec_t c_15 = MUL(a_10, b_0);
+      IntrVec_t c_16 = MUL(a_10, b_1);
+      IntrVec_t c_17 = MUL(a_10, b_3);
+      IntrVec_t c_18 = MUL(a_10, b_6);
+      IntrVec_t c_19 = MUL(a_10, b_10);
+      IntrVec_t b_15 = LD(b, 15);
+      IntrVec_t c_20 = MUL(a_10, b_15);
+
+      IntrVec_t a_11 = LD(a, 11);
+      c_15 = FMA(a_11, b_1, c_15);
+      c_16 = FMA(a_11, b_2, c_16);
+      c_17 = FMA(a_11, b_4, c_17);
+      c_18 = FMA(a_11, b_7, c_18);
+      c_19 = FMA(a_11, b_11, c_19);
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      ST(c, 17, c_17);
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      IntrVec_t b_16 = LD(b, 16);
+      c_20 = FMA(a_11, b_16, c_20);
+      ST(c, 20, c_20);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n];
+      c[ 1*N+n] = a[ 2*N+n]*b[ 0*N+n] + a[ 3*N+n]*b[ 1*N+n];
+      c[ 2*N+n] = a[ 2*N+n]*b[ 1*N+n] + a[ 3*N+n]*b[ 2*N+n];
+      c[ 3*N+n] = a[ 4*N+n]*b[ 0*N+n] + a[ 5*N+n]*b[ 1*N+n];
+      c[ 4*N+n] = a[ 4*N+n]*b[ 1*N+n] + a[ 5*N+n]*b[ 2*N+n];
+      c[ 5*N+n] = a[ 4*N+n]*b[ 3*N+n] + a[ 5*N+n]*b[ 4*N+n];
+      c[ 6*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 1*N+n];
+      c[ 7*N+n] = a[ 6*N+n]*b[ 1*N+n] + a[ 7*N+n]*b[ 2*N+n];
+      c[ 8*N+n] = a[ 6*N+n]*b[ 3*N+n] + a[ 7*N+n]*b[ 4*N+n];
+      c[ 9*N+n] = a[ 6*N+n]*b[ 6*N+n] + a[ 7*N+n]*b[ 7*N+n];
+      c[10*N+n] = a[ 8*N+n]*b[ 0*N+n] + a[ 9*N+n]*b[ 1*N+n];
+      c[11*N+n] = a[ 8*N+n]*b[ 1*N+n] + a[ 9*N+n]*b[ 2*N+n];
+      c[12*N+n] = a[ 8*N+n]*b[ 3*N+n] + a[ 9*N+n]*b[ 4*N+n];
+      c[13*N+n] = a[ 8*N+n]*b[ 6*N+n] + a[ 9*N+n]*b[ 7*N+n];
+      c[14*N+n] = a[ 8*N+n]*b[10*N+n] + a[ 9*N+n]*b[11*N+n];
+      c[15*N+n] = a[10*N+n]*b[ 0*N+n] + a[11*N+n]*b[ 1*N+n];
+      c[16*N+n] = a[10*N+n]*b[ 1*N+n] + a[11*N+n]*b[ 2*N+n];
+      c[17*N+n] = a[10*N+n]*b[ 3*N+n] + a[11*N+n]*b[ 4*N+n];
+      c[18*N+n] = a[10*N+n]*b[ 6*N+n] + a[11*N+n]*b[ 7*N+n];
+      c[19*N+n] = a[10*N+n]*b[10*N+n] + a[11*N+n]*b[11*N+n];
+      c[20*N+n] = a[10*N+n]*b[15*N+n] + a[11*N+n]*b[16*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/KH.ah b/RecoTracker/MkFitCore/src/KH.ah
new file mode 100644
index 0000000000000..eef6d9a9efa24
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/KH.ah
@@ -0,0 +1,140 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      #ifdef AVX512_INTRINSICS
+      IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      #else
+      IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0 };
+      #endif
+
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_1 = MUL(a_0, b_1);
+
+      IntrVec_t a_1 = LD(a, 1);
+      IntrVec_t c_2 = a_1;
+
+      ST(c, 3, all_zeros);
+      ST(c, 4, all_zeros);
+      ST(c, 5, all_zeros);
+
+      IntrVec_t a_3 = LD(a, 3);
+      IntrVec_t c_6 = MUL(a_3, b_0);
+      IntrVec_t c_7 = MUL(a_3, b_1);
+
+      IntrVec_t a_4 = LD(a, 4);
+      IntrVec_t c_8 = a_4;
+      ST(c, 0, c_0);
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+
+      ST(c, 9, all_zeros);
+      ST(c, 10, all_zeros);
+      ST(c, 11, all_zeros);
+
+      IntrVec_t a_6 = LD(a, 6);
+      IntrVec_t c_12 = MUL(a_6, b_0);
+      IntrVec_t c_13 = MUL(a_6, b_1);
+
+      IntrVec_t a_7 = LD(a, 7);
+      IntrVec_t c_14 = a_7;
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+
+      ST(c, 15, all_zeros);
+      ST(c, 16, all_zeros);
+      ST(c, 17, all_zeros);
+
+      IntrVec_t a_9 = LD(a, 9);
+      IntrVec_t c_18 = MUL(a_9, b_0);
+      IntrVec_t c_19 = MUL(a_9, b_1);
+
+      IntrVec_t a_10 = LD(a, 10);
+      IntrVec_t c_20 = a_10;
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+
+      ST(c, 21, all_zeros);
+      ST(c, 22, all_zeros);
+      ST(c, 23, all_zeros);
+
+      IntrVec_t a_12 = LD(a, 12);
+      IntrVec_t c_24 = MUL(a_12, b_0);
+      IntrVec_t c_25 = MUL(a_12, b_1);
+
+      IntrVec_t a_13 = LD(a, 13);
+      IntrVec_t c_26 = a_13;
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      ST(c, 20, c_20);
+
+      ST(c, 27, all_zeros);
+      ST(c, 28, all_zeros);
+      ST(c, 29, all_zeros);
+
+      IntrVec_t a_15 = LD(a, 15);
+      IntrVec_t c_30 = MUL(a_15, b_0);
+      IntrVec_t c_31 = MUL(a_15, b_1);
+
+      IntrVec_t a_16 = LD(a, 16);
+      IntrVec_t c_32 = a_16;
+      ST(c, 24, c_24);
+      ST(c, 25, c_25);
+      ST(c, 26, c_26);
+
+      ST(c, 33, all_zeros);
+      ST(c, 34, all_zeros);
+      ST(c, 35, all_zeros);
+      ST(c, 30, c_30);
+      ST(c, 31, c_31);
+      ST(c, 32, c_32);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n];
+      c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n];
+      c[ 2*N+n] = a[ 1*N+n];
+      c[ 3*N+n] = 0;
+      c[ 4*N+n] = 0;
+      c[ 5*N+n] = 0;
+      c[ 6*N+n] = a[ 3*N+n]*b[ 0*N+n];
+      c[ 7*N+n] = a[ 3*N+n]*b[ 1*N+n];
+      c[ 8*N+n] = a[ 4*N+n];
+      c[ 9*N+n] = 0;
+      c[10*N+n] = 0;
+      c[11*N+n] = 0;
+      c[12*N+n] = a[ 6*N+n]*b[ 0*N+n];
+      c[13*N+n] = a[ 6*N+n]*b[ 1*N+n];
+      c[14*N+n] = a[ 7*N+n];
+      c[15*N+n] = 0;
+      c[16*N+n] = 0;
+      c[17*N+n] = 0;
+      c[18*N+n] = a[ 9*N+n]*b[ 0*N+n];
+      c[19*N+n] = a[ 9*N+n]*b[ 1*N+n];
+      c[20*N+n] = a[10*N+n];
+      c[21*N+n] = 0;
+      c[22*N+n] = 0;
+      c[23*N+n] = 0;
+      c[24*N+n] = a[12*N+n]*b[ 0*N+n];
+      c[25*N+n] = a[12*N+n]*b[ 1*N+n];
+      c[26*N+n] = a[13*N+n];
+      c[27*N+n] = 0;
+      c[28*N+n] = 0;
+      c[29*N+n] = 0;
+      c[30*N+n] = a[15*N+n]*b[ 0*N+n];
+      c[31*N+n] = a[15*N+n]*b[ 1*N+n];
+      c[32*N+n] = a[16*N+n];
+      c[33*N+n] = 0;
+      c[34*N+n] = 0;
+      c[35*N+n] = 0;
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/KHC.ah b/RecoTracker/MkFitCore/src/KHC.ah
new file mode 100644
index 0000000000000..3cb56108acd62
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/KHC.ah
@@ -0,0 +1,186 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+
+      IntrVec_t a_1 = LD(a, 1);
+      IntrVec_t b_1 = LD(b, 1);
+      c_0 = FMA(a_1, b_1, c_0);
+
+      IntrVec_t a_2 = LD(a, 2);
+      IntrVec_t b_3 = LD(b, 3);
+      c_0 = FMA(a_2, b_3, c_0);
+
+
+
+
+      IntrVec_t a_6 = LD(a, 6);
+      IntrVec_t c_1 = MUL(a_6, b_0);
+      IntrVec_t c_2 = MUL(a_6, b_1);
+
+      IntrVec_t a_7 = LD(a, 7);
+      c_1 = FMA(a_7, b_1, c_1);
+      ST(c, 0, c_0);
+      IntrVec_t b_2 = LD(b, 2);
+      c_2 = FMA(a_7, b_2, c_2);
+
+      IntrVec_t a_8 = LD(a, 8);
+      c_1 = FMA(a_8, b_3, c_1);
+      IntrVec_t b_4 = LD(b, 4);
+      c_2 = FMA(a_8, b_4, c_2);
+
+
+
+
+      IntrVec_t a_12 = LD(a, 12);
+      IntrVec_t c_3 = MUL(a_12, b_0);
+      IntrVec_t c_4 = MUL(a_12, b_1);
+      IntrVec_t c_5 = MUL(a_12, b_3);
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+
+      IntrVec_t a_13 = LD(a, 13);
+      c_3 = FMA(a_13, b_1, c_3);
+      c_4 = FMA(a_13, b_2, c_4);
+      c_5 = FMA(a_13, b_4, c_5);
+
+      IntrVec_t a_14 = LD(a, 14);
+      c_3 = FMA(a_14, b_3, c_3);
+      c_4 = FMA(a_14, b_4, c_4);
+      IntrVec_t b_5 = LD(b, 5);
+      c_5 = FMA(a_14, b_5, c_5);
+
+
+
+
+      IntrVec_t a_18 = LD(a, 18);
+      IntrVec_t c_6 = MUL(a_18, b_0);
+      IntrVec_t c_7 = MUL(a_18, b_1);
+      IntrVec_t c_8 = MUL(a_18, b_3);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_9 = MUL(a_18, b_6);
+
+      IntrVec_t a_19 = LD(a, 19);
+      c_6 = FMA(a_19, b_1, c_6);
+      c_7 = FMA(a_19, b_2, c_7);
+      c_8 = FMA(a_19, b_4, c_8);
+      IntrVec_t b_7 = LD(b, 7);
+      c_9 = FMA(a_19, b_7, c_9);
+
+      IntrVec_t a_20 = LD(a, 20);
+      c_6 = FMA(a_20, b_3, c_6);
+      c_7 = FMA(a_20, b_4, c_7);
+      c_8 = FMA(a_20, b_5, c_8);
+      IntrVec_t b_8 = LD(b, 8);
+      c_9 = FMA(a_20, b_8, c_9);
+
+
+
+
+      IntrVec_t a_24 = LD(a, 24);
+      IntrVec_t c_10 = MUL(a_24, b_0);
+      IntrVec_t c_11 = MUL(a_24, b_1);
+      IntrVec_t c_12 = MUL(a_24, b_3);
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+      ST(c, 9, c_9);
+      IntrVec_t c_13 = MUL(a_24, b_6);
+      IntrVec_t b_10 = LD(b, 10);
+      IntrVec_t c_14 = MUL(a_24, b_10);
+
+      IntrVec_t a_25 = LD(a, 25);
+      c_10 = FMA(a_25, b_1, c_10);
+      c_11 = FMA(a_25, b_2, c_11);
+      c_12 = FMA(a_25, b_4, c_12);
+      c_13 = FMA(a_25, b_7, c_13);
+      IntrVec_t b_11 = LD(b, 11);
+      c_14 = FMA(a_25, b_11, c_14);
+
+      IntrVec_t a_26 = LD(a, 26);
+      c_10 = FMA(a_26, b_3, c_10);
+      c_11 = FMA(a_26, b_4, c_11);
+      c_12 = FMA(a_26, b_5, c_12);
+      c_13 = FMA(a_26, b_8, c_13);
+      IntrVec_t b_12 = LD(b, 12);
+      c_14 = FMA(a_26, b_12, c_14);
+
+
+
+
+      IntrVec_t a_30 = LD(a, 30);
+      IntrVec_t c_15 = MUL(a_30, b_0);
+      IntrVec_t c_16 = MUL(a_30, b_1);
+      IntrVec_t c_17 = MUL(a_30, b_3);
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+      IntrVec_t c_18 = MUL(a_30, b_6);
+      IntrVec_t c_19 = MUL(a_30, b_10);
+      IntrVec_t b_15 = LD(b, 15);
+      IntrVec_t c_20 = MUL(a_30, b_15);
+
+      IntrVec_t a_31 = LD(a, 31);
+      c_15 = FMA(a_31, b_1, c_15);
+      c_16 = FMA(a_31, b_2, c_16);
+      c_17 = FMA(a_31, b_4, c_17);
+      c_18 = FMA(a_31, b_7, c_18);
+      c_19 = FMA(a_31, b_11, c_19);
+      IntrVec_t b_16 = LD(b, 16);
+      c_20 = FMA(a_31, b_16, c_20);
+
+      IntrVec_t a_32 = LD(a, 32);
+      c_15 = FMA(a_32, b_3, c_15);
+      c_16 = FMA(a_32, b_4, c_16);
+      c_17 = FMA(a_32, b_5, c_17);
+      c_18 = FMA(a_32, b_8, c_18);
+      c_19 = FMA(a_32, b_12, c_19);
+      IntrVec_t b_17 = LD(b, 17);
+      c_20 = FMA(a_32, b_17, c_20);
+
+
+
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      ST(c, 17, c_17);
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      ST(c, 20, c_20);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 3*N+n];
+      c[ 1*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 1*N+n] + a[ 8*N+n]*b[ 3*N+n];
+      c[ 2*N+n] = a[ 6*N+n]*b[ 1*N+n] + a[ 7*N+n]*b[ 2*N+n] + a[ 8*N+n]*b[ 4*N+n];
+      c[ 3*N+n] = a[12*N+n]*b[ 0*N+n] + a[13*N+n]*b[ 1*N+n] + a[14*N+n]*b[ 3*N+n];
+      c[ 4*N+n] = a[12*N+n]*b[ 1*N+n] + a[13*N+n]*b[ 2*N+n] + a[14*N+n]*b[ 4*N+n];
+      c[ 5*N+n] = a[12*N+n]*b[ 3*N+n] + a[13*N+n]*b[ 4*N+n] + a[14*N+n]*b[ 5*N+n];
+      c[ 6*N+n] = a[18*N+n]*b[ 0*N+n] + a[19*N+n]*b[ 1*N+n] + a[20*N+n]*b[ 3*N+n];
+      c[ 7*N+n] = a[18*N+n]*b[ 1*N+n] + a[19*N+n]*b[ 2*N+n] + a[20*N+n]*b[ 4*N+n];
+      c[ 8*N+n] = a[18*N+n]*b[ 3*N+n] + a[19*N+n]*b[ 4*N+n] + a[20*N+n]*b[ 5*N+n];
+      c[ 9*N+n] = a[18*N+n]*b[ 6*N+n] + a[19*N+n]*b[ 7*N+n] + a[20*N+n]*b[ 8*N+n];
+      c[10*N+n] = a[24*N+n]*b[ 0*N+n] + a[25*N+n]*b[ 1*N+n] + a[26*N+n]*b[ 3*N+n];
+      c[11*N+n] = a[24*N+n]*b[ 1*N+n] + a[25*N+n]*b[ 2*N+n] + a[26*N+n]*b[ 4*N+n];
+      c[12*N+n] = a[24*N+n]*b[ 3*N+n] + a[25*N+n]*b[ 4*N+n] + a[26*N+n]*b[ 5*N+n];
+      c[13*N+n] = a[24*N+n]*b[ 6*N+n] + a[25*N+n]*b[ 7*N+n] + a[26*N+n]*b[ 8*N+n];
+      c[14*N+n] = a[24*N+n]*b[10*N+n] + a[25*N+n]*b[11*N+n] + a[26*N+n]*b[12*N+n];
+      c[15*N+n] = a[30*N+n]*b[ 0*N+n] + a[31*N+n]*b[ 1*N+n] + a[32*N+n]*b[ 3*N+n];
+      c[16*N+n] = a[30*N+n]*b[ 1*N+n] + a[31*N+n]*b[ 2*N+n] + a[32*N+n]*b[ 4*N+n];
+      c[17*N+n] = a[30*N+n]*b[ 3*N+n] + a[31*N+n]*b[ 4*N+n] + a[32*N+n]*b[ 5*N+n];
+      c[18*N+n] = a[30*N+n]*b[ 6*N+n] + a[31*N+n]*b[ 7*N+n] + a[32*N+n]*b[ 8*N+n];
+      c[19*N+n] = a[30*N+n]*b[10*N+n] + a[31*N+n]*b[11*N+n] + a[32*N+n]*b[12*N+n];
+      c[20*N+n] = a[30*N+n]*b[15*N+n] + a[31*N+n]*b[16*N+n] + a[32*N+n]*b[17*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/KalmanGain.ah b/RecoTracker/MkFitCore/src/KalmanGain.ah
new file mode 100644
index 0000000000000..e12fbc7953434
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/KalmanGain.ah
@@ -0,0 +1,147 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      #ifdef AVX512_INTRINSICS
+      IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      #else
+      IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0 };
+      #endif
+
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_1 = MUL(a_0, b_1);
+
+      IntrVec_t a_1 = LD(a, 1);
+      IntrVec_t b_3 = LD(b, 3);
+      c_0 = FMA(a_1, b_3, c_0);
+      IntrVec_t b_4 = LD(b, 4);
+      c_1 = FMA(a_1, b_4, c_1);
+
+      IntrVec_t a_3 = LD(a, 3);
+      IntrVec_t b_6 = LD(b, 6);
+      c_0 = FMA(a_3, b_6, c_0);
+      IntrVec_t b_7 = LD(b, 7);
+      c_1 = FMA(a_3, b_7, c_1);
+
+
+
+      ST(c, 2, all_zeros);
+
+      IntrVec_t c_3 = MUL(a_1, b_0);
+      IntrVec_t c_4 = MUL(a_1, b_1);
+
+      IntrVec_t a_2 = LD(a, 2);
+      c_3 = FMA(a_2, b_3, c_3);
+      ST(c, 0, c_0);
+      ST(c, 1, c_1);
+      c_4 = FMA(a_2, b_4, c_4);
+
+      IntrVec_t a_4 = LD(a, 4);
+      c_3 = FMA(a_4, b_6, c_3);
+      c_4 = FMA(a_4, b_7, c_4);
+
+
+
+      ST(c, 5, all_zeros);
+
+      IntrVec_t c_6 = MUL(a_3, b_0);
+      IntrVec_t c_7 = MUL(a_3, b_1);
+
+      c_6 = FMA(a_4, b_3, c_6);
+      c_7 = FMA(a_4, b_4, c_7);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+
+      IntrVec_t a_5 = LD(a, 5);
+      c_6 = FMA(a_5, b_6, c_6);
+      c_7 = FMA(a_5, b_7, c_7);
+
+
+
+      ST(c, 8, all_zeros);
+
+      IntrVec_t a_6 = LD(a, 6);
+      IntrVec_t c_9 = MUL(a_6, b_0);
+      IntrVec_t c_10 = MUL(a_6, b_1);
+
+      IntrVec_t a_7 = LD(a, 7);
+      c_9 = FMA(a_7, b_3, c_9);
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      c_10 = FMA(a_7, b_4, c_10);
+
+      IntrVec_t a_8 = LD(a, 8);
+      c_9 = FMA(a_8, b_6, c_9);
+      c_10 = FMA(a_8, b_7, c_10);
+
+
+
+      ST(c, 11, all_zeros);
+
+      IntrVec_t a_10 = LD(a, 10);
+      IntrVec_t c_12 = MUL(a_10, b_0);
+      IntrVec_t c_13 = MUL(a_10, b_1);
+
+      IntrVec_t a_11 = LD(a, 11);
+      c_12 = FMA(a_11, b_3, c_12);
+      ST(c, 9, c_9);
+      ST(c, 10, c_10);
+      c_13 = FMA(a_11, b_4, c_13);
+
+      IntrVec_t a_12 = LD(a, 12);
+      c_12 = FMA(a_12, b_6, c_12);
+      c_13 = FMA(a_12, b_7, c_13);
+
+
+
+      ST(c, 14, all_zeros);
+
+      IntrVec_t a_15 = LD(a, 15);
+      IntrVec_t c_15 = MUL(a_15, b_0);
+      IntrVec_t c_16 = MUL(a_15, b_1);
+
+      IntrVec_t a_16 = LD(a, 16);
+      c_15 = FMA(a_16, b_3, c_15);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      c_16 = FMA(a_16, b_4, c_16);
+
+      IntrVec_t a_17 = LD(a, 17);
+      c_15 = FMA(a_17, b_6, c_15);
+      c_16 = FMA(a_17, b_7, c_16);
+
+
+
+      ST(c, 17, all_zeros);
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 3*N+n] + a[ 3*N+n]*b[ 6*N+n];
+      c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n] + a[ 1*N+n]*b[ 4*N+n] + a[ 3*N+n]*b[ 7*N+n];
+      c[ 2*N+n] = 0;
+      c[ 3*N+n] = a[ 1*N+n]*b[ 0*N+n] + a[ 2*N+n]*b[ 3*N+n] + a[ 4*N+n]*b[ 6*N+n];
+      c[ 4*N+n] = a[ 1*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 4*N+n] + a[ 4*N+n]*b[ 7*N+n];
+      c[ 5*N+n] = 0;
+      c[ 6*N+n] = a[ 3*N+n]*b[ 0*N+n] + a[ 4*N+n]*b[ 3*N+n] + a[ 5*N+n]*b[ 6*N+n];
+      c[ 7*N+n] = a[ 3*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 4*N+n] + a[ 5*N+n]*b[ 7*N+n];
+      c[ 8*N+n] = 0;
+      c[ 9*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 3*N+n] + a[ 8*N+n]*b[ 6*N+n];
+      c[10*N+n] = a[ 6*N+n]*b[ 1*N+n] + a[ 7*N+n]*b[ 4*N+n] + a[ 8*N+n]*b[ 7*N+n];
+      c[11*N+n] = 0;
+      c[12*N+n] = a[10*N+n]*b[ 0*N+n] + a[11*N+n]*b[ 3*N+n] + a[12*N+n]*b[ 6*N+n];
+      c[13*N+n] = a[10*N+n]*b[ 1*N+n] + a[11*N+n]*b[ 4*N+n] + a[12*N+n]*b[ 7*N+n];
+      c[14*N+n] = 0;
+      c[15*N+n] = a[15*N+n]*b[ 0*N+n] + a[16*N+n]*b[ 3*N+n] + a[17*N+n]*b[ 6*N+n];
+      c[16*N+n] = a[15*N+n]*b[ 1*N+n] + a[16*N+n]*b[ 4*N+n] + a[17*N+n]*b[ 7*N+n];
+      c[17*N+n] = 0;
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/KalmanGain62.ah b/RecoTracker/MkFitCore/src/KalmanGain62.ah
new file mode 100644
index 0000000000000..0b88848689c9d
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/KalmanGain62.ah
@@ -0,0 +1,87 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_1 = MUL(a_0, b_1);
+
+      IntrVec_t a_1 = LD(a, 1);
+      c_0 = FMA(a_1, b_1, c_0);
+      IntrVec_t b_2 = LD(b, 2);
+      c_1 = FMA(a_1, b_2, c_1);
+
+      IntrVec_t c_2 = MUL(a_1, b_0);
+      IntrVec_t c_3 = MUL(a_1, b_1);
+      ST(c, 0, c_0);
+
+      IntrVec_t a_2 = LD(a, 2);
+      c_2 = FMA(a_2, b_1, c_2);
+      ST(c, 1, c_1);
+      c_3 = FMA(a_2, b_2, c_3);
+
+      IntrVec_t a_3 = LD(a, 3);
+      IntrVec_t c_4 = MUL(a_3, b_0);
+      ST(c, 2, c_2);
+      IntrVec_t c_5 = MUL(a_3, b_1);
+      ST(c, 3, c_3);
+
+      IntrVec_t a_4 = LD(a, 4);
+      c_4 = FMA(a_4, b_1, c_4);
+      c_5 = FMA(a_4, b_2, c_5);
+
+      IntrVec_t a_6 = LD(a, 6);
+      IntrVec_t c_6 = MUL(a_6, b_0);
+      IntrVec_t c_7 = MUL(a_6, b_1);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+
+      IntrVec_t a_7 = LD(a, 7);
+      c_6 = FMA(a_7, b_1, c_6);
+      c_7 = FMA(a_7, b_2, c_7);
+
+      IntrVec_t a_10 = LD(a, 10);
+      IntrVec_t c_8 = MUL(a_10, b_0);
+      IntrVec_t c_9 = MUL(a_10, b_1);
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+
+      IntrVec_t a_11 = LD(a, 11);
+      c_8 = FMA(a_11, b_1, c_8);
+      c_9 = FMA(a_11, b_2, c_9);
+
+      IntrVec_t a_15 = LD(a, 15);
+      IntrVec_t c_10 = MUL(a_15, b_0);
+      IntrVec_t c_11 = MUL(a_15, b_1);
+      ST(c, 8, c_8);
+      ST(c, 9, c_9);
+
+      IntrVec_t a_16 = LD(a, 16);
+      c_10 = FMA(a_16, b_1, c_10);
+      c_11 = FMA(a_16, b_2, c_11);
+
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n];
+      c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n] + a[ 1*N+n]*b[ 2*N+n];
+      c[ 2*N+n] = a[ 1*N+n]*b[ 0*N+n] + a[ 2*N+n]*b[ 1*N+n];
+      c[ 3*N+n] = a[ 1*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 2*N+n];
+      c[ 4*N+n] = a[ 3*N+n]*b[ 0*N+n] + a[ 4*N+n]*b[ 1*N+n];
+      c[ 5*N+n] = a[ 3*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 2*N+n];
+      c[ 6*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 1*N+n];
+      c[ 7*N+n] = a[ 6*N+n]*b[ 1*N+n] + a[ 7*N+n]*b[ 2*N+n];
+      c[ 8*N+n] = a[10*N+n]*b[ 0*N+n] + a[11*N+n]*b[ 1*N+n];
+      c[ 9*N+n] = a[10*N+n]*b[ 1*N+n] + a[11*N+n]*b[ 2*N+n];
+      c[10*N+n] = a[15*N+n]*b[ 0*N+n] + a[16*N+n]*b[ 1*N+n];
+      c[11*N+n] = a[15*N+n]*b[ 1*N+n] + a[16*N+n]*b[ 2*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/KalmanHTG.ah b/RecoTracker/MkFitCore/src/KalmanHTG.ah
new file mode 100644
index 0000000000000..33380656ab56b
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/KalmanHTG.ah
@@ -0,0 +1,64 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_1 = MUL(a_0, b_1);
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t c_2 = MUL(a_0, b_3);
+
+
+      IntrVec_t a_2 = LD(a, 2);
+      c_0 = FMA(a_2, b_3, c_0);
+      IntrVec_t b_4 = LD(b, 4);
+      c_1 = FMA(a_2, b_4, c_1);
+      IntrVec_t b_5 = LD(b, 5);
+      c_2 = FMA(a_2, b_5, c_2);
+      ST(c, 0, c_0);
+
+      IntrVec_t a_3 = LD(a, 3);
+      IntrVec_t c_3 = MUL(a_3, b_0);
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+      IntrVec_t c_4 = MUL(a_3, b_1);
+      IntrVec_t c_5 = MUL(a_3, b_3);
+
+
+      IntrVec_t a_5 = LD(a, 5);
+      c_3 = FMA(a_5, b_3, c_3);
+      c_4 = FMA(a_5, b_4, c_4);
+      c_5 = FMA(a_5, b_5, c_5);
+
+
+      IntrVec_t c_6 = b_1;
+      IntrVec_t b_2 = LD(b, 2);
+      IntrVec_t c_7 = b_2;
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+      IntrVec_t c_8 = b_4;
+
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 2*N+n]*b[ 3*N+n];
+      c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 4*N+n];
+      c[ 2*N+n] = a[ 0*N+n]*b[ 3*N+n] + a[ 2*N+n]*b[ 5*N+n];
+      c[ 3*N+n] = a[ 3*N+n]*b[ 0*N+n] + a[ 5*N+n]*b[ 3*N+n];
+      c[ 4*N+n] = a[ 3*N+n]*b[ 1*N+n] + a[ 5*N+n]*b[ 4*N+n];
+      c[ 5*N+n] = a[ 3*N+n]*b[ 3*N+n] + a[ 5*N+n]*b[ 5*N+n];
+      c[ 6*N+n] = b[ 1*N+n];
+      c[ 7*N+n] = b[ 2*N+n];
+      c[ 8*N+n] = b[ 4*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.cc b/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.cc
new file mode 100644
index 0000000000000..7934de60c3e00
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.cc
@@ -0,0 +1,902 @@
+#include "KalmanUtilsMPlex.h"
+#include "PropagationMPlex.h"
+
+//#define DEBUG
+#include "Debug.h"
+
+#include "KalmanUtilsMPlex.icc"
+
+#include "RecoTracker/MkFitCore/interface/cms_common_macros.h"
+
+namespace {
+  using namespace mkfit;
+  using idx_t = Matriplex::idx_t;
+
+  inline void MultResidualsAdd(const MPlexLH& A, const MPlexLV& B, const MPlex2V& C, MPlexLV& D) {
+    // outPar = psPar + kalmanGain*(dPar)
+    //   D    =   B         A         C
+    // where right half of kalman gain is 0
+
+    // XXX Regenerate with a script.
+
+    MultResidualsAdd_imp(A, B, C, D, 0, NN);
+  }
+
+  inline void MultResidualsAdd(const MPlexL2& A, const MPlexLV& B, const MPlex2V& C, MPlexLV& D) {
+    // outPar = psPar + kalmanGain*(dPar)
+    //   D    =   B         A         C
+    // where right half of kalman gain is 0
+
+    // XXX Regenerate with a script.
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    const T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+    T* d = D.fArray;
+    ASSUME_ALIGNED(d, 64);
+
+#pragma omp simd
+    for (idx_t n = 0; n < N; ++n) {
+      // generate loop (can also write it manually this time, it's not much)
+      d[0 * N + n] = b[0 * N + n] + a[0 * N + n] * c[0 * N + n] + a[1 * N + n] * c[1 * N + n];
+      d[1 * N + n] = b[1 * N + n] + a[2 * N + n] * c[0 * N + n] + a[3 * N + n] * c[1 * N + n];
+      d[2 * N + n] = b[2 * N + n] + a[4 * N + n] * c[0 * N + n] + a[5 * N + n] * c[1 * N + n];
+      d[3 * N + n] = b[3 * N + n] + a[6 * N + n] * c[0 * N + n] + a[7 * N + n] * c[1 * N + n];
+      d[4 * N + n] = b[4 * N + n] + a[8 * N + n] * c[0 * N + n] + a[9 * N + n] * c[1 * N + n];
+      d[5 * N + n] = b[5 * N + n] + a[10 * N + n] * c[0 * N + n] + a[11 * N + n] * c[1 * N + n];
+    }
+  }
+
+  //------------------------------------------------------------------------------
+
+  inline void Chi2Similarity(const MPlex2V& A,  //resPar
+                             const MPlex2S& C,  //resErr
+                             MPlexQF& D)        //outChi2
+  {
+    // outChi2 = (resPar) * resErr * (resPar)
+    //   D     =    A      *    C   *      A
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+    T* d = D.fArray;
+    ASSUME_ALIGNED(d, 64);
+
+#pragma omp simd
+    for (idx_t n = 0; n < N; ++n) {
+      // generate loop (can also write it manually this time, it's not much)
+      d[0 * N + n] = c[0 * N + n] * a[0 * N + n] * a[0 * N + n] + c[2 * N + n] * a[1 * N + n] * a[1 * N + n] +
+                     2 * (c[1 * N + n] * a[1 * N + n] * a[0 * N + n]);
+    }
+  }
+
+  //------------------------------------------------------------------------------
+
+  inline void AddIntoUpperLeft3x3(const MPlexLS& A, const MPlexHS& B, MPlexHS& C) {
+    // The rest of matrix is left untouched.
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#pragma omp simd
+    for (idx_t n = 0; n < N; ++n) {
+      c[0 * N + n] = a[0 * N + n] + b[0 * N + n];
+      c[1 * N + n] = a[1 * N + n] + b[1 * N + n];
+      c[2 * N + n] = a[2 * N + n] + b[2 * N + n];
+      c[3 * N + n] = a[3 * N + n] + b[3 * N + n];
+      c[4 * N + n] = a[4 * N + n] + b[4 * N + n];
+      c[5 * N + n] = a[5 * N + n] + b[5 * N + n];
+    }
+  }
+
+  inline void AddIntoUpperLeft2x2(const MPlexLS& A, const MPlexHS& B, MPlex2S& C) {
+    // The rest of matrix is left untouched.
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#pragma omp simd
+    for (idx_t n = 0; n < N; ++n) {
+      c[0 * N + n] = a[0 * N + n] + b[0 * N + n];
+      c[1 * N + n] = a[1 * N + n] + b[1 * N + n];
+      c[2 * N + n] = a[2 * N + n] + b[2 * N + n];
+    }
+  }
+
+  //------------------------------------------------------------------------------
+
+  inline void SubtractFirst3(const MPlexHV& A, const MPlexLV& B, MPlexHV& C) {
+    // The rest of matrix is left untouched.
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#pragma omp simd
+    for (idx_t n = 0; n < N; ++n) {
+      c[0 * N + n] = a[0 * N + n] - b[0 * N + n];
+      c[1 * N + n] = a[1 * N + n] - b[1 * N + n];
+      c[2 * N + n] = a[2 * N + n] - b[2 * N + n];
+    }
+  }
+
+  inline void SubtractFirst2(const MPlexHV& A, const MPlexLV& B, MPlex2V& C) {
+    // The rest of matrix is left untouched.
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#pragma omp simd
+    for (idx_t n = 0; n < N; ++n) {
+      c[0 * N + n] = a[0 * N + n] - b[0 * N + n];
+      c[1 * N + n] = a[1 * N + n] - b[1 * N + n];
+    }
+  }
+
+  //==============================================================================
+
+  inline void ProjectResErr(const MPlexQF& A00, const MPlexQF& A01, const MPlexHS& B, MPlexHH& C) {
+    // C = A * B, C is 3x3, A is 3x3 , B is 3x3 sym
+
+    // Based on script generation and adapted to custom sizes.
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a00 = A00.fArray;
+    ASSUME_ALIGNED(a00, 64);
+    const T* a01 = A01.fArray;
+    ASSUME_ALIGNED(a01, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#pragma omp simd
+    for (int n = 0; n < N; ++n) {
+      c[0 * N + n] = a00[n] * b[0 * N + n] + a01[n] * b[1 * N + n];
+      c[1 * N + n] = a00[n] * b[1 * N + n] + a01[n] * b[2 * N + n];
+      c[2 * N + n] = a00[n] * b[3 * N + n] + a01[n] * b[4 * N + n];
+      c[3 * N + n] = b[3 * N + n];
+      c[4 * N + n] = b[4 * N + n];
+      c[5 * N + n] = b[5 * N + n];
+      c[6 * N + n] = a01[n] * b[0 * N + n] - a00[n] * b[1 * N + n];
+      c[7 * N + n] = a01[n] * b[1 * N + n] - a00[n] * b[2 * N + n];
+      c[8 * N + n] = a01[n] * b[3 * N + n] - a00[n] * b[4 * N + n];
+    }
+  }
+
+  inline void ProjectResErrTransp(const MPlexQF& A00, const MPlexQF& A01, const MPlexHH& B, MPlex2S& C) {
+    // C = A * B, C is 3x3 sym, A is 3x3 , B is 3x3
+
+    // Based on script generation and adapted to custom sizes.
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a00 = A00.fArray;
+    ASSUME_ALIGNED(a00, 64);
+    const T* a01 = A01.fArray;
+    ASSUME_ALIGNED(a01, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#pragma omp simd
+    for (int n = 0; n < N; ++n) {
+      c[0 * N + n] = b[0 * N + n] * a00[n] + b[1 * N + n] * a01[n];
+      c[1 * N + n] = b[3 * N + n] * a00[n] + b[4 * N + n] * a01[n];
+      c[2 * N + n] = b[5 * N + n];
+    }
+  }
+
+  inline void RotateResidualsOnTangentPlane(const MPlexQF& R00,  //r00
+                                            const MPlexQF& R01,  //r01
+                                            const MPlexHV& A,    //res_glo
+                                            MPlex2V& B)          //res_loc
+  {
+    RotateResidualsOnTangentPlane_impl(R00, R01, A, B, 0, NN);
+  }
+
+  inline void KalmanHTG(const MPlexQF& A00, const MPlexQF& A01, const MPlex2S& B, MPlexHH& C) {
+    // HTG  = rot * res_loc
+    //   C  =  A  *    B
+
+    // Based on script generation and adapted to custom sizes.
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a00 = A00.fArray;
+    ASSUME_ALIGNED(a00, 64);
+    const T* a01 = A01.fArray;
+    ASSUME_ALIGNED(a01, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#pragma omp simd
+    for (int n = 0; n < N; ++n) {
+      c[0 * N + n] = a00[n] * b[0 * N + n];
+      c[1 * N + n] = a00[n] * b[1 * N + n];
+      c[2 * N + n] = 0.;
+      c[3 * N + n] = a01[n] * b[0 * N + n];
+      c[4 * N + n] = a01[n] * b[1 * N + n];
+      c[5 * N + n] = 0.;
+      c[6 * N + n] = b[1 * N + n];
+      c[7 * N + n] = b[2 * N + n];
+      c[8 * N + n] = 0.;
+    }
+  }
+
+  inline void KalmanGain(const MPlexLS& A, const MPlexHH& B, MPlexLH& C) {
+    // C = A * B, C is 6x3, A is 6x6 sym , B is 3x3
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#pragma omp simd
+    for (int n = 0; n < N; ++n) {
+      c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[3 * N + n] + a[3 * N + n] * b[6 * N + n];
+      c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[4 * N + n] + a[3 * N + n] * b[7 * N + n];
+      c[2 * N + n] = 0;
+      c[3 * N + n] = a[1 * N + n] * b[0 * N + n] + a[2 * N + n] * b[3 * N + n] + a[4 * N + n] * b[6 * N + n];
+      c[4 * N + n] = a[1 * N + n] * b[1 * N + n] + a[2 * N + n] * b[4 * N + n] + a[4 * N + n] * b[7 * N + n];
+      c[5 * N + n] = 0;
+      c[6 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[3 * N + n] + a[5 * N + n] * b[6 * N + n];
+      c[7 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[7 * N + n];
+      c[8 * N + n] = 0;
+      c[9 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[3 * N + n] + a[8 * N + n] * b[6 * N + n];
+      c[10 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[4 * N + n] + a[8 * N + n] * b[7 * N + n];
+      c[11 * N + n] = 0;
+      c[12 * N + n] = a[10 * N + n] * b[0 * N + n] + a[11 * N + n] * b[3 * N + n] + a[12 * N + n] * b[6 * N + n];
+      c[13 * N + n] = a[10 * N + n] * b[1 * N + n] + a[11 * N + n] * b[4 * N + n] + a[12 * N + n] * b[7 * N + n];
+      c[14 * N + n] = 0;
+      c[15 * N + n] = a[15 * N + n] * b[0 * N + n] + a[16 * N + n] * b[3 * N + n] + a[17 * N + n] * b[6 * N + n];
+      c[16 * N + n] = a[15 * N + n] * b[1 * N + n] + a[16 * N + n] * b[4 * N + n] + a[17 * N + n] * b[7 * N + n];
+      c[17 * N + n] = 0;
+    }
+  }
+
+  void KalmanGain(const MPlexLS& A, const MPlex2S& B, MPlexL2& C) {
+    // C = A * B, C is 6x2, A is 6x6 sym , B is 2x2
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#include "KalmanGain62.ah"
+  }
+
+  inline void KHMult(const MPlexLH& A, const MPlexQF& B00, const MPlexQF& B01, MPlexLL& C) {
+    // C = A * B, C is 6x6, A is 6x3 , B is 3x6
+    KHMult_imp(A, B00, B01, C, 0, NN);
+  }
+
+  inline void KHC(const MPlexLL& A, const MPlexLS& B, MPlexLS& C) {
+    // C = A * B, C is 6x6, A is 6x6 , B is 6x6 sym
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#include "KHC.ah"
+  }
+
+  inline void KHC(const MPlexL2& A, const MPlexLS& B, MPlexLS& C) {
+    // C = A * B, C is 6x6 sym, A is 6x2 , B is 6x6 sym
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#include "K62HC.ah"
+  }
+
+  //Warning: MultFull is not vectorized!
+  template <typename T1, typename T2, typename T3>
+  void MultFull(const T1& A, int nia, int nja, const T2& B, int nib, int njb, T3& C, int nic, int njc) {
+#ifdef DEBUG
+    assert(nja == nib);
+    assert(nia == nic);
+    assert(njb == njc);
+#endif
+    for (int n = 0; n < NN; ++n) {
+      for (int i = 0; i < nia; ++i) {
+        for (int j = 0; j < njb; ++j) {
+          C(n, i, j) = 0.;
+          for (int k = 0; k < nja; ++k)
+            C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, k, j);
+        }
+      }
+    }
+  }
+
+  //Warning: MultTranspFull is not vectorized!
+  // (careful about which one is transposed, I think rows and cols are swapped and the one that is transposed is A)
+  template <typename T1, typename T2, typename T3>
+  void MultTranspFull(const T1& A, int nia, int nja, const T2& B, int nib, int njb, T3& C, int nic, int njc) {
+#ifdef DEBUG
+    assert(nja == njb);
+    assert(nia == nic);
+    assert(nib == njc);
+#endif
+    for (int n = 0; n < NN; ++n) {
+      for (int i = 0; i < nia; ++i) {
+        for (int j = 0; j < nib; ++j) {
+          C(n, i, j) = 0.;
+          for (int k = 0; k < nja; ++k)
+            C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, j, k);
+        }
+      }
+    }
+  }
+
+}  // namespace
+
+//==============================================================================
+// Kalman operations - common dummy variables
+//==============================================================================
+
+namespace {
+  // Dummy variables for parameter consistency to kalmanOperation.
+  // Through KalmanFilterOperation enum parameter it is guaranteed that
+  // those will never get accessed in the code (read from or written into).
+
+  CMS_SA_ALLOW mkfit::MPlexLS dummy_err;
+  CMS_SA_ALLOW mkfit::MPlexLV dummy_par;
+  CMS_SA_ALLOW mkfit::MPlexQF dummy_chi2;
+}  // namespace
+
+namespace mkfit {
+
+  //==============================================================================
+  // Kalman operations - Barrel
+  //==============================================================================
+
+  void kalmanUpdate(const MPlexLS& psErr,
+                    const MPlexLV& psPar,
+                    const MPlexHS& msErr,
+                    const MPlexHV& msPar,
+                    MPlexLS& outErr,
+                    MPlexLV& outPar,
+                    const int N_proc) {
+    kalmanOperation(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
+  }
+
+  void kalmanPropagateAndUpdate(const MPlexLS& psErr,
+                                const MPlexLV& psPar,
+                                MPlexQI& Chg,
+                                const MPlexHS& msErr,
+                                const MPlexHV& msPar,
+                                MPlexLS& outErr,
+                                MPlexLV& outPar,
+                                const int N_proc,
+                                const PropagationFlags propFlags,
+                                const bool propToHit) {
+    if (propToHit) {
+      MPlexLS propErr;
+      MPlexLV propPar;
+      MPlexQF msRad;
+#pragma omp simd
+      for (int n = 0; n < NN; ++n) {
+        msRad.At(n, 0, 0) = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
+      }
+
+      propagateHelixToRMPlex(psErr, psPar, Chg, msRad, propErr, propPar, N_proc, propFlags);
+
+      kalmanOperation(KFO_Update_Params, propErr, propPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
+    } else {
+      kalmanOperation(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
+    }
+    for (int n = 0; n < NN; ++n) {
+      if (outPar.At(n, 3, 0) < 0) {
+        Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
+        outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
+      }
+    }
+  }
+
+  //------------------------------------------------------------------------------
+
+  void kalmanComputeChi2(const MPlexLS& psErr,
+                         const MPlexLV& psPar,
+                         const MPlexQI& inChg,
+                         const MPlexHS& msErr,
+                         const MPlexHV& msPar,
+                         MPlexQF& outChi2,
+                         const int N_proc) {
+    kalmanOperation(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
+  }
+
+  void kalmanPropagateAndComputeChi2(const MPlexLS& psErr,
+                                     const MPlexLV& psPar,
+                                     const MPlexQI& inChg,
+                                     const MPlexHS& msErr,
+                                     const MPlexHV& msPar,
+                                     MPlexQF& outChi2,
+                                     MPlexLV& propPar,
+                                     const int N_proc,
+                                     const PropagationFlags propFlags,
+                                     const bool propToHit) {
+    propPar = psPar;
+    if (propToHit) {
+      MPlexLS propErr;
+      MPlexQF msRad;
+#pragma omp simd
+      for (int n = 0; n < NN; ++n) {
+        msRad.At(n, 0, 0) = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
+      }
+
+      propagateHelixToRMPlex(psErr, psPar, inChg, msRad, propErr, propPar, N_proc, propFlags);
+
+      kalmanOperation(KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
+    } else {
+      kalmanOperation(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
+    }
+  }
+
+  //------------------------------------------------------------------------------
+
+  void kalmanOperation(const int kfOp,
+                       const MPlexLS& psErr,
+                       const MPlexLV& psPar,
+                       const MPlexHS& msErr,
+                       const MPlexHV& msPar,
+                       MPlexLS& outErr,
+                       MPlexLV& outPar,
+                       MPlexQF& outChi2,
+                       const int N_proc) {
+#ifdef DEBUG
+    {
+      dmutex_guard;
+      printf("psPar:\n");
+      for (int i = 0; i < 6; ++i) {
+        printf("%8f ", psPar.constAt(0, 0, i));
+        printf("\n");
+      }
+      printf("\n");
+      printf("psErr:\n");
+      for (int i = 0; i < 6; ++i) {
+        for (int j = 0; j < 6; ++j)
+          printf("%8f ", psErr.constAt(0, i, j));
+        printf("\n");
+      }
+      printf("\n");
+      printf("msPar:\n");
+      for (int i = 0; i < 3; ++i) {
+        printf("%8f ", msPar.constAt(0, 0, i));
+        printf("\n");
+      }
+      printf("\n");
+      printf("msErr:\n");
+      for (int i = 0; i < 3; ++i) {
+        for (int j = 0; j < 3; ++j)
+          printf("%8f ", msErr.constAt(0, i, j));
+        printf("\n");
+      }
+      printf("\n");
+    }
+#endif
+
+    // Rotate global point on tangent plane to cylinder
+    // Tangent point is half way between hit and propagate position
+
+    // Rotation matrix
+    //  rotT00  0  rotT01
+    //  rotT01  0 -rotT00
+    //     0    1    0
+    // Minimize temporaries: only two float are needed!
+
+    MPlexQF rotT00;
+    MPlexQF rotT01;
+    for (int n = 0; n < NN; ++n) {
+      const float r = std::hypot(msPar.constAt(n, 0, 0), msPar.constAt(n, 1, 0));
+      rotT00.At(n, 0, 0) = -(msPar.constAt(n, 1, 0) + psPar.constAt(n, 1, 0)) / (2 * r);
+      rotT01.At(n, 0, 0) = (msPar.constAt(n, 0, 0) + psPar.constAt(n, 0, 0)) / (2 * r);
+    }
+
+    MPlexHV res_glo;  //position residual in global coordinates
+    SubtractFirst3(msPar, psPar, res_glo);
+
+    MPlexHS resErr_glo;  //covariance sum in global position coordinates
+    AddIntoUpperLeft3x3(psErr, msErr, resErr_glo);
+
+    MPlex2V res_loc;  //position residual in local coordinates
+    RotateResidualsOnTangentPlane(rotT00, rotT01, res_glo, res_loc);
+    MPlex2S resErr_loc;  //covariance sum in local position coordinates
+    MPlexHH tempHH;
+    ProjectResErr(rotT00, rotT01, resErr_glo, tempHH);
+    ProjectResErrTransp(rotT00, rotT01, tempHH, resErr_loc);
+
+#ifdef DEBUG
+    {
+      dmutex_guard;
+      printf("resErr_loc:\n");
+      for (int i = 0; i < 2; ++i) {
+        for (int j = 0; j < 2; ++j)
+          printf("%8f ", resErr_loc.At(0, i, j));
+        printf("\n");
+      }
+      printf("\n");
+    }
+#endif
+
+    //invert the 2x2 matrix
+    Matriplex::invertCramerSym(resErr_loc);
+
+    if (kfOp & KFO_Calculate_Chi2) {
+      Chi2Similarity(res_loc, resErr_loc, outChi2);
+
+#ifdef DEBUG
+      {
+        dmutex_guard;
+        printf("resErr_loc (Inv):\n");
+        for (int i = 0; i < 2; ++i) {
+          for (int j = 0; j < 2; ++j)
+            printf("%8f ", resErr_loc.At(0, i, j));
+          printf("\n");
+        }
+        printf("\n");
+        printf("chi2: %8f\n", outChi2.At(0, 0, 0));
+      }
+#endif
+    }
+
+    if (kfOp & KFO_Update_Params) {
+      MPlexLH K;                                      // kalman gain, fixme should be L2
+      KalmanHTG(rotT00, rotT01, resErr_loc, tempHH);  // intermediate term to get kalman gain (H^T*G)
+      KalmanGain(psErr, tempHH, K);
+
+      MultResidualsAdd(K, psPar, res_loc, outPar);
+      MPlexLL tempLL;
+
+      squashPhiMPlex(outPar, N_proc);  // ensure phi is between |pi|
+
+      KHMult(K, rotT00, rotT01, tempLL);
+      KHC(tempLL, psErr, outErr);
+      outErr.subtract(psErr, outErr);
+
+#ifdef DEBUG
+      {
+        dmutex_guard;
+        printf("res_glo:\n");
+        for (int i = 0; i < 3; ++i) {
+          printf("%8f ", res_glo.At(0, i, 0));
+        }
+        printf("\n");
+        printf("res_loc:\n");
+        for (int i = 0; i < 2; ++i) {
+          printf("%8f ", res_loc.At(0, i, 0));
+        }
+        printf("\n");
+        printf("resErr_loc (Inv):\n");
+        for (int i = 0; i < 2; ++i) {
+          for (int j = 0; j < 2; ++j)
+            printf("%8f ", resErr_loc.At(0, i, j));
+          printf("\n");
+        }
+        printf("\n");
+        printf("K:\n");
+        for (int i = 0; i < 6; ++i) {
+          for (int j = 0; j < 3; ++j)
+            printf("%8f ", K.At(0, i, j));
+          printf("\n");
+        }
+        printf("\n");
+        printf("outPar:\n");
+        for (int i = 0; i < 6; ++i) {
+          printf("%8f  ", outPar.At(0, i, 0));
+        }
+        printf("\n");
+        printf("outErr:\n");
+        for (int i = 0; i < 6; ++i) {
+          for (int j = 0; j < 6; ++j)
+            printf("%8f ", outErr.At(0, i, j));
+          printf("\n");
+        }
+        printf("\n");
+      }
+#endif
+    }
+  }
+
+  //==============================================================================
+  // Kalman operations - Endcap
+  //==============================================================================
+
+  void kalmanUpdateEndcap(const MPlexLS& psErr,
+                          const MPlexLV& psPar,
+                          const MPlexHS& msErr,
+                          const MPlexHV& msPar,
+                          MPlexLS& outErr,
+                          MPlexLV& outPar,
+                          const int N_proc) {
+    kalmanOperationEndcap(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
+  }
+
+  void kalmanPropagateAndUpdateEndcap(const MPlexLS& psErr,
+                                      const MPlexLV& psPar,
+                                      MPlexQI& Chg,
+                                      const MPlexHS& msErr,
+                                      const MPlexHV& msPar,
+                                      MPlexLS& outErr,
+                                      MPlexLV& outPar,
+                                      const int N_proc,
+                                      const PropagationFlags propFlags,
+                                      const bool propToHit) {
+    if (propToHit) {
+      MPlexLS propErr;
+      MPlexLV propPar;
+      MPlexQF msZ;
+#pragma omp simd
+      for (int n = 0; n < NN; ++n) {
+        msZ.At(n, 0, 0) = msPar.constAt(n, 2, 0);
+      }
+
+      propagateHelixToZMPlex(psErr, psPar, Chg, msZ, propErr, propPar, N_proc, propFlags);
+
+      kalmanOperationEndcap(KFO_Update_Params, propErr, propPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
+    } else {
+      kalmanOperationEndcap(KFO_Update_Params, psErr, psPar, msErr, msPar, outErr, outPar, dummy_chi2, N_proc);
+    }
+    for (int n = 0; n < NN; ++n) {
+      if (outPar.At(n, 3, 0) < 0) {
+        Chg.At(n, 0, 0) = -Chg.At(n, 0, 0);
+        outPar.At(n, 3, 0) = -outPar.At(n, 3, 0);
+      }
+    }
+  }
+
+  //------------------------------------------------------------------------------
+
+  void kalmanComputeChi2Endcap(const MPlexLS& psErr,
+                               const MPlexLV& psPar,
+                               const MPlexQI& inChg,
+                               const MPlexHS& msErr,
+                               const MPlexHV& msPar,
+                               MPlexQF& outChi2,
+                               const int N_proc) {
+    kalmanOperationEndcap(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
+  }
+
+  void kalmanPropagateAndComputeChi2Endcap(const MPlexLS& psErr,
+                                           const MPlexLV& psPar,
+                                           const MPlexQI& inChg,
+                                           const MPlexHS& msErr,
+                                           const MPlexHV& msPar,
+                                           MPlexQF& outChi2,
+                                           MPlexLV& propPar,
+                                           const int N_proc,
+                                           const PropagationFlags propFlags,
+                                           const bool propToHit) {
+    propPar = psPar;
+    if (propToHit) {
+      MPlexLS propErr;
+      MPlexQF msZ;
+#pragma omp simd
+      for (int n = 0; n < NN; ++n) {
+        msZ.At(n, 0, 0) = msPar.constAt(n, 2, 0);
+      }
+
+      propagateHelixToZMPlex(psErr, psPar, inChg, msZ, propErr, propPar, N_proc, propFlags);
+
+      kalmanOperationEndcap(KFO_Calculate_Chi2, propErr, propPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
+    } else {
+      kalmanOperationEndcap(KFO_Calculate_Chi2, psErr, psPar, msErr, msPar, dummy_err, dummy_par, outChi2, N_proc);
+    }
+  }
+
+  //------------------------------------------------------------------------------
+
+  void kalmanOperationEndcap(const int kfOp,
+                             const MPlexLS& psErr,
+                             const MPlexLV& psPar,
+                             const MPlexHS& msErr,
+                             const MPlexHV& msPar,
+                             MPlexLS& outErr,
+                             MPlexLV& outPar,
+                             MPlexQF& outChi2,
+                             const int N_proc) {
+#ifdef DEBUG
+    {
+      dmutex_guard;
+      printf("updateParametersEndcapMPlex\n");
+      printf("psPar:\n");
+      for (int i = 0; i < 6; ++i) {
+        printf("%8f ", psPar.constAt(0, 0, i));
+        printf("\n");
+      }
+      printf("\n");
+      printf("msPar:\n");
+      for (int i = 0; i < 3; ++i) {
+        printf("%8f ", msPar.constAt(0, 0, i));
+        printf("\n");
+      }
+      printf("\n");
+      printf("psErr:\n");
+      for (int i = 0; i < 6; ++i) {
+        for (int j = 0; j < 6; ++j)
+          printf("%8f ", psErr.constAt(0, i, j));
+        printf("\n");
+      }
+      printf("\n");
+      printf("msErr:\n");
+      for (int i = 0; i < 3; ++i) {
+        for (int j = 0; j < 3; ++j)
+          printf("%8f ", msErr.constAt(0, i, j));
+        printf("\n");
+      }
+      printf("\n");
+    }
+#endif
+
+    MPlex2V res;
+    SubtractFirst2(msPar, psPar, res);
+
+    MPlex2S resErr;
+    AddIntoUpperLeft2x2(psErr, msErr, resErr);
+
+#ifdef DEBUG
+    {
+      dmutex_guard;
+      printf("resErr:\n");
+      for (int i = 0; i < 2; ++i) {
+        for (int j = 0; j < 2; ++j)
+          printf("%8f ", resErr.At(0, i, j));
+        printf("\n");
+      }
+      printf("\n");
+    }
+#endif
+
+    //invert the 2x2 matrix
+    Matriplex::invertCramerSym(resErr);
+
+    if (kfOp & KFO_Calculate_Chi2) {
+      Chi2Similarity(res, resErr, outChi2);
+
+#ifdef DEBUG
+      {
+        dmutex_guard;
+        printf("resErr_loc (Inv):\n");
+        for (int i = 0; i < 2; ++i) {
+          for (int j = 0; j < 2; ++j)
+            printf("%8f ", resErr.At(0, i, j));
+          printf("\n");
+        }
+        printf("\n");
+        printf("chi2: %8f\n", outChi2.At(0, 0, 0));
+      }
+#endif
+    }
+
+    if (kfOp & KFO_Update_Params) {
+      MPlexL2 K;
+      KalmanGain(psErr, resErr, K);
+
+      MultResidualsAdd(K, psPar, res, outPar);
+
+      squashPhiMPlex(outPar, N_proc);  // ensure phi is between |pi|
+
+      KHC(K, psErr, outErr);
+
+#ifdef DEBUG
+      {
+        dmutex_guard;
+        printf("outErr before subtract:\n");
+        for (int i = 0; i < 6; ++i) {
+          for (int j = 0; j < 6; ++j)
+            printf("%8f ", outErr.At(0, i, j));
+          printf("\n");
+        }
+        printf("\n");
+      }
+#endif
+
+      outErr.subtract(psErr, outErr);
+
+#ifdef DEBUG
+      {
+        dmutex_guard;
+        printf("res:\n");
+        for (int i = 0; i < 2; ++i) {
+          printf("%8f ", res.At(0, i, 0));
+        }
+        printf("\n");
+        printf("resErr (Inv):\n");
+        for (int i = 0; i < 2; ++i) {
+          for (int j = 0; j < 2; ++j)
+            printf("%8f ", resErr.At(0, i, j));
+          printf("\n");
+        }
+        printf("\n");
+        printf("K:\n");
+        for (int i = 0; i < 6; ++i) {
+          for (int j = 0; j < 2; ++j)
+            printf("%8f ", K.At(0, i, j));
+          printf("\n");
+        }
+        printf("\n");
+        printf("outPar:\n");
+        for (int i = 0; i < 6; ++i) {
+          printf("%8f  ", outPar.At(0, i, 0));
+        }
+        printf("\n");
+        printf("outErr:\n");
+        for (int i = 0; i < 6; ++i) {
+          for (int j = 0; j < 6; ++j)
+            printf("%8f ", outErr.At(0, i, j));
+          printf("\n");
+        }
+        printf("\n");
+      }
+#endif
+    }
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.h b/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.h
new file mode 100644
index 0000000000000..c45d0c5a4c92b
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.h
@@ -0,0 +1,114 @@
+#ifndef RecoTracker_MkFitCore_src_KalmanUtilsMPlex_h
+#define RecoTracker_MkFitCore_src_KalmanUtilsMPlex_h
+
+#include "RecoTracker/MkFitCore/interface/Track.h"
+#include "Matrix.h"
+
+namespace mkfit {
+
+  //------------------------------------------------------------------------------
+
+  enum KalmanFilterOperation { KFO_Calculate_Chi2 = 1, KFO_Update_Params = 2 };
+
+  //------------------------------------------------------------------------------
+
+  void kalmanUpdate(const MPlexLS& psErr,
+                    const MPlexLV& psPar,
+                    const MPlexHS& msErr,
+                    const MPlexHV& msPar,
+                    MPlexLS& outErr,
+                    MPlexLV& outPar,
+                    const int N_proc);
+
+  void kalmanPropagateAndUpdate(const MPlexLS& psErr,
+                                const MPlexLV& psPar,
+                                MPlexQI& Chg,
+                                const MPlexHS& msErr,
+                                const MPlexHV& msPar,
+                                MPlexLS& outErr,
+                                MPlexLV& outPar,
+                                const int N_proc,
+                                const PropagationFlags propFlags,
+                                const bool propToHit);
+
+  void kalmanComputeChi2(const MPlexLS& psErr,
+                         const MPlexLV& psPar,
+                         const MPlexQI& inChg,
+                         const MPlexHS& msErr,
+                         const MPlexHV& msPar,
+                         MPlexQF& outChi2,
+                         const int N_proc);
+
+  void kalmanPropagateAndComputeChi2(const MPlexLS& psErr,
+                                     const MPlexLV& psPar,
+                                     const MPlexQI& inChg,
+                                     const MPlexHS& msErr,
+                                     const MPlexHV& msPar,
+                                     MPlexQF& outChi2,
+                                     MPlexLV& propPar,
+                                     const int N_proc,
+                                     const PropagationFlags propFlags,
+                                     const bool propToHit);
+
+  void kalmanOperation(const int kfOp,
+                       const MPlexLS& psErr,
+                       const MPlexLV& psPar,
+                       const MPlexHS& msErr,
+                       const MPlexHV& msPar,
+                       MPlexLS& outErr,
+                       MPlexLV& outPar,
+                       MPlexQF& outChi2,
+                       const int N_proc);
+
+  //------------------------------------------------------------------------------
+
+  void kalmanUpdateEndcap(const MPlexLS& psErr,
+                          const MPlexLV& psPar,
+                          const MPlexHS& msErr,
+                          const MPlexHV& msPar,
+                          MPlexLS& outErr,
+                          MPlexLV& outPar,
+                          const int N_proc);
+
+  void kalmanPropagateAndUpdateEndcap(const MPlexLS& psErr,
+                                      const MPlexLV& psPar,
+                                      MPlexQI& Chg,
+                                      const MPlexHS& msErr,
+                                      const MPlexHV& msPar,
+                                      MPlexLS& outErr,
+                                      MPlexLV& outPar,
+                                      const int N_proc,
+                                      const PropagationFlags propFlags,
+                                      const bool propToHit);
+
+  void kalmanComputeChi2Endcap(const MPlexLS& psErr,
+                               const MPlexLV& psPar,
+                               const MPlexQI& inChg,
+                               const MPlexHS& msErr,
+                               const MPlexHV& msPar,
+                               MPlexQF& outChi2,
+                               const int N_proc);
+
+  void kalmanPropagateAndComputeChi2Endcap(const MPlexLS& psErr,
+                                           const MPlexLV& psPar,
+                                           const MPlexQI& inChg,
+                                           const MPlexHS& msErr,
+                                           const MPlexHV& msPar,
+                                           MPlexQF& outChi2,
+                                           MPlexLV& propPar,
+                                           const int N_proc,
+                                           const PropagationFlags propFlags,
+                                           const bool propToHit);
+
+  void kalmanOperationEndcap(const int kfOp,
+                             const MPlexLS& psErr,
+                             const MPlexLV& psPar,
+                             const MPlexHS& msErr,
+                             const MPlexHV& msPar,
+                             MPlexLS& outErr,
+                             MPlexLV& outPar,
+                             MPlexQF& outChi2,
+                             const int N_proc);
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.icc b/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.icc
new file mode 100644
index 0000000000000..b6d5b73469a87
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/KalmanUtilsMPlex.icc
@@ -0,0 +1,205 @@
+///////////////////////////////////////////////////////////////////////////////
+/// KHMult_imp
+///////////////////////////////////////////////////////////////////////////////
+
+template <typename TfLH, typename TfQF1, typename TfQF2, typename TfLL>
+static inline void KHMult_imp(
+    const TfLH& a, const TfQF1& b00, const TfQF2& b01, TfLL& c, const int nmin, const int nmax) {
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    c(n, 0, 0) = a(n, 0, 0) * b00(n, 0, 0);
+    c(n, 0, 1) = a(n, 0, 0) * b01(n, 0, 0);
+    c(n, 0, 2) = a(n, 0, 1);
+    c(n, 0, 3) = 0;
+    c(n, 0, 4) = 0;
+    c(n, 0, 5) = 0;
+    c(n, 0, 6) = a(n, 0, 3) * b00(n, 0, 0);
+    c(n, 0, 7) = a(n, 0, 3) * b01(n, 0, 0);
+    c(n, 0, 8) = a(n, 0, 4);
+    c(n, 0, 9) = 0;
+    c(n, 0, 10) = 0;
+    c(n, 0, 11) = 0;
+    c(n, 0, 12) = a(n, 0, 6) * b00(n, 0, 0);
+    c(n, 0, 13) = a(n, 0, 6) * b01(n, 0, 0);
+    c(n, 0, 14) = a(n, 0, 7);
+    c(n, 0, 15) = 0;
+    c(n, 0, 16) = 0;
+    c(n, 0, 17) = 0;
+    c(n, 0, 18) = a(n, 0, 9) * b00(n, 0, 0);
+    c(n, 0, 19) = a(n, 0, 9) * b01(n, 0, 0);
+    c(n, 0, 20) = a(n, 0, 10);
+    c(n, 0, 21) = 0;
+    c(n, 0, 22) = 0;
+    c(n, 0, 23) = 0;
+    c(n, 0, 24) = a(n, 0, 12) * b00(n, 0, 0);
+    c(n, 0, 25) = a(n, 0, 12) * b01(n, 0, 0);
+    c(n, 0, 26) = a(n, 0, 13);
+    c(n, 0, 27) = 0;
+    c(n, 0, 28) = 0;
+    c(n, 0, 29) = 0;
+    c(n, 0, 30) = a(n, 0, 15) * b00(n, 0, 0);
+    c(n, 0, 31) = a(n, 0, 15) * b01(n, 0, 0);
+    c(n, 0, 32) = a(n, 0, 16);
+    c(n, 0, 33) = 0;
+    c(n, 0, 34) = 0;
+    c(n, 0, 35) = 0;
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+/// ConvertToCCS_imp
+///////////////////////////////////////////////////////////////////////////////
+
+template <typename TfLV1, typename TfLV2, typename TfLL>
+static inline void ConvertToCCS_imp(const TfLV1& a, TfLV2& b, TfLL& c, const int nmin, const int nmax) {
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    const float pt = getHypot(a(n, 0, 3), a(n, 0, 4));
+    const float p2 = pt * pt + a(n, 0, 5) * a(n, 0, 5);
+    //
+    b(n, 0, 0) = a(n, 0, 0);
+    b(n, 0, 1) = a(n, 0, 1);
+    b(n, 0, 2) = a(n, 0, 2);
+    b(n, 0, 3) = 1.0f / pt;
+    b(n, 0, 4) = getPhi(a(n, 0, 3), a(n, 0, 4));  //fixme: use trig approx
+    b(n, 0, 5) = getTheta(pt, a(n, 0, 5));
+    //
+    c(n, 0, 0) = 1.;
+    c(n, 0, 1) = 0.;
+    c(n, 0, 2) = 0.;
+    c(n, 0, 3) = 0.;
+    c(n, 0, 4) = 0.;
+    c(n, 0, 5) = 0.;
+    c(n, 0, 6) = 0.;
+    c(n, 0, 7) = 1.;
+    c(n, 0, 8) = 0.;
+    c(n, 0, 9) = 0.;
+    c(n, 0, 10) = 0.;
+    c(n, 0, 11) = 0.;
+    c(n, 0, 12) = 0.;
+    c(n, 0, 13) = 0.;
+    c(n, 0, 14) = 1.;
+    c(n, 0, 15) = 0.;
+    c(n, 0, 16) = 0.;
+    c(n, 0, 17) = 0.;
+    c(n, 0, 18) = 0.;
+    c(n, 0, 19) = 0.;
+    c(n, 0, 20) = 0.;
+    c(n, 0, 21) = -a(n, 0, 3) / (pt * pt * pt);
+    c(n, 0, 22) = -a(n, 0, 4) / (pt * pt * pt);
+    c(n, 0, 23) = 0.;
+    c(n, 0, 24) = 0.;
+    c(n, 0, 25) = 0.;
+    c(n, 0, 26) = 0.;
+    c(n, 0, 27) = -a(n, 0, 4) / (pt * pt);
+    c(n, 0, 28) = a(n, 0, 3) / (pt * pt);
+    c(n, 0, 29) = 0.;
+    c(n, 0, 30) = 0.;
+    c(n, 0, 31) = 0.;
+    c(n, 0, 32) = 0.;
+    c(n, 0, 33) = a(n, 0, 3) * a(n, 0, 5) / (pt * p2);
+    c(n, 0, 34) = a(n, 0, 4) * a(n, 0, 5) / (pt * p2);
+    c(n, 0, 35) = -pt / p2;
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+/// ConvertToCartesian_imp
+///////////////////////////////////////////////////////////////////////////////
+
+template <typename TfLV1, typename TfLV2, typename TfLL>
+static inline void ConvertToCartesian_imp(const TfLV1& a, TfLV2& b, TfLL& c, const int nmin, const int nmax) {
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    const float cosP = std::cos(a(n, 0, 4));  //fixme: use trig approx
+    const float sinP = std::sin(a(n, 0, 4));
+    const float cosT = std::cos(a(n, 0, 5));
+    const float sinT = std::sin(a(n, 0, 5));
+    //
+    b(n, 0, 0) = a(n, 0, 0);
+    b(n, 0, 1) = a(n, 0, 1);
+    b(n, 0, 2) = a(n, 0, 2);
+    b(n, 0, 3) = cosP / a(n, 0, 3);
+    b(n, 0, 4) = sinP / a(n, 0, 3);
+    b(n, 0, 5) = cosT / (sinT * a(n, 0, 3));
+    //
+    c(n, 0, 0) = 1.;
+    c(n, 0, 1) = 0.;
+    c(n, 0, 2) = 0.;
+    c(n, 0, 3) = 0.;
+    c(n, 0, 4) = 0.;
+    c(n, 0, 5) = 0.;
+    c(n, 0, 6) = 0.;
+    c(n, 0, 7) = 1.;
+    c(n, 0, 8) = 0.;
+    c(n, 0, 9) = 0.;
+    c(n, 0, 10) = 0.;
+    c(n, 0, 11) = 0.;
+    c(n, 0, 12) = 0.;
+    c(n, 0, 13) = 0.;
+    c(n, 0, 14) = 1.;
+    c(n, 0, 15) = 0.;
+    c(n, 0, 16) = 0.;
+    c(n, 0, 17) = 0.;
+    c(n, 0, 18) = 0.;
+    c(n, 0, 19) = 0.;
+    c(n, 0, 20) = 0.;
+    c(n, 0, 21) = -cosP / (a(n, 0, 3) * a(n, 0, 3));
+    c(n, 0, 22) = -sinP / a(n, 0, 3);
+    c(n, 0, 23) = 0.;
+    c(n, 0, 24) = 0.;
+    c(n, 0, 25) = 0.;
+    c(n, 0, 26) = 0.;
+    c(n, 0, 27) = -sinP / (a(n, 0, 3) * a(n, 0, 3));
+    c(n, 0, 28) = cosP / a(n, 0, 3);
+    c(n, 0, 29) = 0.;
+    c(n, 0, 30) = 0.;
+    c(n, 0, 31) = 0.;
+    c(n, 0, 32) = 0.;
+    c(n, 0, 33) = -cosT / (sinT * a(n, 0, 3) * a(n, 0, 3));
+    c(n, 0, 34) = 0.;
+    c(n, 0, 35) = -1.0f / (sinT * sinT * a(n, 0, 3));
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+/// MultResidualsAdd_imp
+///////////////////////////////////////////////////////////////////////////////
+
+template <typename TfLH, typename TfLV1, typename Tf2V, typename TfLV2>
+static inline void MultResidualsAdd_imp(
+    const TfLH& a, const TfLV1& b, const Tf2V& c, TfLV2& d, const int nmin, const int nmax) {
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    d(n, 0, 0) = b(n, 0, 0) + a(n, 0, 0) * c(n, 0, 0) + a(n, 0, 1) * c(n, 0, 1);
+    d(n, 0, 1) = b(n, 0, 1) + a(n, 0, 3) * c(n, 0, 0) + a(n, 0, 4) * c(n, 0, 1);
+    d(n, 0, 2) = b(n, 0, 2) + a(n, 0, 6) * c(n, 0, 0) + a(n, 0, 7) * c(n, 0, 1);
+    d(n, 0, 3) = b(n, 0, 3) + a(n, 0, 9) * c(n, 0, 0) + a(n, 0, 10) * c(n, 0, 1);
+    d(n, 0, 4) = b(n, 0, 4) + a(n, 0, 12) * c(n, 0, 0) + a(n, 0, 13) * c(n, 0, 1);
+    d(n, 0, 5) = b(n, 0, 5) + a(n, 0, 15) * c(n, 0, 0) + a(n, 0, 16) * c(n, 0, 1);
+  }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+/// AddIntoUpperLeft3x3_imp
+///////////////////////////////////////////////////////////////////////////////
+
+template <typename TfLS, typename TfHS1, typename TfHS2>
+static inline void AddIntoUpperLeft3x3_imp(
+    const TfLS& A, const TfHS1& B, TfHS2& C, const int aN, const int bN, const int cN, const int nmin, const int nmax) {
+  // Problem here: (n, i, j) uses indirection -> slow on the GPU
+}
+
+///////////////////////////////////////////////////////////////////////////////
+///  RotateResidualsOnTangentPlane_impl
+///////////////////////////////////////////////////////////////////////////////
+
+template <typename TfQF1, typename TfQF2, typename TfHV, typename Tf2V>
+static inline void RotateResidualsOnTangentPlane_impl(
+    const TfQF1& r00, const TfQF2& r01, const TfHV& a, Tf2V& b, const int nmin, const int nmax) {
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    b(n, 0, 0) = r00(n, 0, 0) * a(n, 0, 0) + r01(n, 0, 0) * a(n, 0, 1);
+    b(n, 0, 1) = a(n, 0, 2);
+  }
+}
diff --git a/RecoTracker/MkFitCore/src/MaterialEffects.cc b/RecoTracker/MkFitCore/src/MaterialEffects.cc
new file mode 100644
index 0000000000000..ec1c7fe505180
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MaterialEffects.cc
@@ -0,0 +1,51 @@
+#include "RecoTracker/MkFitCore/src/MaterialEffects.h"
+
+namespace mkfit {
+
+  namespace Config {
+    // derivation documented in https://indico.cern.ch/event/924564/contributions/3885164/attachments/2097314/
+    // radiation length array
+    constexpr float Rl[136] = {
+        0.018, 0.031, 0.017, 0.023, 0.018, 0.028, 0.021, 0.040, 0.066, 0.039, 0.069, 0.040, 0.103, 0.098, 0.028, 0.038,
+        0.025, 0.034, 0.037, 0.078, 0.048, 0.064, 0.168, 0.085, 0.144, 0.033, 0.157, 0.078, 0.014, 0.032, 0.052, 0.012,
+        0.026, 0.038, 0.015, 0.035, 0.061, 0.015, 0.035, 0.043, 0.015, 0.036, 0.033, 0.010, 0.021, 0.022, 0.093, 0.084,
+        0.100, 0.194, 0.093, 0.108, 0.200, 0.093, 0.084, 0.100, 0.194, 0.093, 0.108, 0.200, 0.038, 0.075, 0.038, 0.075,
+        0.038, 0.075, 0.038, 0.075, 0.038, 0.075, 0.038, 0.075, 0.039, 0.078, 0.039, 0.078, 0.039, 0.078, 0.039, 0.078,
+        0.039, 0.078, 0.039, 0.078, 0.046, 0.023, 0.046, 0.023, 0.046, 0.046, 0.023, 0.046, 0.023, 0.046, 0.048, 0.024,
+        0.048, 0.024, 0.048, 0.048, 0.024, 0.048, 0.024, 0.048, 0.055, 0.027, 0.055, 0.027, 0.055, 0.055, 0.027, 0.055,
+        0.027, 0.055, 0.043, 0.021, 0.043, 0.043, 0.043, 0.021, 0.043, 0.043, 0.040, 0.020, 0.040, 0.040, 0.040, 0.020,
+        0.040, 0.040, 0.014, 0.028, 0.028, 0.014, 0.028, 0.028};
+
+    constexpr float Xi[136] = {
+        0.039e-03, 0.062e-03, 0.029e-03, 0.037e-03, 0.032e-03, 0.049e-03, 0.044e-03, 0.080e-03, 0.147e-03, 0.086e-03,
+        0.162e-03, 0.092e-03, 0.214e-03, 0.207e-03, 0.062e-03, 0.081e-03, 0.051e-03, 0.068e-03, 0.078e-03, 0.155e-03,
+        0.110e-03, 0.138e-03, 0.321e-03, 0.166e-03, 0.311e-03, 0.077e-03, 0.371e-03, 0.185e-03, 0.035e-03, 0.069e-03,
+        0.104e-03, 0.025e-03, 0.051e-03, 0.072e-03, 0.033e-03, 0.069e-03, 0.114e-03, 0.033e-03, 0.071e-03, 0.083e-03,
+        0.033e-03, 0.073e-03, 0.064e-03, 0.021e-03, 0.043e-03, 0.043e-03, 0.216e-03, 0.209e-03, 0.185e-03, 0.309e-03,
+        0.216e-03, 0.255e-03, 0.369e-03, 0.216e-03, 0.209e-03, 0.185e-03, 0.309e-03, 0.216e-03, 0.255e-03, 0.369e-03,
+        0.083e-03, 0.166e-03, 0.083e-03, 0.166e-03, 0.083e-03, 0.166e-03, 0.083e-03, 0.166e-03, 0.083e-03, 0.166e-03,
+        0.083e-03, 0.166e-03, 0.088e-03, 0.175e-03, 0.088e-03, 0.175e-03, 0.088e-03, 0.175e-03, 0.088e-03, 0.175e-03,
+        0.088e-03, 0.175e-03, 0.088e-03, 0.175e-03, 0.104e-03, 0.052e-03, 0.104e-03, 0.052e-03, 0.104e-03, 0.104e-03,
+        0.052e-03, 0.104e-03, 0.052e-03, 0.104e-03, 0.110e-03, 0.055e-03, 0.110e-03, 0.055e-03, 0.110e-03, 0.110e-03,
+        0.055e-03, 0.110e-03, 0.055e-03, 0.110e-03, 0.130e-03, 0.065e-03, 0.130e-03, 0.065e-03, 0.130e-03, 0.130e-03,
+        0.065e-03, 0.130e-03, 0.065e-03, 0.130e-03, 0.097e-03, 0.048e-03, 0.097e-03, 0.097e-03, 0.097e-03, 0.048e-03,
+        0.097e-03, 0.097e-03, 0.089e-03, 0.045e-03, 0.089e-03, 0.089e-03, 0.089e-03, 0.045e-03, 0.089e-03, 0.089e-03,
+        0.030e-03, 0.061e-03, 0.061e-03, 0.030e-03, 0.061e-03, 0.061e-03};
+  }  // namespace Config
+
+  MaterialEffects::MaterialEffects() {
+    for (int zb = 0; zb < Config::nBinsZME; zb++) {
+      const float zbf = (zb * Config::rangeZME) / Config::nBinsZME;
+      for (int rb = 0; rb < Config::nBinsRME; rb++) {
+        const float rbf = (rb * Config::rangeRME) / Config::nBinsRME;
+        const int detid = getDetId(zbf, rbf);
+        mRlgridME[zb][rb] = (detid >= 0 ? Config::Rl[detid] : 0.f);
+        mXigridME[zb][rb] = (detid >= 0 ? Config::Xi[detid] : 0.f);
+      }
+    }
+  }
+
+  namespace Config {
+    const MaterialEffects materialEff;
+  }
+}  // namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/MaterialEffects.h b/RecoTracker/MkFitCore/src/MaterialEffects.h
new file mode 100644
index 0000000000000..4d6f199f3cb11
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MaterialEffects.h
@@ -0,0 +1,443 @@
+#ifndef RecoTracker_MkFitCore_src_MaterialEffects_h
+#define RecoTracker_MkFitCore_src_MaterialEffects_h
+
+#include "RecoTracker/MkFitCore/interface/Config.h"
+
+#include <cmath>
+
+namespace mkfit {
+
+  namespace Config {
+    // config for material effects in cmssw
+    constexpr float rangeZME = 300.;
+    constexpr int nBinsZME = 300;
+    constexpr float rangeRME = 120.;
+    constexpr int nBinsRME = 120;
+  }  // namespace Config
+
+  class MaterialEffects {
+  public:
+    MaterialEffects();
+
+    int getZbin(const float z) const { return (std::abs(z) * Config::nBinsZME) / (Config::rangeZME); }
+    int getRbin(const float r) const { return (r * Config::nBinsRME) / (Config::rangeRME); }
+    float getRlVal(const int zb, const int rb) const { return mRlgridME[zb][rb]; }
+    float getXiVal(const int zb, const int rb) const { return mXigridME[zb][rb]; }
+
+    /// (z,r) grid to material bin/det mapping for Rad length (Rl) and Xi arrays
+    // see https://indico.cern.ch/event/924564/contributions/3885164/attachments/2097314/
+    int getDetId(const float zin, const float r) const {
+      const float z = std::abs(zin);
+
+      //pixel
+      if (r < 17) {
+        //pixel barrel
+        if (z < 28) {
+          if (r < 4) {
+            if (z < 20)
+              return 0;
+            else
+              return 1;
+          }
+          if (r < 8) {
+            if (z < 20)
+              return 2;
+            else
+              return 3;
+          }
+          if (r < 12) {
+            if (z < 20)
+              return 4;
+            else
+              return 5;
+          }
+          if (z < 20)
+            return 6;
+          else
+            return 7;
+        }
+
+        //pixel endcap
+        if (z < 36) {
+          if (r > 9.5 && z < 32.5)
+            return 8;
+          else
+            return 9;
+        }
+        if (z < 45) {
+          if (r > 9.5 && z < 40)
+            return 10;
+          else
+            return 11;
+        }
+        if (z >= 45) {
+          if (r > 9.5 && z < 49)
+            return 12;
+          else
+            return 13;
+        }
+      }
+
+      //TIB & TID
+      if (r < 55) {
+        //TIB
+        if (z < 70) {
+          if (r < 29) {
+            if (z < 22)
+              return 14;
+            else
+              return 15;
+          }
+          if (r < 38) {
+            if (z < 25)
+              return 16;
+            else
+              return 17;
+          }
+          if (r < 46) {
+            if (z < 44)
+              return 18;
+            else
+              return 19;
+          }
+          if (z < 50)
+            return 20;
+          else
+            return 21;
+        }
+
+        //TID
+        if (z > 70 && z < 120) {
+          if (r > 35 && z < 80)
+            return 22;
+          else if (z < 86)
+            return 23;
+          else if (r > 35 && z < 92)
+            return 24;
+          else if (z < 98)
+            return 25;
+          else if (r > 35 && z < 104)
+            return 26;
+          else
+            return 27;
+        }
+      }
+
+      //TOB
+      if (r < 120 && z < 120) {
+        if (r < 65) {
+          if (z < 17)
+            return 28;
+          else if (z < 70)
+            return 29;
+          else
+            return 30;
+        }
+        if (r < 75) {
+          if (z < 17)
+            return 31;
+          else if (z < 70)
+            return 32;
+          else
+            return 33;
+        }
+        if (r < 82) {
+          if (z < 17)
+            return 34;
+          else if (z < 70)
+            return 35;
+          else
+            return 36;
+        }
+        if (r < 90) {
+          if (z < 17)
+            return 37;
+          else if (z < 70)
+            return 38;
+          else
+            return 39;
+        }
+        if (r < 100) {
+          if (z < 17)
+            return 40;
+          else if (z < 70)
+            return 41;
+          else
+            return 42;
+        }
+        if (z < 17)
+          return 43;
+        else if (z < 70)
+          return 44;
+        else
+          return 45;
+      }
+
+      //TEC
+      if (z > 120 && r < 120) {
+        if (z < 128) {
+          if (r < 35)
+            return 46;
+          else if (r < 55)
+            return 47;
+          else if (r < 80)
+            return 48;
+          else
+            return 49;
+        }
+        if (z < 132) {
+          if (r < 45)
+            return 50;
+          else if (r < 70)
+            return 51;
+          else
+            return 52;
+        }
+        if (z < 136) {
+          if (r < 35)
+            return 53;
+          else if (r < 55)
+            return 54;
+          else if (r < 80)
+            return 55;
+          else
+            return 56;
+        }
+        if (z < 138) {
+          if (r < 45)
+            return 57;
+          else if (r < 70)
+            return 58;
+          else
+            return 59;
+        }
+        if (z < 142) {
+          if (r < 35)
+            return 60;
+          else if (r < 55)
+            return 61;
+          else if (r < 80)
+            return 62;
+          else
+            return 63;
+        }
+        if (z < 146) {
+          if (r < 45)
+            return 64;
+          else
+            return 65;
+        }
+        if (z < 150) {
+          if (r < 35)
+            return 66;
+          else if (r < 55)
+            return 67;
+          else if (r < 80)
+            return 68;
+          else
+            return 69;
+        }
+        if (z < 153) {
+          if (r < 45)
+            return 70;
+          else
+            return 71;
+        }
+        if (z < 156) {
+          if (r < 35)
+            return 72;
+          else if (r < 55)
+            return 73;
+          else if (r < 80)
+            return 74;
+          else
+            return 75;
+        }
+        if (z < 160) {
+          if (r < 45)
+            return 76;
+          else
+            return 77;
+        }
+        if (z < 164) {
+          if (r < 35)
+            return 78;
+          else if (r < 55)
+            return 79;
+          else if (r < 80)
+            return 80;
+          else
+            return 81;
+        }
+        if (z < 167) {
+          if (r < 45)
+            return 82;
+          else
+            return 83;
+        }
+
+        if (z < 170) {
+          if (r < 55)
+            return 84;
+          else if (r < 80)
+            return 85;
+          else
+            return 86;
+        }
+        if (z < 174) {
+          if (r < 45)
+            return 87;
+          else
+            return 88;
+        }
+
+        if (z < 177.3) {
+          if (r < 55)
+            return 89;
+          else if (r < 80)
+            return 90;
+          else
+            return 91;
+        }
+        if (z < 181) {
+          if (r < 45)
+            return 92;
+          else
+            return 93;
+        }
+
+        if (z < 185) {
+          if (r < 55)
+            return 94;
+          else if (r < 80)
+            return 95;
+          else
+            return 96;
+        }
+        if (z < 188.5) {
+          if (r < 45)
+            return 97;
+          else
+            return 98;
+        }
+
+        if (z < 192) {
+          if (r < 55)
+            return 99;
+          else if (r < 80)
+            return 100;
+          else
+            return 101;
+        }
+        if (z < 195) {
+          if (r < 45)
+            return 102;
+          else
+            return 103;
+        }
+
+        if (z < 202) {
+          if (r < 55)
+            return 104;
+          else if (r < 80)
+            return 105;
+          else
+            return 106;
+        }
+        if (z < 206) {
+          if (r < 45)
+            return 107;
+          else
+            return 108;
+        }
+
+        if (z < 210) {
+          if (r < 55)
+            return 109;
+          else if (r < 80)
+            return 110;
+          else
+            return 111;
+        }
+        if (z < 212) {
+          if (r < 45)
+            return 112;
+          else
+            return 113;
+        }
+
+        if (z < 222) {
+          if (r < 55)
+            return 114;
+          else if (r < 80)
+            return 115;
+          else
+            return 116;
+        }
+        if (z < 224)
+          return 117;
+
+        if (z < 228) {
+          if (r < 55)
+            return 118;
+          else if (r < 80)
+            return 119;
+          else
+            return 120;
+        }
+        if (z < 232)
+          return 121;
+
+        if (z < 241) {
+          if (r < 55)
+            return 122;
+          else if (r < 80)
+            return 123;
+          else
+            return 124;
+        }
+        if (z < 245)
+          return 125;
+
+        if (z < 248) {
+          if (r < 55)
+            return 126;
+          else if (r < 80)
+            return 127;
+          else
+            return 128;
+        }
+        if (z < 252)
+          return 129;
+
+        if (z < 264) {
+          if (r < 80)
+            return 130;
+          else
+            return 131;
+        }
+        if (z < 267)
+          return 132;
+
+        if (z < 270) {
+          if (r < 80)
+            return 133;
+          else
+            return 134;
+        }
+        if (z < 280)
+          return 135;
+      }
+      return -1;
+    }
+
+  private:
+    float mRlgridME[Config::nBinsZME][Config::nBinsRME];
+    float mXigridME[Config::nBinsZME][Config::nBinsRME];
+  };  // class MaterialEffects
+
+  namespace Config {
+    extern const MaterialEffects materialEff;
+  }
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/src/Matriplex/GenMPlexOps.pl b/RecoTracker/MkFitCore/src/Matriplex/GenMPlexOps.pl
new file mode 100755
index 0000000000000..4efbf74c8613c
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matriplex/GenMPlexOps.pl
@@ -0,0 +1,407 @@
+#!/usr/bin/perl
+
+use lib "../Matriplex";
+
+use GenMul;
+use warnings;
+
+#------------------------------------------------------------------------------
+### simple general 3x3 matrix times 3 vector multiplication for CF MPlex
+
+$A = new GenMul::Matrix('name'=>'a', 'M'=>3, 'N'=>3);
+
+$B = new GenMul::Matrix('name'=>'b', 'M'=>3, 'N'=>1);
+
+$C = new GenMul::Matrix('name'=>'c', 'M'=>3, 'N'=>1);
+
+$m = new GenMul::Multiply;
+
+$m->dump_multiply_std_and_intrinsic("CFMatrix33Vector3.ah",
+                                    $A, $B, $C);
+
+#------------------------------------------------------------------------------
+###updateParametersMPlex -- propagated errors in CCS coordinates
+# propErr_ccs = jac_ccs * propErr * jac_ccsT
+
+$jac_ccs = new GenMul::Matrix('name'=>'a', 'M'=>6, 'N'=>6);
+$jac_ccs->set_pattern(<<"FNORD");
+1 0 0 0 0 0
+0 1 0 0 0 0
+0 0 1 0 0 0
+0 0 0 x x 0
+0 0 0 x x 0
+0 0 0 x x x
+FNORD
+
+$propErr = new GenMul::MatrixSym('name'=>'b', 'M'=>6, 'N'=>6);
+
+$temp   = new GenMul::Matrix('name'=>'c', 'M'=>6, 'N'=>6);
+
+$m = new GenMul::Multiply;
+
+$m->dump_multiply_std_and_intrinsic("CCSErr.ah",
+                                    $jac_ccs, $propErr, $temp);
+
+$jac_ccsT = new GenMul::MatrixTranspose($jac_ccs);
+$propErr_ccs = new GenMul::MatrixSym('name'=>'c', 'M'=>6, 'N'=>6);
+$temp  ->{name} = 'b';
+
+$m->dump_multiply_std_and_intrinsic("CCSErrTransp.ah",
+                                    $temp, $jac_ccsT, $propErr_ccs);
+
+#------------------------------------------------------------------------------
+###updateParametersMPlex -- updated errors in cartesian coordinates
+# outErr = jac_back_ccs * outErr_ccs * jac_back_ccsT
+
+$jac_back_ccs = new GenMul::Matrix('name'=>'a', 'M'=>6, 'N'=>6);
+$jac_back_ccs->set_pattern(<<"FNORD");
+1 0 0 0 0 0
+0 1 0 0 0 0
+0 0 1 0 0 0
+0 0 0 x x 0
+0 0 0 x x 0
+0 0 0 x 0 x
+FNORD
+
+$outErr_ccs = new GenMul::MatrixSym('name'=>'b', 'M'=>6, 'N'=>6);
+
+$temp   = new GenMul::Matrix('name'=>'c', 'M'=>6, 'N'=>6);
+
+$m = new GenMul::Multiply;
+
+$m->dump_multiply_std_and_intrinsic("CartesianErr.ah",
+                                    $jac_back_ccs, $outErr_ccs, $temp);
+
+$jac_back_ccsT = new GenMul::MatrixTranspose($jac_back_ccs);
+$outErr = new GenMul::MatrixSym('name'=>'c', 'M'=>6, 'N'=>6);
+$temp  ->{name} = 'b';
+
+$m->dump_multiply_std_and_intrinsic("CartesianErrTransp.ah",
+                                    $temp, $jac_back_ccsT, $outErr);
+
+#------------------------------------------------------------------------------
+###updateParametersMPlex -- first term to get kalman gain (H^T*G)
+# temp = rot * resErr_loc
+
+$rot = new GenMul::Matrix('name'=>'a', 'M'=>3, 'N'=>3);
+$rot->set_pattern(<<"FNORD");
+x 0 x
+x 0 x
+0 1 0
+FNORD
+
+$resErr_loc = new GenMul::MatrixSym('name'=>'b', 'M'=>3, 'N'=>3);
+
+$temp   = new GenMul::Matrix('name'=>'c', 'M'=>3, 'N'=>3);
+
+$m = new GenMul::Multiply;
+
+$m->dump_multiply_std_and_intrinsic("KalmanHTG.ah",
+                                    $rot, $resErr_loc, $temp);
+
+
+#------------------------------------------------------------------------------
+###updateParametersMPlex -- kalman gain
+# K = propErr_ccs * resErrTmpLH
+
+$propErr_ccs = new GenMul::MatrixSym('name'=>'a', 'M'=>6, 'N'=>6);
+
+$resErrTmpLH  = new GenMul::Matrix('name'=>'b', 'M'=>6, 'N'=>3);
+$resErrTmpLH->set_pattern(<<"FNORD");
+x x 0
+x x 0
+x x 0
+0 0 0
+0 0 0
+0 0 0
+FNORD
+
+$K   = new GenMul::Matrix('name'=>'c', 'M'=>6, 'N'=>3);
+
+$m = new GenMul::Multiply;
+
+$m->dump_multiply_std_and_intrinsic("KalmanGain.ah",
+                                    $propErr_ccs, $resErrTmpLH, $K);
+#------------------------------------------------------------------------------
+###updateParametersMPlex -- kalman gain
+# K = propErr * resErr2x2
+
+$propErr = new GenMul::MatrixSym('name'=>'a', 'M'=>6, 'N'=>6);
+
+$resErr2x2  = new GenMul::MatrixSym('name'=>'b', 'M'=>2, 'N'=>2);
+
+$K   = new GenMul::Matrix('name'=>'c', 'M'=>6, 'N'=>2);
+
+{
+  my $m_kg = new GenMul::Multiply('no_size_check' => 1);
+
+  $m_kg->dump_multiply_std_and_intrinsic("KalmanGain62.ah",
+					 $propErr, $resErr2x2, $K);
+}
+
+#------------------------------------------------------------------------------
+###updateParametersMPlex -- KH
+# KH = K * H
+
+$K   = new GenMul::Matrix('name'=>'a', 'M'=>6, 'N'=>3);
+$K->set_pattern(<<"FNORD");
+x x 0
+x x 0
+x x 0
+x x 0
+x x 0
+x x 0
+FNORD
+
+$H   = new GenMul::Matrix('name'=>'b', 'M'=>3, 'N'=>6);
+$H->set_pattern(<<"FNORD");
+x x 0 0 0 0
+0 0 1 0 0 0
+x x 0 0 0 0
+FNORD
+
+$KH   = new GenMul::Matrix('name'=>'c', 'M'=>6, 'N'=>6);
+
+$m = new GenMul::Multiply;
+
+$m->dump_multiply_std_and_intrinsic("KH.ah",
+                                    $K, $H, $KH);
+
+#------------------------------------------------------------------------------
+###updateParametersMPlex -- KH * C
+# temp = KH * propErr_ccs
+
+$KH   = new GenMul::Matrix('name'=>'a', 'M'=>6, 'N'=>6);
+$KH->set_pattern(<<"FNORD");
+x x x 0 0 0
+x x x 0 0 0
+x x x 0 0 0
+x x x 0 0 0
+x x x 0 0 0
+x x x 0 0 0
+FNORD
+
+$propErr_ccs = new GenMul::MatrixSym('name'=>'b', 'M'=>6, 'N'=>6);
+
+$temp   = new GenMul::MatrixSym('name'=>'c', 'M'=>6, 'N'=>6);
+
+$m = new GenMul::Multiply;
+
+$m->dump_multiply_std_and_intrinsic("KHC.ah",
+                                    $KH, $propErr_ccs, $temp);
+
+#------------------------------------------------------------------------------
+
+###updateParametersMPlex -- KH * C with KH=K dim 6x2
+# temp = KH * propErr
+
+$KH   = new GenMul::Matrix('name'=>'a', 'M'=>6, 'N'=>2);
+$KH->set_pattern(<<"FNORD");
+x x
+x x
+x x
+x x
+x x
+x x
+FNORD
+
+$propErr = new GenMul::MatrixSym('name'=>'b', 'M'=>6, 'N'=>6);
+
+$temp   = new GenMul::MatrixSym('name'=>'c', 'M'=>6, 'N'=>6);
+
+{
+  my $m_kg = new GenMul::Multiply('no_size_check' => 1);
+
+  $m_kg->dump_multiply_std_and_intrinsic("K62HC.ah",
+				      $KH, $propErr, $temp);
+}
+
+#------------------------------------------------------------------------------
+
+### computeChi2MPlex -- similarity to rotate errors, two ops.
+# resErr_loc = rotT * resErr_glo * rotTT
+
+$rotT = new GenMul::Matrix('name'=>'a', 'M'=>3, 'N'=>3);
+$rotT->set_pattern(<<"FNORD");
+x x 0
+0 0 1
+x x 0
+FNORD
+
+$resErr_glo = new GenMul::MatrixSym('name'=>'b', 'M'=>3, 'N'=>3);
+
+$temp   = new GenMul::Matrix('name'=>'c', 'M'=>3, 'N'=>3);
+
+$m = new GenMul::Multiply;
+
+$m->dump_multiply_std_and_intrinsic("ProjectResErr.ah",
+                                    $rotT, $resErr_glo, $temp);
+
+$roTT = new GenMul::MatrixTranspose($rotT);
+$resErr_loc = new GenMul::MatrixSym('name'=>'c', 'M'=>3, 'N'=>3);
+$temp  ->{name} = 'b';
+
+$m->dump_multiply_std_and_intrinsic("ProjectResErrTransp.ah",
+                                    $temp, $roTT, $resErr_loc);
+
+#------------------------------------------------------------------------------
+
+### Propagate Helix To R -- final similarity, two ops.
+
+# outErr = errProp * outErr * errPropT
+#   outErr is symmetric
+
+my $DIM = 6;
+
+$errProp = new GenMul::Matrix('name'=>'a', 'M'=>$DIM, 'N'=>$DIM);
+$errProp->set_pattern(<<"FNORD");
+x x 0 x x 0
+x x 0 x x 0
+x x 1 x x x
+x x 0 x x 0
+x x 0 x x 0
+0 0 0 0 0 1
+FNORD
+#switch to the one below when moving to CCS coordinates only
+#x x 0 x x 0
+#x x 0 x x 0
+#x x 1 x x x
+#0 0 0 1 0 0
+#x x 0 x x 0
+#0 0 0 0 0 1
+#FNORD
+
+$outErr = new GenMul::MatrixSym('name'=>'b', 'M'=>$DIM, 'N'=>$DIM);
+
+$temp   = new GenMul::Matrix('name'=>'c', 'M'=>$DIM, 'N'=>$DIM);
+
+
+$errPropT = new GenMul::MatrixTranspose($errProp);
+$errPropT->print_info();
+$errPropT->print_pattern();
+
+# ----------------------------------------------------------------------
+
+$m = new GenMul::Multiply;
+
+# outErr and c are just templates ...
+
+$m->dump_multiply_std_and_intrinsic("MultHelixProp.ah",
+                                    $errProp, $outErr, $temp);
+
+$temp  ->{name} = 'b';
+$outErr->{name} = 'c';
+
+### XXX fix this ... in accordance with what is in Propagation.cc
+$m->dump_multiply_std_and_intrinsic("MultHelixPropTransp.ah",
+                                    $temp, $errPropT, $outErr);
+
+#######################################
+###          ENDCAP version         ###
+#######################################
+
+$errProp->set_pattern(<<"FNORD");
+1 0 x x x x
+0 1 x x x x
+0 0 0 0 0 0
+0 0 0 1 0 0
+0 0 x x 1 x
+0 0 0 0 0 1
+FNORD
+
+$temp  ->{name} = 'c';
+$outErr->{name} = 'b';
+
+$errPropT = new GenMul::MatrixTranspose($errProp);
+$m->dump_multiply_std_and_intrinsic("MultHelixPropEndcap.ah",
+                                    $errProp, $outErr, $temp);
+
+$temp  ->{name} = 'b';
+$outErr->{name} = 'c';
+
+### XXX fix this ... in accordance with what is in Propagation.cc
+$m->dump_multiply_std_and_intrinsic("MultHelixPropTranspEndcap.ah",
+                                    $temp, $errPropT, $outErr);
+
+
+##############################
+### updateParameters       ###
+##############################
+
+#declared first on its own because propErr sees many uses
+my $propErr_M = 6;
+$propErr = new GenMul::MatrixSym('name' => 'a',
+                                 'M'    => $propErr_M); #will have to remember to re'name' it based on location in function
+
+my $propErrT_M = 6;
+$propErrT = new GenMul::MatrixTranspose($propErr); #will have to remember to re'name' it based on location in function
+
+
+
+### kalmanGain =  = propErr * (projMatrixT * resErrInv)
+$resErrInv = new GenMul::MatrixSym('name'=>'b', 'M'=>3, 'N'=>3);
+
+$kalmanGain = new GenMul::Matrix('name'=>'c', 'M' => 6, 'N' => 3);
+
+{
+  my $m_kg = new GenMul::Multiply('no_size_check' => 1);
+
+  $m_kg->dump_multiply_std_and_intrinsic("upParam_MultKalmanGain.ah",
+                                         $propErr, $resErrInv, $kalmanGain);
+}
+
+
+### updatedErrs = propErr - propErr^T * simil * propErr
+# Going to skip the subtraction for now
+my $simil_M = 6;
+$simil = new GenMul::MatrixSym('name'=>'a', 'M'=>$simil_M);
+$simil->set_pattern(<<"FNORD");
+x
+x x
+x x x
+0 0 0 0
+0 0 0 0 0
+0 0 0 0 0 0
+FNORD
+
+$propErr->{name} = 'b';
+
+my $temp_simil_x_propErr_M = 6;
+my $temp_simil_x_propErr_N = 6;
+$temp_simil_x_propErr = new GenMul::Matrix('name'=>'c',
+                                           'M'=>$temp_simil_x_propErr_M,
+                                           'N'=>$temp_simil_x_propErr_N);
+
+$m->dump_multiply_std_and_intrinsic("upParam_simil_x_propErr.ah",
+                                    $simil, $propErr, $temp_simil_x_propErr);
+
+$temp_simil_x_propErr->{name} = 'b';									 
+$temp_simil_x_propErr->set_pattern(<<"FNORD");
+x x x x x x
+x x x x x x
+x x x x x x
+0 0 0 0 0 0
+0 0 0 0 0 0
+0 0 0 0 0 0
+FNORD
+
+#? This one is symmetric but the output can't handle it... need to fix
+#$temp_propErrT_x_simil_propErr = new GenMul::MatrixSym('name'=>'c', 'M'=>$propErrT_M, 'N'=>$temp_simil_x_propErr_N);
+
+
+$temp_propErrT_x_simil_propErr = new GenMul::MatrixSym('name'=>'c', 'M'=>$propErrT_M);
+
+$m->dump_multiply_std_and_intrinsic("upParam_propErrT_x_simil_propErr.ah",
+                                    $propErrT, $temp_simil_x_propErr, $temp_propErrT_x_simil_propErr);
+									
+
+{
+  my $temp = new GenMul::MatrixSym('name' => 'c', 'M' => 6);
+
+  my $m_kg = new GenMul::Multiply('no_size_check' => 1);
+
+  $kalmanGain->{name} = 'a';
+
+  $m_kg->dump_multiply_std_and_intrinsic("upParam_kalmanGain_x_propErr.ah",
+                                         $kalmanGain, $propErr, $temp);
+}
diff --git a/RecoTracker/MkFitCore/src/Matriplex/GenMul.pm b/RecoTracker/MkFitCore/src/Matriplex/GenMul.pm
new file mode 100644
index 0000000000000..1968137b5f72f
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matriplex/GenMul.pm
@@ -0,0 +1,870 @@
+########################################################################
+########################################################################
+# Top level package
+########################################################################
+########################################################################
+
+package GenMul;
+
+my $G_vec_width = 1;
+
+########################################################################
+########################################################################
+# MATRIX CLASSES
+########################################################################
+########################################################################
+
+########################################################################
+# MBase -- matrix base class
+########################################################################
+
+package GenMul::MBase;
+
+use Carp;
+
+# Required arguments:
+# - M
+# - N (for non-symmetric matrices)
+# - name: name of array
+#
+# Created members:
+# - class
+#
+#
+# Input matrix pattern can be set via function set_pattern(). The argument is
+# a white-space separated string of x, 0, 1, describing the matrix elements.
+# For symmetric matrices lower-left triangle must be given.
+# Support for -1 could be added (but isn't trivial (unless unary - changes 
+# the preceeding addition into subtraction; also this is a tough call
+# for intrinsics)).
+#
+# Pattern could also be set for output matrix but is currently not supported.
+
+sub new
+{
+  my $proto = shift;
+  my $class = ref($proto) || $proto;
+  my $S = {@_};
+  bless($S, $class);
+
+  # M, N checked in concrete classes
+
+  croak "name must be set" unless defined $S->{name};
+
+  $S->{class} = $class;
+
+  return $S;
+}
+
+sub mat_size
+{
+  die "max_size() should be overriden in concrete matrix class";
+}
+
+sub idx
+{
+  die "idx() should be overriden in concrete matrix class";
+}
+
+sub row_col_in_range
+{
+  my ($S, $i, $j) = @_;
+
+  return $i >= 0 and $i < $S->{M} and $j >= 0 and $j < $S->{N};
+}
+
+sub set_pattern
+{
+  my ($S, $pstr) = @_;
+
+  @{$S->{pattern}} = split /\s+/, $pstr;
+
+  croak "set_pattern number of entries does not match matrix size"
+      unless scalar @{$S->{pattern}} == $S->mat_size();
+
+  croak "set_pattern() input string contains invalid entry"
+      if grep {$_ !~ /0|1|x/} @{$S->{pattern}};
+}
+
+sub pattern
+{
+  my ($S, $idx) = @_;
+
+  die "pattern called with bad index."
+      unless $idx >=0 and $idx < $S->mat_size();
+
+  return defined $S->{pattern} ? $S->{pattern}[$idx] : 'x';
+}
+
+sub reg_name
+{
+  my ($S, $idx) = @_;
+
+  die "reg_name called with bad index."
+      unless $idx >=0 and $idx < $S->mat_size();
+
+  return "$S->{name}_${idx}";
+}
+
+sub print_info
+{
+  my ($S) = @_;
+
+  print "Class='$S->{class}', M=$S->{M}, N=$S->{N}, name='$S->{name}'\n";
+}
+
+sub print_pattern
+{
+  my ($S) = @_;
+
+  for (my $i = 0; $i < $S->{M}; ++$i)
+  {
+    for (my $j = 0; $j < $S->{N}; ++$j)
+    {
+      print $S->pattern($S->idx($i, $j)), " ";
+    }
+    print "\n";
+  }
+}
+
+########################################################################
+# Matrix -- standard MxN matrix
+########################################################################
+
+package GenMul::Matrix; @ISA = ('GenMul::MBase');
+
+use Carp;
+
+sub new
+{
+  my $proto = shift;
+  my $S = $proto->SUPER::new(@_);
+
+  croak "M not set for $S->{class}" unless defined $S->{M};
+
+  croak "N not set for $S->{class}" unless defined $S->{N};
+
+  return $S;
+}
+
+sub mat_size
+{
+  my ($S) = @_;
+
+  return $S->{M} * $S->{N};
+}
+
+sub idx
+{
+  my ($S, $i, $j) = @_;
+
+  confess "$S->{class}::idx() i out of range"
+      if $i < 0 or $i >= $S->{M};
+
+  confess "$S->{class}::idx() j out of range"
+      if $j < 0 or $j >= $S->{N};
+
+  return $i * $S->{N} + $j;
+}
+
+########################################################################
+# MatrixSym -- symmetric square matrix
+########################################################################
+
+package GenMul::MatrixSym; @ISA = ('GenMul::MBase');
+
+use Carp;
+
+# Offsets converting from full matrix indices to symmetric ones:
+my @Offs;
+@Offs[2] = [ 0, 1, 1, 2 ];
+@Offs[3] = [ 0, 1, 3, 1, 2, 4, 3, 4, 5 ];
+@Offs[4] = [ 0, 1, 3, 6, 1, 2, 4, 7, 3, 4, 5, 8, 6, 7, 8, 9 ];
+@Offs[5] = [ 0, 1, 3, 6, 10, 1, 2, 4, 7, 11, 3, 4, 5, 8, 12, 6, 7, 8, 9, 13, 10, 11, 12, 13, 14 ];
+@Offs[6] = [ 0, 1, 3, 6, 10, 15, 1, 2, 4, 7, 11, 16, 3, 4, 5, 8, 12, 17, 6, 7, 8, 9, 13, 18, 10, 11, 12, 13, 14, 19, 15, 16, 17, 18, 19, 20 ];
+
+sub new
+{
+  my $proto = shift;
+  my $S = $proto->SUPER::new(@_);
+
+  croak "M not set for $S->{class}" unless defined $S->{M};
+
+  croak "N should not be set or should be equal to M for $S->{class}"
+      if defined $S->{N} and $S->{N} != $S->{M};
+
+  die "Offset array not defined for this dimension"
+      unless defined @Offs[$S->{M}];
+
+  die "Offset array of wrong dimension"
+      unless scalar @{$Offs[$S->{M}]} == $S->{M} * $S->{M};
+
+  $S->{N} = $S->{M} unless defined $S->{N};
+
+  return $S;
+}
+
+sub mat_size
+{
+  my ($S) = @_;
+
+  return ($S->{M} + 1) * $S->{M} / 2;
+}
+
+sub idx
+{
+  my ($S, $i, $j) = @_;
+
+  confess "$S->{class}::idx() i out of range"
+      if $i < 0 or $i >= $S->{M};
+
+  confess "$S->{class}::idx() j out of range"
+      if $j < 0 or $j >= $S->{N};
+
+  return $Offs[$S->{M}][$i * $S->{N} + $j];
+}
+
+
+########################################################################
+# MatrixTranspose -- wrapper for transpose of a matrix
+########################################################################
+
+package GenMul::MatrixTranspose; @ISA = ('GenMul::MBase');
+
+use Carp;
+use Scalar::Util 'blessed';
+
+
+sub new
+{
+  my $proto = shift;
+  my $mat   = shift;
+
+  croak "Argument for $S->{class} is not a GenMul::MBase"
+      unless blessed $mat and $mat->isa("GenMul::MBase");
+
+  my $S = $proto->SUPER::new(@_, 'name'=>$mat->{name});
+
+
+  $S->{matrix} = $mat;
+
+  # Hack around dimensions -- these are accessed directly, everything
+  # else goes through methods.
+
+  $S->{M} = $S->{matrix}{N};
+  $S->{N} = $S->{matrix}{M};
+
+  return $S;
+}
+
+sub mat_size
+{
+  my ($S) = @_;
+
+  return $S->{matrix}->mat_size();
+}
+
+sub idx
+{
+  my ($S, $i, $j) = @_;
+
+  return $S->{matrix}->idx($j, $i);
+}
+
+sub pattern
+{
+  my ($S, $idx) = @_;
+
+  return $S->{matrix}->pattern($idx);
+}
+
+sub print_info
+{
+  my ($S) = @_;
+
+  print "Transpose of ";
+  $S->{matrix}->print_info();
+  print "    ";
+  $S->SUPER::print_info();
+}
+
+
+########################################################################
+########################################################################
+# CODE GENERATION CLASSES
+########################################################################
+########################################################################
+
+package GenMul::Multiply;
+
+use Carp;
+use Scalar::Util 'blessed';
+
+use warnings;
+
+# Optional arguments:
+# - no_size_check: elements out of range are assumed to be 0
+
+sub new
+{
+  my $proto = shift;
+  my $class = ref($proto) || $proto;
+  my $S = {@_};
+  bless($S, $class);
+
+  $S->{prefix}  = "      "    unless defined $S->{prefix};
+  $S->{vectype} = "IntrVec_t" unless defined $S->{vectype};
+
+  $S->{class} = $class;
+
+  return $S;
+}
+
+sub check_multiply_arguments
+{
+  my ($S, $a, $b, $c) = @_;
+
+  croak "Input a is not a GenMul::MBase"
+      unless blessed $a and $a->isa("GenMul::MBase");
+
+  croak "Input b is not a GenMul::MBase"
+      unless blessed $b and $b->isa("GenMul::MBase");
+
+  croak "Input c is not a GenMul::MBase"
+      unless blessed $c and $c->isa("GenMul::MBase");
+
+  unless ($S->{no_size_check})
+  {
+    croak "Input matrices a and b not compatible"
+        unless $a->{N} == $b->{M};
+
+    croak "Result matrix c of wrong dimensions"
+        unless $c->{M} == $a->{M} and $c->{N} == $b->{N};
+  }
+  else
+  {
+    carp "Input matrices a and b not compatible -- running with no_size_check"
+        unless $a->{N} == $b->{M};
+
+    carp "Result matrix c of wrong dimensions -- running with no_size_check"
+        unless $c->{M} == $a->{M} and $c->{N} == $b->{N};
+  }
+
+  croak "Result matrix c should not be a transpose (or check & implement this case in GenMul code)"
+      if $c->isa("GenMul::MatrixTranspose");
+
+  croak "Result matrix c has a pattern defined, this is not yet supported (but shouldn't be too hard)."
+      if defined $c->{pattern};
+
+  carp "Result matrix c is symmetric, GenMul hopes you know what you're doing"
+      if $c->isa("GenMul::MatrixSym");
+
+  $S->{a}{mat} = $a;
+  $S->{b}{mat} = $b;
+}
+
+sub push_out
+{
+  my $S = shift;
+
+  push @{$S->{out}}, join "", @_;
+}
+
+sub unshift_out
+{
+  my $S = shift;
+
+  unshift @{$S->{out}}, join "", @_;
+}
+
+sub handle_all_zeros_ones
+{
+  my ($S, $zeros, $ones) = @_;
+
+  if ($zeros or $ones)
+  {
+    my @zo;
+
+    push @zo, "#ifdef AVX512_INTRINSICS";
+
+    push @zo, "$S->{vectype} all_zeros = { " . join(", ", (0) x 16) . " };"
+        if $zeros;
+
+    push @zo, "$S->{vectype} all_ones  = { " . join(", ", (1) x 16) . " };"
+        if $ones;
+
+    push @zo, "#else";
+
+    push @zo, "$S->{vectype} all_zeros = { " . join(", ", (0) x 8) . " };"
+        if $zeros;
+
+    push @zo, "$S->{vectype} all_ones  = { " . join(", ", (1) x 8) . " };"
+        if $ones;
+
+    push @zo, "#endif";
+
+    push @zo, "";
+
+    for $zol (reverse @zo)
+    {
+      $S->unshift_out($zol);
+    }
+  }
+}
+
+sub delete_temporaries
+{
+  my ($S) = @_;
+
+  for $k ('idx', 'pat')
+  {
+    delete $S->{a};
+    delete $S->{b};
+  }
+}
+
+sub delete_loop_temporaries
+{
+  my ($S) = @_;
+
+  for $k ('idx', 'pat')
+  {
+    delete $S->{a}{$k};
+    delete $S->{b}{$k};
+  }
+}
+
+sub generate_index_and_pattern
+{
+  my ($S, $x, $i1, $i2) = @_;
+
+  if ($S->{no_size_check} and not $S->{$x}{mat}->row_col_in_range($i, $k))
+  {
+    $S->{$x}{pat} = '0';
+  }
+  else
+  {
+    $S->{$x}{idx} = $S->{$x}{mat}->idx($i1, $i2);
+    $S->{$x}{pat} = $S->{$x}{mat}->pattern ($S->{$x}{idx});
+  }
+}
+
+sub generate_indices_and_patterns_for_multiplication
+{
+  # Provide idcs and patterns for given indices
+
+  my ($S, $i, $j, $k) = @_;
+
+  $S->delete_loop_temporaries();
+
+  $S->generate_index_and_pattern('a', $i, $k);
+  $S->generate_index_and_pattern('b', $k, $j);
+}
+
+# ----------------------------------------------------------------------
+
+sub generate_addend_standard
+{
+  my ($S, $x, $y) = @_;
+
+  return undef if $S->{$x}{pat} eq '0' or  $S->{$y}{pat} eq '0';
+  return "1"   if $S->{$x}{pat} eq '1' and $S->{$y}{pat} eq '1';
+
+  my $xstr = sprintf "$S->{$x}{mat}{name}\[%2d*N+n]", $S->{$x}{idx};
+  my $ystr = sprintf "$S->{$y}{mat}{name}\[%2d*N+n]", $S->{$y}{idx};
+
+  return $xstr if $S->{$y}{pat} eq '1';
+  return $ystr if $S->{$x}{pat} eq '1';
+
+  return "${xstr}*${ystr}";
+}
+
+sub multiply_standard
+{
+  # Standard mutiplication - outputs unrolled C code, one line
+  # per target matrix element.
+  # Arguments: a, b, c   -- all GenMul::MBase with right dimensions.
+  # Does:      c = a * b
+
+  check_multiply_arguments(@_);
+
+  my ($S, $a, $b, $c) = @_;
+
+  my $is_c_symmetric = $c->isa("GenMul::MatrixSym");
+
+  # With no_size_check matrices do not have to be compatible.
+  my $k_max = $a->{N} <= $b->{M} ? $a->{N} : $b->{M};
+
+  for (my $i = 0; $i < $c->{M}; ++$i)
+  {
+    my $j_max = $is_c_symmetric ?  $i + 1 : $c->{N};
+
+    for (my $j = 0; $j < $j_max; ++$j)
+    {
+      my $x = $c->idx($i, $j);
+
+      printf "$S->{prefix}$c->{name}\[%2d*N+n\] = ", $x;
+
+      my @sum;
+
+      for (my $k = 0; $k < $k_max; ++$k)
+      {
+        $S->generate_indices_and_patterns_for_multiplication($i, $j, $k);
+
+        my $addend = $S->generate_addend_standard('a', 'b');
+
+        push @sum, $addend if defined $addend;
+      }
+      if (@sum)
+      {
+        print join(" + ", @sum), ";";
+      }
+      else
+      {
+        print "0;"
+      }
+      print "\n";
+    }
+  }
+
+  $S->delete_temporaries();
+}
+
+# ----------------------------------------------------------------------
+
+sub generate_addend_gpu
+{
+  my ($S, $x, $y) = @_;
+
+  return undef if $S->{$x}{pat} eq '0' or  $S->{$y}{pat} eq '0';
+  return "1"   if $S->{$x}{pat} eq '1' and $S->{$y}{pat} eq '1';
+
+  my $xstr = sprintf "$S->{$x}{mat}{name}\[%2d*$S->{$x}{mat}{name}N+$S->{$x}{mat}{name}n]", $S->{$x}{idx};
+  my $ystr = sprintf "$S->{$y}{mat}{name}\[%2d*$S->{$y}{mat}{name}N+$S->{$y}{mat}{name}n]", $S->{$y}{idx};
+
+  return $xstr if $S->{$y}{pat} eq '1';
+  return $ystr if $S->{$x}{pat} eq '1';
+
+  return "${xstr}*${ystr}";
+}
+
+sub multiply_gpu
+{
+  # Standard mutiplication - outputs unrolled C code, one line
+  # per target matrix element.
+  # Arguments: a, b, c   -- all GenMul::MBase with right dimensions.
+  # Does:      c = a * b
+
+  check_multiply_arguments(@_);
+
+  my ($S, $a, $b, $c) = @_;
+
+  my $is_c_symmetric = $c->isa("GenMul::MatrixSym");
+
+  # With no_size_check matrices do not have to be compatible.
+  my $k_max = $a->{N} <= $b->{M} ? $a->{N} : $b->{M};
+
+  for (my $i = 0; $i < $c->{M}; ++$i)
+  {
+    my $j_max = $is_c_symmetric ?  $i + 1 : $c->{N};
+
+    for (my $j = 0; $j < $j_max; ++$j)
+    {
+      my $x = $c->idx($i, $j);
+
+      printf "$S->{prefix}$c->{name}\[%2d*$c->{name}N+$c->{name}n\] = ", $x;
+
+      my @sum;
+
+      for (my $k = 0; $k < $k_max; ++$k)
+      {
+        $S->generate_indices_and_patterns_for_multiplication($i, $j, $k);
+
+        my $addend = $S->generate_addend_gpu('a', 'b');
+
+        push @sum, $addend if defined $addend;
+      }
+      if (@sum)
+      {
+        print join(" + ", @sum), ";";
+      }
+      else
+      {
+        print "0;"
+      }
+      print "\n";
+    }
+  }
+
+  $S->delete_temporaries();
+}
+
+# ----------------------------------------------------------------------
+
+sub load_if_needed
+{
+  my ($S, $x) = @_;
+
+  my $idx = $S->{$x}{idx};
+
+  my $reg = $S->{$x}{mat}->reg_name($idx);
+
+  if ($S->{$x}{cnt}[$idx] == 0)
+  {
+    $S->push_out("$S->{vectype} ${reg} = LD($S->{$x}{mat}{name}, $idx);");
+    ++$S->{tick};
+  }
+
+  ++$S->{$x}{cnt}[$idx];
+
+  return $reg;
+}
+
+sub store
+{
+  my ($S, $mat, $idx) = @_;
+
+  my $reg = $mat->reg_name(${idx});
+
+  $S->push_out("ST($mat->{name}, ${idx}, ${reg});");
+
+  return $reg;
+}
+
+sub multiply_intrinsic
+{
+  check_multiply_arguments(@_);
+
+  my ($S, $a, $b, $c) = @_;
+
+  $S->{tick} = 0;
+
+  $S->{out}  = [];
+
+  # Counts of use. For a and b to fetch, for c to assign / add / mult / fma.
+  # cc is used as tick at which store can be performed afterwards.
+  my (@cc, @to_store);
+  @cc = (0) x $c->mat_size();
+
+  $S->{a}{cnt} = [ (0) x $a->mat_size() ];
+  $S->{b}{cnt} = [ (0) x $b->mat_size() ];
+
+  my $need_all_zeros = 0;
+  my $need_all_ones  = 0;
+
+  my $is_c_symmetric = $c->isa("GenMul::MatrixSym");
+
+  # With no_size_check matrices do not have to be compatible.
+  my $k_max = $a->{N} <= $b->{M} ? $a->{N} : $b->{M};
+
+  for (my $i = 0; $i < $c->{M}; ++$i)
+  {
+    my $j_max = $is_c_symmetric ?  $i + 1 : $c->{N};
+
+    for (my $k = 0; $k < $k_max; ++$k)
+    {
+      for (my $j = 0; $j < $j_max; ++$j)
+      {
+        my $x = $c->idx($i, $j);
+
+        $S->generate_indices_and_patterns_for_multiplication($i, $j, $k);
+
+        if ($S->{a}{pat} ne '0' and $S->{b}{pat} ne '0')
+        {
+          my ($areg, $breg, $sreg);
+
+          if ($S->{a}{pat} eq '1' and $S->{b}{pat} eq '1')
+          {
+            $need_all_ones = 1;
+            $sreg = "all_ones";
+          }
+          elsif ($S->{b}{pat} eq '1')
+          {
+            $sreg = $S->load_if_needed('a');
+          }
+          elsif ($S->{a}{pat} eq '1')
+          {
+            $sreg = $S->load_if_needed('b');
+          }
+          else
+          {
+            $areg = $S->load_if_needed('a');
+            $breg = $S->load_if_needed('b');
+          }
+
+          my $creg = $c->reg_name($x);
+
+          if ($cc[$x] == 0)
+          {
+            my $op = defined $sreg ? "${sreg}" : "MUL(${areg}, ${breg})";
+
+            $S->push_out("$S->{vectype} ${creg} = ", $op, ";");
+          }
+          else
+          {
+            my $op = defined $sreg ?
+                "ADD(${sreg}, ${creg})" :
+                "FMA(${areg}, ${breg}, ${creg})";
+
+            $S->push_out("${creg} = ", $op, ";");
+          }
+
+          ++$cc[$x];
+          ++$S->{tick};
+        }
+
+        if ($k + 1 == $k_max)
+        {
+          if ($cc[$x] == 0)
+          {
+            $need_all_zeros = 1;
+
+            $S->push_out("ST($c->{name}, $x, all_zeros);");
+          }
+          else
+          {
+            $cc[$x] = $S->{tick} + 4; #### Will be ready to store in 4 cycles. Really 4?
+            push @to_store, $x;
+          }
+        }
+
+        # Try to store the finished ones.
+        while (1)
+        {
+          last unless @to_store;
+          my $s = $to_store[0];
+          last if $S->{tick} < $cc[$s];
+
+          $S->store($c, $s);
+          shift @to_store;
+          ++$S->{tick};
+        }
+
+      }
+
+      $S->push_out("") unless $i + 1 == $a->{M} and $k + 1 == $a->{N};
+    }
+  }
+
+  for my $s (@to_store)
+  {
+    $S->store($c, $s);
+
+    ++$S->{tick};
+  }
+
+  $S->handle_all_zeros_ones($need_all_zeros, $need_all_ones);
+
+  for (@{$S->{out}})
+  {
+    print $S->{prefix} unless /^$/;
+    print;
+    print "\n";
+  }
+
+  $S->delete_temporaries();
+}
+
+# ----------------------------------------------------------------------
+
+sub dump_multiply_std_and_intrinsic
+{
+  my ($S, $fname, $a, $b, $c) = @_;
+
+  unless ($fname eq '-')
+  {
+    open FF, ">$fname";
+    select FF;
+  }
+
+  print <<"FNORD";
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+FNORD
+
+  $S->multiply_intrinsic($a, $b, $c);
+
+  print <<"FNORD";
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+FNORD
+
+  $S->multiply_standard($a, $b, $c);
+
+  print <<"FNORD";
+   }
+#endif
+FNORD
+
+  unless ($fname eq '-')
+  {
+    close FF;
+    select STDOUT;
+  }
+}
+
+# ----------------------------------------------------------------------
+
+sub dump_multiply_std_and_intrinsic_and_gpu
+{
+  my ($S, $fname, $a, $b, $c) = @_;
+
+  unless ($fname eq '-')
+  {
+    open FF, ">$fname";
+    select FF;
+  }
+
+  print <<"FNORD";
+#ifndef __CUDACC__
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+FNORD
+
+  $S->multiply_intrinsic($a, $b, $c);
+
+  print <<"FNORD";
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+FNORD
+
+  $S->multiply_standard($a, $b, $c);
+
+  print <<"FNORD";
+   }
+#endif
+#else  // __CUDACC__
+FNORD
+  $S->multiply_gpu($a, $b, $c);
+  print <<"FNORD";
+#endif  // __CUDACC__
+FNORD
+
+  unless ($fname eq '-')
+  {
+    close FF;
+    select STDOUT;
+  }
+}
+
+########################################################################
+########################################################################
+# THE END
+########################################################################
+########################################################################
+
+1;
diff --git a/RecoTracker/MkFitCore/src/Matriplex/Makefile b/RecoTracker/MkFitCore/src/Matriplex/Makefile
new file mode 100644
index 0000000000000..cddddc7c6e3bd
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matriplex/Makefile
@@ -0,0 +1,31 @@
+all: auto
+
+auto: std_sym intr_sym
+
+clean:
+	rm -f *.ah
+
+distclean: clean
+
+# ---------------------------------------------------------------- #
+
+std_sym:  std_sym_3x3.ah std_sym_6x6.ah
+
+intr_sym: intr_sym_3x3.ah intr_sym_6x6.ah 
+
+
+# ================================================================ #
+
+GM := ./gen_mul.pl
+
+std_sym_3x3.ah:
+	${GM} "mult_sym(3);" > $@
+
+std_sym_6x6.ah:
+	${GM} "mult_sym(6);" > $@
+
+intr_sym_3x3.ah:
+	${GM} "mult_sym_fma_intrinsic(3);" > $@
+
+intr_sym_6x6.ah:
+	${GM} "mult_sym_fma_intrinsic(6);" > $@
diff --git a/RecoTracker/MkFitCore/src/Matriplex/Matriplex.h b/RecoTracker/MkFitCore/src/Matriplex/Matriplex.h
new file mode 100644
index 0000000000000..d7fea243db2ce
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matriplex/Matriplex.h
@@ -0,0 +1,484 @@
+#ifndef RecoTracker_MkFitCore_src_Matriplex_Matriplex_h
+#define RecoTracker_MkFitCore_src_Matriplex_Matriplex_h
+
+#include "MatriplexCommon.h"
+
+namespace Matriplex {
+
+  //------------------------------------------------------------------------------
+
+  template <typename T, idx_t D1, idx_t D2, idx_t N>
+  class Matriplex {
+  public:
+    typedef T value_type;
+
+    /// return no. of matrix rows
+    static constexpr int kRows = D1;
+    /// return no. of matrix columns
+    static constexpr int kCols = D2;
+    /// return no of elements: rows*columns
+    static constexpr int kSize = D1 * D2;
+    /// size of the whole matriplex
+    static constexpr int kTotSize = N * kSize;
+
+    T fArray[kTotSize] __attribute__((aligned(64)));
+
+    Matriplex() {}
+    Matriplex(T v) { setVal(v); }
+
+    idx_t plexSize() const { return N; }
+
+    void setVal(T v) {
+      for (idx_t i = 0; i < kTotSize; ++i) {
+        fArray[i] = v;
+      }
+    }
+
+    void add(const Matriplex& v) {
+      for (idx_t i = 0; i < kTotSize; ++i) {
+        fArray[i] += v.fArray[i];
+      }
+    }
+
+    void scale(T scale) {
+      for (idx_t i = 0; i < kTotSize; ++i) {
+        fArray[i] *= scale;
+      }
+    }
+
+    T operator[](idx_t xx) const { return fArray[xx]; }
+    T& operator[](idx_t xx) { return fArray[xx]; }
+
+    const T& constAt(idx_t n, idx_t i, idx_t j) const { return fArray[(i * D2 + j) * N + n]; }
+
+    T& At(idx_t n, idx_t i, idx_t j) { return fArray[(i * D2 + j) * N + n]; }
+
+    T& operator()(idx_t n, idx_t i, idx_t j) { return fArray[(i * D2 + j) * N + n]; }
+    const T& operator()(idx_t n, idx_t i, idx_t j) const { return fArray[(i * D2 + j) * N + n]; }
+
+    Matriplex& operator=(const Matriplex& m) {
+      memcpy(fArray, m.fArray, sizeof(T) * kTotSize);
+      return *this;
+    }
+
+    void copySlot(idx_t n, const Matriplex& m) {
+      for (idx_t i = n; i < kTotSize; i += N) {
+        fArray[i] = m.fArray[i];
+      }
+    }
+
+    void copyIn(idx_t n, const T* arr) {
+      for (idx_t i = n; i < kTotSize; i += N) {
+        fArray[i] = *(arr++);
+      }
+    }
+
+    void copyIn(idx_t n, const Matriplex& m, idx_t in) {
+      for (idx_t i = n; i < kTotSize; i += N, in += N) {
+        fArray[i] = m[in];
+      }
+    }
+
+    void copy(idx_t n, idx_t in) {
+      for (idx_t i = n; i < kTotSize; i += N, in += N) {
+        fArray[i] = fArray[in];
+      }
+    }
+
+#if defined(AVX512_INTRINSICS)
+
+    template <typename U>
+    void slurpIn(const T* arr, __m512i& vi, const U&, const int N_proc = N) {
+      //_mm512_prefetch_i32gather_ps(vi, arr, 1, _MM_HINT_T0);
+
+      const __m512 src = {0};
+      const __mmask16 k = N_proc == N ? -1 : (1 << N_proc) - 1;
+
+      for (int i = 0; i < kSize; ++i, ++arr) {
+        //_mm512_prefetch_i32gather_ps(vi, arr+2, 1, _MM_HINT_NTA);
+
+        __m512 reg = _mm512_mask_i32gather_ps(src, k, vi, arr, sizeof(U));
+        _mm512_mask_store_ps(&fArray[i * N], k, reg);
+      }
+    }
+
+    // Experimental methods, slurpIn() seems to be at least as fast.
+    // See comments in mkFit/MkFitter.cc MkFitter::addBestHit().
+    void ChewIn(const char* arr, int off, int vi[N], const char* tmp, __m512i& ui) {
+      // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
+
+      for (int i = 0; i < N; ++i) {
+        __m512 reg = _mm512_load_ps(arr + vi[i]);
+        _mm512_store_ps((void*)(tmp + 64 * i), reg);
+      }
+
+      for (int i = 0; i < kSize; ++i) {
+        __m512 reg = _mm512_i32gather_ps(ui, tmp + off + i * sizeof(T), 1);
+        _mm512_store_ps(&fArray[i * N], reg);
+      }
+    }
+
+    void Contaginate(const char* arr, int vi[N], const char* tmp) {
+      // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
+
+      for (int i = 0; i < N; ++i) {
+        __m512 reg = _mm512_load_ps(arr + vi[i]);
+        _mm512_store_ps((void*)(tmp + 64 * i), reg);
+      }
+    }
+
+    void Plexify(const char* tmp, __m512i& ui) {
+      for (int i = 0; i < kSize; ++i) {
+        __m512 reg = _mm512_i32gather_ps(ui, tmp + i * sizeof(T), 1);
+        _mm512_store_ps(&fArray[i * N], reg);
+      }
+    }
+
+#elif defined(AVX2_INTRINSICS)
+
+    template <typename U>
+    void slurpIn(const T* arr, __m256i& vi, const U&, const int N_proc = N) {
+      // Casts to float* needed to "support" also T=HitOnTrack.
+      // Note that sizeof(float) == sizeof(HitOnTrack) == 4.
+
+      const __m256 src = {0};
+
+      __m256i k = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
+      __m256i k_sel = _mm256_set1_epi32(N_proc);
+      __m256i k_master = _mm256_cmpgt_epi32(k_sel, k);
+
+      k = k_master;
+      for (int i = 0; i < kSize; ++i, ++arr) {
+        __m256 reg = _mm256_mask_i32gather_ps(src, (float*)arr, vi, (__m256)k, sizeof(U));
+        // Restore mask (docs say gather clears it but it doesn't seem to).
+        k = k_master;
+        _mm256_maskstore_ps((float*)&fArray[i * N], k, reg);
+      }
+    }
+
+#else
+
+    void slurpIn(const T* arr, int vi[N], const int N_proc = N) {
+      // Separate N_proc == N case (gains about 7% in fit test).
+      if (N_proc == N) {
+        for (int i = 0; i < kSize; ++i) {
+          for (int j = 0; j < N; ++j) {
+            fArray[i * N + j] = *(arr + i + vi[j]);
+          }
+        }
+      } else {
+        for (int i = 0; i < kSize; ++i) {
+          for (int j = 0; j < N_proc; ++j) {
+            fArray[i * N + j] = *(arr + i + vi[j]);
+          }
+        }
+      }
+    }
+
+#endif
+
+    void copyOut(idx_t n, T* arr) const {
+      for (idx_t i = n; i < kTotSize; i += N) {
+        *(arr++) = fArray[i];
+      }
+    }
+  };
+
+  template <typename T, idx_t D1, idx_t D2, idx_t N>
+  using MPlex = Matriplex<T, D1, D2, N>;
+
+  //==============================================================================
+  // Multiplications
+  //==============================================================================
+
+  template <typename T, idx_t D1, idx_t D2, idx_t D3, idx_t N>
+  void multiplyGeneral(const MPlex<T, D1, D2, N>& A, const MPlex<T, D2, D3, N>& B, MPlex<T, D1, D3, N>& C) {
+    for (idx_t i = 0; i < D1; ++i) {
+      for (idx_t j = 0; j < D3; ++j) {
+        const idx_t ijo = N * (i * D3 + j);
+
+#pragma omp simd
+        for (idx_t n = 0; n < N; ++n) {
+          C.fArray[ijo + n] = 0;
+        }
+
+        for (idx_t k = 0; k < D2; ++k) {
+          const idx_t iko = N * (i * D2 + k);
+          const idx_t kjo = N * (k * D3 + j);
+
+#pragma omp simd
+          for (idx_t n = 0; n < N; ++n) {
+            C.fArray[ijo + n] += A.fArray[iko + n] * B.fArray[kjo + n];
+          }
+        }
+      }
+    }
+  }
+
+  //------------------------------------------------------------------------------
+
+  template <typename T, idx_t D, idx_t N>
+  struct MultiplyCls {
+    static void multiply(const MPlex<T, D, D, N>& A, const MPlex<T, D, D, N>& B, MPlex<T, D, D, N>& C) {
+      throw std::runtime_error("general multiplication not supported, well, call multiplyGeneral()");
+    }
+  };
+
+  template <typename T, idx_t N>
+  struct MultiplyCls<T, 3, N> {
+    static void multiply(const MPlex<T, 3, 3, N>& A, const MPlex<T, 3, 3, N>& B, MPlex<T, 3, 3, N>& C) {
+      const T* a = A.fArray;
+      ASSUME_ALIGNED(a, 64);
+      const T* b = B.fArray;
+      ASSUME_ALIGNED(b, 64);
+      T* c = C.fArray;
+      ASSUME_ALIGNED(c, 64);
+
+#pragma omp simd
+      for (idx_t n = 0; n < N; ++n) {
+        c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[3 * N + n] + a[2 * N + n] * b[6 * N + n];
+        c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[4 * N + n] + a[2 * N + n] * b[7 * N + n];
+        c[2 * N + n] = a[0 * N + n] * b[2 * N + n] + a[1 * N + n] * b[5 * N + n] + a[2 * N + n] * b[8 * N + n];
+        c[3 * N + n] = a[3 * N + n] * b[0 * N + n] + a[4 * N + n] * b[3 * N + n] + a[5 * N + n] * b[6 * N + n];
+        c[4 * N + n] = a[3 * N + n] * b[1 * N + n] + a[4 * N + n] * b[4 * N + n] + a[5 * N + n] * b[7 * N + n];
+        c[5 * N + n] = a[3 * N + n] * b[2 * N + n] + a[4 * N + n] * b[5 * N + n] + a[5 * N + n] * b[8 * N + n];
+        c[6 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[3 * N + n] + a[8 * N + n] * b[6 * N + n];
+        c[7 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[4 * N + n] + a[8 * N + n] * b[7 * N + n];
+        c[8 * N + n] = a[6 * N + n] * b[2 * N + n] + a[7 * N + n] * b[5 * N + n] + a[8 * N + n] * b[8 * N + n];
+      }
+    }
+  };
+
+  template <typename T, idx_t N>
+  struct MultiplyCls<T, 6, N> {
+    static void multiply(const MPlex<T, 6, 6, N>& A, const MPlex<T, 6, 6, N>& B, MPlex<T, 6, 6, N>& C) {
+      const T* a = A.fArray;
+      ASSUME_ALIGNED(a, 64);
+      const T* b = B.fArray;
+      ASSUME_ALIGNED(b, 64);
+      T* c = C.fArray;
+      ASSUME_ALIGNED(c, 64);
+#pragma omp simd
+      for (idx_t n = 0; n < N; ++n) {
+        c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[6 * N + n] + a[2 * N + n] * b[12 * N + n] +
+                       a[3 * N + n] * b[18 * N + n] + a[4 * N + n] * b[24 * N + n] + a[5 * N + n] * b[30 * N + n];
+        c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[7 * N + n] + a[2 * N + n] * b[13 * N + n] +
+                       a[3 * N + n] * b[19 * N + n] + a[4 * N + n] * b[25 * N + n] + a[5 * N + n] * b[31 * N + n];
+        c[2 * N + n] = a[0 * N + n] * b[2 * N + n] + a[1 * N + n] * b[8 * N + n] + a[2 * N + n] * b[14 * N + n] +
+                       a[3 * N + n] * b[20 * N + n] + a[4 * N + n] * b[26 * N + n] + a[5 * N + n] * b[32 * N + n];
+        c[3 * N + n] = a[0 * N + n] * b[3 * N + n] + a[1 * N + n] * b[9 * N + n] + a[2 * N + n] * b[15 * N + n] +
+                       a[3 * N + n] * b[21 * N + n] + a[4 * N + n] * b[27 * N + n] + a[5 * N + n] * b[33 * N + n];
+        c[4 * N + n] = a[0 * N + n] * b[4 * N + n] + a[1 * N + n] * b[10 * N + n] + a[2 * N + n] * b[16 * N + n] +
+                       a[3 * N + n] * b[22 * N + n] + a[4 * N + n] * b[28 * N + n] + a[5 * N + n] * b[34 * N + n];
+        c[5 * N + n] = a[0 * N + n] * b[5 * N + n] + a[1 * N + n] * b[11 * N + n] + a[2 * N + n] * b[17 * N + n] +
+                       a[3 * N + n] * b[23 * N + n] + a[4 * N + n] * b[29 * N + n] + a[5 * N + n] * b[35 * N + n];
+        c[6 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[6 * N + n] + a[8 * N + n] * b[12 * N + n] +
+                       a[9 * N + n] * b[18 * N + n] + a[10 * N + n] * b[24 * N + n] + a[11 * N + n] * b[30 * N + n];
+        c[7 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[7 * N + n] + a[8 * N + n] * b[13 * N + n] +
+                       a[9 * N + n] * b[19 * N + n] + a[10 * N + n] * b[25 * N + n] + a[11 * N + n] * b[31 * N + n];
+        c[8 * N + n] = a[6 * N + n] * b[2 * N + n] + a[7 * N + n] * b[8 * N + n] + a[8 * N + n] * b[14 * N + n] +
+                       a[9 * N + n] * b[20 * N + n] + a[10 * N + n] * b[26 * N + n] + a[11 * N + n] * b[32 * N + n];
+        c[9 * N + n] = a[6 * N + n] * b[3 * N + n] + a[7 * N + n] * b[9 * N + n] + a[8 * N + n] * b[15 * N + n] +
+                       a[9 * N + n] * b[21 * N + n] + a[10 * N + n] * b[27 * N + n] + a[11 * N + n] * b[33 * N + n];
+        c[10 * N + n] = a[6 * N + n] * b[4 * N + n] + a[7 * N + n] * b[10 * N + n] + a[8 * N + n] * b[16 * N + n] +
+                        a[9 * N + n] * b[22 * N + n] + a[10 * N + n] * b[28 * N + n] + a[11 * N + n] * b[34 * N + n];
+        c[11 * N + n] = a[6 * N + n] * b[5 * N + n] + a[7 * N + n] * b[11 * N + n] + a[8 * N + n] * b[17 * N + n] +
+                        a[9 * N + n] * b[23 * N + n] + a[10 * N + n] * b[29 * N + n] + a[11 * N + n] * b[35 * N + n];
+        c[12 * N + n] = a[12 * N + n] * b[0 * N + n] + a[13 * N + n] * b[6 * N + n] + a[14 * N + n] * b[12 * N + n] +
+                        a[15 * N + n] * b[18 * N + n] + a[16 * N + n] * b[24 * N + n] + a[17 * N + n] * b[30 * N + n];
+        c[13 * N + n] = a[12 * N + n] * b[1 * N + n] + a[13 * N + n] * b[7 * N + n] + a[14 * N + n] * b[13 * N + n] +
+                        a[15 * N + n] * b[19 * N + n] + a[16 * N + n] * b[25 * N + n] + a[17 * N + n] * b[31 * N + n];
+        c[14 * N + n] = a[12 * N + n] * b[2 * N + n] + a[13 * N + n] * b[8 * N + n] + a[14 * N + n] * b[14 * N + n] +
+                        a[15 * N + n] * b[20 * N + n] + a[16 * N + n] * b[26 * N + n] + a[17 * N + n] * b[32 * N + n];
+        c[15 * N + n] = a[12 * N + n] * b[3 * N + n] + a[13 * N + n] * b[9 * N + n] + a[14 * N + n] * b[15 * N + n] +
+                        a[15 * N + n] * b[21 * N + n] + a[16 * N + n] * b[27 * N + n] + a[17 * N + n] * b[33 * N + n];
+        c[16 * N + n] = a[12 * N + n] * b[4 * N + n] + a[13 * N + n] * b[10 * N + n] + a[14 * N + n] * b[16 * N + n] +
+                        a[15 * N + n] * b[22 * N + n] + a[16 * N + n] * b[28 * N + n] + a[17 * N + n] * b[34 * N + n];
+        c[17 * N + n] = a[12 * N + n] * b[5 * N + n] + a[13 * N + n] * b[11 * N + n] + a[14 * N + n] * b[17 * N + n] +
+                        a[15 * N + n] * b[23 * N + n] + a[16 * N + n] * b[29 * N + n] + a[17 * N + n] * b[35 * N + n];
+        c[18 * N + n] = a[18 * N + n] * b[0 * N + n] + a[19 * N + n] * b[6 * N + n] + a[20 * N + n] * b[12 * N + n] +
+                        a[21 * N + n] * b[18 * N + n] + a[22 * N + n] * b[24 * N + n] + a[23 * N + n] * b[30 * N + n];
+        c[19 * N + n] = a[18 * N + n] * b[1 * N + n] + a[19 * N + n] * b[7 * N + n] + a[20 * N + n] * b[13 * N + n] +
+                        a[21 * N + n] * b[19 * N + n] + a[22 * N + n] * b[25 * N + n] + a[23 * N + n] * b[31 * N + n];
+        c[20 * N + n] = a[18 * N + n] * b[2 * N + n] + a[19 * N + n] * b[8 * N + n] + a[20 * N + n] * b[14 * N + n] +
+                        a[21 * N + n] * b[20 * N + n] + a[22 * N + n] * b[26 * N + n] + a[23 * N + n] * b[32 * N + n];
+        c[21 * N + n] = a[18 * N + n] * b[3 * N + n] + a[19 * N + n] * b[9 * N + n] + a[20 * N + n] * b[15 * N + n] +
+                        a[21 * N + n] * b[21 * N + n] + a[22 * N + n] * b[27 * N + n] + a[23 * N + n] * b[33 * N + n];
+        c[22 * N + n] = a[18 * N + n] * b[4 * N + n] + a[19 * N + n] * b[10 * N + n] + a[20 * N + n] * b[16 * N + n] +
+                        a[21 * N + n] * b[22 * N + n] + a[22 * N + n] * b[28 * N + n] + a[23 * N + n] * b[34 * N + n];
+        c[23 * N + n] = a[18 * N + n] * b[5 * N + n] + a[19 * N + n] * b[11 * N + n] + a[20 * N + n] * b[17 * N + n] +
+                        a[21 * N + n] * b[23 * N + n] + a[22 * N + n] * b[29 * N + n] + a[23 * N + n] * b[35 * N + n];
+        c[24 * N + n] = a[24 * N + n] * b[0 * N + n] + a[25 * N + n] * b[6 * N + n] + a[26 * N + n] * b[12 * N + n] +
+                        a[27 * N + n] * b[18 * N + n] + a[28 * N + n] * b[24 * N + n] + a[29 * N + n] * b[30 * N + n];
+        c[25 * N + n] = a[24 * N + n] * b[1 * N + n] + a[25 * N + n] * b[7 * N + n] + a[26 * N + n] * b[13 * N + n] +
+                        a[27 * N + n] * b[19 * N + n] + a[28 * N + n] * b[25 * N + n] + a[29 * N + n] * b[31 * N + n];
+        c[26 * N + n] = a[24 * N + n] * b[2 * N + n] + a[25 * N + n] * b[8 * N + n] + a[26 * N + n] * b[14 * N + n] +
+                        a[27 * N + n] * b[20 * N + n] + a[28 * N + n] * b[26 * N + n] + a[29 * N + n] * b[32 * N + n];
+        c[27 * N + n] = a[24 * N + n] * b[3 * N + n] + a[25 * N + n] * b[9 * N + n] + a[26 * N + n] * b[15 * N + n] +
+                        a[27 * N + n] * b[21 * N + n] + a[28 * N + n] * b[27 * N + n] + a[29 * N + n] * b[33 * N + n];
+        c[28 * N + n] = a[24 * N + n] * b[4 * N + n] + a[25 * N + n] * b[10 * N + n] + a[26 * N + n] * b[16 * N + n] +
+                        a[27 * N + n] * b[22 * N + n] + a[28 * N + n] * b[28 * N + n] + a[29 * N + n] * b[34 * N + n];
+        c[29 * N + n] = a[24 * N + n] * b[5 * N + n] + a[25 * N + n] * b[11 * N + n] + a[26 * N + n] * b[17 * N + n] +
+                        a[27 * N + n] * b[23 * N + n] + a[28 * N + n] * b[29 * N + n] + a[29 * N + n] * b[35 * N + n];
+        c[30 * N + n] = a[30 * N + n] * b[0 * N + n] + a[31 * N + n] * b[6 * N + n] + a[32 * N + n] * b[12 * N + n] +
+                        a[33 * N + n] * b[18 * N + n] + a[34 * N + n] * b[24 * N + n] + a[35 * N + n] * b[30 * N + n];
+        c[31 * N + n] = a[30 * N + n] * b[1 * N + n] + a[31 * N + n] * b[7 * N + n] + a[32 * N + n] * b[13 * N + n] +
+                        a[33 * N + n] * b[19 * N + n] + a[34 * N + n] * b[25 * N + n] + a[35 * N + n] * b[31 * N + n];
+        c[32 * N + n] = a[30 * N + n] * b[2 * N + n] + a[31 * N + n] * b[8 * N + n] + a[32 * N + n] * b[14 * N + n] +
+                        a[33 * N + n] * b[20 * N + n] + a[34 * N + n] * b[26 * N + n] + a[35 * N + n] * b[32 * N + n];
+        c[33 * N + n] = a[30 * N + n] * b[3 * N + n] + a[31 * N + n] * b[9 * N + n] + a[32 * N + n] * b[15 * N + n] +
+                        a[33 * N + n] * b[21 * N + n] + a[34 * N + n] * b[27 * N + n] + a[35 * N + n] * b[33 * N + n];
+        c[34 * N + n] = a[30 * N + n] * b[4 * N + n] + a[31 * N + n] * b[10 * N + n] + a[32 * N + n] * b[16 * N + n] +
+                        a[33 * N + n] * b[22 * N + n] + a[34 * N + n] * b[28 * N + n] + a[35 * N + n] * b[34 * N + n];
+        c[35 * N + n] = a[30 * N + n] * b[5 * N + n] + a[31 * N + n] * b[11 * N + n] + a[32 * N + n] * b[17 * N + n] +
+                        a[33 * N + n] * b[23 * N + n] + a[34 * N + n] * b[29 * N + n] + a[35 * N + n] * b[35 * N + n];
+      }
+    }
+  };
+
+  template <typename T, idx_t D, idx_t N>
+  void multiply(const MPlex<T, D, D, N>& A, const MPlex<T, D, D, N>& B, MPlex<T, D, D, N>& C) {
+#ifdef DEBUG
+    printf("Multipl %d %d\n", D, N);
+#endif
+
+    MultiplyCls<T, D, N>::multiply(A, B, C);
+  }
+
+  //==============================================================================
+  // Cramer inversion
+  //==============================================================================
+
+  template <typename T, idx_t D, idx_t N>
+  struct CramerInverter {
+    static void invert(MPlex<T, D, D, N>& A, double* determ = nullptr) {
+      throw std::runtime_error("general cramer inversion not supported");
+    }
+  };
+
+  template <typename T, idx_t N>
+  struct CramerInverter<T, 2, N> {
+    static void invert(MPlex<T, 2, 2, N>& A, double* determ = nullptr) {
+      typedef T TT;
+
+      T* a = A.fArray;
+      ASSUME_ALIGNED(a, 64);
+
+#pragma omp simd
+      for (idx_t n = 0; n < N; ++n) {
+        // Force determinant calculation in double precision.
+        const double det = (double)a[0 * N + n] * a[3 * N + n] - (double)a[2 * N + n] * a[1 * N + n];
+        if (determ)
+          determ[n] = det;
+
+        const TT s = TT(1) / det;
+        const TT tmp = s * a[3 * N + n];
+        a[1 * N + n] *= -s;
+        a[2 * N + n] *= -s;
+        a[3 * N + n] = s * a[0 * N + n];
+        a[0 * N + n] = tmp;
+      }
+    }
+  };
+
+  template <typename T, idx_t N>
+  struct CramerInverter<T, 3, N> {
+    static void invert(MPlex<T, 3, 3, N>& A, double* determ = nullptr) {
+      typedef T TT;
+
+      T* a = A.fArray;
+      ASSUME_ALIGNED(a, 64);
+
+#pragma omp simd
+      for (idx_t n = 0; n < N; ++n) {
+        const TT c00 = a[4 * N + n] * a[8 * N + n] - a[5 * N + n] * a[7 * N + n];
+        const TT c01 = a[5 * N + n] * a[6 * N + n] - a[3 * N + n] * a[8 * N + n];
+        const TT c02 = a[3 * N + n] * a[7 * N + n] - a[4 * N + n] * a[6 * N + n];
+        const TT c10 = a[7 * N + n] * a[2 * N + n] - a[8 * N + n] * a[1 * N + n];
+        const TT c11 = a[8 * N + n] * a[0 * N + n] - a[6 * N + n] * a[2 * N + n];
+        const TT c12 = a[6 * N + n] * a[1 * N + n] - a[7 * N + n] * a[0 * N + n];
+        const TT c20 = a[1 * N + n] * a[5 * N + n] - a[2 * N + n] * a[4 * N + n];
+        const TT c21 = a[2 * N + n] * a[3 * N + n] - a[0 * N + n] * a[5 * N + n];
+        const TT c22 = a[0 * N + n] * a[4 * N + n] - a[1 * N + n] * a[3 * N + n];
+
+        // Force determinant calculation in double precision.
+        const double det = (double)a[0 * N + n] * c00 + (double)a[1 * N + n] * c01 + (double)a[2 * N + n] * c02;
+        if (determ)
+          determ[n] = det;
+
+        const TT s = TT(1) / det;
+        a[0 * N + n] = s * c00;
+        a[1 * N + n] = s * c10;
+        a[2 * N + n] = s * c20;
+        a[3 * N + n] = s * c01;
+        a[4 * N + n] = s * c11;
+        a[5 * N + n] = s * c21;
+        a[6 * N + n] = s * c02;
+        a[7 * N + n] = s * c12;
+        a[8 * N + n] = s * c22;
+      }
+    }
+  };
+
+  template <typename T, idx_t D, idx_t N>
+  void invertCramer(MPlex<T, D, D, N>& A, double* determ = nullptr) {
+    CramerInverter<T, D, N>::invert(A, determ);
+  }
+
+  //==============================================================================
+  // Cholesky inversion
+  //==============================================================================
+
+  template <typename T, idx_t D, idx_t N>
+  struct CholeskyInverter {
+    static void invert(MPlex<T, D, D, N>& A) { throw std::runtime_error("general cholesky inversion not supported"); }
+  };
+
+  template <typename T, idx_t N>
+  struct CholeskyInverter<T, 3, N> {
+    // Note: this only works on symmetric matrices.
+    // Optimized version for positive definite matrices, no checks.
+    static void invert(MPlex<T, 3, 3, N>& A) {
+      typedef T TT;
+
+      T* a = A.fArray;
+      ASSUME_ALIGNED(a, 64);
+
+#pragma omp simd
+      for (idx_t n = 0; n < N; ++n) {
+        TT l0 = std::sqrt(T(1) / a[0 * N + n]);
+        TT l1 = a[3 * N + n] * l0;
+        TT l2 = a[4 * N + n] - l1 * l1;
+        l2 = std::sqrt(T(1) / l2);
+        TT l3 = a[6 * N + n] * l0;
+        TT l4 = (a[7 * N + n] - l1 * l3) * l2;
+        TT l5 = a[8 * N + n] - (l3 * l3 + l4 * l4);
+        l5 = std::sqrt(T(1) / l5);
+
+        // decomposition done
+
+        l3 = (l1 * l4 * l2 - l3) * l0 * l5;
+        l1 = -l1 * l0 * l2;
+        l4 = -l4 * l2 * l5;
+
+        a[0 * N + n] = l3 * l3 + l1 * l1 + l0 * l0;
+        a[1 * N + n] = a[3 * N + n] = l3 * l4 + l1 * l2;
+        a[4 * N + n] = l4 * l4 + l2 * l2;
+        a[2 * N + n] = a[6 * N + n] = l3 * l5;
+        a[5 * N + n] = a[7 * N + n] = l4 * l5;
+        a[8 * N + n] = l5 * l5;
+
+        // m(2,x) are all zero if anything went wrong at l5.
+        // all zero, if anything went wrong already for l0 or l2.
+      }
+    }
+  };
+
+  template <typename T, idx_t D, idx_t N>
+  void invertCholesky(MPlex<T, D, D, N>& A) {
+    CholeskyInverter<T, D, N>::invert(A);
+  }
+
+}  // namespace Matriplex
+
+#endif
diff --git a/RecoTracker/MkFitCore/src/Matriplex/MatriplexCommon.cc b/RecoTracker/MkFitCore/src/Matriplex/MatriplexCommon.cc
new file mode 100644
index 0000000000000..cc5109e04d3c8
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matriplex/MatriplexCommon.cc
@@ -0,0 +1,7 @@
+#include <cstdio>
+
+namespace Matriplex {
+  void align_check(const char *pref, void *adr) {
+    printf("%s 0x%llx  -  modulo 64 = %lld\n", pref, (long long unsigned)adr, (long long)adr % 64);
+  }
+}  // namespace Matriplex
diff --git a/RecoTracker/MkFitCore/src/Matriplex/MatriplexCommon.h b/RecoTracker/MkFitCore/src/Matriplex/MatriplexCommon.h
new file mode 100644
index 0000000000000..2aef8d0908fc2
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matriplex/MatriplexCommon.h
@@ -0,0 +1,87 @@
+#ifndef RecoTracker_MkFitCore_src_Matriplex_MatriplexCommon_h
+#define RecoTracker_MkFitCore_src_Matriplex_MatriplexCommon_h
+
+#include <cstring>
+
+// Use intrinsics version of code when available, done via CPP flags.
+// #define  MPLEX_USE_INTRINSICS
+
+//==============================================================================
+// Intrinsics -- preamble
+//==============================================================================
+
+#include "immintrin.h"
+
+#if defined(MPLEX_USE_INTRINSICS)
+// This seems unnecessary: __AVX__ is usually defined for all higher ISA extensions
+#if defined(__AVX__) || defined(__AVX512F__)
+
+#define MPLEX_INTRINSICS
+
+#endif
+
+#if defined(__AVX512F__)
+
+typedef __m512 IntrVec_t;
+#define MPLEX_INTRINSICS_WIDTH_BYTES 64
+#define MPLEX_INTRINSICS_WIDTH_BITS 512
+#define AVX512_INTRINSICS
+#define GATHER_INTRINSICS
+#define GATHER_IDX_LOAD(name, arr) __m512i name = _mm512_load_epi32(arr);
+
+#define LD(a, i) _mm512_load_ps(&a[i * N + n])
+#define ST(a, i, r) _mm512_store_ps(&a[i * N + n], r)
+#define ADD(a, b) _mm512_add_ps(a, b)
+#define MUL(a, b) _mm512_mul_ps(a, b)
+#define FMA(a, b, v) _mm512_fmadd_ps(a, b, v)
+
+#elif defined(__AVX2__) && defined(__FMA__)
+
+typedef __m256 IntrVec_t;
+#define MPLEX_INTRINSICS_WIDTH_BYTES 32
+#define MPLEX_INTRINSICS_WIDTH_BITS 256
+#define AVX2_INTRINSICS
+#define GATHER_INTRINSICS
+// Previously used _mm256_load_epi32(arr) here, but that's part of AVX-512F, not AVX2
+#define GATHER_IDX_LOAD(name, arr) __m256i name = _mm256_load_si256(reinterpret_cast<const __m256i *>(arr));
+
+#define LD(a, i) _mm256_load_ps(&a[i * N + n])
+#define ST(a, i, r) _mm256_store_ps(&a[i * N + n], r)
+#define ADD(a, b) _mm256_add_ps(a, b)
+#define MUL(a, b) _mm256_mul_ps(a, b)
+#define FMA(a, b, v) _mm256_fmadd_ps(a, b, v)
+
+#elif defined(__AVX__)
+
+typedef __m256 IntrVec_t;
+#define MPLEX_INTRINSICS_WIDTH_BYTES 32
+#define MPLEX_INTRINSICS_WIDTH_BITS 256
+#define AVX_INTRINSICS
+
+#define LD(a, i) _mm256_load_ps(&a[i * N + n])
+#define ST(a, i, r) _mm256_store_ps(&a[i * N + n], r)
+#define ADD(a, b) _mm256_add_ps(a, b)
+#define MUL(a, b) _mm256_mul_ps(a, b)
+// #define FMA(a, b, v)  { __m256 temp = _mm256_mul_ps(a, b); v = _mm256_add_ps(temp, v); }
+inline __m256 FMA(const __m256 &a, const __m256 &b, const __m256 &v) {
+  __m256 temp = _mm256_mul_ps(a, b);
+  return _mm256_add_ps(temp, v);
+}
+
+#endif
+
+#endif
+
+#ifdef __INTEL_COMPILER
+#define ASSUME_ALIGNED(a, b) __assume_aligned(a, b)
+#else
+#define ASSUME_ALIGNED(a, b) a = static_cast<decltype(a)>(__builtin_assume_aligned(a, b))
+#endif
+
+namespace Matriplex {
+  typedef int idx_t;
+
+  void align_check(const char *pref, void *adr);
+}  // namespace Matriplex
+
+#endif
diff --git a/RecoTracker/MkFitCore/src/Matriplex/MatriplexSym.h b/RecoTracker/MkFitCore/src/Matriplex/MatriplexSym.h
new file mode 100644
index 0000000000000..6b8cf44222adc
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matriplex/MatriplexSym.h
@@ -0,0 +1,467 @@
+#ifndef RecoTracker_MkFitCore_src_Matriplex_MatriplexSym_h
+#define RecoTracker_MkFitCore_src_Matriplex_MatriplexSym_h
+
+#include "MatriplexCommon.h"
+#include "Matriplex.h"
+
+//==============================================================================
+// MatriplexSym
+//==============================================================================
+
+namespace Matriplex {
+
+  const idx_t gSymOffsets[7][36] = {{},
+                                    {},
+                                    {0, 1, 1, 2},
+                                    {0, 1, 3, 1, 2, 4, 3, 4, 5},  // 3
+                                    {},
+                                    {},
+                                    {0, 1, 3, 6, 10, 15, 1,  2,  4,  7,  11, 16, 3,  4,  5,  8,  12, 17,
+                                     6, 7, 8, 9, 13, 18, 10, 11, 12, 13, 14, 19, 15, 16, 17, 18, 19, 20}};
+
+  //------------------------------------------------------------------------------
+
+  template <typename T, idx_t D, idx_t N>
+  class MatriplexSym {
+  public:
+    typedef T value_type;
+
+    /// no. of matrix rows
+    static constexpr int kRows = D;
+    /// no. of matrix columns
+    static constexpr int kCols = D;
+    /// no of elements: lower triangle
+    static constexpr int kSize = (D + 1) * D / 2;
+    /// size of the whole matriplex
+    static constexpr int kTotSize = N * kSize;
+
+    T fArray[kTotSize] __attribute__((aligned(64)));
+
+    MatriplexSym() {}
+    MatriplexSym(T v) { setVal(v); }
+
+    idx_t plexSize() const { return N; }
+
+    void setVal(T v) {
+      for (idx_t i = 0; i < kTotSize; ++i) {
+        fArray[i] = v;
+      }
+    }
+
+    void add(const MatriplexSym& v) {
+      for (idx_t i = 0; i < kTotSize; ++i) {
+        fArray[i] += v.fArray[i];
+      }
+    }
+
+    void scale(T scale) {
+      for (idx_t i = 0; i < kTotSize; ++i) {
+        fArray[i] *= scale;
+      }
+    }
+
+    T operator[](idx_t xx) const { return fArray[xx]; }
+    T& operator[](idx_t xx) { return fArray[xx]; }
+
+    const idx_t* offsets() const { return gSymOffsets[D]; }
+    idx_t off(idx_t i) const { return gSymOffsets[D][i]; }
+
+    const T& constAt(idx_t n, idx_t i, idx_t j) const { return fArray[off(i * D + j) * N + n]; }
+
+    T& At(idx_t n, idx_t i, idx_t j) { return fArray[off(i * D + j) * N + n]; }
+
+    T& operator()(idx_t n, idx_t i, idx_t j) { return At(n, i, j); }
+    const T& operator()(idx_t n, idx_t i, idx_t j) const { return constAt(n, i, j); }
+
+    MatriplexSym& operator=(const MatriplexSym& m) {
+      memcpy(fArray, m.fArray, sizeof(T) * kTotSize);
+      return *this;
+    }
+
+    void copySlot(idx_t n, const MatriplexSym& m) {
+      for (idx_t i = n; i < kTotSize; i += N) {
+        fArray[i] = m.fArray[i];
+      }
+    }
+
+    void copyIn(idx_t n, const T* arr) {
+      for (idx_t i = n; i < kTotSize; i += N) {
+        fArray[i] = *(arr++);
+      }
+    }
+
+    void copyIn(idx_t n, const MatriplexSym& m, idx_t in) {
+      for (idx_t i = n; i < kTotSize; i += N, in += N) {
+        fArray[i] = m[in];
+      }
+    }
+
+    void copy(idx_t n, idx_t in) {
+      for (idx_t i = n; i < kTotSize; i += N, in += N) {
+        fArray[i] = fArray[in];
+      }
+    }
+
+#if defined(AVX512_INTRINSICS)
+
+    template <typename U>
+    void slurpIn(const T* arr, __m512i& vi, const U&, const int N_proc = N) {
+      //_mm512_prefetch_i32gather_ps(vi, arr, 1, _MM_HINT_T0);
+
+      const __m512 src = {0};
+      const __mmask16 k = N_proc == N ? -1 : (1 << N_proc) - 1;
+
+      for (int i = 0; i < kSize; ++i, ++arr) {
+        //_mm512_prefetch_i32gather_ps(vi, arr+2, 1, _MM_HINT_NTA);
+
+        __m512 reg = _mm512_mask_i32gather_ps(src, k, vi, arr, sizeof(U));
+        _mm512_mask_store_ps(&fArray[i * N], k, reg);
+      }
+    }
+
+    // Experimental methods, slurpIn() seems to be at least as fast.
+    // See comments in mkFit/MkFitter.cc MkFitter::addBestHit().
+
+    void ChewIn(const char* arr, int off, int vi[N], const char* tmp, __m512i& ui) {
+      // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
+
+      for (int i = 0; i < N; ++i) {
+        __m512 reg = _mm512_load_ps(arr + vi[i]);
+        _mm512_store_ps((void*)(tmp + 64 * i), reg);
+      }
+
+      for (int i = 0; i < kSize; ++i) {
+        __m512 reg = _mm512_i32gather_ps(ui, tmp + off + i * sizeof(T), 1);
+        _mm512_store_ps(&fArray[i * N], reg);
+      }
+    }
+
+    void Contaginate(const char* arr, int vi[N], const char* tmp) {
+      // This is a hack ... we know sizeof(Hit) = 64 = cache line = vector width.
+
+      for (int i = 0; i < N; ++i) {
+        __m512 reg = _mm512_load_ps(arr + vi[i]);
+        _mm512_store_ps((void*)(tmp + 64 * i), reg);
+      }
+    }
+
+    void Plexify(const char* tmp, __m512i& ui) {
+      for (int i = 0; i < kSize; ++i) {
+        __m512 reg = _mm512_i32gather_ps(ui, tmp + i * sizeof(T), 1);
+        _mm512_store_ps(&fArray[i * N], reg);
+      }
+    }
+
+#elif defined(AVX2_INTRINSICS)
+
+    template <typename U>
+    void slurpIn(const T* arr, __m256i& vi, const U&, const int N_proc = N) {
+      const __m256 src = {0};
+
+      __m256i k = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
+      __m256i k_sel = _mm256_set1_epi32(N_proc);
+      __m256i k_master = _mm256_cmpgt_epi32(k_sel, k);
+
+      k = k_master;
+      for (int i = 0; i < kSize; ++i, ++arr) {
+        __m256 reg = _mm256_mask_i32gather_ps(src, arr, vi, (__m256)k, sizeof(U));
+        // Restore mask (docs say gather clears it but it doesn't seem to).
+        k = k_master;
+        _mm256_maskstore_ps(&fArray[i * N], k, reg);
+      }
+    }
+
+#else
+
+    void slurpIn(const T* arr, int vi[N], const int N_proc = N) {
+      // Separate N_proc == N case (gains about 7% in fit test).
+      if (N_proc == N) {
+        for (int i = 0; i < kSize; ++i) {
+          for (int j = 0; j < N; ++j) {
+            fArray[i * N + j] = *(arr + i + vi[j]);
+          }
+        }
+      } else {
+        for (int i = 0; i < kSize; ++i) {
+          for (int j = 0; j < N_proc; ++j) {
+            fArray[i * N + j] = *(arr + i + vi[j]);
+          }
+        }
+      }
+    }
+
+#endif
+
+    void copyOut(idx_t n, T* arr) const {
+      for (idx_t i = n; i < kTotSize; i += N) {
+        *(arr++) = fArray[i];
+      }
+    }
+
+    void setDiagonal3x3(idx_t n, T d) {
+      T* p = fArray + n;
+
+      p[0 * N] = d;
+      p[1 * N] = 0;
+      p[2 * N] = d;
+      p[3 * N] = 0;
+      p[4 * N] = 0;
+      p[5 * N] = d;
+    }
+
+    MatriplexSym& subtract(const MatriplexSym& a, const MatriplexSym& b) {
+      // Does *this = a - b;
+
+#pragma omp simd
+      for (idx_t i = 0; i < kTotSize; ++i) {
+        fArray[i] = a.fArray[i] - b.fArray[i];
+      }
+
+      return *this;
+    }
+
+    // ==================================================================
+    // Operations specific to Kalman fit in 6 parameter space
+    // ==================================================================
+
+    void addNoiseIntoUpperLeft3x3(T noise) {
+      T* p = fArray;
+      ASSUME_ALIGNED(p, 64);
+
+#pragma omp simd
+      for (idx_t n = 0; n < N; ++n) {
+        p[0 * N + n] += noise;
+        p[2 * N + n] += noise;
+        p[5 * N + n] += noise;
+      }
+    }
+
+    void invertUpperLeft3x3() {
+      typedef T TT;
+
+      T* a = fArray;
+      ASSUME_ALIGNED(a, 64);
+
+#pragma omp simd
+      for (idx_t n = 0; n < N; ++n) {
+        const TT c00 = a[2 * N + n] * a[5 * N + n] - a[4 * N + n] * a[4 * N + n];
+        const TT c01 = a[4 * N + n] * a[3 * N + n] - a[1 * N + n] * a[5 * N + n];
+        const TT c02 = a[1 * N + n] * a[4 * N + n] - a[2 * N + n] * a[3 * N + n];
+        const TT c11 = a[5 * N + n] * a[0 * N + n] - a[3 * N + n] * a[3 * N + n];
+        const TT c12 = a[3 * N + n] * a[1 * N + n] - a[4 * N + n] * a[0 * N + n];
+        const TT c22 = a[0 * N + n] * a[2 * N + n] - a[1 * N + n] * a[1 * N + n];
+
+        // Force determinant calculation in double precision.
+        const double det = (double)a[0 * N + n] * c00 + (double)a[1 * N + n] * c01 + (double)a[3 * N + n] * c02;
+        const TT s = TT(1) / det;
+
+        a[0 * N + n] = s * c00;
+        a[1 * N + n] = s * c01;
+        a[2 * N + n] = s * c11;
+        a[3 * N + n] = s * c02;
+        a[4 * N + n] = s * c12;
+        a[5 * N + n] = s * c22;
+      }
+    }
+  };
+
+  template <typename T, idx_t D, idx_t N>
+  using MPlexSym = MatriplexSym<T, D, N>;
+
+  //==============================================================================
+  // Multiplications
+  //==============================================================================
+
+  template <typename T, idx_t D, idx_t N>
+  struct SymMultiplyCls {
+    static void multiply(const MPlexSym<T, D, N>& A, const MPlexSym<T, D, N>& B, MPlex<T, D, D, N>& C) {
+      throw std::runtime_error("general symmetric multiplication not supported");
+    }
+  };
+
+  template <typename T, idx_t N>
+  struct SymMultiplyCls<T, 3, N> {
+    static void multiply(const MPlexSym<T, 3, N>& A, const MPlexSym<T, 3, N>& B, MPlex<T, 3, 3, N>& C) {
+      const T* a = A.fArray;
+      ASSUME_ALIGNED(a, 64);
+      const T* b = B.fArray;
+      ASSUME_ALIGNED(b, 64);
+      T* c = C.fArray;
+      ASSUME_ALIGNED(c, 64);
+
+#ifdef MPLEX_INTRINSICS
+
+      for (idx_t n = 0; n < N; n += 64 / sizeof(T)) {
+#include "intr_sym_3x3.ah"
+      }
+
+#else
+
+#pragma omp simd
+      for (idx_t n = 0; n < N; ++n) {
+#include "std_sym_3x3.ah"
+      }
+
+#endif
+    }
+  };
+
+  template <typename T, idx_t N>
+  struct SymMultiplyCls<T, 6, N> {
+    static void multiply(const MPlexSym<float, 6, N>& A, const MPlexSym<float, 6, N>& B, MPlex<float, 6, 6, N>& C) {
+      const T* a = A.fArray;
+      ASSUME_ALIGNED(a, 64);
+      const T* b = B.fArray;
+      ASSUME_ALIGNED(b, 64);
+      T* c = C.fArray;
+      ASSUME_ALIGNED(c, 64);
+
+#ifdef MPLEX_INTRINSICS
+
+      for (idx_t n = 0; n < N; n += 64 / sizeof(T)) {
+#include "intr_sym_6x6.ah"
+      }
+
+#else
+
+#pragma omp simd
+      for (idx_t n = 0; n < N; ++n) {
+#include "std_sym_6x6.ah"
+      }
+
+#endif
+    }
+  };
+
+  template <typename T, idx_t D, idx_t N>
+  void multiply(const MPlexSym<T, D, N>& A, const MPlexSym<T, D, N>& B, MPlex<T, D, D, N>& C) {
+    SymMultiplyCls<T, D, N>::multiply(A, B, C);
+  }
+
+  //==============================================================================
+  // Cramer inversion
+  //==============================================================================
+
+  template <typename T, idx_t D, idx_t N>
+  struct CramerInverterSym {
+    static void invert(MPlexSym<T, D, N>& A, double* determ = nullptr) {
+      throw std::runtime_error("general cramer inversion not supported");
+    }
+  };
+
+  template <typename T, idx_t N>
+  struct CramerInverterSym<T, 2, N> {
+    static void invert(MPlexSym<T, 2, N>& A, double* determ = nullptr) {
+      typedef T TT;
+
+      T* a = A.fArray;
+      ASSUME_ALIGNED(a, 64);
+
+#pragma omp simd
+      for (idx_t n = 0; n < N; ++n) {
+        // Force determinant calculation in double precision.
+        const double det = (double)a[0 * N + n] * a[2 * N + n] - (double)a[1 * N + n] * a[1 * N + n];
+        if (determ)
+          determ[n] = det;
+
+        const TT s = TT(1) / det;
+        const TT tmp = s * a[2 * N + n];
+        a[1 * N + n] *= -s;
+        a[2 * N + n] = s * a[0 * N + n];
+        a[0 * N + n] = tmp;
+      }
+    }
+  };
+
+  template <typename T, idx_t N>
+  struct CramerInverterSym<T, 3, N> {
+    static void invert(MPlexSym<T, 3, N>& A, double* determ = nullptr) {
+      typedef T TT;
+
+      T* a = A.fArray;
+      ASSUME_ALIGNED(a, 64);
+
+#pragma omp simd
+      for (idx_t n = 0; n < N; ++n) {
+        const TT c00 = a[2 * N + n] * a[5 * N + n] - a[4 * N + n] * a[4 * N + n];
+        const TT c01 = a[4 * N + n] * a[3 * N + n] - a[1 * N + n] * a[5 * N + n];
+        const TT c02 = a[1 * N + n] * a[4 * N + n] - a[2 * N + n] * a[3 * N + n];
+        const TT c11 = a[5 * N + n] * a[0 * N + n] - a[3 * N + n] * a[3 * N + n];
+        const TT c12 = a[3 * N + n] * a[1 * N + n] - a[4 * N + n] * a[0 * N + n];
+        const TT c22 = a[0 * N + n] * a[2 * N + n] - a[1 * N + n] * a[1 * N + n];
+
+        // Force determinant calculation in double precision.
+        const double det = (double)a[0 * N + n] * c00 + (double)a[1 * N + n] * c01 + (double)a[3 * N + n] * c02;
+        if (determ)
+          determ[n] = det;
+
+        const TT s = TT(1) / det;
+        a[0 * N + n] = s * c00;
+        a[1 * N + n] = s * c01;
+        a[2 * N + n] = s * c11;
+        a[3 * N + n] = s * c02;
+        a[4 * N + n] = s * c12;
+        a[5 * N + n] = s * c22;
+      }
+    }
+  };
+
+  template <typename T, idx_t D, idx_t N>
+  void invertCramerSym(MPlexSym<T, D, N>& A, double* determ = nullptr) {
+    CramerInverterSym<T, D, N>::invert(A, determ);
+  }
+
+  //==============================================================================
+  // Cholesky inversion
+  //==============================================================================
+
+  template <typename T, idx_t D, idx_t N>
+  struct CholeskyInverterSym {
+    static void invert(MPlexSym<T, D, N>& A) { throw std::runtime_error("general cholesky inversion not supported"); }
+  };
+
+  template <typename T, idx_t N>
+  struct CholeskyInverterSym<T, 3, N> {
+    static void invert(MPlexSym<T, 3, N>& A) {
+      typedef T TT;
+
+      T* a = A.fArray;
+
+#pragma omp simd
+      for (idx_t n = 0; n < N; ++n) {
+        TT l0 = std::sqrt(T(1) / a[0 * N + n]);
+        TT l1 = a[1 * N + n] * l0;
+        TT l2 = a[2 * N + n] - l1 * l1;
+        l2 = std::sqrt(T(1) / l2);
+        TT l3 = a[3 * N + n] * l0;
+        TT l4 = (a[4 * N + n] - l1 * l3) * l2;
+        TT l5 = a[5 * N + n] - (l3 * l3 + l4 * l4);
+        l5 = std::sqrt(T(1) / l5);
+
+        // decomposition done
+
+        l3 = (l1 * l4 * l2 - l3) * l0 * l5;
+        l1 = -l1 * l0 * l2;
+        l4 = -l4 * l2 * l5;
+
+        a[0 * N + n] = l3 * l3 + l1 * l1 + l0 * l0;
+        a[1 * N + n] = l3 * l4 + l1 * l2;
+        a[2 * N + n] = l4 * l4 + l2 * l2;
+        a[3 * N + n] = l3 * l5;
+        a[4 * N + n] = l4 * l5;
+        a[5 * N + n] = l5 * l5;
+
+        // m(2,x) are all zero if anything went wrong at l5.
+        // all zero, if anything went wrong already for l0 or l2.
+      }
+    }
+  };
+
+  template <typename T, idx_t D, idx_t N>
+  void invertCholeskySym(MPlexSym<T, D, N>& A) {
+    CholeskyInverterSym<T, D, N>::invert(A);
+  }
+
+}  // end namespace Matriplex
+
+#endif
diff --git a/RecoTracker/MkFitCore/src/Matriplex/MatriplexVector.h b/RecoTracker/MkFitCore/src/Matriplex/MatriplexVector.h
new file mode 100644
index 0000000000000..8b3d0c33bf2c8
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matriplex/MatriplexVector.h
@@ -0,0 +1,154 @@
+#ifndef RecoTracker_MkFitCore_src_Matriplex_MatriplexVector_h
+#define RecoTracker_MkFitCore_src_Matriplex_MatriplexVector_h
+
+#include "Matriplex.h"
+
+#include <vector>
+#include <cassert>
+
+namespace Matriplex {
+
+  //------------------------------------------------------------------------------
+
+  template <class MP>
+  class MatriplexVector {
+    MP* fV;
+    const idx_t fN;
+
+    typedef typename MP::value_type T;
+
+  public:
+    MatriplexVector(idx_t n) : fN(n) { fV = (MP*)std::aligned_alloc(64, sizeof(MP) * fN); }
+
+    ~MatriplexVector() { std::free(fV); }
+
+    idx_t size() const { return fN; }
+
+    MP& mplex(int i) { return fV[i]; }
+    MP& operator[](int i) { return fV[i]; }
+
+    const MP& mplex(int i) const { return fV[i]; }
+    const MP& operator[](int i) const { return fV[i]; }
+
+    void setVal(T v) {
+      for (idx_t i = 0; i < kTotSize; ++i) {
+        fArray[i] = v;
+      }
+    }
+
+    T& At(idx_t n, idx_t i, idx_t j) { return fV[n / fN].At(n % fN, i, j); }
+
+    T& operator()(idx_t n, idx_t i, idx_t j) { return fV[n / fN].At(n % fN, i, j); }
+
+    void copyIn(idx_t n, T* arr) { fV[n / fN].copyIn(n % fN, arr); }
+    void copyOut(idx_t n, T* arr) { fV[n / fN].copyOut(n % fN, arr); }
+  };
+
+  template <class MP>
+  using MPlexVec = MatriplexVector<MP>;
+
+  //==============================================================================
+
+  template <typename T, idx_t D1, idx_t D2, idx_t D3, idx_t N>
+  void multiply(const MPlexVec<MPlex<T, D1, D2, N>>& A,
+                const MPlexVec<MPlex<T, D2, D3, N>>& B,
+                MPlexVec<MPlex<T, D1, D3, N>>& C,
+                int n_to_process = 0) {
+    assert(A.size() == B.size());
+    assert(A.size() == C.size());
+
+    const int np = n_to_process ? n_to_process : A.size();
+
+    for (int i = 0; i < np; ++i) {
+      multiply(A[i], B[i], C[i]);
+    }
+  }
+
+  template <typename T, idx_t D1, idx_t D2, idx_t D3, idx_t N>
+  void multiplyGeneral(const MPlexVec<MPlex<T, D1, D2, N>>& A,
+                       const MPlexVec<MPlex<T, D2, D3, N>>& B,
+                       MPlexVec<MPlex<T, D1, D3, N>>& C,
+                       int n_to_process = 0) {
+    assert(A.size() == B.size());
+    assert(A.size() == C.size());
+
+    const int np = n_to_process ? n_to_process : A.size();
+
+    for (int i = 0; i < np; ++i) {
+      multiplyGeneral(A[i], B[i], C[i]);
+    }
+  }
+
+  template <typename T, idx_t D1, idx_t D2, idx_t D3, idx_t N>
+  void multiply3in(MPlexVec<MPlex<T, D1, D2, N>>& A,
+                   MPlexVec<MPlex<T, D2, D3, N>>& B,
+                   MPlexVec<MPlex<T, D1, D3, N>>& C,
+                   int n_to_process = 0) {
+    assert(A.size() == B.size());
+    assert(A.size() == C.size());
+
+    const int np = n_to_process ? n_to_process : A.size();
+
+    for (int i = 0; i < np; ++i) {
+      multiply(A[i], B[i], C[i]);
+      multiply(B[i], C[i], A[i]);
+      multiply(C[i], A[i], B[i]);
+    }
+  }
+
+  template <typename T, idx_t D, idx_t N>
+  void multiply(const MPlexVec<MPlexSym<T, D, N>>& A,
+                const MPlexVec<MPlexSym<T, D, N>>& B,
+                MPlexVec<MPlex<T, D, D, N>>& C,
+                int n_to_process = 0) {
+    assert(A.size() == B.size());
+    assert(A.size() == C.size());
+
+    const int np = n_to_process ? n_to_process : A.size();
+
+    for (int i = 0; i < np; ++i) {
+      multiply(A[i], B[i], C[i]);
+    }
+  }
+
+  //==============================================================================
+
+  template <typename T, idx_t D, idx_t N>
+  void invertCramer(MPlexVec<MPlex<T, D, D, N>>& A, int n_to_process = 0) {
+    const int np = n_to_process ? n_to_process : A.size();
+
+    for (int i = 0; i < np; ++i) {
+      invertCramer(A[i]);
+    }
+  }
+
+  template <typename T, idx_t D, idx_t N>
+  void invertCholesky(MPlexVec<MPlex<T, D, D, N>>& A, int n_to_process = 0) {
+    const int np = n_to_process ? n_to_process : A.size();
+
+    for (int i = 0; i < np; ++i) {
+      invertCholesky(A[i]);
+    }
+  }
+
+  template <typename T, idx_t D, idx_t N>
+  void invertCramerSym(MPlexVec<MPlexSym<T, D, N>>& A, int n_to_process = 0) {
+    const int np = n_to_process ? n_to_process : A.size();
+
+    for (int i = 0; i < np; ++i) {
+      invertCramerSym(A[i]);
+    }
+  }
+
+  template <typename T, idx_t D, idx_t N>
+  void invertCholeskySym(MPlexVec<MPlexSym<T, D, N>>& A, int n_to_process = 0) {
+    const int np = n_to_process ? n_to_process : A.size();
+
+    for (int i = 0; i < np; ++i) {
+      invertCholeskySym(A[i]);
+    }
+  }
+
+}  // namespace Matriplex
+
+#endif
diff --git a/RecoTracker/MkFitCore/src/Matriplex/gen_mul.pl b/RecoTracker/MkFitCore/src/Matriplex/gen_mul.pl
new file mode 100755
index 0000000000000..55fcfb62974e5
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matriplex/gen_mul.pl
@@ -0,0 +1,242 @@
+#!/usr/bin/perl
+
+@Offs[3] = [ 0, 1, 3, 1, 2, 4, 3, 4, 5 ];
+@Offs[4] = [ 0, 1, 3, 6, 1, 2, 4, 7, 3, 4, 5, 8, 6, 7, 8, 9 ];
+@Offs[5] = [ 0, 1, 3, 6, 10, 1, 2, 4, 7, 11, 3, 4, 5, 8, 12, 6, 7, 8, 9, 13, 10, 11, 12, 13, 14 ];
+@Offs[6] = [ 0, 1, 3, 6, 10, 15, 1, 2, 4, 7, 11, 16, 3, 4, 5, 8, 12, 17, 6, 7, 8, 9, 13, 18, 10, 11, 12, 13, 14, 19, 15, 16, 17, 18, 19, 20 ];
+
+$PREF = "      ";
+$BR   = " ";
+$JOIN = "$BR";
+$POST = " ";
+
+
+#$a = "A.fArray";
+#$b = "B.fArray";
+#$c = "C.fArray";
+$a = "a";
+$b = "b";
+$c = "c";
+
+$vectype = "IntrVec_t";
+
+$SYMMETRIC = 1;
+
+################################################################################
+
+sub mult_sym
+{
+  my $D = shift;
+
+  my @Off = @{$Offs[$D]};
+
+  for (my $i = 0; $i < $D; ++$i)
+  {
+    for (my $j = 0; $j < $D; ++$j)
+    # for (my $j = 0; $j <= $i; ++$j)
+    {
+      # my $x = $Off[$i * $D + $j];
+      my $x = $i * $D + $j;
+      printf "${PREF}${c}[%2d*N+n] =${POST}", $x;
+
+      my @sum;
+
+      for (my $k = 0; $k < $D; ++$k)
+      {
+        my $iko = $Off[$i * $D + $k];
+        my $kjo = $Off[$k * $D + $j];
+
+        push @sum, sprintf("${a}[%2d*N+n]*${b}[%2d*N+n]", $iko, $kjo);
+      }
+      print join(" +$JOIN", @sum), ";";
+      print "\n";
+    }
+  }
+}
+
+sub mult_sym_fma
+{
+  # This actually runs quite horribly, twice slower than the
+  # fully expressd version.
+  # Changing order of k and i loops doesn't change anything.
+  #
+  # However, this should be close to what we need for auto-generated
+  # intrinsics!
+
+  my $D = shift;
+
+  my @Off = @{$Offs[$D]};
+
+  for (my $i = 0; $i < $D; ++$i)
+  {
+    for (my $k = 0; $k < $D; ++$k)
+    {
+      for (my $j = 0; $j < $D; ++$j)
+      {
+        my $x = $i * $D + $j;
+        my $iko = $Off[$i * $D + $k];
+        my $kjo = $Off[$k * $D + $j];
+
+        my $op = ($k == 0) ? " =" : "+=";
+
+        printf "${PREF}${c}[%2d*N+n] ${op} ${a}[%2d*N+n]*${b}[%2d*N+n];\n",
+           $x, $iko, $kjo;
+      }
+      print "\n";
+    }
+  }
+}
+
+sub reg_name
+{
+  my ($var, $idx) = @_;
+
+  return "${var}_${idx}";
+
+}
+
+sub load_if_needed
+{
+  my ($var, $idx, $arc) = @_;
+
+  my $reg = reg_name(${var}, ${idx});
+
+  if ($arc->[$idx] == 0)
+  {
+    print "${PREF}${vectype} ${reg} = LD($var, $idx);\n";
+    ++$tick;
+  }
+
+  ++$arc->[$idx];
+
+  return $reg;
+}
+
+sub store
+{
+  my ($var, $idx) = @_;
+
+  my $reg = reg_name(${var}, ${idx});
+
+  print "${PREF}ST(${var}, ${idx}, ${reg});\n";
+
+  return $reg;
+}
+
+sub mult_sym_fma_intrinsic
+{
+  # Order of k and i loops should be different for 3x3 and 6x6. (?)
+
+  my $D = shift;
+
+  my @Off = @{$Offs[$D]};
+
+  local $tick = 0;
+
+  # Counts of use. For a and b to fetch, for c to store
+  my @ac, @bc, @cc, @to_store;
+
+  for (my $i = 0; $i < $D; ++$i)
+  {
+    for (my $k = 0; $k < $D; ++$k)
+    {
+      for (my $j = 0; $j < $D; ++$j)
+      {
+        my $x = $i * $D + $j;
+        my $iko = $Off[$i * $D + $k];
+        my $kjo = $Off[$k * $D + $j];
+
+        my $areg = load_if_needed("a", $iko, \@ac);
+        my $breg = load_if_needed("b", $kjo, \@bc);
+        my $creg = reg_name("c", $x);
+
+        my $op = ($k == 0) ? "=" : "+=";
+
+        if ($k == 0)
+        {
+          print "${PREF}${vectype} ${creg} = MUL(${areg}, ${breg});\n";
+        }
+        else
+        {
+          print "${PREF}${creg} = FMA(${areg}, ${breg}, ${creg});\n";
+        }
+
+        ++$tick;
+
+        if ($k + 1 == $D)
+        {
+          $cc[$x] = $tick + 4; #### Will be ready to store in 4 cycles. Really 4?
+          push @to_store, $x;
+        }
+
+        # Try to store the finished ones.
+        while (1)
+        {
+          last unless @to_store;
+          my $s = $to_store[0];
+          last if $tick < $cc[$s];
+
+          store("c", $s);
+          shift @to_store;
+          ++$tick;
+        }
+
+      }
+      print "\n";
+    }
+  }
+
+  for $s (@to_store)
+  {
+    store("c", $s);
+
+    ++$tick;
+  }
+}
+
+################################################################################
+
+sub mult_std
+{
+  my $D = shift;
+
+  for (my $i = 0; $i < $D; ++$i)
+  {
+    for (my $j = 0; $j < $D; ++$j)
+    {
+      my $x = $i * $D + $j;
+      printf "${PREF}${c}[%2d*N+n] =${POST}", $x;
+
+      my @sum;
+
+      for (my $k = 0; $k < $D; ++$k)
+      {
+        my $iko = $i * $D + $k;
+        my $kjo = $k * $D + $j;
+
+        push @sum, sprintf "${a}[%2d*N+n]*${b}[%2d*N+n]", $iko, $kjo;
+      }
+      print join(" +$JOIN", @sum), ";";
+      print "\n";
+    }
+  }
+}
+
+################################################################################
+
+if (scalar @ARGV != 1)
+{
+  print STDERR "Usage: $0 function_call\n";
+  print STDERR << "FNORD";
+Some options:
+  $0 "mult_sym(3);"
+  $0 "mult_sym_fma(3);"
+  $0 "mult_sym_fma_intrinsic(6);"
+
+  $0 "mult_std();"
+FNORD
+
+  exit(1);
+}
+
+eval $ARGV[0];
diff --git a/RecoTracker/MkFitCore/src/Matriplex/intr_sym_3x3.ah b/RecoTracker/MkFitCore/src/Matriplex/intr_sym_3x3.ah
new file mode 100644
index 0000000000000..84c310ebddd63
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matriplex/intr_sym_3x3.ah
@@ -0,0 +1,57 @@
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_1 = MUL(a_0, b_1);
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t c_2 = MUL(a_0, b_3);
+
+      IntrVec_t a_1 = LD(a, 1);
+      c_0 = FMA(a_1, b_1, c_0);
+      IntrVec_t b_2 = LD(b, 2);
+      c_1 = FMA(a_1, b_2, c_1);
+      IntrVec_t b_4 = LD(b, 4);
+      c_2 = FMA(a_1, b_4, c_2);
+
+      IntrVec_t a_3 = LD(a, 3);
+      c_0 = FMA(a_3, b_3, c_0);
+      c_1 = FMA(a_3, b_4, c_1);
+      IntrVec_t b_5 = LD(b, 5);
+      c_2 = FMA(a_3, b_5, c_2);
+
+      IntrVec_t c_3 = MUL(a_1, b_0);
+      ST(c, 0, c_0);
+      ST(c, 1, c_1);
+      IntrVec_t c_4 = MUL(a_1, b_1);
+      ST(c, 2, c_2);
+      IntrVec_t c_5 = MUL(a_1, b_3);
+
+      IntrVec_t a_2 = LD(a, 2);
+      c_3 = FMA(a_2, b_1, c_3);
+      c_4 = FMA(a_2, b_2, c_4);
+      c_5 = FMA(a_2, b_4, c_5);
+
+      IntrVec_t a_4 = LD(a, 4);
+      c_3 = FMA(a_4, b_3, c_3);
+      c_4 = FMA(a_4, b_4, c_4);
+      c_5 = FMA(a_4, b_5, c_5);
+
+      IntrVec_t c_6 = MUL(a_3, b_0);
+      IntrVec_t c_7 = MUL(a_3, b_1);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+      IntrVec_t c_8 = MUL(a_3, b_3);
+
+      c_6 = FMA(a_4, b_1, c_6);
+      c_7 = FMA(a_4, b_2, c_7);
+      c_8 = FMA(a_4, b_4, c_8);
+
+      IntrVec_t a_5 = LD(a, 5);
+      c_6 = FMA(a_5, b_3, c_6);
+      c_7 = FMA(a_5, b_4, c_7);
+      c_8 = FMA(a_5, b_5, c_8);
+
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
diff --git a/RecoTracker/MkFitCore/src/Matriplex/intr_sym_6x6.ah b/RecoTracker/MkFitCore/src/Matriplex/intr_sym_6x6.ah
new file mode 100644
index 0000000000000..96bc9e2ba83db
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matriplex/intr_sym_6x6.ah
@@ -0,0 +1,330 @@
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_1 = MUL(a_0, b_1);
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t c_2 = MUL(a_0, b_3);
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_3 = MUL(a_0, b_6);
+      IntrVec_t b_10 = LD(b, 10);
+      IntrVec_t c_4 = MUL(a_0, b_10);
+      IntrVec_t b_15 = LD(b, 15);
+      IntrVec_t c_5 = MUL(a_0, b_15);
+
+      IntrVec_t a_1 = LD(a, 1);
+      c_0 = FMA(a_1, b_1, c_0);
+      IntrVec_t b_2 = LD(b, 2);
+      c_1 = FMA(a_1, b_2, c_1);
+      IntrVec_t b_4 = LD(b, 4);
+      c_2 = FMA(a_1, b_4, c_2);
+      IntrVec_t b_7 = LD(b, 7);
+      c_3 = FMA(a_1, b_7, c_3);
+      IntrVec_t b_11 = LD(b, 11);
+      c_4 = FMA(a_1, b_11, c_4);
+      IntrVec_t b_16 = LD(b, 16);
+      c_5 = FMA(a_1, b_16, c_5);
+
+      IntrVec_t a_3 = LD(a, 3);
+      c_0 = FMA(a_3, b_3, c_0);
+      c_1 = FMA(a_3, b_4, c_1);
+      IntrVec_t b_5 = LD(b, 5);
+      c_2 = FMA(a_3, b_5, c_2);
+      IntrVec_t b_8 = LD(b, 8);
+      c_3 = FMA(a_3, b_8, c_3);
+      IntrVec_t b_12 = LD(b, 12);
+      c_4 = FMA(a_3, b_12, c_4);
+      IntrVec_t b_17 = LD(b, 17);
+      c_5 = FMA(a_3, b_17, c_5);
+
+      IntrVec_t a_6 = LD(a, 6);
+      c_0 = FMA(a_6, b_6, c_0);
+      c_1 = FMA(a_6, b_7, c_1);
+      c_2 = FMA(a_6, b_8, c_2);
+      IntrVec_t b_9 = LD(b, 9);
+      c_3 = FMA(a_6, b_9, c_3);
+      IntrVec_t b_13 = LD(b, 13);
+      c_4 = FMA(a_6, b_13, c_4);
+      IntrVec_t b_18 = LD(b, 18);
+      c_5 = FMA(a_6, b_18, c_5);
+
+      IntrVec_t a_10 = LD(a, 10);
+      c_0 = FMA(a_10, b_10, c_0);
+      c_1 = FMA(a_10, b_11, c_1);
+      c_2 = FMA(a_10, b_12, c_2);
+      c_3 = FMA(a_10, b_13, c_3);
+      IntrVec_t b_14 = LD(b, 14);
+      c_4 = FMA(a_10, b_14, c_4);
+      IntrVec_t b_19 = LD(b, 19);
+      c_5 = FMA(a_10, b_19, c_5);
+
+      IntrVec_t a_15 = LD(a, 15);
+      c_0 = FMA(a_15, b_15, c_0);
+      c_1 = FMA(a_15, b_16, c_1);
+      c_2 = FMA(a_15, b_17, c_2);
+      c_3 = FMA(a_15, b_18, c_3);
+      c_4 = FMA(a_15, b_19, c_4);
+      ST(c, 0, c_0);
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      IntrVec_t b_20 = LD(b, 20);
+      c_5 = FMA(a_15, b_20, c_5);
+
+      IntrVec_t c_6 = MUL(a_1, b_0);
+      IntrVec_t c_7 = MUL(a_1, b_1);
+      IntrVec_t c_8 = MUL(a_1, b_3);
+      IntrVec_t c_9 = MUL(a_1, b_6);
+      ST(c, 5, c_5);
+      IntrVec_t c_10 = MUL(a_1, b_10);
+      IntrVec_t c_11 = MUL(a_1, b_15);
+
+      IntrVec_t a_2 = LD(a, 2);
+      c_6 = FMA(a_2, b_1, c_6);
+      c_7 = FMA(a_2, b_2, c_7);
+      c_8 = FMA(a_2, b_4, c_8);
+      c_9 = FMA(a_2, b_7, c_9);
+      c_10 = FMA(a_2, b_11, c_10);
+      c_11 = FMA(a_2, b_16, c_11);
+
+      IntrVec_t a_4 = LD(a, 4);
+      c_6 = FMA(a_4, b_3, c_6);
+      c_7 = FMA(a_4, b_4, c_7);
+      c_8 = FMA(a_4, b_5, c_8);
+      c_9 = FMA(a_4, b_8, c_9);
+      c_10 = FMA(a_4, b_12, c_10);
+      c_11 = FMA(a_4, b_17, c_11);
+
+      IntrVec_t a_7 = LD(a, 7);
+      c_6 = FMA(a_7, b_6, c_6);
+      c_7 = FMA(a_7, b_7, c_7);
+      c_8 = FMA(a_7, b_8, c_8);
+      c_9 = FMA(a_7, b_9, c_9);
+      c_10 = FMA(a_7, b_13, c_10);
+      c_11 = FMA(a_7, b_18, c_11);
+
+      IntrVec_t a_11 = LD(a, 11);
+      c_6 = FMA(a_11, b_10, c_6);
+      c_7 = FMA(a_11, b_11, c_7);
+      c_8 = FMA(a_11, b_12, c_8);
+      c_9 = FMA(a_11, b_13, c_9);
+      c_10 = FMA(a_11, b_14, c_10);
+      c_11 = FMA(a_11, b_19, c_11);
+
+      IntrVec_t a_16 = LD(a, 16);
+      c_6 = FMA(a_16, b_15, c_6);
+      c_7 = FMA(a_16, b_16, c_7);
+      c_8 = FMA(a_16, b_17, c_8);
+      c_9 = FMA(a_16, b_18, c_9);
+      c_10 = FMA(a_16, b_19, c_10);
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+      ST(c, 9, c_9);
+      ST(c, 10, c_10);
+      c_11 = FMA(a_16, b_20, c_11);
+
+      IntrVec_t c_12 = MUL(a_3, b_0);
+      IntrVec_t c_13 = MUL(a_3, b_1);
+      IntrVec_t c_14 = MUL(a_3, b_3);
+      IntrVec_t c_15 = MUL(a_3, b_6);
+      ST(c, 11, c_11);
+      IntrVec_t c_16 = MUL(a_3, b_10);
+      IntrVec_t c_17 = MUL(a_3, b_15);
+
+      c_12 = FMA(a_4, b_1, c_12);
+      c_13 = FMA(a_4, b_2, c_13);
+      c_14 = FMA(a_4, b_4, c_14);
+      c_15 = FMA(a_4, b_7, c_15);
+      c_16 = FMA(a_4, b_11, c_16);
+      c_17 = FMA(a_4, b_16, c_17);
+
+      IntrVec_t a_5 = LD(a, 5);
+      c_12 = FMA(a_5, b_3, c_12);
+      c_13 = FMA(a_5, b_4, c_13);
+      c_14 = FMA(a_5, b_5, c_14);
+      c_15 = FMA(a_5, b_8, c_15);
+      c_16 = FMA(a_5, b_12, c_16);
+      c_17 = FMA(a_5, b_17, c_17);
+
+      IntrVec_t a_8 = LD(a, 8);
+      c_12 = FMA(a_8, b_6, c_12);
+      c_13 = FMA(a_8, b_7, c_13);
+      c_14 = FMA(a_8, b_8, c_14);
+      c_15 = FMA(a_8, b_9, c_15);
+      c_16 = FMA(a_8, b_13, c_16);
+      c_17 = FMA(a_8, b_18, c_17);
+
+      IntrVec_t a_12 = LD(a, 12);
+      c_12 = FMA(a_12, b_10, c_12);
+      c_13 = FMA(a_12, b_11, c_13);
+      c_14 = FMA(a_12, b_12, c_14);
+      c_15 = FMA(a_12, b_13, c_15);
+      c_16 = FMA(a_12, b_14, c_16);
+      c_17 = FMA(a_12, b_19, c_17);
+
+      IntrVec_t a_17 = LD(a, 17);
+      c_12 = FMA(a_17, b_15, c_12);
+      c_13 = FMA(a_17, b_16, c_13);
+      c_14 = FMA(a_17, b_17, c_14);
+      c_15 = FMA(a_17, b_18, c_15);
+      c_16 = FMA(a_17, b_19, c_16);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      c_17 = FMA(a_17, b_20, c_17);
+
+      IntrVec_t c_18 = MUL(a_6, b_0);
+      IntrVec_t c_19 = MUL(a_6, b_1);
+      IntrVec_t c_20 = MUL(a_6, b_3);
+      IntrVec_t c_21 = MUL(a_6, b_6);
+      ST(c, 17, c_17);
+      IntrVec_t c_22 = MUL(a_6, b_10);
+      IntrVec_t c_23 = MUL(a_6, b_15);
+
+      c_18 = FMA(a_7, b_1, c_18);
+      c_19 = FMA(a_7, b_2, c_19);
+      c_20 = FMA(a_7, b_4, c_20);
+      c_21 = FMA(a_7, b_7, c_21);
+      c_22 = FMA(a_7, b_11, c_22);
+      c_23 = FMA(a_7, b_16, c_23);
+
+      c_18 = FMA(a_8, b_3, c_18);
+      c_19 = FMA(a_8, b_4, c_19);
+      c_20 = FMA(a_8, b_5, c_20);
+      c_21 = FMA(a_8, b_8, c_21);
+      c_22 = FMA(a_8, b_12, c_22);
+      c_23 = FMA(a_8, b_17, c_23);
+
+      IntrVec_t a_9 = LD(a, 9);
+      c_18 = FMA(a_9, b_6, c_18);
+      c_19 = FMA(a_9, b_7, c_19);
+      c_20 = FMA(a_9, b_8, c_20);
+      c_21 = FMA(a_9, b_9, c_21);
+      c_22 = FMA(a_9, b_13, c_22);
+      c_23 = FMA(a_9, b_18, c_23);
+
+      IntrVec_t a_13 = LD(a, 13);
+      c_18 = FMA(a_13, b_10, c_18);
+      c_19 = FMA(a_13, b_11, c_19);
+      c_20 = FMA(a_13, b_12, c_20);
+      c_21 = FMA(a_13, b_13, c_21);
+      c_22 = FMA(a_13, b_14, c_22);
+      c_23 = FMA(a_13, b_19, c_23);
+
+      IntrVec_t a_18 = LD(a, 18);
+      c_18 = FMA(a_18, b_15, c_18);
+      c_19 = FMA(a_18, b_16, c_19);
+      c_20 = FMA(a_18, b_17, c_20);
+      c_21 = FMA(a_18, b_18, c_21);
+      c_22 = FMA(a_18, b_19, c_22);
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      ST(c, 20, c_20);
+      ST(c, 21, c_21);
+      ST(c, 22, c_22);
+      c_23 = FMA(a_18, b_20, c_23);
+
+      IntrVec_t c_24 = MUL(a_10, b_0);
+      IntrVec_t c_25 = MUL(a_10, b_1);
+      IntrVec_t c_26 = MUL(a_10, b_3);
+      IntrVec_t c_27 = MUL(a_10, b_6);
+      ST(c, 23, c_23);
+      IntrVec_t c_28 = MUL(a_10, b_10);
+      IntrVec_t c_29 = MUL(a_10, b_15);
+
+      c_24 = FMA(a_11, b_1, c_24);
+      c_25 = FMA(a_11, b_2, c_25);
+      c_26 = FMA(a_11, b_4, c_26);
+      c_27 = FMA(a_11, b_7, c_27);
+      c_28 = FMA(a_11, b_11, c_28);
+      c_29 = FMA(a_11, b_16, c_29);
+
+      c_24 = FMA(a_12, b_3, c_24);
+      c_25 = FMA(a_12, b_4, c_25);
+      c_26 = FMA(a_12, b_5, c_26);
+      c_27 = FMA(a_12, b_8, c_27);
+      c_28 = FMA(a_12, b_12, c_28);
+      c_29 = FMA(a_12, b_17, c_29);
+
+      c_24 = FMA(a_13, b_6, c_24);
+      c_25 = FMA(a_13, b_7, c_25);
+      c_26 = FMA(a_13, b_8, c_26);
+      c_27 = FMA(a_13, b_9, c_27);
+      c_28 = FMA(a_13, b_13, c_28);
+      c_29 = FMA(a_13, b_18, c_29);
+
+      IntrVec_t a_14 = LD(a, 14);
+      c_24 = FMA(a_14, b_10, c_24);
+      c_25 = FMA(a_14, b_11, c_25);
+      c_26 = FMA(a_14, b_12, c_26);
+      c_27 = FMA(a_14, b_13, c_27);
+      c_28 = FMA(a_14, b_14, c_28);
+      c_29 = FMA(a_14, b_19, c_29);
+
+      IntrVec_t a_19 = LD(a, 19);
+      c_24 = FMA(a_19, b_15, c_24);
+      c_25 = FMA(a_19, b_16, c_25);
+      c_26 = FMA(a_19, b_17, c_26);
+      c_27 = FMA(a_19, b_18, c_27);
+      c_28 = FMA(a_19, b_19, c_28);
+      ST(c, 24, c_24);
+      ST(c, 25, c_25);
+      ST(c, 26, c_26);
+      ST(c, 27, c_27);
+      ST(c, 28, c_28);
+      c_29 = FMA(a_19, b_20, c_29);
+
+      IntrVec_t c_30 = MUL(a_15, b_0);
+      IntrVec_t c_31 = MUL(a_15, b_1);
+      IntrVec_t c_32 = MUL(a_15, b_3);
+      IntrVec_t c_33 = MUL(a_15, b_6);
+      ST(c, 29, c_29);
+      IntrVec_t c_34 = MUL(a_15, b_10);
+      IntrVec_t c_35 = MUL(a_15, b_15);
+
+      c_30 = FMA(a_16, b_1, c_30);
+      c_31 = FMA(a_16, b_2, c_31);
+      c_32 = FMA(a_16, b_4, c_32);
+      c_33 = FMA(a_16, b_7, c_33);
+      c_34 = FMA(a_16, b_11, c_34);
+      c_35 = FMA(a_16, b_16, c_35);
+
+      c_30 = FMA(a_17, b_3, c_30);
+      c_31 = FMA(a_17, b_4, c_31);
+      c_32 = FMA(a_17, b_5, c_32);
+      c_33 = FMA(a_17, b_8, c_33);
+      c_34 = FMA(a_17, b_12, c_34);
+      c_35 = FMA(a_17, b_17, c_35);
+
+      c_30 = FMA(a_18, b_6, c_30);
+      c_31 = FMA(a_18, b_7, c_31);
+      c_32 = FMA(a_18, b_8, c_32);
+      c_33 = FMA(a_18, b_9, c_33);
+      c_34 = FMA(a_18, b_13, c_34);
+      c_35 = FMA(a_18, b_18, c_35);
+
+      c_30 = FMA(a_19, b_10, c_30);
+      c_31 = FMA(a_19, b_11, c_31);
+      c_32 = FMA(a_19, b_12, c_32);
+      c_33 = FMA(a_19, b_13, c_33);
+      c_34 = FMA(a_19, b_14, c_34);
+      c_35 = FMA(a_19, b_19, c_35);
+
+      IntrVec_t a_20 = LD(a, 20);
+      c_30 = FMA(a_20, b_15, c_30);
+      c_31 = FMA(a_20, b_16, c_31);
+      c_32 = FMA(a_20, b_17, c_32);
+      c_33 = FMA(a_20, b_18, c_33);
+      c_34 = FMA(a_20, b_19, c_34);
+      ST(c, 30, c_30);
+      ST(c, 31, c_31);
+      ST(c, 32, c_32);
+      ST(c, 33, c_33);
+      ST(c, 34, c_34);
+      c_35 = FMA(a_20, b_20, c_35);
+
+      ST(c, 35, c_35);
diff --git a/RecoTracker/MkFitCore/src/Matriplex/std_sym_3x3.ah b/RecoTracker/MkFitCore/src/Matriplex/std_sym_3x3.ah
new file mode 100644
index 0000000000000..ab42fba342a4f
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matriplex/std_sym_3x3.ah
@@ -0,0 +1,9 @@
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n] + a[ 3*N+n]*b[ 3*N+n];
+      c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n] + a[ 1*N+n]*b[ 2*N+n] + a[ 3*N+n]*b[ 4*N+n];
+      c[ 2*N+n] = a[ 0*N+n]*b[ 3*N+n] + a[ 1*N+n]*b[ 4*N+n] + a[ 3*N+n]*b[ 5*N+n];
+      c[ 3*N+n] = a[ 1*N+n]*b[ 0*N+n] + a[ 2*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 3*N+n];
+      c[ 4*N+n] = a[ 1*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 2*N+n] + a[ 4*N+n]*b[ 4*N+n];
+      c[ 5*N+n] = a[ 1*N+n]*b[ 3*N+n] + a[ 2*N+n]*b[ 4*N+n] + a[ 4*N+n]*b[ 5*N+n];
+      c[ 6*N+n] = a[ 3*N+n]*b[ 0*N+n] + a[ 4*N+n]*b[ 1*N+n] + a[ 5*N+n]*b[ 3*N+n];
+      c[ 7*N+n] = a[ 3*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 2*N+n] + a[ 5*N+n]*b[ 4*N+n];
+      c[ 8*N+n] = a[ 3*N+n]*b[ 3*N+n] + a[ 4*N+n]*b[ 4*N+n] + a[ 5*N+n]*b[ 5*N+n];
diff --git a/RecoTracker/MkFitCore/src/Matriplex/std_sym_6x6.ah b/RecoTracker/MkFitCore/src/Matriplex/std_sym_6x6.ah
new file mode 100644
index 0000000000000..22602d7a22e64
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matriplex/std_sym_6x6.ah
@@ -0,0 +1,36 @@
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n] + a[ 3*N+n]*b[ 3*N+n] + a[ 6*N+n]*b[ 6*N+n] + a[10*N+n]*b[10*N+n] + a[15*N+n]*b[15*N+n];
+      c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n] + a[ 1*N+n]*b[ 2*N+n] + a[ 3*N+n]*b[ 4*N+n] + a[ 6*N+n]*b[ 7*N+n] + a[10*N+n]*b[11*N+n] + a[15*N+n]*b[16*N+n];
+      c[ 2*N+n] = a[ 0*N+n]*b[ 3*N+n] + a[ 1*N+n]*b[ 4*N+n] + a[ 3*N+n]*b[ 5*N+n] + a[ 6*N+n]*b[ 8*N+n] + a[10*N+n]*b[12*N+n] + a[15*N+n]*b[17*N+n];
+      c[ 3*N+n] = a[ 0*N+n]*b[ 6*N+n] + a[ 1*N+n]*b[ 7*N+n] + a[ 3*N+n]*b[ 8*N+n] + a[ 6*N+n]*b[ 9*N+n] + a[10*N+n]*b[13*N+n] + a[15*N+n]*b[18*N+n];
+      c[ 4*N+n] = a[ 0*N+n]*b[10*N+n] + a[ 1*N+n]*b[11*N+n] + a[ 3*N+n]*b[12*N+n] + a[ 6*N+n]*b[13*N+n] + a[10*N+n]*b[14*N+n] + a[15*N+n]*b[19*N+n];
+      c[ 5*N+n] = a[ 0*N+n]*b[15*N+n] + a[ 1*N+n]*b[16*N+n] + a[ 3*N+n]*b[17*N+n] + a[ 6*N+n]*b[18*N+n] + a[10*N+n]*b[19*N+n] + a[15*N+n]*b[20*N+n];
+      c[ 6*N+n] = a[ 1*N+n]*b[ 0*N+n] + a[ 2*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 3*N+n] + a[ 7*N+n]*b[ 6*N+n] + a[11*N+n]*b[10*N+n] + a[16*N+n]*b[15*N+n];
+      c[ 7*N+n] = a[ 1*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 2*N+n] + a[ 4*N+n]*b[ 4*N+n] + a[ 7*N+n]*b[ 7*N+n] + a[11*N+n]*b[11*N+n] + a[16*N+n]*b[16*N+n];
+      c[ 8*N+n] = a[ 1*N+n]*b[ 3*N+n] + a[ 2*N+n]*b[ 4*N+n] + a[ 4*N+n]*b[ 5*N+n] + a[ 7*N+n]*b[ 8*N+n] + a[11*N+n]*b[12*N+n] + a[16*N+n]*b[17*N+n];
+      c[ 9*N+n] = a[ 1*N+n]*b[ 6*N+n] + a[ 2*N+n]*b[ 7*N+n] + a[ 4*N+n]*b[ 8*N+n] + a[ 7*N+n]*b[ 9*N+n] + a[11*N+n]*b[13*N+n] + a[16*N+n]*b[18*N+n];
+      c[10*N+n] = a[ 1*N+n]*b[10*N+n] + a[ 2*N+n]*b[11*N+n] + a[ 4*N+n]*b[12*N+n] + a[ 7*N+n]*b[13*N+n] + a[11*N+n]*b[14*N+n] + a[16*N+n]*b[19*N+n];
+      c[11*N+n] = a[ 1*N+n]*b[15*N+n] + a[ 2*N+n]*b[16*N+n] + a[ 4*N+n]*b[17*N+n] + a[ 7*N+n]*b[18*N+n] + a[11*N+n]*b[19*N+n] + a[16*N+n]*b[20*N+n];
+      c[12*N+n] = a[ 3*N+n]*b[ 0*N+n] + a[ 4*N+n]*b[ 1*N+n] + a[ 5*N+n]*b[ 3*N+n] + a[ 8*N+n]*b[ 6*N+n] + a[12*N+n]*b[10*N+n] + a[17*N+n]*b[15*N+n];
+      c[13*N+n] = a[ 3*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 2*N+n] + a[ 5*N+n]*b[ 4*N+n] + a[ 8*N+n]*b[ 7*N+n] + a[12*N+n]*b[11*N+n] + a[17*N+n]*b[16*N+n];
+      c[14*N+n] = a[ 3*N+n]*b[ 3*N+n] + a[ 4*N+n]*b[ 4*N+n] + a[ 5*N+n]*b[ 5*N+n] + a[ 8*N+n]*b[ 8*N+n] + a[12*N+n]*b[12*N+n] + a[17*N+n]*b[17*N+n];
+      c[15*N+n] = a[ 3*N+n]*b[ 6*N+n] + a[ 4*N+n]*b[ 7*N+n] + a[ 5*N+n]*b[ 8*N+n] + a[ 8*N+n]*b[ 9*N+n] + a[12*N+n]*b[13*N+n] + a[17*N+n]*b[18*N+n];
+      c[16*N+n] = a[ 3*N+n]*b[10*N+n] + a[ 4*N+n]*b[11*N+n] + a[ 5*N+n]*b[12*N+n] + a[ 8*N+n]*b[13*N+n] + a[12*N+n]*b[14*N+n] + a[17*N+n]*b[19*N+n];
+      c[17*N+n] = a[ 3*N+n]*b[15*N+n] + a[ 4*N+n]*b[16*N+n] + a[ 5*N+n]*b[17*N+n] + a[ 8*N+n]*b[18*N+n] + a[12*N+n]*b[19*N+n] + a[17*N+n]*b[20*N+n];
+      c[18*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 1*N+n] + a[ 8*N+n]*b[ 3*N+n] + a[ 9*N+n]*b[ 6*N+n] + a[13*N+n]*b[10*N+n] + a[18*N+n]*b[15*N+n];
+      c[19*N+n] = a[ 6*N+n]*b[ 1*N+n] + a[ 7*N+n]*b[ 2*N+n] + a[ 8*N+n]*b[ 4*N+n] + a[ 9*N+n]*b[ 7*N+n] + a[13*N+n]*b[11*N+n] + a[18*N+n]*b[16*N+n];
+      c[20*N+n] = a[ 6*N+n]*b[ 3*N+n] + a[ 7*N+n]*b[ 4*N+n] + a[ 8*N+n]*b[ 5*N+n] + a[ 9*N+n]*b[ 8*N+n] + a[13*N+n]*b[12*N+n] + a[18*N+n]*b[17*N+n];
+      c[21*N+n] = a[ 6*N+n]*b[ 6*N+n] + a[ 7*N+n]*b[ 7*N+n] + a[ 8*N+n]*b[ 8*N+n] + a[ 9*N+n]*b[ 9*N+n] + a[13*N+n]*b[13*N+n] + a[18*N+n]*b[18*N+n];
+      c[22*N+n] = a[ 6*N+n]*b[10*N+n] + a[ 7*N+n]*b[11*N+n] + a[ 8*N+n]*b[12*N+n] + a[ 9*N+n]*b[13*N+n] + a[13*N+n]*b[14*N+n] + a[18*N+n]*b[19*N+n];
+      c[23*N+n] = a[ 6*N+n]*b[15*N+n] + a[ 7*N+n]*b[16*N+n] + a[ 8*N+n]*b[17*N+n] + a[ 9*N+n]*b[18*N+n] + a[13*N+n]*b[19*N+n] + a[18*N+n]*b[20*N+n];
+      c[24*N+n] = a[10*N+n]*b[ 0*N+n] + a[11*N+n]*b[ 1*N+n] + a[12*N+n]*b[ 3*N+n] + a[13*N+n]*b[ 6*N+n] + a[14*N+n]*b[10*N+n] + a[19*N+n]*b[15*N+n];
+      c[25*N+n] = a[10*N+n]*b[ 1*N+n] + a[11*N+n]*b[ 2*N+n] + a[12*N+n]*b[ 4*N+n] + a[13*N+n]*b[ 7*N+n] + a[14*N+n]*b[11*N+n] + a[19*N+n]*b[16*N+n];
+      c[26*N+n] = a[10*N+n]*b[ 3*N+n] + a[11*N+n]*b[ 4*N+n] + a[12*N+n]*b[ 5*N+n] + a[13*N+n]*b[ 8*N+n] + a[14*N+n]*b[12*N+n] + a[19*N+n]*b[17*N+n];
+      c[27*N+n] = a[10*N+n]*b[ 6*N+n] + a[11*N+n]*b[ 7*N+n] + a[12*N+n]*b[ 8*N+n] + a[13*N+n]*b[ 9*N+n] + a[14*N+n]*b[13*N+n] + a[19*N+n]*b[18*N+n];
+      c[28*N+n] = a[10*N+n]*b[10*N+n] + a[11*N+n]*b[11*N+n] + a[12*N+n]*b[12*N+n] + a[13*N+n]*b[13*N+n] + a[14*N+n]*b[14*N+n] + a[19*N+n]*b[19*N+n];
+      c[29*N+n] = a[10*N+n]*b[15*N+n] + a[11*N+n]*b[16*N+n] + a[12*N+n]*b[17*N+n] + a[13*N+n]*b[18*N+n] + a[14*N+n]*b[19*N+n] + a[19*N+n]*b[20*N+n];
+      c[30*N+n] = a[15*N+n]*b[ 0*N+n] + a[16*N+n]*b[ 1*N+n] + a[17*N+n]*b[ 3*N+n] + a[18*N+n]*b[ 6*N+n] + a[19*N+n]*b[10*N+n] + a[20*N+n]*b[15*N+n];
+      c[31*N+n] = a[15*N+n]*b[ 1*N+n] + a[16*N+n]*b[ 2*N+n] + a[17*N+n]*b[ 4*N+n] + a[18*N+n]*b[ 7*N+n] + a[19*N+n]*b[11*N+n] + a[20*N+n]*b[16*N+n];
+      c[32*N+n] = a[15*N+n]*b[ 3*N+n] + a[16*N+n]*b[ 4*N+n] + a[17*N+n]*b[ 5*N+n] + a[18*N+n]*b[ 8*N+n] + a[19*N+n]*b[12*N+n] + a[20*N+n]*b[17*N+n];
+      c[33*N+n] = a[15*N+n]*b[ 6*N+n] + a[16*N+n]*b[ 7*N+n] + a[17*N+n]*b[ 8*N+n] + a[18*N+n]*b[ 9*N+n] + a[19*N+n]*b[13*N+n] + a[20*N+n]*b[18*N+n];
+      c[34*N+n] = a[15*N+n]*b[10*N+n] + a[16*N+n]*b[11*N+n] + a[17*N+n]*b[12*N+n] + a[18*N+n]*b[13*N+n] + a[19*N+n]*b[14*N+n] + a[20*N+n]*b[19*N+n];
+      c[35*N+n] = a[15*N+n]*b[15*N+n] + a[16*N+n]*b[16*N+n] + a[17*N+n]*b[17*N+n] + a[18*N+n]*b[18*N+n] + a[19*N+n]*b[19*N+n] + a[20*N+n]*b[20*N+n];
diff --git a/RecoTracker/MkFitCore/src/MatriplexPackers.h b/RecoTracker/MkFitCore/src/MatriplexPackers.h
new file mode 100644
index 0000000000000..4b163af0ef289
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MatriplexPackers.h
@@ -0,0 +1,142 @@
+#ifndef RecoTracker_MkFitCore_src_MatriplexPackers_h
+#define RecoTracker_MkFitCore_src_MatriplexPackers_h
+
+#include "Matrix.h"
+
+#include "RecoTracker/MkFitCore/interface/Hit.h"
+#include "RecoTracker/MkFitCore/interface/Track.h"
+
+namespace mkfit {
+
+  //==============================================================================
+  // MatriplexPackerSlurpIn
+  //==============================================================================
+
+  template <typename D>
+  class MatriplexPackerSlurpIn {
+  protected:
+    alignas(64) int m_idx[NN];
+
+    const D* m_base;
+    int m_pos;
+
+  public:
+    MatriplexPackerSlurpIn(const D& base) : m_base(&base), m_pos(0) {}
+
+    void reset() { m_pos = 0; }
+
+    void addNullInput() { m_idx[m_pos++] = 0; }
+
+    void addInput(const D& item) {
+      // Could issue prefetch requests here.
+
+      m_idx[m_pos] = &item - m_base;
+
+      ++m_pos;
+    }
+
+    void addInputAt(int pos, const D& item) {
+      while (m_pos < pos) {
+        // We might not care about initialization / reset to 0.
+        // Or we could be building an additional mask (on top of N_proc).
+        m_idx[m_pos++] = 0;
+      }
+
+      addInput(item);
+    }
+
+    template <typename TM>
+    void pack(TM& mplex, int base_offset) {
+      assert(m_pos > 0 && m_pos <= NN);
+
+#if defined(GATHER_INTRINSICS)
+      GATHER_IDX_LOAD(vi, m_idx);
+      mplex.slurpIn(m_base + base_offset, vi, D(), m_pos);
+#else
+      mplex.slurpIn(m_base + base_offset, m_idx, m_pos);
+#endif
+    }
+  };
+
+  //==============================================================================
+  // MatriplexErrParPackerSlurpIn
+  //==============================================================================
+
+  // T - input class (Track or Hit), D - data type (float)
+
+  template <typename T, typename D>
+  class MatriplexErrParPackerSlurpIn : public MatriplexPackerSlurpIn<D> {
+    int m_off_param;
+
+  public:
+    MatriplexErrParPackerSlurpIn(const T& t)
+        : MatriplexPackerSlurpIn<D>(*t.errArray()), m_off_param(t.posArray() - this->m_base) {}
+
+    void addInput(const T& item) {
+      // Could issue L1 prefetch requests here.
+
+      this->m_idx[this->m_pos] = item.errArray() - this->m_base;
+
+      ++this->m_pos;
+    }
+
+    void addInputAt(int pos, const T& item) {
+      while (this->m_pos < pos) {
+        // We might not care about initialization / reset to 0.
+        // Or we could be building an additional mask (on top of N_proc).
+        this->m_idx[this->m_pos++] = 0;
+      }
+
+      addInput(item);
+    }
+
+    template <typename TMerr, typename TMpar>
+    void pack(TMerr& err, TMpar& par) {
+      assert(this->m_pos > 0 && this->m_pos <= NN);
+
+#if defined(GATHER_INTRINSICS)
+      GATHER_IDX_LOAD(vi, this->m_idx);
+      err.slurpIn(this->m_base, vi, D(), this->m_pos);
+      par.slurpIn(this->m_base + m_off_param, vi, D(), this->m_pos);
+#else
+      err.slurpIn(this->m_base, this->m_idx, this->m_pos);
+      par.slurpIn(this->m_base + m_off_param, this->m_idx, this->m_pos);
+#endif
+    }
+  };
+
+  //==============================================================================
+  // MatriplexTrackPackerPlexify
+  //==============================================================================
+
+  template <typename T, typename D>
+  class MatriplexTrackPackerPlexify  // : public MatriplexTrackPackerBase
+  {
+  public:
+    MatriplexTrackPackerPlexify(const T& t) {}
+
+    void reset() {}
+
+    void addNullInput() {}
+
+    void addInput(const T& item) {}
+
+    void addInputAt(int pos, const T& item) {}
+
+    template <typename TMerr, typename TMpar>
+    void pack(TMerr& err, TMpar& par) {}
+  };
+
+  //==============================================================================
+  // Packer Selection
+  //==============================================================================
+
+  // Optionally ifdef with defines from Makefile.config
+
+  using MatriplexHitPacker = MatriplexErrParPackerSlurpIn<Hit, float>;
+  using MatriplexTrackPacker = MatriplexErrParPackerSlurpIn<TrackBase, float>;
+
+  using MatriplexHoTPacker = MatriplexPackerSlurpIn<HitOnTrack>;
+}  // namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/src/Matrix.h b/RecoTracker/MkFitCore/src/Matrix.h
new file mode 100644
index 0000000000000..ed5e6939b8692
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Matrix.h
@@ -0,0 +1,73 @@
+#ifndef RecoTracker_MkFitCore_src_Matrix_h
+#define RecoTracker_MkFitCore_src_Matrix_h
+
+#include "RecoTracker/MkFitCore/interface/Config.h"
+#include "RecoTracker/MkFitCore/interface/MatrixSTypes.h"
+
+namespace mkfit {
+
+  inline float hipo(float x, float y) { return std::sqrt(x * x + y * y); }
+
+  inline float hipo_sqr(float x, float y) { return x * x + y * y; }
+
+  inline void sincos4(const float x, float& sin, float& cos) {
+    // Had this writen with explicit division by factorial.
+    // The *whole* fitting test ran like 2.5% slower on MIC, sigh.
+
+    const float x2 = x * x;
+    cos = 1.f - 0.5f * x2 + 0.04166667f * x2 * x2;
+    sin = x - 0.16666667f * x * x2;
+  }
+}  // end namespace mkfit
+
+//==============================================================================
+
+// Matriplex dimensions and typedefs
+
+#include "Matriplex/MatriplexSym.h"
+
+#ifndef MPT_SIZE
+#if defined(__AVX512F__)
+#define MPT_SIZE 16
+#elif defined(__AVX__) || defined(__AVX2__)
+#define MPT_SIZE 8
+#elif defined(__SSE3__)
+#define MPT_SIZE 4
+#else
+#define MPT_SIZE 8
+#endif
+#endif
+
+namespace mkfit {
+
+  constexpr Matriplex::idx_t NN = MPT_SIZE;  // "Length" of MPlex.
+
+  constexpr Matriplex::idx_t LL = 6;  // Dimension of large/long  MPlex entities
+  constexpr Matriplex::idx_t HH = 3;  // Dimension of small/short MPlex entities
+
+  typedef Matriplex::Matriplex<float, LL, LL, NN> MPlexLL;
+  typedef Matriplex::Matriplex<float, LL, 1, NN> MPlexLV;
+  typedef Matriplex::MatriplexSym<float, LL, NN> MPlexLS;
+
+  typedef Matriplex::Matriplex<float, HH, HH, NN> MPlexHH;
+  typedef Matriplex::Matriplex<float, HH, 1, NN> MPlexHV;
+  typedef Matriplex::MatriplexSym<float, HH, NN> MPlexHS;
+
+  typedef Matriplex::Matriplex<float, 2, 2, NN> MPlex22;
+  typedef Matriplex::Matriplex<float, 2, 1, NN> MPlex2V;
+  typedef Matriplex::MatriplexSym<float, 2, NN> MPlex2S;
+
+  typedef Matriplex::Matriplex<float, LL, HH, NN> MPlexLH;
+  typedef Matriplex::Matriplex<float, HH, LL, NN> MPlexHL;
+
+  typedef Matriplex::Matriplex<float, LL, 2, NN> MPlexL2;
+
+  typedef Matriplex::Matriplex<float, 1, 1, NN> MPlexQF;
+  typedef Matriplex::Matriplex<int, 1, 1, NN> MPlexQI;
+  typedef Matriplex::Matriplex<unsigned int, 1, 1, NN> MPlexQUI;
+
+  typedef Matriplex::Matriplex<bool, 1, 1, NN> MPlexQB;
+
+}  // end namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/src/MkBase.h b/RecoTracker/MkFitCore/src/MkBase.h
new file mode 100644
index 0000000000000..b12efb754e5c8
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MkBase.h
@@ -0,0 +1,100 @@
+#ifndef RecoTracker_MkFitCore_src_MkBase_h
+#define RecoTracker_MkFitCore_src_MkBase_h
+
+#include "Matrix.h"
+
+#include "PropagationMPlex.h"
+
+namespace mkfit {
+
+  //==============================================================================
+  // MkBase
+  //==============================================================================
+
+  class MkBase {
+  public:
+    static constexpr int iC = 0;  // current
+    static constexpr int iP = 1;  // propagated
+
+    float getPar(int itrack, int i, int par) const { return m_Par[i].constAt(itrack, par, 0); }
+
+    float radiusSqr(int itrack, int i) const { return hipo_sqr(getPar(itrack, i, 0), getPar(itrack, i, 1)); }
+
+    //----------------------------------------------------------------------------
+
+    MkBase() {}
+
+    //----------------------------------------------------------------------------
+
+    void propagateTracksToR(float r, const int N_proc, const PropagationFlags pf) {
+      MPlexQF msRad;
+#pragma omp simd
+      for (int n = 0; n < NN; ++n) {
+        msRad.At(n, 0, 0) = r;
+      }
+
+      propagateHelixToRMPlex(m_Err[iC], m_Par[iC], m_Chg, msRad, m_Err[iP], m_Par[iP], N_proc, pf);
+    }
+
+    void propagateTracksToHitR(const MPlexHV& par,
+                               const int N_proc,
+                               const PropagationFlags pf,
+                               const MPlexQI* noMatEffPtr = nullptr) {
+      MPlexQF msRad;
+#pragma omp simd
+      for (int n = 0; n < NN; ++n) {
+        msRad.At(n, 0, 0) = std::hypot(par.constAt(n, 0, 0), par.constAt(n, 1, 0));
+      }
+
+      propagateHelixToRMPlex(m_Err[iC], m_Par[iC], m_Chg, msRad, m_Err[iP], m_Par[iP], N_proc, pf, noMatEffPtr);
+    }
+
+    //----------------------------------------------------------------------------
+
+    void propagateTracksToZ(float z, const int N_proc, const PropagationFlags pf) {
+      MPlexQF msZ;
+#pragma omp simd
+      for (int n = 0; n < NN; ++n) {
+        msZ.At(n, 0, 0) = z;
+      }
+
+      propagateHelixToZMPlex(m_Err[iC], m_Par[iC], m_Chg, msZ, m_Err[iP], m_Par[iP], N_proc, pf);
+    }
+
+    void propagateTracksToHitZ(const MPlexHV& par,
+                               const int N_proc,
+                               const PropagationFlags pf,
+                               const MPlexQI* noMatEffPtr = nullptr) {
+      MPlexQF msZ;
+#pragma omp simd
+      for (int n = 0; n < NN; ++n) {
+        msZ.At(n, 0, 0) = par.constAt(n, 2, 0);
+      }
+
+      propagateHelixToZMPlex(m_Err[iC], m_Par[iC], m_Chg, msZ, m_Err[iP], m_Par[iP], N_proc, pf, noMatEffPtr);
+    }
+
+    void propagateTracksToPCAZ(const int N_proc, const PropagationFlags pf) {
+      MPlexQF msZ;  // PCA z-coordinate
+#pragma omp simd
+      for (int n = 0; n < NN; ++n) {
+        const float slope = std::tan(m_Par[iC].constAt(n, 5, 0));
+        //      msZ.At(n, 0, 0) = ( Config::beamspotz0 + slope * ( Config::beamspotr0 - std::hypot(m_Par[iC].constAt(n, 0, 0), m_Par[iC].constAt(n, 1, 0))) + slope * slope * m_Par[iC].constAt(n, 2, 0) ) / ( 1+slope*slope); // PCA w.r.t. z0, r0
+        msZ.At(n, 0, 0) = (slope * (slope * m_Par[iC].constAt(n, 2, 0) -
+                                    std::hypot(m_Par[iC].constAt(n, 0, 0), m_Par[iC].constAt(n, 1, 0)))) /
+                          (1 + slope * slope);  // PCA to origin
+      }
+
+      propagateHelixToZMPlex(m_Err[iC], m_Par[iC], m_Chg, msZ, m_Err[iP], m_Par[iP], N_proc, pf);
+    }
+
+    //----------------------------------------------------------------------------
+
+  protected:
+    MPlexLS m_Err[2];
+    MPlexLV m_Par[2];
+    MPlexQI m_Chg;
+  };
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/src/MkBuilder.cc b/RecoTracker/MkFitCore/src/MkBuilder.cc
new file mode 100644
index 0000000000000..bc903ac3689ea
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MkBuilder.cc
@@ -0,0 +1,1362 @@
+#include <memory>
+#include <limits>
+#include <algorithm>
+
+#include "RecoTracker/MkFitCore/interface/cms_common_macros.h"
+
+#include "RecoTracker/MkFitCore/interface/MkBuilder.h"
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
+
+#include "Pool.h"
+#include "CandCloner.h"
+#include "FindingFoos.h"
+#include "MkFitter.h"
+#include "MkFinder.h"
+
+#ifdef MKFIT_STANDALONE
+#include "RecoTracker/MkFitCore/standalone/Event.h"
+#endif
+
+#include "Ice/IceRevisitedRadix.h"
+
+//#define DEBUG
+#include "Debug.h"
+
+#include "oneapi/tbb/parallel_for.h"
+#include "oneapi/tbb/parallel_for_each.h"
+
+// Set this to select a single track for deep debugging:
+//#define SELECT_SEED_LABEL -494
+
+namespace mkfit {
+
+  //==============================================================================
+  // Execution context -- Pools of helper objects
+  //==============================================================================
+
+  struct ExecutionContext {
+    ExecutionContext() = default;
+    ~ExecutionContext() = default;
+
+    Pool<CandCloner> m_cloners;
+    Pool<MkFitter> m_fitters;
+    Pool<MkFinder> m_finders;
+
+    void populate(int n_thr) {
+      m_cloners.populate(n_thr - m_cloners.size());
+      m_fitters.populate(n_thr - m_fitters.size());
+      m_finders.populate(n_thr - m_finders.size());
+    }
+  };
+
+  CMS_SA_ALLOW ExecutionContext g_exe_ctx;
+
+}  // end namespace mkfit
+
+//------------------------------------------------------------------------------
+
+namespace {
+  using namespace mkfit;
+
+  // Range of indices processed within one iteration of a TBB parallel_for.
+  struct RangeOfSeedIndices {
+    int m_rng_beg, m_rng_end;
+    int m_beg, m_end;
+
+    RangeOfSeedIndices(int rb, int re) : m_rng_beg(rb), m_rng_end(re) { reset(); }
+
+    void reset() {
+      m_end = m_rng_beg;
+      next_chunk();
+    }
+
+    bool valid() const { return m_beg < m_rng_end; }
+
+    int n_proc() const { return m_end - m_beg; }
+
+    void next_chunk() {
+      m_beg = m_end;
+      m_end = std::min(m_end + NN, m_rng_end);
+    }
+
+    RangeOfSeedIndices &operator++() {
+      next_chunk();
+      return *this;
+    }
+  };
+
+  // Region of seed indices processed in a single TBB parallel for.
+  struct RegionOfSeedIndices {
+    int m_reg_beg, m_reg_end, m_vec_cnt;
+
+    RegionOfSeedIndices(const std::vector<int> &seedEtaSeparators, int region) {
+      m_reg_beg = (region == 0) ? 0 : seedEtaSeparators[region - 1];
+      m_reg_end = seedEtaSeparators[region];
+      m_vec_cnt = (m_reg_end - m_reg_beg + NN - 1) / NN;
+    }
+
+    int count() const { return m_reg_end - m_reg_beg; }
+
+    tbb::blocked_range<int> tbb_blk_rng_std(int thr_hint = -1) const {
+      if (thr_hint < 0)
+        thr_hint = Config::numSeedsPerTask;
+      return tbb::blocked_range<int>(m_reg_beg, m_reg_end, thr_hint);
+    }
+
+    tbb::blocked_range<int> tbb_blk_rng_vec() const {
+      return tbb::blocked_range<int>(0, m_vec_cnt, std::max(1, Config::numSeedsPerTask / NN));
+    }
+
+    RangeOfSeedIndices seed_rng(const tbb::blocked_range<int> &i) const {
+      return RangeOfSeedIndices(m_reg_beg + NN * i.begin(), std::min(m_reg_beg + NN * i.end(), m_reg_end));
+    }
+  };
+
+#ifdef DEBUG
+  void pre_prop_print(int ilay, MkBase *fir) {
+    const float pt = 1.f / fir->getPar(0, 0, 3);
+    std::cout << "propagate to lay=" << ilay << " start from x=" << fir->getPar(0, 0, 0)
+              << " y=" << fir->getPar(0, 0, 1) << " z=" << fir->getPar(0, 0, 2)
+              << " r=" << getHypot(fir->getPar(0, 0, 0), fir->getPar(0, 0, 1))
+              << " px=" << pt * std::cos(fir->getPar(0, 0, 4)) << " py=" << pt * std::sin(fir->getPar(0, 0, 4))
+              << " pz=" << pt / std::tan(fir->getPar(0, 0, 5)) << " pT=" << pt << std::endl;
+  }
+
+  void post_prop_print(int ilay, MkBase *fir) {
+    std::cout << "propagate to lay=" << ilay << " arrive at x=" << fir->getPar(0, 1, 0) << " y=" << fir->getPar(0, 1, 1)
+              << " z=" << fir->getPar(0, 1, 2) << " r=" << getHypot(fir->getPar(0, 1, 0), fir->getPar(0, 1, 1))
+              << std::endl;
+  }
+
+  void print_seed(const Track &seed) {
+    std::cout << "MX - found seed with label=" << seed.label() << " nHits=" << seed.nFoundHits()
+              << " chi2=" << seed.chi2() << " posEta=" << seed.posEta() << " posPhi=" << seed.posPhi()
+              << " posR=" << seed.posR() << " posZ=" << seed.z() << " pT=" << seed.pT() << std::endl;
+  }
+
+  void print_seed2(const TrackCand &seed) {
+    std::cout << "MX - found seed with nFoundHits=" << seed.nFoundHits() << " chi2=" << seed.chi2() << " x=" << seed.x()
+              << " y=" << seed.y() << " z=" << seed.z() << " px=" << seed.px() << " py=" << seed.py()
+              << " pz=" << seed.pz() << " pT=" << seed.pT() << std::endl;
+  }
+
+  void print_seeds(const TrackVec &seeds) {
+    std::cout << "found total seeds=" << seeds.size() << std::endl;
+    for (auto &&seed : seeds) {
+      print_seed(seed);
+    }
+  }
+
+  void print_seeds(const EventOfCombCandidates &event_of_comb_cands) {
+    for (int iseed = 0; iseed < event_of_comb_cands.size(); iseed++) {
+      print_seed2(event_of_comb_cands[iseed].front());
+    }
+  }
+#endif
+
+  bool sortCandByScore(const TrackCand &cand1, const TrackCand &cand2) {
+    return mkfit::sortByScoreTrackCand(cand1, cand2);
+  }
+
+}  // end unnamed namespace
+
+//------------------------------------------------------------------------------
+// Constructor and destructor
+//------------------------------------------------------------------------------
+
+namespace mkfit {
+
+  std::unique_ptr<MkBuilder> MkBuilder::make_builder(bool silent) { return std::make_unique<MkBuilder>(silent); }
+
+  void MkBuilder::populate() { g_exe_ctx.populate(Config::numThreadsFinder); }
+
+  //------------------------------------------------------------------------------
+  // Common functions
+  //------------------------------------------------------------------------------
+
+  void MkBuilder::begin_event(MkJob *job, Event *ev, const char *build_type) {
+    m_nan_n_silly_per_layer_count = 0;
+
+    m_job = job;
+    m_event = ev;
+
+    m_seedEtaSeparators.resize(m_job->num_regions());
+    m_seedMinLastLayer.resize(m_job->num_regions());
+    m_seedMaxLastLayer.resize(m_job->num_regions());
+
+    for (int i = 0; i < m_job->num_regions(); ++i) {
+      m_seedEtaSeparators[i] = 0;
+      m_seedMinLastLayer[i] = 9999;
+      m_seedMaxLastLayer[i] = 0;
+    }
+
+    if (!m_silent) {
+      std::cout << "MkBuilder building tracks with '" << build_type << "'"
+                << ", iteration_index=" << job->m_iter_config.m_iteration_index
+                << ", track_algorithm=" << job->m_iter_config.m_track_algorithm << std::endl;
+    }
+  }
+
+  void MkBuilder::end_event() {
+    m_job = nullptr;
+    m_event = nullptr;
+  }
+
+  void MkBuilder::release_memory() {
+    TrackVec tmp;
+    m_tracks.swap(tmp);
+    m_event_of_comb_cands.releaseMemory();
+  }
+
+  void MkBuilder::import_seeds(const TrackVec &in_seeds, std::function<insert_seed_foo> insert_seed) {
+    // bool debug = true;
+
+    const int size = in_seeds.size();
+
+    IterationSeedPartition part(size);
+
+    m_job->m_iter_config.m_partition_seeds(m_job->m_trk_info, in_seeds, m_job->m_event_of_hits, part);
+
+    RadixSort radix;
+    radix.Sort(&part.m_sort_score[0], size);
+
+    for (int i = 0; i < size; ++i) {
+      int j = radix.GetRanks()[i];
+
+      const Track &S = in_seeds[j];
+      HitOnTrack hot = S.getLastHitOnTrack();
+
+      int reg = part.m_region[j];
+
+      ++m_seedEtaSeparators[reg];
+
+      m_seedMinLastLayer[reg] = std::min(m_seedMinLastLayer[reg], hot.layer);
+      m_seedMaxLastLayer[reg] = std::max(m_seedMaxLastLayer[reg], hot.layer);
+
+      insert_seed(S, reg);
+    }
+
+    // Fix min/max layers
+    for (int i = 0; i < m_job->num_regions(); ++i) {
+      if (m_seedMinLastLayer[i] == 9999)
+        m_seedMinLastLayer[i] = -1;
+      if (m_seedMaxLastLayer[i] == 0)
+        m_seedMaxLastLayer[i] = -1;
+    }
+
+    dprintf(
+        "MkBuilder::import_seeds finished import of %d seeds (last seeding layer min, max):\n"
+        "  ec- = %d(%d,%d), t- = %d(%d,%d), brl = %d(%d,%d), t+ = %d(%d,%d), ec+ = %d(%d,%d).\n",
+        size,
+        m_seedEtaSeparators[0],
+        m_seedMinLastLayer[0],
+        m_seedMaxLastLayer[0],
+        m_seedEtaSeparators[1],
+        m_seedMinLastLayer[1],
+        m_seedMaxLastLayer[1],
+        m_seedEtaSeparators[2],
+        m_seedMinLastLayer[2],
+        m_seedMaxLastLayer[2],
+        m_seedEtaSeparators[3],
+        m_seedMinLastLayer[3],
+        m_seedMaxLastLayer[3],
+        m_seedEtaSeparators[4],
+        m_seedMinLastLayer[4],
+        m_seedMaxLastLayer[4]);
+
+    // Sum up region counts to contain actual separator indices.
+    for (int i = 1; i < m_job->num_regions(); ++i) {
+      m_seedEtaSeparators[i] += m_seedEtaSeparators[i - 1];
+    }
+
+    dcall(print_seeds(m_event_of_comb_cands));
+  }
+
+  //------------------------------------------------------------------------------
+
+  int MkBuilder::filter_comb_cands(std::function<filter_track_cand_foo> filter) {
+    EventOfCombCandidates &eoccs = m_event_of_comb_cands;
+    int i = 0, place_pos = 0;
+
+    dprintf("MkBuilder::filter_comb_cands Entering filter size eoccsm_size=%d\n", eoccs.size());
+
+    std::vector<int> removed_cnts(m_job->num_regions());
+    while (i < eoccs.size()) {
+      if (filter(eoccs[i].front())) {
+        if (place_pos != i)
+          std::swap(eoccs[place_pos], eoccs[i]);
+        ++place_pos;
+      } else {
+        assert(eoccs[i].front().getEtaRegion() < m_job->num_regions());
+        ++removed_cnts[eoccs[i].front().getEtaRegion()];
+      }
+      ++i;
+    }
+
+    int n_removed = 0;
+    for (int reg = 0; reg < m_job->num_regions(); ++reg) {
+      dprintf("MkBuilder::filter_comb_cands reg=%d: n_rem_was=%d removed_in_r=%d n_rem=%d, es_was=%d es_new=%d\n",
+              reg,
+              n_removed,
+              removed_cnts[reg],
+              n_removed + removed_cnts[reg],
+              m_seedEtaSeparators[reg],
+              m_seedEtaSeparators[reg] - n_removed - removed_cnts[reg]);
+
+      n_removed += removed_cnts[reg];
+      m_seedEtaSeparators[reg] -= n_removed;
+    }
+
+    eoccs.resizeAfterFiltering(n_removed);
+
+    dprintf("MkBuilder::filter_comb_cands n_removed = %d, eoccsm_size=%d\n", n_removed, eoccs.size());
+
+    return n_removed;
+  }
+
+  void MkBuilder::find_min_max_hots_size() {
+    const EventOfCombCandidates &eoccs = m_event_of_comb_cands;
+    int min[5], max[5], gmin = 0, gmax = 0;
+    int i = 0;
+    for (int reg = 0; reg < 5; ++reg) {
+      min[reg] = 9999;
+      max[reg] = 0;
+      for (; i < m_seedEtaSeparators[reg]; i++) {
+        min[reg] = std::min(min[reg], eoccs[i].hotsSize());
+        max[reg] = std::max(max[reg], eoccs[i].hotsSize());
+      }
+      gmin = std::max(gmin, min[reg]);
+      gmax = std::max(gmax, max[reg]);
+    }
+    printf(
+        "MkBuilder::find_min_max_hots_size MIN %3d -- [ %3d | %3d | %3d | %3d | %3d ]   MAX %3d -- [ %3d | %3d | %3d | "
+        "%3d | %3d ]\n",
+        gmin,
+        min[0],
+        min[1],
+        min[2],
+        min[3],
+        min[4],
+        gmax,
+        max[0],
+        max[1],
+        max[2],
+        max[3],
+        max[4]);
+  }
+
+  void MkBuilder::select_best_comb_cands(bool clear_m_tracks, bool remove_missing_hits) {
+    if (clear_m_tracks)
+      m_tracks.clear();
+    export_best_comb_cands(m_tracks, remove_missing_hits);
+  }
+
+  void MkBuilder::export_best_comb_cands(TrackVec &out_vec, bool remove_missing_hits) {
+    const EventOfCombCandidates &eoccs = m_event_of_comb_cands;
+    out_vec.reserve(out_vec.size() + eoccs.size());
+    for (int i = 0; i < eoccs.size(); i++) {
+      // See MT-RATS comment below.
+      assert(!eoccs[i].empty() && "BackwardFitBH requires output tracks to align with seeds.");
+
+      // Take the first candidate, if it exists.
+      if (!eoccs[i].empty()) {
+        const TrackCand &bcand = eoccs[i].front();
+        out_vec.emplace_back(bcand.exportTrack(remove_missing_hits));
+      }
+    }
+  }
+
+  void MkBuilder::export_tracks(TrackVec &out_vec) {
+    out_vec.reserve(out_vec.size() + m_tracks.size());
+    for (auto &t : m_tracks) {
+      out_vec.emplace_back(t);
+    }
+  }
+
+  //------------------------------------------------------------------------------
+  // PrepareSeeds
+  //------------------------------------------------------------------------------
+
+  void MkBuilder::seed_post_cleaning(TrackVec &tv) {
+#ifdef SELECT_SEED_LABEL
+    {  // Select seed with the defined label for detailed debugging.
+      for (int i = 0; i < (int)tv.size(); ++i) {
+        if (tv[i].label() == SELECT_SEED_LABEL) {
+          printf("Preselect seed with label %d - found on pos %d\n", SELECT_SEED_LABEL, i);
+          if (i != 0)
+            tv[0] = tv[i];
+          tv.resize(1);
+          print("Label", tv[0].label(), tv[0], true);
+          break;
+        }
+      }
+      if (tv.size() != 1) {
+        printf("Preselect seed with label %d - NOT FOUND. Cleaning out seeds.\n", SELECT_SEED_LABEL);
+        tv.clear();
+      }
+    }
+#endif
+
+    if (Const::nan_n_silly_check_seeds) {
+      int count = 0;
+
+      for (int i = 0; i < (int)tv.size(); ++i) {
+        bool silly = tv[i].hasSillyValues(Const::nan_n_silly_print_bad_seeds,
+                                          Const::nan_n_silly_fixup_bad_seeds,
+                                          "Post-cleaning seed silly value check and fix");
+        if (silly) {
+          ++count;
+          if (Const::nan_n_silly_remove_bad_seeds) {
+            // XXXX MT
+            // Could do somethin smarter here: set as Stopped ?  check in seed cleaning ?
+            tv.erase(tv.begin() + i);
+            --i;
+          }
+        }
+      }
+
+      if (count > 0 && !m_silent) {
+        printf("Nan'n'Silly detected %d silly seeds (fix=%d, remove=%d).\n",
+               count,
+               Const::nan_n_silly_fixup_bad_seeds,
+               Const::nan_n_silly_remove_bad_seeds);
+      }
+    }
+  }
+
+  //------------------------------------------------------------------------------
+  // FindTracksBestHit
+  //------------------------------------------------------------------------------
+
+  void MkBuilder::find_tracks_load_seeds_BH(const TrackVec &in_seeds) {
+    // bool debug = true;
+
+    m_tracks.reserve(in_seeds.size());
+    m_tracks.clear();
+
+    import_seeds(in_seeds, [&](const Track &seed, int region) {
+      m_tracks.push_back(seed);
+      m_tracks.back().setNSeedHits(seed.nTotalHits());
+      m_tracks.back().setEtaRegion(region);
+    });
+
+    //dump seeds
+    dcall(print_seeds(m_tracks));
+  }
+
+  void MkBuilder::findTracksBestHit(SteeringParams::IterationType_e iteration_dir) {
+    // bool debug = true;
+
+    TrackVec &cands = m_tracks;
+
+    tbb::parallel_for_each(m_job->regions_begin(), m_job->regions_end(), [&](int region) {
+      if (iteration_dir == SteeringParams::IT_BkwSearch && !m_job->steering_params(region).has_bksearch_plan()) {
+        printf("No backward search plan for region %d\n", region);
+        return;
+      }
+
+      // XXXXXX Select endcap / barrel only ...
+      // if (region != TrackerInfo::Reg_Endcap_Neg && region != TrackerInfo::Reg_Endcap_Pos)
+      // if (region != TrackerInfo::Reg_Barrel)
+      //   return;
+
+      const SteeringParams &st_par = m_job->steering_params(region);
+      const TrackerInfo &trk_info = m_job->m_trk_info;
+      const PropagationConfig &prop_config = PropagationConfig::get_default();
+
+      const RegionOfSeedIndices rosi(m_seedEtaSeparators, region);
+
+      tbb::parallel_for(rosi.tbb_blk_rng_vec(), [&](const tbb::blocked_range<int> &blk_rng) {
+        auto mkfndr = g_exe_ctx.m_finders.makeOrGet();
+
+        RangeOfSeedIndices rng = rosi.seed_rng(blk_rng);
+
+        std::vector<int> trk_idcs(NN);  // track indices in Matriplex
+        std::vector<int> trk_llay(NN);  // last layer on input track
+
+        while (rng.valid()) {
+          dprint(std::endl << "processing track=" << rng.m_beg << ", label=" << cands[rng.m_beg].label());
+
+          int prev_layer = 9999;
+
+          for (int i = rng.m_beg, ii = 0; i < rng.m_end; ++i, ++ii) {
+            int llay = cands[i].getLastHitLyr();
+            trk_llay[ii] = llay;
+            prev_layer = std::min(prev_layer, llay);
+
+            dprintf("  %2d %2d %2d lay=%3d prev_layer=%d\n", ii, i, cands[i].label(), llay, prev_layer);
+          }
+          int curr_tridx = 0;
+
+          auto layer_plan_it = st_par.make_iterator(iteration_dir);
+
+          dprintf("Made iterator for %d, first layer=%d ... end layer=%d\n",
+                  iteration_dir,
+                  layer_plan_it.layer(),
+                  layer_plan_it.last_layer());
+
+          assert(layer_plan_it.is_pickup_only());
+
+          int curr_layer = layer_plan_it.layer();
+
+          mkfndr->m_Stopped.setVal(0);
+
+          // Loop over layers, starting from after the seed.
+          // Consider inverting loop order and make layer outer, need to
+          // trade off hit prefetching with copy-out of candidates.
+          while (++layer_plan_it) {
+            prev_layer = curr_layer;
+            curr_layer = layer_plan_it.layer();
+            mkfndr->setup(prop_config,
+                          m_job->m_iter_config.m_params,
+                          m_job->m_iter_config.m_layer_configs[curr_layer],
+                          m_job->get_mask_for_layer(curr_layer));
+
+            dprint("at layer " << curr_layer);
+            const LayerOfHits &layer_of_hits = m_job->m_event_of_hits[curr_layer];
+            const LayerInfo &layer_info = trk_info.layer(curr_layer);
+            const FindingFoos &fnd_foos = FindingFoos::get_finding_foos(layer_info.is_barrel());
+
+            // Pick up seeds that become active on current layer -- unless already fully loaded.
+            if (curr_tridx < rng.n_proc()) {
+              int prev_tridx = curr_tridx;
+
+              for (int i = rng.m_beg, ii = 0; i < rng.m_end; ++i, ++ii) {
+                if (trk_llay[ii] == prev_layer)
+                  trk_idcs[curr_tridx++] = i;
+              }
+              if (curr_tridx > prev_tridx) {
+                dprintf("added %d seeds, started with %d\n", curr_tridx - prev_tridx, prev_tridx);
+
+                mkfndr->inputTracksAndHitIdx(cands, trk_idcs, prev_tridx, curr_tridx, false, prev_tridx);
+              }
+            }
+
+            if (layer_plan_it.is_pickup_only())
+              continue;
+
+            dcall(pre_prop_print(curr_layer, mkfndr.get()));
+
+            (mkfndr.get()->*fnd_foos.m_propagate_foo)(
+                layer_info.propagate_to(), curr_tridx, prop_config.finding_inter_layer_pflags);
+
+            dcall(post_prop_print(curr_layer, mkfndr.get()));
+
+            mkfndr->selectHitIndices(layer_of_hits, curr_tridx);
+
+            // Stop low-pT tracks that can not reach the current barrel layer.
+            if (layer_info.is_barrel()) {
+              const float r_min_sqr = layer_info.rin() * layer_info.rin();
+              for (int i = 0; i < curr_tridx; ++i) {
+                if (!mkfndr->m_Stopped[i]) {
+                  if (mkfndr->radiusSqr(i, MkBase::iP) < r_min_sqr) {
+                    if (region == TrackerInfo::Reg_Barrel) {
+                      mkfndr->m_Stopped[i] = 1;
+                      mkfndr->outputTrackAndHitIdx(cands[rng.m_beg + i], i, false);
+                    }
+                    mkfndr->m_XWsrResult[i].m_wsr = WSR_Outside;
+                    mkfndr->m_XHitSize[i] = 0;
+                  }
+                } else {  // make sure we don't add extra work for AddBestHit
+                  mkfndr->m_XWsrResult[i].m_wsr = WSR_Outside;
+                  mkfndr->m_XHitSize[i] = 0;
+                }
+              }
+            }
+
+            // make candidates with best hit
+            dprint("make new candidates");
+
+            mkfndr->addBestHit(layer_of_hits, curr_tridx, fnd_foos);
+
+            // Stop tracks that have reached N_max_holes.
+            for (int i = 0; i < curr_tridx; ++i) {
+              if (!mkfndr->m_Stopped[i] && mkfndr->bestHitLastHoT(i).index == -2) {
+                mkfndr->m_Stopped[i] = 1;
+                mkfndr->outputTrackAndHitIdx(cands[rng.m_beg + i], i, false);
+              }
+            }
+
+          }  // end of layer loop
+
+          mkfndr->outputNonStoppedTracksAndHitIdx(cands, trk_idcs, 0, curr_tridx, false);
+
+          ++rng;
+        }  // end of loop over candidates in a tbb chunk
+      });  // end parallel_for over candidates in a region
+    });    // end of parallel_for_each over regions
+  }
+
+  //------------------------------------------------------------------------------
+  // FindTracksCombinatorial: Standard TBB and CloneEngine TBB
+  //------------------------------------------------------------------------------
+
+  void MkBuilder::find_tracks_load_seeds(const TrackVec &in_seeds) {
+    // This will sort seeds according to iteration configuration.
+
+    // m_tracks can be used for BkFit.
+    m_tracks.clear();
+
+    m_event_of_comb_cands.reset((int)in_seeds.size(), m_job->max_max_cands());
+
+    import_seeds(in_seeds, [&](const Track &seed, int region) { m_event_of_comb_cands.insertSeed(seed, region); });
+  }
+
+  int MkBuilder::find_tracks_unroll_candidates(std::vector<std::pair<int, int>> &seed_cand_vec,
+                                               int start_seed,
+                                               int end_seed,
+                                               int layer,
+                                               int prev_layer,
+                                               bool pickup_only,
+                                               SteeringParams::IterationType_e iteration_dir) {
+    int silly_count = 0;
+
+    seed_cand_vec.clear();
+
+    for (int iseed = start_seed; iseed < end_seed; ++iseed) {
+      CombCandidate &ccand = m_event_of_comb_cands[iseed];
+
+      if (ccand.state() == CombCandidate::Dormant && ccand.pickupLayer() == prev_layer) {
+        ccand.setState(CombCandidate::Finding);
+      }
+      if (!pickup_only && ccand.state() == CombCandidate::Finding) {
+        bool active = false;
+        for (int ic = 0; ic < (int)ccand.size(); ++ic) {
+          if (ccand[ic].getLastHitIdx() != -2) {
+            // Check if the candidate is close to it's max_r, pi/2 - 0.2 rad (11.5 deg)
+            if (iteration_dir == SteeringParams::IT_FwdSearch && ccand[ic].pT() < 1.2) {
+              const float dphi = std::abs(ccand[ic].posPhi() - ccand[ic].momPhi());
+              if (ccand[ic].posRsq() > 625.f && dphi > 1.371f && dphi < 4.512f) {
+                // printf("Stopping cand at r=%f, posPhi=%.1f momPhi=%.2f pt=%.2f emomEta=%.2f\n",
+                //        ccand[ic].posR(), ccand[ic].posPhi(), ccand[ic].momPhi(), ccand[ic].pT(), ccand[ic].momEta());
+                ccand[ic].addHitIdx(-2, layer, 0.0f);
+                continue;
+              }
+            }
+
+            active = true;
+            seed_cand_vec.push_back(std::pair<int, int>(iseed, ic));
+            ccand[ic].resetOverlaps();
+
+            if (Const::nan_n_silly_check_cands_every_layer) {
+              if (ccand[ic].hasSillyValues(Const::nan_n_silly_print_bad_cands_every_layer,
+                                           Const::nan_n_silly_fixup_bad_cands_every_layer,
+                                           "Per layer silly check"))
+                ++silly_count;
+            }
+          }
+        }
+        if (!active) {
+          ccand.setState(CombCandidate::Finished);
+        }
+      }
+    }
+
+    if (Const::nan_n_silly_check_cands_every_layer && silly_count > 0) {
+      m_nan_n_silly_per_layer_count += silly_count;
+    }
+
+    return seed_cand_vec.size();
+  }
+
+  void MkBuilder::find_tracks_handle_missed_layers(MkFinder *mkfndr,
+                                                   const LayerInfo &layer_info,
+                                                   std::vector<std::vector<TrackCand>> &tmp_cands,
+                                                   const std::vector<std::pair<int, int>> &seed_cand_idx,
+                                                   const int region,
+                                                   const int start_seed,
+                                                   const int itrack,
+                                                   const int end) {
+    // XXXX-1 If I miss a layer, insert the original track into tmp_cands
+    // AND do not do it in FindCandidates as the position can be badly
+    // screwed by then. See comment there, too.
+    // One could also do a pre-check ... so as not to use up a slot.
+
+    // bool debug = true;
+
+    for (int ti = itrack; ti < end; ++ti) {
+      TrackCand &cand = m_event_of_comb_cands[seed_cand_idx[ti].first][seed_cand_idx[ti].second];
+      WSR_Result &w = mkfndr->m_XWsrResult[ti - itrack];
+
+      // XXXX-4 Low pT tracks can miss a barrel layer ... and should be stopped
+      const float cand_r =
+          std::hypot(mkfndr->getPar(ti - itrack, MkBase::iP, 0), mkfndr->getPar(ti - itrack, MkBase::iP, 1));
+
+      dprintf("WSR Check label %d, seed %d, cand %d score %f -> wsr %d, in_gap %d\n",
+              cand.label(),
+              seed_cand_idx[ti].first,
+              seed_cand_idx[ti].second,
+              cand.score(),
+              w.m_wsr,
+              w.m_in_gap);
+
+      if (layer_info.is_barrel() && cand_r < layer_info.rin()) {
+        // Fake outside so it does not get processed in FindTracks Std/CE... and
+        // create a stopped replica in barrel and original copy if there is
+        // still chance to hit endcaps.
+        dprintf("Barrel cand propagated to r=%f ... layer is %f - %f\n", cand_r, layer_info.rin(), layer_info.rout());
+
+        mkfndr->m_XHitSize[ti - itrack] = 0;
+        w.m_wsr = WSR_Outside;
+
+        tmp_cands[seed_cand_idx[ti].first - start_seed].push_back(cand);
+        if (region == TrackerInfo::Reg_Barrel) {
+          dprintf(" creating extra stopped held back candidate\n");
+          tmp_cands[seed_cand_idx[ti].first - start_seed].back().addHitIdx(-2, layer_info.layer_id(), 0);
+        }
+      } else if (w.m_wsr == WSR_Outside) {
+        dprintf(" creating extra held back candidate\n");
+        tmp_cands[seed_cand_idx[ti].first - start_seed].push_back(cand);
+      } else if (w.m_wsr == WSR_Edge) {
+        // Do nothing special here, this case is handled also in MkFinder:findTracks()
+      }
+    }
+  }
+
+  //------------------------------------------------------------------------------
+  // FindTracksCombinatorial: Standard TBB
+  //------------------------------------------------------------------------------
+
+  void MkBuilder::findTracksStandard(SteeringParams::IterationType_e iteration_dir) {
+    // debug = true;
+
+    EventOfCombCandidates &eoccs = m_event_of_comb_cands;
+
+    tbb::parallel_for_each(m_job->regions_begin(), m_job->regions_end(), [&](int region) {
+      if (iteration_dir == SteeringParams::IT_BkwSearch && !m_job->steering_params(region).has_bksearch_plan()) {
+        printf("No backward search plan for region %d\n", region);
+        return;
+      }
+
+      const TrackerInfo &trk_info = m_job->m_trk_info;
+      const SteeringParams &st_par = m_job->steering_params(region);
+      const IterationParams &params = m_job->params();
+      const PropagationConfig &prop_config = PropagationConfig::get_default();
+
+      const RegionOfSeedIndices rosi(m_seedEtaSeparators, region);
+
+      // adaptive seeds per task based on the total estimated amount of work to divide among all threads
+      const int adaptiveSPT = std::clamp(
+          Config::numThreadsEvents * eoccs.size() / Config::numThreadsFinder + 1, 4, Config::numSeedsPerTask);
+      dprint("adaptiveSPT " << adaptiveSPT << " fill " << rosi.count() << "/" << eoccs.size() << " region " << region);
+
+      // loop over seeds
+      tbb::parallel_for(rosi.tbb_blk_rng_std(adaptiveSPT), [&](const tbb::blocked_range<int> &seeds) {
+        auto mkfndr = g_exe_ctx.m_finders.makeOrGet();
+
+        const int start_seed = seeds.begin();
+        const int end_seed = seeds.end();
+        const int n_seeds = end_seed - start_seed;
+
+        std::vector<std::vector<TrackCand>> tmp_cands(n_seeds);
+        for (size_t iseed = 0; iseed < tmp_cands.size(); ++iseed) {
+          tmp_cands[iseed].reserve(2 * params.maxCandsPerSeed);  //factor 2 seems reasonable to start with
+        }
+
+        std::vector<std::pair<int, int>> seed_cand_idx;
+        seed_cand_idx.reserve(n_seeds * params.maxCandsPerSeed);
+
+        auto layer_plan_it = st_par.make_iterator(iteration_dir);
+
+        dprintf("Made iterator for %d, first layer=%d ... end layer=%d\n",
+                iteration_dir,
+                layer_plan_it.layer(),
+                layer_plan_it.last_layer());
+
+        assert(layer_plan_it.is_pickup_only());
+
+        int curr_layer = layer_plan_it.layer(), prev_layer;
+
+        dprintf("\nMkBuilder::FindTracksStandard region=%d, seed_pickup_layer=%d, first_layer=%d\n",
+                region,
+                curr_layer,
+                layer_plan_it.next_layer());
+
+        auto &iter_params = (iteration_dir == SteeringParams::IT_BkwSearch) ? m_job->m_iter_config.m_backward_params
+                                                                            : m_job->m_iter_config.m_params;
+
+        // Loop over layers, starting from after the seed.
+        while (++layer_plan_it) {
+          prev_layer = curr_layer;
+          curr_layer = layer_plan_it.layer();
+          mkfndr->setup(prop_config,
+                        iter_params,
+                        m_job->m_iter_config.m_layer_configs[curr_layer],
+                        m_job->get_mask_for_layer(curr_layer));
+
+          dprintf("\n* Processing layer %d\n", curr_layer);
+
+          const LayerOfHits &layer_of_hits = m_job->m_event_of_hits[curr_layer];
+          const LayerInfo &layer_info = trk_info.layer(curr_layer);
+          const FindingFoos &fnd_foos = FindingFoos::get_finding_foos(layer_info.is_barrel());
+
+          int theEndCand = find_tracks_unroll_candidates(seed_cand_idx,
+                                                         start_seed,
+                                                         end_seed,
+                                                         curr_layer,
+                                                         prev_layer,
+                                                         layer_plan_it.is_pickup_only(),
+                                                         iteration_dir);
+
+          if (layer_plan_it.is_pickup_only() || theEndCand == 0)
+            continue;
+
+          // vectorized loop
+          for (int itrack = 0; itrack < theEndCand; itrack += NN) {
+            int end = std::min(itrack + NN, theEndCand);
+
+            dprint("processing track=" << itrack << ", label="
+                                       << eoccs[seed_cand_idx[itrack].first][seed_cand_idx[itrack].second].label());
+
+            //fixme find a way to deal only with the candidates needed in this thread
+            mkfndr->inputTracksAndHitIdx(eoccs.refCandidates(), seed_cand_idx, itrack, end, false);
+
+            //propagate to layer
+            dcall(pre_prop_print(curr_layer, mkfndr.get()));
+
+            (mkfndr.get()->*fnd_foos.m_propagate_foo)(
+                layer_info.propagate_to(), end - itrack, prop_config.finding_inter_layer_pflags);
+
+            dcall(post_prop_print(curr_layer, mkfndr.get()));
+
+            dprint("now get hit range");
+            mkfndr->selectHitIndices(layer_of_hits, end - itrack);
+
+            find_tracks_handle_missed_layers(
+                mkfndr.get(), layer_info, tmp_cands, seed_cand_idx, region, start_seed, itrack, end);
+
+            dprint("make new candidates");
+            mkfndr->findCandidates(layer_of_hits, tmp_cands, start_seed, end - itrack, fnd_foos);
+
+          }  //end of vectorized loop
+
+          // sort the input candidates
+          for (int is = 0; is < n_seeds; ++is) {
+            dprint("dump seed n " << is << " with N_input_candidates=" << tmp_cands[is].size());
+
+            std::sort(tmp_cands[is].begin(), tmp_cands[is].end(), sortCandByScore);
+          }
+
+          // now fill out the output candidates
+          for (int is = 0; is < n_seeds; ++is) {
+            if (!tmp_cands[is].empty()) {
+              eoccs[start_seed + is].clear();
+
+              // Put good candidates into eoccs, process -2 candidates.
+              int n_placed = 0;
+              bool first_short = true;
+              for (int ii = 0; ii < (int)tmp_cands[is].size() && n_placed < params.maxCandsPerSeed; ++ii) {
+                TrackCand &tc = tmp_cands[is][ii];
+
+                // See if we have an overlap hit available, but only if we have a true hit in this layer
+                // and pT is above the pTCutOverlap
+                if (tc.pT() > params.pTCutOverlap && tc.getLastHitLyr() == curr_layer && tc.getLastHitIdx() >= 0) {
+                  CombCandidate &ccand = eoccs[start_seed + is];
+
+                  HitMatch *hm = ccand[tc.originIndex()].findOverlap(
+                      tc.getLastHitIdx(), layer_of_hits.refHit(tc.getLastHitIdx()).detIDinLayer());
+
+                  if (hm) {
+                    tc.addHitIdx(hm->m_hit_idx, curr_layer, hm->m_chi2);
+                    tc.incOverlapCount();
+                  }
+                }
+
+                if (tc.getLastHitIdx() != -2) {
+                  eoccs[start_seed + is].emplace_back(tc);
+                  ++n_placed;
+                } else if (first_short) {
+                  first_short = false;
+                  if (tc.score() > eoccs[start_seed + is].refBestShortCand().score()) {
+                    eoccs[start_seed + is].setBestShortCand(tc);
+                  }
+                }
+              }
+
+              tmp_cands[is].clear();
+            }
+          }
+
+        }  // end of layer loop
+
+        // final sorting
+        for (int iseed = start_seed; iseed < end_seed; ++iseed) {
+          eoccs[iseed].mergeCandsAndBestShortOne(m_job->params(), true, true);
+        }
+      });  // end parallel-for over chunk of seeds within region
+    });    // end of parallel-for-each over eta regions
+
+    // debug = false;
+  }
+
+  //------------------------------------------------------------------------------
+  // FindTracksCombinatorial: CloneEngine TBB
+  //------------------------------------------------------------------------------
+
+  void MkBuilder::findTracksCloneEngine(SteeringParams::IterationType_e iteration_dir) {
+    // debug = true;
+
+    EventOfCombCandidates &eoccs = m_event_of_comb_cands;
+
+    tbb::parallel_for_each(m_job->regions_begin(), m_job->regions_end(), [&](int region) {
+      if (iteration_dir == SteeringParams::IT_BkwSearch && !m_job->steering_params(region).has_bksearch_plan()) {
+        printf("No backward search plan for region %d\n", region);
+        return;
+      }
+
+      const RegionOfSeedIndices rosi(m_seedEtaSeparators, region);
+
+      // adaptive seeds per task based on the total estimated amount of work to divide among all threads
+      const int adaptiveSPT = std::clamp(
+          Config::numThreadsEvents * eoccs.size() / Config::numThreadsFinder + 1, 4, Config::numSeedsPerTask);
+      dprint("adaptiveSPT " << adaptiveSPT << " fill " << rosi.count() << "/" << eoccs.size() << " region " << region);
+
+      tbb::parallel_for(rosi.tbb_blk_rng_std(adaptiveSPT), [&](const tbb::blocked_range<int> &seeds) {
+        auto cloner = g_exe_ctx.m_cloners.makeOrGet();
+        auto mkfndr = g_exe_ctx.m_finders.makeOrGet();
+
+        cloner->setup(m_job->params());
+
+        // loop over layers
+        find_tracks_in_layers(*cloner, mkfndr.get(), iteration_dir, seeds.begin(), seeds.end(), region);
+
+        cloner->release();
+      });
+    });
+
+    // debug = false;
+  }
+
+  void MkBuilder::find_tracks_in_layers(CandCloner &cloner,
+                                        MkFinder *mkfndr,
+                                        SteeringParams::IterationType_e iteration_dir,
+                                        const int start_seed,
+                                        const int end_seed,
+                                        const int region) {
+    EventOfCombCandidates &eoccs = m_event_of_comb_cands;
+    const TrackerInfo &trk_info = m_job->m_trk_info;
+    const SteeringParams &st_par = m_job->steering_params(region);
+    const IterationParams &params = m_job->params();
+    const PropagationConfig &prop_config = PropagationConfig::get_default();
+
+    const int n_seeds = end_seed - start_seed;
+
+    std::vector<std::pair<int, int>> seed_cand_idx, seed_cand_update_idx;
+    seed_cand_idx.reserve(n_seeds * params.maxCandsPerSeed);
+    seed_cand_update_idx.reserve(n_seeds * params.maxCandsPerSeed);
+
+    std::vector<std::vector<TrackCand>> extra_cands(n_seeds);
+    for (int ii = 0; ii < n_seeds; ++ii)
+      extra_cands[ii].reserve(params.maxCandsPerSeed);
+
+    cloner.begin_eta_bin(&eoccs, &seed_cand_update_idx, &extra_cands, start_seed, n_seeds);
+
+    // Loop over layers, starting from after the seed.
+    // Note that we do a final pass with curr_layer = -1 to update parameters
+    // and output final tracks.
+
+    auto layer_plan_it = st_par.make_iterator(iteration_dir);
+
+    dprintf("Made iterator for %d, first layer=%d ... end layer=%d\n",
+            iteration_dir,
+            layer_plan_it.layer(),
+            layer_plan_it.last_layer());
+
+    assert(layer_plan_it.is_pickup_only());
+
+    int curr_layer = layer_plan_it.layer(), prev_layer;
+
+    dprintf(
+        "\nMkBuilder::find_tracks_in_layers region=%d, seed_pickup_layer=%d, first_layer=%d; start_seed=%d, "
+        "end_seed=%d\n",
+        region,
+        curr_layer,
+        layer_plan_it.next_layer(),
+        start_seed,
+        end_seed);
+
+    auto &iter_params = (iteration_dir == SteeringParams::IT_BkwSearch) ? m_job->m_iter_config.m_backward_params
+                                                                        : m_job->m_iter_config.m_params;
+
+    // Loop over layers according to plan.
+    while (++layer_plan_it) {
+      prev_layer = curr_layer;
+      curr_layer = layer_plan_it.layer();
+      mkfndr->setup(prop_config,
+                    iter_params,
+                    m_job->m_iter_config.m_layer_configs[curr_layer],
+                    m_job->get_mask_for_layer(curr_layer));
+
+      const bool pickup_only = layer_plan_it.is_pickup_only();
+
+      dprintf("\n\n* Processing layer %d, %s\n\n", curr_layer, pickup_only ? "pickup only" : "full finding");
+
+      const LayerInfo &layer_info = trk_info.layer(curr_layer);
+      const LayerOfHits &layer_of_hits = m_job->m_event_of_hits[curr_layer];
+      const FindingFoos &fnd_foos = FindingFoos::get_finding_foos(layer_info.is_barrel());
+
+      const int theEndCand = find_tracks_unroll_candidates(
+          seed_cand_idx, start_seed, end_seed, curr_layer, prev_layer, pickup_only, iteration_dir);
+
+      dprintf("  Number of candidates to process: %d\n", theEndCand);
+
+      // Don't bother messing with the clone engine if there are no candidates
+      // (actually it crashes, so this protection is needed).
+      // If there are no cands on this iteration, there won't be any later on either,
+      // by the construction of the seed_cand_idx vector.
+      // XXXXMT There might be cases in endcap where all tracks will miss the
+      // next layer, but only relevant if we do geometric selection before.
+
+      if (pickup_only || theEndCand == 0)
+        continue;
+
+      cloner.begin_layer(curr_layer);
+
+      //vectorized loop
+      for (int itrack = 0; itrack < theEndCand; itrack += NN) {
+        const int end = std::min(itrack + NN, theEndCand);
+
+#ifdef DEBUG
+        dprintf("\nProcessing track=%d, start_seed=%d, n_seeds=%d, theEndCand=%d, end=%d, nn=%d, end_eq_tec=%d\n",
+                itrack,
+                start_seed,
+                n_seeds,
+                theEndCand,
+                end,
+                end - itrack,
+                end == theEndCand);
+        dprintf("  (seed,cand): ");
+        for (int i = itrack; i < end; ++i)
+          dprintf("(%d,%d)  ", seed_cand_idx[i].first, seed_cand_idx[i].second);
+        dprintf("\n");
+#endif
+
+        mkfndr->inputTracksAndHitIdx(eoccs.refCandidates(), seed_cand_idx, itrack, end, false);
+
+#ifdef DEBUG
+        for (int i = itrack; i < end; ++i)
+          dprintf("  track %d, idx %d is from seed %d\n", i, i - itrack, mkfndr->m_Label(i - itrack, 0, 0));
+#endif
+
+        // propagate to current layer
+        (mkfndr->*fnd_foos.m_propagate_foo)(
+            layer_info.propagate_to(), end - itrack, prop_config.finding_inter_layer_pflags);
+
+        dprint("now get hit range");
+
+#ifdef DUMPHITWINDOW
+        mkfndr->m_event = m_event;
+#endif
+
+        mkfndr->selectHitIndices(layer_of_hits, end - itrack);
+
+        find_tracks_handle_missed_layers(
+            mkfndr, layer_info, extra_cands, seed_cand_idx, region, start_seed, itrack, end);
+
+        // copy_out the propagated track params, errors only.
+        // Do not, keep cands at last valid hit until actual update,
+        // this requires change to propagation flags used in MkFinder::updateWithLastHit()
+        // from intra-layer to inter-layer.
+        // mkfndr->copyOutParErr(eoccs.refCandidates_nc(), end - itrack, true);
+
+        dprint("make new candidates");
+        cloner.begin_iteration();
+
+        mkfndr->findCandidatesCloneEngine(layer_of_hits, cloner, start_seed, end - itrack, fnd_foos);
+
+        cloner.end_iteration();
+      }  //end of vectorized loop
+
+      cloner.end_layer();
+
+      // Update loop of best candidates. CandCloner prepares the list of those
+      // that need update (excluding all those with negative last hit index).
+
+      const int theEndUpdater = seed_cand_update_idx.size();
+
+      for (int itrack = 0; itrack < theEndUpdater; itrack += NN) {
+        const int end = std::min(itrack + NN, theEndUpdater);
+
+        mkfndr->inputTracksAndHitIdx(eoccs.refCandidates(), seed_cand_update_idx, itrack, end, true);
+
+        mkfndr->updateWithLastHit(layer_of_hits, end - itrack, fnd_foos);
+
+        // copy_out the updated track params, errors only (hit-idcs and chi2 already set)
+        mkfndr->copyOutParErr(eoccs.refCandidates_nc(), end - itrack, false);
+      }
+
+      // Check if cands are sorted, as expected.
+#ifdef DEBUG
+      for (int iseed = start_seed; iseed < end_seed; ++iseed) {
+        auto &cc = eoccs[iseed];
+
+        for (int i = 0; i < ((int)cc.size()) - 1; ++i) {
+          if (cc[i].score() < cc[i + 1].score()) {
+            printf("CloneEngine - NOT SORTED: layer=%d, iseed=%d (size=%lu)-- %d : %f smaller than %d : %f\n",
+                   curr_layer,
+                   iseed,
+                   cc.size(),
+                   i,
+                   cc[i].score(),
+                   i + 1,
+                   cc[i + 1].score());
+          }
+        }
+      }
+#endif
+
+    }  // end of layer loop
+
+    cloner.end_eta_bin();
+
+    // final sorting
+    for (int iseed = start_seed; iseed < end_seed; ++iseed) {
+      eoccs[iseed].mergeCandsAndBestShortOne(m_job->params(), true, true);
+    }
+  }
+
+  //==============================================================================
+  // BackwardFit
+  //==============================================================================
+
+  // MT-RATS - eta separators can be screwed after copy out with possibly empty CombCands.
+  // I added asserts to two applicable places above (both here in MkBuilder.cc).
+  // One could also re-calculate / adjust m_seedEtaSeparators, during export iself, probably.
+  // Or use separate seed / track vectors for every region -- which would be prettier.
+
+  void MkBuilder::backwardFitBH() {
+    tbb::parallel_for_each(m_job->regions_begin(), m_job->regions_end(), [&](int region) {
+      const RegionOfSeedIndices rosi(m_seedEtaSeparators, region);
+
+      tbb::parallel_for(rosi.tbb_blk_rng_vec(), [&](const tbb::blocked_range<int> &blk_rng) {
+        auto mkfndr = g_exe_ctx.m_finders.makeOrGet();
+
+        RangeOfSeedIndices rng = rosi.seed_rng(blk_rng);
+
+        while (rng.valid()) {
+          // final backward fit
+          fit_cands_BH(mkfndr.get(), rng.m_beg, rng.m_end, region);
+
+          ++rng;
+        }
+      });
+    });
+  }
+
+  void MkBuilder::fit_cands_BH(MkFinder *mkfndr, int start_cand, int end_cand, int region) {
+    const SteeringParams &st_par = m_job->steering_params(region);
+    const PropagationConfig &prop_config = PropagationConfig::get_default();
+#ifdef DEBUG
+    EventOfCombCandidates &eoccs = m_event_of_comb_cands;
+#endif
+
+    for (int icand = start_cand; icand < end_cand; icand += NN) {
+      const int end = std::min(icand + NN, end_cand);
+
+#ifdef DEBUG
+      printf("Pre Final fit for %d - %d\n", icand, end);
+      for (int i = icand; i < end; ++i) {
+        const TrackCand &t = eoccs[i][0];
+        printf(
+            "  %4d with q=%+d chi2=%7.3f pT=%7.3f eta=% 7.3f x=%.3f y=%.3f z=%.3f nHits=%2d  label=%4d findable=%d\n",
+            i,
+            t.charge(),
+            t.chi2(),
+            t.pT(),
+            t.momEta(),
+            t.x(),
+            t.y(),
+            t.z(),
+            t.nFoundHits(),
+            t.label(),
+            t.isFindable());
+      }
+#endif
+
+      bool chi_debug = false;
+#ifdef DEBUG_BACKWARD_FIT_BH
+    redo_fit:
+#endif
+
+      // input candidate tracks
+      mkfndr->bkFitInputTracks(m_tracks, icand, end);
+
+      // perform fit back to first layer on track
+      mkfndr->bkFitFitTracksBH(m_job->m_event_of_hits, st_par, end - icand, chi_debug);
+
+      // now move one last time to PCA
+      if (prop_config.backward_fit_to_pca) {
+        mkfndr->bkFitPropTracksToPCA(end - icand);
+      }
+
+#ifdef DEBUG_BACKWARD_FIT_BH
+      // Dump tracks with pT > 2 and chi2/dof > 20. Assumes MPT_SIZE=1.
+      if (!chi_debug && 1.0f / mkfndr->m_Par[MkBase::iP].At(0, 3, 0) > 2.0f &&
+          mkfndr->m_Chi2(0, 0, 0) / (eoccs[icand][0].nFoundHits() * 3 - 6) > 20.0f) {
+        chi_debug = true;
+#ifdef MKFIT_STANDALONE
+        printf("CHIHDR Event %d, Cand %3d, pT %f, chipdof %f ### NOTE x,y,z in cm, sigmas, deltas in mum ### !!!\n",
+               m_event->evtID(),
+#else
+        printf("CHIHDR Cand %3d, pT %f, chipdof %f ### NOTE x,y,z in cm, sigmas, deltas in mum ### !!!\n",
+#endif
+               icand,
+               1.0f / mkfndr->m_Par[MkBase::iP].At(0, 3, 0),
+               mkfndr->m_Chi2(0, 0, 0) / (eoccs[icand][0].nFoundHits() * 3 - 6));
+        printf(
+            "CHIHDR %3s %10s %10s %10s %10s %10s %11s %11s %11s %10s %10s %10s %10s %11s %11s %11s %10s %10s %10s %10s "
+            "%10s %11s %11s\n",
+            "lyr",
+            "chi2",
+            "x_h",
+            "y_h",
+            "z_h",
+            "r_h",
+            "sx_h",
+            "sy_h",
+            "sz_h",
+            "x_t",
+            "y_t",
+            "z_t",
+            "r_t",
+            "sx_t",
+            "sy_t",
+            "sz_t",
+            "pt",
+            "phi",
+            "theta",
+            "phi_h",
+            "phi_t",
+            "d_xy",
+            "d_z");
+        goto redo_fit;
+      }
+#endif
+
+      // copy out full set of info at last propagated position
+      mkfndr->bkFitOutputTracks(m_tracks, icand, end, prop_config.backward_fit_to_pca);
+
+#ifdef DEBUG
+      printf("Post Final fit for %d - %d\n", icand, end);
+      for (int i = icand; i < end; ++i) {
+        const TrackCand &t = eoccs[i][0];
+        printf(
+            "  %4d with q=%+d chi2=%7.3f pT=%7.3f eta=% 7.3f x=%.3f y=%.3f z=%.3f nHits=%2d  label=%4d findable=%d\n",
+            i,
+            t.charge(),
+            t.chi2(),
+            t.pT(),
+            t.momEta(),
+            t.x(),
+            t.y(),
+            t.z(),
+            t.nFoundHits(),
+            t.label(),
+            t.isFindable());
+      }
+#endif
+    }
+  }
+
+  //------------------------------------------------------------------------------
+
+  void MkBuilder::backwardFit() {
+    EventOfCombCandidates &eoccs = m_event_of_comb_cands;
+
+    tbb::parallel_for_each(m_job->regions_begin(), m_job->regions_end(), [&](int region) {
+      const RegionOfSeedIndices rosi(m_seedEtaSeparators, region);
+
+      // adaptive seeds per task based on the total estimated amount of work to divide among all threads
+      const int adaptiveSPT = std::clamp(
+          Config::numThreadsEvents * eoccs.size() / Config::numThreadsFinder + 1, 4, Config::numSeedsPerTask);
+      dprint("adaptiveSPT " << adaptiveSPT << " fill " << rosi.count() << "/" << eoccs.size() << " region " << region);
+
+      tbb::parallel_for(rosi.tbb_blk_rng_std(adaptiveSPT), [&](const tbb::blocked_range<int> &cands) {
+        auto mkfndr = g_exe_ctx.m_finders.makeOrGet();
+
+        fit_cands(mkfndr.get(), cands.begin(), cands.end(), region);
+      });
+    });
+  }
+
+  void MkBuilder::fit_cands(MkFinder *mkfndr, int start_cand, int end_cand, int region) {
+    EventOfCombCandidates &eoccs = m_event_of_comb_cands;
+    const SteeringParams &st_par = m_job->steering_params(region);
+    const PropagationConfig &prop_config = PropagationConfig::get_default();
+
+    int step = NN;
+
+    for (int icand = start_cand; icand < end_cand; icand += step) {
+      int end = std::min(icand + NN, end_cand);
+
+#ifdef DEBUG
+      printf("Pre Final fit for %d - %d\n", icand, end);
+      for (int i = icand; i < end; ++i) {
+        const TrackCand &t = eoccs[i][0];
+        printf(
+            "  %4d with q=%+d chi2=%7.3f pT=%7.3f eta=% 7.3f x=%.3f y=%.3f z=%.3f nHits=%2d  label=%4d findable=%d\n",
+            i,
+            t.charge(),
+            t.chi2(),
+            t.pT(),
+            t.momEta(),
+            t.x(),
+            t.y(),
+            t.z(),
+            t.nFoundHits(),
+            t.label(),
+            t.isFindable());
+      }
+#endif
+
+      bool chi_debug = false;
+#ifdef DEBUG_BACKWARD_FIT
+      chi_debug = true;
+      static bool first = true;
+      if (first) {
+        // ./mkFit ... | perl -ne 'if (/^BKF_OVERLAP/) { s/^BKF_OVERLAP //og; print; }' > bkf_ovlp.rtt
+        printf(
+            "BKF_OVERLAP event/I:label/I:prod_type/I:is_findable/I:layer/I:is_stereo/I:is_barrel/I:"
+            "pt/F:eta/F:phi/F:chi2/F:isnan/I:isfin/I:gtzero/I:hit_label/I:"
+            "sx_t/F:sy_t/F:sz_t/F:d_xy/F:d_z/F\n");
+        first = false;
+      }
+      mkfndr->m_event = m_event;
+#endif
+
+      // input tracks
+      mkfndr->bkFitInputTracks(eoccs, icand, end);
+
+      // fit tracks back to first layer
+      mkfndr->bkFitFitTracks(m_job->m_event_of_hits, st_par, end - icand, chi_debug);
+
+      // now move one last time to PCA
+      if (prop_config.backward_fit_to_pca) {
+        mkfndr->bkFitPropTracksToPCA(end - icand);
+      }
+
+      mkfndr->bkFitOutputTracks(eoccs, icand, end, prop_config.backward_fit_to_pca);
+
+#ifdef DEBUG
+      printf("Post Final fit for %d - %d\n", icand, end);
+      for (int i = icand; i < end; ++i) {
+        const TrackCand &t = eoccs[i][0];
+        printf(
+            "  %4d with q=%+d chi2=%7.3f pT=%7.3f eta=% 7.3f x=%.3f y=%.3f z=%.3f nHits=%2d  label=%4d findable=%d\n",
+            i,
+            t.charge(),
+            t.chi2(),
+            t.pT(),
+            t.momEta(),
+            t.x(),
+            t.y(),
+            t.z(),
+            t.nFoundHits(),
+            t.label(),
+            t.isFindable());
+      }
+#endif
+    }
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/MkBuilderWrapper.cc b/RecoTracker/MkFitCore/src/MkBuilderWrapper.cc
new file mode 100644
index 0000000000000..08bd3793bf459
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MkBuilderWrapper.cc
@@ -0,0 +1,10 @@
+#include "RecoTracker/MkFitCore/interface/MkBuilderWrapper.h"
+#include "RecoTracker/MkFitCore/interface/MkBuilder.h"
+
+namespace mkfit {
+  MkBuilderWrapper::MkBuilderWrapper(bool silent) : builder_(MkBuilder::make_builder(silent)) {}
+
+  MkBuilderWrapper::~MkBuilderWrapper() {}
+
+  void MkBuilderWrapper::populate() { MkBuilder::populate(); }
+}  // namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/MkFinder.cc b/RecoTracker/MkFitCore/src/MkFinder.cc
new file mode 100644
index 0000000000000..cb65f324690c2
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MkFinder.cc
@@ -0,0 +1,1769 @@
+#include "MkFinder.h"
+
+#include "CandCloner.h"
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
+#include "RecoTracker/MkFitCore/interface/IterationConfig.h"
+#include "FindingFoos.h"
+
+#include "KalmanUtilsMPlex.h"
+
+#include "MatriplexPackers.h"
+
+//#define DEBUG
+#include "Debug.h"
+
+#if (defined(DUMPHITWINDOW) || defined(DEBUG_BACKWARD_FIT)) && defined(MKFIT_STANDALONE)
+#include "RecoTracker/MkFitCore/standalone/Event.h"
+#endif
+
+#ifndef MKFIT_STANDALONE
+#include "FWCore/Utilities/interface/isFinite.h"
+#endif
+
+#include <algorithm>
+
+namespace {
+  bool isFinite(float x) {
+#ifndef MKFIT_STANDALONE
+    return edm::isFinite(x);
+#else
+    return std::isfinite(x);
+#endif
+  }
+}  // namespace
+
+namespace mkfit {
+
+  void MkFinder::setup(const PropagationConfig &pc,
+                       const IterationParams &ip,
+                       const IterationLayerConfig &ilc,
+                       const std::vector<bool> *ihm) {
+    m_prop_config = &pc;
+    m_iteration_params = &ip;
+    m_iteration_layer_config = &ilc;
+    m_iteration_hit_mask = ihm;
+  }
+
+  void MkFinder::release() {
+    m_prop_config = nullptr;
+    m_iteration_params = nullptr;
+    m_iteration_layer_config = nullptr;
+    m_iteration_hit_mask = nullptr;
+  }
+
+  //==============================================================================
+  // Input / Output TracksAndHitIdx
+  //==============================================================================
+
+  void MkFinder::inputTracksAndHitIdx(const std::vector<Track> &tracks, int beg, int end, bool inputProp) {
+    // Assign track parameters to initial state and copy hit values in.
+
+    // This might not be true for the last chunk!
+    // assert(end - beg == NN);
+
+    const int iI = inputProp ? iP : iC;
+
+    for (int i = beg, imp = 0; i < end; ++i, ++imp) {
+      copy_in(tracks[i], imp, iI);
+    }
+  }
+
+  void MkFinder::inputTracksAndHitIdx(
+      const std::vector<Track> &tracks, const std::vector<int> &idxs, int beg, int end, bool inputProp, int mp_offset) {
+    // Assign track parameters to initial state and copy hit values in.
+
+    // This might not be true for the last chunk!
+    // assert(end - beg == NN);
+
+    const int iI = inputProp ? iP : iC;
+
+    for (int i = beg, imp = mp_offset; i < end; ++i, ++imp) {
+      copy_in(tracks[idxs[i]], imp, iI);
+    }
+  }
+
+  void MkFinder::inputTracksAndHitIdx(const std::vector<CombCandidate> &tracks,
+                                      const std::vector<std::pair<int, int>> &idxs,
+                                      int beg,
+                                      int end,
+                                      bool inputProp) {
+    // Assign track parameters to initial state and copy hit values in.
+
+    // This might not be true for the last chunk!
+    // assert(end - beg == NN);
+
+    const int iI = inputProp ? iP : iC;
+
+    for (int i = beg, imp = 0; i < end; ++i, ++imp) {
+      const TrackCand &trk = tracks[idxs[i].first][idxs[i].second];
+
+      copy_in(trk, imp, iI);
+
+#ifdef DUMPHITWINDOW
+      m_SeedAlgo(imp, 0, 0) = tracks[idxs[i].first].seed_algo();
+      m_SeedLabel(imp, 0, 0) = tracks[idxs[i].first].seed_label();
+#endif
+
+      m_SeedIdx(imp, 0, 0) = idxs[i].first;
+      m_CandIdx(imp, 0, 0) = idxs[i].second;
+    }
+  }
+
+  void MkFinder::inputTracksAndHitIdx(const std::vector<CombCandidate> &tracks,
+                                      const std::vector<std::pair<int, IdxChi2List>> &idxs,
+                                      int beg,
+                                      int end,
+                                      bool inputProp) {
+    // Assign track parameters to initial state and copy hit values in.
+
+    // This might not be true for the last chunk!
+    // assert(end - beg == NN);
+
+    const int iI = inputProp ? iP : iC;
+
+    for (int i = beg, imp = 0; i < end; ++i, ++imp) {
+      const TrackCand &trk = tracks[idxs[i].first][idxs[i].second.trkIdx];
+
+      copy_in(trk, imp, iI);
+
+#ifdef DUMPHITWINDOW
+      m_SeedAlgo(imp, 0, 0) = tracks[idxs[i].first].seed_algo();
+      m_SeedLabel(imp, 0, 0) = tracks[idxs[i].first].seed_label();
+#endif
+
+      m_SeedIdx(imp, 0, 0) = idxs[i].first;
+      m_CandIdx(imp, 0, 0) = idxs[i].second.trkIdx;
+    }
+  }
+
+  void MkFinder::outputTracksAndHitIdx(std::vector<Track> &tracks, int beg, int end, bool outputProp) const {
+    // Copies requested track parameters into Track objects.
+    // The tracks vector should be resized to allow direct copying.
+
+    const int iO = outputProp ? iP : iC;
+
+    for (int i = beg, imp = 0; i < end; ++i, ++imp) {
+      copy_out(tracks[i], imp, iO);
+    }
+  }
+
+  void MkFinder::outputTracksAndHitIdx(
+      std::vector<Track> &tracks, const std::vector<int> &idxs, int beg, int end, bool outputProp) const {
+    // Copies requested track parameters into Track objects.
+    // The tracks vector should be resized to allow direct copying.
+
+    const int iO = outputProp ? iP : iC;
+
+    for (int i = beg, imp = 0; i < end; ++i, ++imp) {
+      copy_out(tracks[idxs[i]], imp, iO);
+    }
+  }
+
+  //==============================================================================
+  // getHitSelDynamicWindows
+  //==============================================================================
+  // From HitSelectionWindows.h: track-related config on hit selection windows
+
+  void MkFinder::getHitSelDynamicWindows(
+      const float invpt, const float theta, float &min_dq, float &max_dq, float &min_dphi, float &max_dphi) {
+    const IterationLayerConfig &ILC = *m_iteration_layer_config;
+
+    float max_invpt = invpt;
+    if (invpt > 10.0)
+      max_invpt = 10.0;  // => pT>0.1 GeV
+
+    // dq hit selection window
+    float this_dq = (ILC.c_dq_0) * max_invpt + (ILC.c_dq_1) * theta + (ILC.c_dq_2);
+    // In case layer is missing (e.g., seeding layers, or too low stats for training), leave original limits
+    if ((ILC.c_dq_sf) * this_dq > 0.f) {
+      min_dq = (ILC.c_dq_sf) * this_dq;
+      max_dq = 2.0f * min_dq;
+    }
+
+    // dphi hit selection window
+    float this_dphi = (ILC.c_dp_0) * max_invpt + (ILC.c_dp_1) * theta + (ILC.c_dp_2);
+    // In case layer is missing (e.g., seeding layers, or too low stats for training), leave original limits
+    if ((ILC.c_dp_sf) * this_dphi > min_dphi) {
+      min_dphi = (ILC.c_dp_sf) * this_dphi;
+      max_dphi = 2.0f * min_dphi;
+    }
+
+    //// For future optimization: for layer & iteration dependend hit chi2 cut
+    //float this_c2 = (ILC.c_c2_0)*invpt+(ILC.c_c2_1)*theta+(ILC.c_c2_2);
+    //// In case layer is missing (e.g., seeding layers, or too low stats for training), leave original limits
+    //if(this_c2>0.f){
+    //  max_c2 = (ILC.c_c2_sf)*this_c2;
+    //}
+  }
+
+  //==============================================================================
+  // getHitSelDynamicChi2Cut
+  //==============================================================================
+  // From HitSelectionWindows.h: track-related config on hit selection windows
+
+  inline float MkFinder::getHitSelDynamicChi2Cut(const int itrk, const int ipar) {
+    const IterationLayerConfig &ILC = *m_iteration_layer_config;
+
+    const float minChi2Cut = m_iteration_params->chi2Cut_min;
+    const float invpt = m_Par[ipar].At(itrk, 3, 0);
+    const float theta = std::abs(m_Par[ipar].At(itrk, 5, 0) - Const::PIOver2);
+
+    float max_invpt = invpt;
+    if (invpt > 10.0)
+      max_invpt = 10.0;
+
+    float this_c2 = ILC.c_c2_0 * max_invpt + ILC.c_c2_1 * theta + ILC.c_c2_2;
+    // In case layer is missing (e.g., seeding layers, or too low stats for training), leave original limits
+    if ((ILC.c_c2_sf) * this_c2 > minChi2Cut)
+      return ILC.c_c2_sf * this_c2;
+    else
+      return minChi2Cut;
+  }
+
+  //==============================================================================
+  // SelectHitIndices
+  //==============================================================================
+
+  void MkFinder::selectHitIndices(const LayerOfHits &layer_of_hits, const int N_proc) {
+    // bool debug = true;
+
+    const LayerOfHits &L = layer_of_hits;
+    const IterationLayerConfig &ILC = *m_iteration_layer_config;
+
+    const int iI = iP;
+    const float nSigmaPhi = 3;
+    const float nSigmaZ = 3;
+    const float nSigmaR = 3;
+
+    dprintf("LayerOfHits::SelectHitIndices %s layer=%d N_proc=%d\n",
+            L.is_barrel() ? "barrel" : "endcap",
+            L.layer_id(),
+            N_proc);
+
+    float dqv[NN], dphiv[NN], qv[NN], phiv[NN];
+    int qb1v[NN], qb2v[NN], qbv[NN], pb1v[NN], pb2v[NN];
+
+    const auto assignbins = [&](int itrack,
+                                float q,
+                                float dq,
+                                float phi,
+                                float dphi,
+                                float min_dq,
+                                float max_dq,
+                                float min_dphi,
+                                float max_dphi) {
+      dphi = std::clamp(std::abs(dphi), min_dphi, max_dphi);
+      dq = std::clamp(dq, min_dq, max_dq);
+      //
+      qv[itrack] = q;
+      phiv[itrack] = phi;
+      dphiv[itrack] = dphi;
+      dqv[itrack] = dq;
+      //
+      qbv[itrack] = L.qBinChecked(q);
+      qb1v[itrack] = L.qBinChecked(q - dq);
+      qb2v[itrack] = L.qBinChecked(q + dq) + 1;
+      pb1v[itrack] = L.phiBin(phi - dphi);
+      pb2v[itrack] = L.phiBin(phi + dphi) + 1;
+    };
+
+    const auto calcdphi2 = [&](int itrack, float dphidx, float dphidy) {
+      return dphidx * dphidx * m_Err[iI].constAt(itrack, 0, 0) + dphidy * dphidy * m_Err[iI].constAt(itrack, 1, 1) +
+             2 * dphidx * dphidy * m_Err[iI].constAt(itrack, 0, 1);
+    };
+
+    const auto calcdphi = [&](float dphi2, float min_dphi) {
+      return std::max(nSigmaPhi * std::sqrt(std::abs(dphi2)), min_dphi);
+    };
+
+    if (L.is_barrel()) {
+      // Pull out the part of the loop that vectorizes
+#pragma omp simd
+      for (int itrack = 0; itrack < NN; ++itrack) {
+        m_XHitSize[itrack] = 0;
+
+        float min_dq = ILC.min_dq();
+        float max_dq = ILC.max_dq();
+        float min_dphi = ILC.min_dphi();
+        float max_dphi = ILC.max_dphi();
+
+        const float invpt = m_Par[iI].At(itrack, 3, 0);
+        const float theta = std::fabs(m_Par[iI].At(itrack, 5, 0) - Const::PIOver2);
+        getHitSelDynamicWindows(invpt, theta, min_dq, max_dq, min_dphi, max_dphi);
+
+        const float x = m_Par[iI].constAt(itrack, 0, 0);
+        const float y = m_Par[iI].constAt(itrack, 1, 0);
+        const float r2 = x * x + y * y;
+        const float dphidx = -y / r2, dphidy = x / r2;
+        const float dphi2 = calcdphi2(itrack, dphidx, dphidy);
+#ifdef HARD_CHECK
+        assert(dphi2 >= 0);
+#endif
+
+        const float phi = getPhi(x, y);
+        float dphi = calcdphi(dphi2, min_dphi);
+
+        const float z = m_Par[iI].constAt(itrack, 2, 0);
+        const float dz = std::abs(nSigmaZ * std::sqrt(m_Err[iI].constAt(itrack, 2, 2)));
+        const float edgeCorr = std::abs(0.5f * (L.layer_info()->rout() - L.layer_info()->rin()) /
+                                        std::tan(m_Par[iI].constAt(itrack, 5, 0)));
+        // XXX-NUM-ERR above, m_Err(2,2) gets negative!
+
+        m_XWsrResult[itrack] = L.is_within_z_sensitive_region(z, std::sqrt(dz * dz + edgeCorr * edgeCorr));
+        assignbins(itrack, z, dz, phi, dphi, min_dq, max_dq, min_dphi, max_dphi);
+      }
+    } else  // endcap
+    {
+      // Pull out the part of the loop that vectorizes
+#pragma omp simd
+      for (int itrack = 0; itrack < NN; ++itrack) {
+        m_XHitSize[itrack] = 0;
+
+        float min_dq = ILC.min_dq();
+        float max_dq = ILC.max_dq();
+        float min_dphi = ILC.min_dphi();
+        float max_dphi = ILC.max_dphi();
+
+        const float invpt = m_Par[iI].At(itrack, 3, 0);
+        const float theta = std::fabs(m_Par[iI].At(itrack, 5, 0) - Const::PIOver2);
+        getHitSelDynamicWindows(invpt, theta, min_dq, max_dq, min_dphi, max_dphi);
+
+        const float x = m_Par[iI].constAt(itrack, 0, 0);
+        const float y = m_Par[iI].constAt(itrack, 1, 0);
+        const float r2 = x * x + y * y;
+        const float dphidx = -y / r2, dphidy = x / r2;
+        const float dphi2 = calcdphi2(itrack, dphidx, dphidy);
+#ifdef HARD_CHECK
+        assert(dphi2 >= 0);
+#endif
+
+        const float phi = getPhi(x, y);
+        float dphi = calcdphi(dphi2, min_dphi);
+
+        const float r = std::sqrt(r2);
+        const float dr = nSigmaR * std::sqrt(std::abs(x * x * m_Err[iI].constAt(itrack, 0, 0) +
+                                                      y * y * m_Err[iI].constAt(itrack, 1, 1) +
+                                                      2 * x * y * m_Err[iI].constAt(itrack, 0, 1)) /
+                                             r2);
+        const float edgeCorr = std::abs(0.5f * (L.layer_info()->zmax() - L.layer_info()->zmin()) *
+                                        std::tan(m_Par[iI].constAt(itrack, 5, 0)));
+
+        m_XWsrResult[itrack] = L.is_within_r_sensitive_region(r, std::sqrt(dr * dr + edgeCorr * edgeCorr));
+        assignbins(itrack, r, dr, phi, dphi, min_dq, max_dq, min_dphi, max_dphi);
+      }
+    }
+
+    // Vectorizing this makes it run slower!
+    //#pragma omp simd
+    for (int itrack = 0; itrack < N_proc; ++itrack) {
+      if (m_XWsrResult[itrack].m_wsr == WSR_Outside) {
+        m_XHitSize[itrack] = -1;
+        continue;
+      }
+
+      const int qb = qbv[itrack];
+      const int qb1 = qb1v[itrack];
+      const int qb2 = qb2v[itrack];
+      const int pb1 = pb1v[itrack];
+      const int pb2 = pb2v[itrack];
+
+      // Used only by usePhiQArrays
+      const float q = qv[itrack];
+      const float phi = phiv[itrack];
+      const float dphi = dphiv[itrack];
+      const float dq = dqv[itrack];
+
+      dprintf("  %2d/%2d: %6.3f %6.3f %6.6f %7.5f %3d %3d %4d %4d\n",
+              L.layer_id(),
+              itrack,
+              q,
+              phi,
+              dq,
+              dphi,
+              qb1,
+              qb2,
+              pb1,
+              pb2);
+
+      // MT: One could iterate in "spiral" order, to pick hits close to the center.
+      // http://stackoverflow.com/questions/398299/looping-in-a-spiral
+      // This would then work best with relatively small bin sizes.
+      // Or, set them up so I can always take 3x3 array around the intersection.
+
+#if defined(DUMPHITWINDOW) && defined(MKFIT_STANDALONE)
+      int thisseedmcid = -999999;
+      {
+        int seedlabel = m_SeedLabel(itrack, 0, 0);
+        TrackVec &seedtracks = m_event->seedTracks_;
+        int thisidx = -999999;
+        for (int i = 0; i < int(seedtracks.size()); ++i) {
+          auto &thisseed = seedtracks[i];
+          if (thisseed.label() == seedlabel) {
+            thisidx = i;
+            break;
+          }
+        }
+        if (thisidx > -999999) {
+          auto &seedtrack = m_event->seedTracks_[thisidx];
+          std::vector<int> thismcHitIDs;
+          seedtrack.mcHitIDsVec(m_event->layerHits_, m_event->simHitsInfo_, thismcHitIDs);
+          if (std::adjacent_find(thismcHitIDs.begin(), thismcHitIDs.end(), std::not_equal_to<>()) ==
+              thismcHitIDs.end()) {
+            thisseedmcid = thismcHitIDs.at(0);
+          }
+        }
+      }
+#endif
+
+      for (int qi = qb1; qi < qb2; ++qi) {
+        for (int pi = pb1; pi < pb2; ++pi) {
+          const int pb = L.phiMaskApply(pi);
+
+          // Limit to central Q-bin
+          if (qi == qb && L.phi_bin_dead(qi, pb) == true) {
+            dprint("dead module for track in layer=" << L.layer_id() << " qb=" << qi << " pb=" << pb << " q=" << q
+                                                     << " phi=" << phi);
+            m_XWsrResult[itrack].m_in_gap = true;
+          }
+
+          // MT: The following line is the biggest hog (4% total run time).
+          // This comes from cache misses, I presume.
+          // It might make sense to make first loop to extract bin indices
+          // and issue prefetches at the same time.
+          // Then enter vectorized loop to actually collect the hits in proper order.
+
+          //SK: ~20x1024 bin sizes give mostly 1 hit per bin. Commented out for 128 bins or less
+          // #pragma nounroll
+          auto pbi = L.phi_bin_info(qi, pb);
+          for (uint16_t hi = pbi.first; hi < pbi.second; ++hi) {
+            // MT: Access into m_hit_zs and m_hit_phis is 1% run-time each.
+
+            int hi_orig = L.getOriginalHitIndex(hi);
+
+            if (m_iteration_hit_mask && (*m_iteration_hit_mask)[hi_orig]) {
+              dprintf(
+                  "Yay, denying masked hit on layer %d, hi %d, orig idx %d\n", L.layer_info()->layer_id(), hi, hi_orig);
+              continue;
+            }
+
+            if (Config::usePhiQArrays) {
+              if (m_XHitSize[itrack] >= MPlexHitIdxMax)
+                break;
+
+              const float ddq = std::abs(q - L.hit_q(hi));
+              const float ddphi = cdist(std::abs(phi - L.hit_phi(hi)));
+
+#if defined(DUMPHITWINDOW) && defined(MKFIT_STANDALONE)
+              {
+                const MCHitInfo &mchinfo = m_event->simHitsInfo_[L.refHit(hi).mcHitID()];
+                int mchid = mchinfo.mcTrackID();
+                int st_isfindable = 0;
+                int st_label = -999999;
+                int st_prodtype = 0;
+                int st_nhits = -1;
+                int st_charge = 0;
+                float st_r = -999.;
+                float st_z = -999.;
+                float st_pt = -999.;
+                float st_eta = -999.;
+                float st_phi = -999.;
+                if (mchid >= 0) {
+                  Track simtrack = m_event->simTracks_[mchid];
+                  st_isfindable = (int)simtrack.isFindable();
+                  st_label = simtrack.label();
+                  st_prodtype = (int)simtrack.prodType();
+                  st_pt = simtrack.pT();
+                  st_eta = simtrack.momEta();
+                  st_phi = simtrack.momPhi();
+                  st_nhits = simtrack.nTotalHits();
+                  st_charge = simtrack.charge();
+                  st_r = simtrack.posR();
+                  st_z = simtrack.z();
+                }
+
+                const Hit &thishit = L.refHit(hi);
+                m_msErr.copyIn(itrack, thishit.errArray());
+                m_msPar.copyIn(itrack, thishit.posArray());
+
+                MPlexQF thisOutChi2;
+                MPlexLV tmpPropPar;
+                const FindingFoos &fnd_foos = FindingFoos::get_finding_foos(L.is_barrel());
+                (*fnd_foos.m_compute_chi2_foo)(m_Err[iI],
+                                               m_Par[iI],
+                                               m_Chg,
+                                               m_msErr,
+                                               m_msPar,
+                                               thisOutChi2,
+                                               tmpPropPar,
+                                               N_proc,
+                                               m_prop_config->finding_intra_layer_pflags,
+                                               m_prop_config->finding_requires_propagation_to_hit_pos);
+
+                float hx = thishit.x();
+                float hy = thishit.y();
+                float hz = thishit.z();
+                float hr = std::hypot(hx, hy);
+                float hphi = std::atan2(hy, hx);
+                float hex = std::sqrt(thishit.exx());
+                if (std::isnan(hex))
+                  hex = -999.;
+                float hey = std::sqrt(thishit.eyy());
+                if (std::isnan(hey))
+                  hey = -999.;
+                float hez = std::sqrt(thishit.ezz());
+                if (std::isnan(hez))
+                  hez = -999.;
+                float her = std::sqrt(
+                    (hx * hx * thishit.exx() + hy * hy * thishit.eyy() + 2.0f * hx * hy * m_msErr.At(itrack, 0, 1)) /
+                    (hr * hr));
+                if (std::isnan(her))
+                  her = -999.;
+                float hephi = std::sqrt(thishit.ephi());
+                if (std::isnan(hephi))
+                  hephi = -999.;
+                float hchi2 = thisOutChi2[itrack];
+                if (std::isnan(hchi2))
+                  hchi2 = -999.;
+                float tx = m_Par[iI].At(itrack, 0, 0);
+                float ty = m_Par[iI].At(itrack, 1, 0);
+                float tz = m_Par[iI].At(itrack, 2, 0);
+                float tr = std::hypot(tx, ty);
+                float tphi = std::atan2(ty, tx);
+                float tchi2 = m_Chi2(itrack, 0, 0);
+                if (std::isnan(tchi2))
+                  tchi2 = -999.;
+                float tex = std::sqrt(m_Err[iI].At(itrack, 0, 0));
+                if (std::isnan(tex))
+                  tex = -999.;
+                float tey = std::sqrt(m_Err[iI].At(itrack, 1, 1));
+                if (std::isnan(tey))
+                  tey = -999.;
+                float tez = std::sqrt(m_Err[iI].At(itrack, 2, 2));
+                if (std::isnan(tez))
+                  tez = -999.;
+                float ter = std::sqrt(
+                    (tx * tx * tex * tex + ty * ty * tey * tey + 2.0f * tx * ty * m_Err[iI].At(itrack, 0, 1)) /
+                    (tr * tr));
+                if (std::isnan(ter))
+                  ter = -999.;
+                float tephi = std::sqrt(
+                    (ty * ty * tex * tex + tx * tx * tey * tey - 2.0f * tx * ty * m_Err[iI].At(itrack, 0, 1)) /
+                    (tr * tr * tr * tr));
+                if (std::isnan(tephi))
+                  tephi = -999.;
+                float ht_dxy = std::hypot(hx - tx, hy - ty);
+                float ht_dz = hz - tz;
+                float ht_dphi = cdist(std::abs(hphi - tphi));
+
+                static bool first = true;
+                if (first) {
+                  printf(
+                      "HITWINDOWSEL "
+                      "evt_id/I:"
+                      "lyr_id/I:lyr_isbrl/I:hit_idx/I:"
+                      "trk_cnt/I:trk_idx/I:trk_label/I:"
+                      "trk_pt/F:trk_eta/F:trk_mphi/F:trk_chi2/F:"
+                      "nhits/I:"
+                      "seed_idx/I:seed_label/I:seed_algo/I:seed_mcid/I:"
+                      "hit_mcid/I:"
+                      "st_isfindable/I:st_prodtype/I:st_label/I:"
+                      "st_pt/F:st_eta/F:st_phi/F:"
+                      "st_nhits/I:st_charge/I:st_r/F:st_z/F:"
+                      "trk_q/F:hit_q/F:dq_trkhit/F:dq_cut/F:trk_phi/F:hit_phi/F:dphi_trkhit/F:dphi_cut/F:"
+                      "t_x/F:t_y/F:t_r/F:t_phi/F:t_z/F:"
+                      "t_ex/F:t_ey/F:t_er/F:t_ephi/F:t_ez/F:"
+                      "h_x/F:h_y/F:h_r/F:h_phi/F:h_z/F:"
+                      "h_ex/F:h_ey/F:h_er/F:h_ephi/F:h_ez/F:"
+                      "ht_dxy/F:ht_dz/F:ht_dphi/F:"
+                      "h_chi2/F"
+                      "\n");
+                  first = false;
+                }
+
+                if (!(std::isnan(phi)) && !(std::isnan(getEta(m_Par[iI].At(itrack, 5, 0))))) {
+                  //|| std::isnan(ter) || std::isnan(her) || std::isnan(m_Chi2(itrack, 0, 0)) || std::isnan(hchi2)))
+                  printf(
+                      "HITWINDOWSEL "
+                      "%d "
+                      "%d %d %d "
+                      "%d %d %d "
+                      "%6.3f %6.3f %6.3f %6.3f "
+                      "%d "
+                      "%d %d %d %d "
+                      "%d "
+                      "%d %d %d "
+                      "%6.3f %6.3f %6.3f "
+                      "%d %d %6.3f %6.3f "
+                      "%6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f "
+                      "%6.3f %6.3f %6.3f %6.3f %6.3f "
+                      "%6.6f %6.6f %6.6f %6.6f %6.6f "
+                      "%6.3f %6.3f %6.3f %6.3f %6.3f "
+                      "%6.6f %6.6f %6.6f %6.6f %6.6f "
+                      "%6.3f %6.3f %6.3f "
+                      "%6.3f"
+                      "\n",
+                      m_event->evtID(),
+                      L.layer_id(),
+                      L.is_barrel(),
+                      L.getOriginalHitIndex(hi),
+                      itrack,
+                      m_CandIdx(itrack, 0, 0),
+                      m_Label(itrack, 0, 0),
+                      1.0f / m_Par[iI].At(itrack, 3, 0),
+                      getEta(m_Par[iI].At(itrack, 5, 0)),
+                      m_Par[iI].At(itrack, 4, 0),
+                      m_Chi2(itrack, 0, 0),
+                      m_NFoundHits(itrack, 0, 0),
+                      m_SeedIdx(itrack, 0, 0),
+                      m_SeedLabel(itrack, 0, 0),
+                      m_SeedAlgo(itrack, 0, 0),
+                      thisseedmcid,
+                      mchid,
+                      st_isfindable,
+                      st_prodtype,
+                      st_label,
+                      st_pt,
+                      st_eta,
+                      st_phi,
+                      st_nhits,
+                      st_charge,
+                      st_r,
+                      st_z,
+                      q,
+                      L.hit_q(hi),
+                      ddq,
+                      dq,
+                      phi,
+                      L.hit_phi(hi),
+                      ddphi,
+                      dphi,
+                      tx,
+                      ty,
+                      tr,
+                      tphi,
+                      tz,
+                      tex,
+                      tey,
+                      ter,
+                      tephi,
+                      tez,
+                      hx,
+                      hy,
+                      hr,
+                      hphi,
+                      hz,
+                      hex,
+                      hey,
+                      her,
+                      hephi,
+                      hez,
+                      ht_dxy,
+                      ht_dz,
+                      ht_dphi,
+                      hchi2);
+                }
+              }
+#endif
+
+              if (ddq >= dq)
+                continue;
+              if (ddphi >= dphi)
+                continue;
+
+              dprintf("     SHI %3d %4d %4d %5d  %6.3f %6.3f %6.4f %7.5f   %s\n",
+                      qi,
+                      pi,
+                      pb,
+                      hi,
+                      L.hit_q(hi),
+                      L.hit_phi(hi),
+                      ddq,
+                      ddphi,
+                      (ddq < dq && ddphi < dphi) ? "PASS" : "FAIL");
+
+              // MT: Removing extra check gives full efficiency ...
+              //     and means our error estimations are wrong!
+              // Avi says we should have *minimal* search windows per layer.
+              // Also ... if bins are sufficiently small, we do not need the extra
+              // checks, see above.
+              m_XHitArr.At(itrack, m_XHitSize[itrack]++, 0) = hi_orig;
+            } else {
+              // MT: The following check alone makes more sense with spiral traversal,
+              // we'd be taking in closest hits first.
+
+              // Hmmh -- there used to be some more checks here.
+              // Or, at least, the phi binning was much smaller and no further checks were done.
+              assert(false && "this code has not been used in a while -- see comments in code");
+
+              if (m_XHitSize[itrack] < MPlexHitIdxMax) {
+                m_XHitArr.At(itrack, m_XHitSize[itrack]++, 0) = hi_orig;
+              }
+            }
+          }  //hi
+        }    //pi
+      }      //qi
+    }        //itrack
+  }
+
+  //==============================================================================
+  // AddBestHit - Best Hit Track Finding
+  //==============================================================================
+
+  void MkFinder::addBestHit(const LayerOfHits &layer_of_hits, const int N_proc, const FindingFoos &fnd_foos) {
+    // debug = true;
+
+    MatriplexHitPacker mhp(*layer_of_hits.hitArray());
+
+    float minChi2[NN];
+    int bestHit[NN];
+    int maxSize = 0;
+
+    // Determine maximum number of hits for tracks in the collection.
+    for (int it = 0; it < NN; ++it) {
+      if (it < N_proc) {
+        if (m_XHitSize[it] > 0) {
+          maxSize = std::max(maxSize, m_XHitSize[it]);
+        }
+      }
+
+      bestHit[it] = -1;
+      minChi2[it] = getHitSelDynamicChi2Cut(it, iP);
+    }
+
+    for (int hit_cnt = 0; hit_cnt < maxSize; ++hit_cnt) {
+      //fixme what if size is zero???
+
+      mhp.reset();
+
+#pragma omp simd
+      for (int itrack = 0; itrack < N_proc; ++itrack) {
+        if (hit_cnt < m_XHitSize[itrack]) {
+          mhp.addInputAt(itrack, layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0)));
+        }
+      }
+
+      mhp.pack(m_msErr, m_msPar);
+
+      //now compute the chi2 of track state vs hit
+      MPlexQF outChi2;
+      MPlexLV tmpPropPar;
+      (*fnd_foos.m_compute_chi2_foo)(m_Err[iP],
+                                     m_Par[iP],
+                                     m_Chg,
+                                     m_msErr,
+                                     m_msPar,
+                                     outChi2,
+                                     tmpPropPar,
+                                     N_proc,
+                                     m_prop_config->finding_intra_layer_pflags,
+                                     m_prop_config->finding_requires_propagation_to_hit_pos);
+
+      //update best hit in case chi2<minChi2
+#pragma omp simd
+      for (int itrack = 0; itrack < N_proc; ++itrack) {
+        if (hit_cnt < m_XHitSize[itrack]) {
+          const float chi2 = std::abs(outChi2[itrack]);  //fixme negative chi2 sometimes...
+          dprint("chi2=" << chi2 << " minChi2[itrack]=" << minChi2[itrack]);
+          if (chi2 < minChi2[itrack]) {
+            minChi2[itrack] = chi2;
+            bestHit[itrack] = m_XHitArr.At(itrack, hit_cnt, 0);
+          }
+        }
+      }
+    }  // end loop over hits
+
+    //#pragma omp simd
+    for (int itrack = 0; itrack < N_proc; ++itrack) {
+      if (m_XWsrResult[itrack].m_wsr == WSR_Outside) {
+        // Why am I doing this?
+        m_msErr.setDiagonal3x3(itrack, 666);
+        m_msPar(itrack, 0, 0) = m_Par[iP](itrack, 0, 0);
+        m_msPar(itrack, 1, 0) = m_Par[iP](itrack, 1, 0);
+        m_msPar(itrack, 2, 0) = m_Par[iP](itrack, 2, 0);
+
+        // XXXX If not in gap, should get back the old track params. But they are gone ...
+        // Would actually have to do it right after SelectHitIndices where updated params are still ok.
+        // Here they got screwed during hit matching.
+        // So, I'd store them there (into propagated params) and retrieve them here.
+        // Or we decide not to care ...
+
+        continue;
+      }
+
+      //fixme decide what to do in case no hit found
+      if (bestHit[itrack] >= 0) {
+        const Hit &hit = layer_of_hits.refHit(bestHit[itrack]);
+        const float chi2 = minChi2[itrack];
+
+        dprint("ADD BEST HIT FOR TRACK #"
+               << itrack << std::endl
+               << "prop x=" << m_Par[iP].constAt(itrack, 0, 0) << " y=" << m_Par[iP].constAt(itrack, 1, 0) << std::endl
+               << "copy in hit #" << bestHit[itrack] << " x=" << hit.position()[0] << " y=" << hit.position()[1]);
+
+        m_msErr.copyIn(itrack, hit.errArray());
+        m_msPar.copyIn(itrack, hit.posArray());
+        m_Chi2(itrack, 0, 0) += chi2;
+
+        add_hit(itrack, bestHit[itrack], layer_of_hits.layer_id());
+      } else {
+        int fake_hit_idx = -1;
+
+        if (m_XWsrResult[itrack].m_wsr == WSR_Edge) {
+          // YYYYYY Config::store_missed_layers
+          fake_hit_idx = -3;
+        } else if (num_all_minus_one_hits(itrack)) {
+          fake_hit_idx = -2;
+        }
+
+        dprint("ADD FAKE HIT FOR TRACK #" << itrack << " withinBounds=" << (fake_hit_idx != -3)
+                                          << " r=" << std::hypot(m_Par[iP](itrack, 0, 0), m_Par[iP](itrack, 1, 0)));
+
+        m_msErr.setDiagonal3x3(itrack, 666);
+        m_msPar(itrack, 0, 0) = m_Par[iP](itrack, 0, 0);
+        m_msPar(itrack, 1, 0) = m_Par[iP](itrack, 1, 0);
+        m_msPar(itrack, 2, 0) = m_Par[iP](itrack, 2, 0);
+        // Don't update chi2
+
+        add_hit(itrack, fake_hit_idx, layer_of_hits.layer_id());
+      }
+    }
+
+    // Update the track parameters with this hit. (Note that some calculations
+    // are already done when computing chi2. Not sure it's worth caching them?)
+
+    dprint("update parameters");
+    (*fnd_foos.m_update_param_foo)(m_Err[iP],
+                                   m_Par[iP],
+                                   m_Chg,
+                                   m_msErr,
+                                   m_msPar,
+                                   m_Err[iC],
+                                   m_Par[iC],
+                                   N_proc,
+                                   m_prop_config->finding_intra_layer_pflags,
+                                   m_prop_config->finding_requires_propagation_to_hit_pos);
+
+    dprint("m_Par[iP](0,0,0)=" << m_Par[iP](0, 0, 0) << " m_Par[iC](0,0,0)=" << m_Par[iC](0, 0, 0));
+  }
+
+  //=======================================================
+  // isStripQCompatible : check if prop is consistent with the barrel/endcap strip length
+  //=======================================================
+  bool isStripQCompatible(
+      int itrack, bool isBarrel, const MPlexLS &pErr, const MPlexLV &pPar, const MPlexHS &msErr, const MPlexHV &msPar) {
+    //check module compatibility via long strip side = L/sqrt(12)
+    if (isBarrel) {  //check z direction only
+      const float res = std::abs(msPar.constAt(itrack, 2, 0) - pPar.constAt(itrack, 2, 0));
+      const float hitHL = sqrt(msErr.constAt(itrack, 2, 2) * 3.f);  //half-length
+      const float qErr = sqrt(pErr.constAt(itrack, 2, 2));
+      dprint("qCompat " << hitHL << " + " << 3.f * qErr << " vs " << res);
+      return hitHL + std::max(3.f * qErr, 0.5f) > res;
+    } else {  //project on xy, assuming the strip Length >> Width
+      const float res[2]{msPar.constAt(itrack, 0, 0) - pPar.constAt(itrack, 0, 0),
+                         msPar.constAt(itrack, 1, 0) - pPar.constAt(itrack, 1, 0)};
+      const float hitT2 = msErr.constAt(itrack, 0, 0) + msErr.constAt(itrack, 1, 1);
+      const float hitT2inv = 1.f / hitT2;
+      const float proj[3] = {msErr.constAt(itrack, 0, 0) * hitT2inv,
+                             msErr.constAt(itrack, 0, 1) * hitT2inv,
+                             msErr.constAt(itrack, 1, 1) * hitT2inv};
+      const float qErr =
+          sqrt(std::abs(pErr.constAt(itrack, 0, 0) * proj[0] + 2.f * pErr.constAt(itrack, 0, 1) * proj[1] +
+                        pErr.constAt(itrack, 1, 1) * proj[2]));  //take abs to avoid non-pos-def cases
+      const float resProj =
+          sqrt(res[0] * proj[0] * res[0] + 2.f * res[1] * proj[1] * res[0] + res[1] * proj[2] * res[1]);
+      dprint("qCompat " << sqrt(hitT2 * 3.f) << " + " << 3.f * qErr << " vs " << resProj);
+      return sqrt(hitT2 * 3.f) + std::max(3.f * qErr, 0.5f) > resProj;
+    }
+  }
+
+  //=======================================================
+  // passStripChargePCMfromTrack : apply the slope correction to charge per cm and cut using hit err matrix
+  //         the raw pcm = charge/L_normal
+  //         the corrected qCorr = charge/L_path = charge/(L_normal*p/p_zLocal) = pcm*p_zLocal/p
+  //=======================================================
+  bool passStripChargePCMfromTrack(
+      int itrack, bool isBarrel, unsigned int pcm, unsigned int pcmMin, const MPlexLV &pPar, const MPlexHS &msErr) {
+    //skip the overflow case
+    if (pcm >= Hit::maxChargePerCM())
+      return true;
+
+    float qSF;
+    if (isBarrel) {  //project in x,y, assuming zero-error direction is in this plane
+      const float hitT2 = msErr.constAt(itrack, 0, 0) + msErr.constAt(itrack, 1, 1);
+      const float hitT2inv = 1.f / hitT2;
+      const float proj[3] = {msErr.constAt(itrack, 0, 0) * hitT2inv,
+                             msErr.constAt(itrack, 0, 1) * hitT2inv,
+                             msErr.constAt(itrack, 1, 1) * hitT2inv};
+      const bool detXY_OK =
+          std::abs(proj[0] * proj[2] - proj[1] * proj[1]) < 0.1f;  //check that zero-direction is close
+      const float cosP = cos(pPar.constAt(itrack, 4, 0));
+      const float sinP = sin(pPar.constAt(itrack, 4, 0));
+      const float sinT = std::abs(sin(pPar.constAt(itrack, 5, 0)));
+      //qSF = sqrt[(px,py)*(1-proj)*(px,py)]/p = sinT*sqrt[(cosP,sinP)*(1-proj)*(cosP,sinP)].
+      qSF = detXY_OK ? sinT * std::sqrt(std::abs(1.f + cosP * cosP * proj[0] + sinP * sinP * proj[2] -
+                                                 2.f * cosP * sinP * proj[1]))
+                     : 1.f;
+    } else {  //project on z
+      // p_zLocal/p = p_z/p = cosT
+      qSF = std::abs(cos(pPar.constAt(itrack, 5, 0)));
+    }
+
+    const float qCorr = pcm * qSF;
+    dprint("pcm " << pcm << " * " << qSF << " = " << qCorr << " vs " << pcmMin);
+    return qCorr > pcmMin;
+  }
+
+  //==============================================================================
+  // FindCandidates - Standard Track Finding
+  //==============================================================================
+
+  void MkFinder::findCandidates(const LayerOfHits &layer_of_hits,
+                                std::vector<std::vector<TrackCand>> &tmp_candidates,
+                                const int offset,
+                                const int N_proc,
+                                const FindingFoos &fnd_foos) {
+    // bool debug = true;
+
+    MatriplexHitPacker mhp(*layer_of_hits.hitArray());
+
+    int maxSize = 0;
+
+    // Determine maximum number of hits for tracks in the collection.
+    for (int it = 0; it < NN; ++it) {
+      if (it < N_proc) {
+        if (m_XHitSize[it] > 0) {
+          maxSize = std::max(maxSize, m_XHitSize[it]);
+        }
+      }
+    }
+
+    dprintf("FindCandidates max hits to process=%d\n", maxSize);
+    int nHitsAdded[NN]{};
+
+    for (int hit_cnt = 0; hit_cnt < maxSize; ++hit_cnt) {
+      mhp.reset();
+
+      int charge_pcm[NN];
+
+#pragma omp simd
+      for (int itrack = 0; itrack < N_proc; ++itrack) {
+        if (hit_cnt < m_XHitSize[itrack]) {
+          const auto &hit = layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0));
+          mhp.addInputAt(itrack, hit);
+          charge_pcm[itrack] = hit.chargePerCM();
+        }
+      }
+
+      mhp.pack(m_msErr, m_msPar);
+
+      //now compute the chi2 of track state vs hit
+      MPlexQF outChi2;
+      MPlexLV propPar;
+      (*fnd_foos.m_compute_chi2_foo)(m_Err[iP],
+                                     m_Par[iP],
+                                     m_Chg,
+                                     m_msErr,
+                                     m_msPar,
+                                     outChi2,
+                                     propPar,
+                                     N_proc,
+                                     m_prop_config->finding_intra_layer_pflags,
+                                     m_prop_config->finding_requires_propagation_to_hit_pos);
+
+      // Now update the track parameters with this hit (note that some
+      // calculations are already done when computing chi2, to be optimized).
+      // 1. This is not needed for candidates the hit is not added to, but it's
+      // vectorized so doing it serially below should take the same time.
+      // 2. Still it's a waste of time in case the hit is not added to any of the
+      // candidates, so check beforehand that at least one cand needs update.
+      bool oneCandPassCut = false;
+      for (int itrack = 0; itrack < N_proc; ++itrack) {
+        float max_c2 = getHitSelDynamicChi2Cut(itrack, iP);
+
+        if (hit_cnt < m_XHitSize[itrack]) {
+          const float chi2 = std::abs(outChi2[itrack]);  //fixme negative chi2 sometimes...
+          dprint("chi2=" << chi2);
+          if (chi2 < max_c2) {
+            bool isCompatible = true;
+            if (!layer_of_hits.is_pix_lyr()) {
+              //check module compatibility via long strip side = L/sqrt(12)
+              isCompatible =
+                  isStripQCompatible(itrack, layer_of_hits.is_barrel(), m_Err[iP], propPar, m_msErr, m_msPar);
+
+              //rescale strip charge to track parameters and reapply the cut
+              isCompatible &= passStripChargePCMfromTrack(
+                  itrack, layer_of_hits.is_barrel(), charge_pcm[itrack], Hit::minChargePerCM(), propPar, m_msErr);
+            }
+            if (isCompatible) {
+              oneCandPassCut = true;
+              break;
+            }
+          }
+        }
+      }
+
+      if (oneCandPassCut) {
+        MPlexQI tmpChg = m_Chg;
+        (*fnd_foos.m_update_param_foo)(m_Err[iP],
+                                       m_Par[iP],
+                                       tmpChg,
+                                       m_msErr,
+                                       m_msPar,
+                                       m_Err[iC],
+                                       m_Par[iC],
+                                       N_proc,
+                                       m_prop_config->finding_intra_layer_pflags,
+                                       m_prop_config->finding_requires_propagation_to_hit_pos);
+
+        dprint("update parameters" << std::endl
+                                   << "propagated track parameters x=" << m_Par[iP].constAt(0, 0, 0)
+                                   << " y=" << m_Par[iP].constAt(0, 1, 0) << std::endl
+                                   << "               hit position x=" << m_msPar.constAt(0, 0, 0)
+                                   << " y=" << m_msPar.constAt(0, 1, 0) << std::endl
+                                   << "   updated track parameters x=" << m_Par[iC].constAt(0, 0, 0)
+                                   << " y=" << m_Par[iC].constAt(0, 1, 0));
+
+        //create candidate with hit in case chi2 < m_iteration_params->chi2Cut_min
+        //fixme: please vectorize me... (not sure it's possible in this case)
+        for (int itrack = 0; itrack < N_proc; ++itrack) {
+          float max_c2 = getHitSelDynamicChi2Cut(itrack, iP);
+
+          if (hit_cnt < m_XHitSize[itrack]) {
+            const float chi2 = std::abs(outChi2[itrack]);  //fixme negative chi2 sometimes...
+            dprint("chi2=" << chi2);
+            if (chi2 < max_c2) {
+              bool isCompatible = true;
+              if (!layer_of_hits.is_pix_lyr()) {
+                //check module compatibility via long strip side = L/sqrt(12)
+                isCompatible =
+                    isStripQCompatible(itrack, layer_of_hits.is_barrel(), m_Err[iP], propPar, m_msErr, m_msPar);
+
+                //rescale strip charge to track parameters and reapply the cut
+                isCompatible &= passStripChargePCMfromTrack(
+                    itrack, layer_of_hits.is_barrel(), charge_pcm[itrack], Hit::minChargePerCM(), propPar, m_msErr);
+              }
+              if (isCompatible) {
+                bool hitExists = false;
+                int maxHits = m_NFoundHits(itrack, 0, 0);
+                if (layer_of_hits.is_pix_lyr()) {
+                  for (int i = 0; i <= maxHits; ++i) {
+                    if (i > 2)
+                      break;
+                    if (m_HoTArrs[itrack][i].layer == layer_of_hits.layer_id()) {
+                      hitExists = true;
+                      break;
+                    }
+                  }
+                }
+                if (hitExists)
+                  continue;
+
+                nHitsAdded[itrack]++;
+                dprint("chi2 cut passed, creating new candidate");
+                // Create a new candidate and fill the tmp_candidates output vector.
+                // QQQ only instantiate if it will pass, be better than N_best
+
+                const int hit_idx = m_XHitArr.At(itrack, hit_cnt, 0);
+
+                TrackCand newcand;
+                copy_out(newcand, itrack, iC);
+                newcand.setCharge(tmpChg(itrack, 0, 0));
+                newcand.addHitIdx(hit_idx, layer_of_hits.layer_id(), chi2);
+                newcand.setScore(getScoreCand(newcand, true /*penalizeTailMissHits*/, true /*inFindCandidates*/));
+                newcand.setOriginIndex(m_CandIdx(itrack, 0, 0));
+
+                if (chi2 < m_iteration_params->chi2CutOverlap) {
+                  CombCandidate &ccand = *newcand.combCandidate();
+                  ccand[m_CandIdx(itrack, 0, 0)].considerHitForOverlap(
+                      hit_idx, layer_of_hits.refHit(hit_idx).detIDinLayer(), chi2);
+                }
+
+                dprint("updated track parameters x=" << newcand.parameters()[0] << " y=" << newcand.parameters()[1]
+                                                     << " z=" << newcand.parameters()[2]
+                                                     << " pt=" << 1. / newcand.parameters()[3]);
+
+                tmp_candidates[m_SeedIdx(itrack, 0, 0) - offset].emplace_back(newcand);
+              }
+            }
+          }
+        }
+      }  //end if (oneCandPassCut)
+
+    }  //end loop over hits
+
+    //now add invalid hit
+    //fixme: please vectorize me...
+    for (int itrack = 0; itrack < N_proc; ++itrack) {
+      // Cands that miss the layer are stashed away in MkBuilder(), before propagation,
+      // and then merged back afterwards.
+      if (m_XWsrResult[itrack].m_wsr == WSR_Outside) {
+        continue;
+      }
+
+      int fake_hit_idx = ((num_all_minus_one_hits(itrack) < m_iteration_params->maxHolesPerCand) &&
+                          (m_NTailMinusOneHits(itrack, 0, 0) < m_iteration_params->maxConsecHoles))
+                             ? -1
+                             : -2;
+
+      if (m_XWsrResult[itrack].m_wsr == WSR_Edge) {
+        // YYYYYY m_iteration_params->store_missed_layers
+        fake_hit_idx = -3;
+      }
+      //now add fake hit for tracks that passsed through inactive modules
+      else if (m_XWsrResult[itrack].m_in_gap == true && nHitsAdded[itrack] == 0) {
+        fake_hit_idx = -7;
+      }
+
+      dprint("ADD FAKE HIT FOR TRACK #" << itrack << " withinBounds=" << (fake_hit_idx != -3)
+                                        << " r=" << std::hypot(m_Par[iP](itrack, 0, 0), m_Par[iP](itrack, 1, 0)));
+
+      // QQQ as above, only create and add if score better
+      TrackCand newcand;
+      copy_out(newcand, itrack, iP);
+      newcand.addHitIdx(fake_hit_idx, layer_of_hits.layer_id(), 0.);
+      newcand.setScore(getScoreCand(newcand, true /*penalizeTailMissHits*/, true /*inFindCandidates*/));
+      // Only relevant when we actually add a hit
+      // newcand.setOriginIndex(m_CandIdx(itrack, 0, 0));
+      tmp_candidates[m_SeedIdx(itrack, 0, 0) - offset].emplace_back(newcand);
+    }
+  }
+
+  //==============================================================================
+  // FindCandidatesCloneEngine - Clone Engine Track Finding
+  //==============================================================================
+
+  void MkFinder::findCandidatesCloneEngine(const LayerOfHits &layer_of_hits,
+                                           CandCloner &cloner,
+                                           const int offset,
+                                           const int N_proc,
+                                           const FindingFoos &fnd_foos) {
+    // bool debug = true;
+
+    MatriplexHitPacker mhp(*layer_of_hits.hitArray());
+
+    int maxSize = 0;
+
+    // Determine maximum number of hits for tracks in the collection.
+#pragma omp simd
+    for (int it = 0; it < NN; ++it) {
+      if (it < N_proc) {
+        if (m_XHitSize[it] > 0) {
+          maxSize = std::max(maxSize, m_XHitSize[it]);
+        }
+      }
+    }
+
+    dprintf("FindCandidatesCloneEngine max hits to process=%d\n", maxSize);
+    int nHitsAdded[NN]{};
+
+    for (int hit_cnt = 0; hit_cnt < maxSize; ++hit_cnt) {
+      mhp.reset();
+
+      int charge_pcm[NN];
+
+#pragma omp simd
+      for (int itrack = 0; itrack < N_proc; ++itrack) {
+        if (hit_cnt < m_XHitSize[itrack]) {
+          const auto &hit = layer_of_hits.refHit(m_XHitArr.At(itrack, hit_cnt, 0));
+          mhp.addInputAt(itrack, hit);
+          charge_pcm[itrack] = hit.chargePerCM();
+        }
+      }
+
+      mhp.pack(m_msErr, m_msPar);
+
+      //now compute the chi2 of track state vs hit
+      MPlexQF outChi2;
+      MPlexLV propPar;
+      (*fnd_foos.m_compute_chi2_foo)(m_Err[iP],
+                                     m_Par[iP],
+                                     m_Chg,
+                                     m_msErr,
+                                     m_msPar,
+                                     outChi2,
+                                     propPar,
+                                     N_proc,
+                                     m_prop_config->finding_intra_layer_pflags,
+                                     m_prop_config->finding_requires_propagation_to_hit_pos);
+
+#pragma omp simd  // DOES NOT VECTORIZE AS IT IS NOW
+      for (int itrack = 0; itrack < N_proc; ++itrack) {
+        // make sure the hit was in the compatiblity window for the candidate
+
+        float max_c2 = getHitSelDynamicChi2Cut(itrack, iP);
+
+        if (hit_cnt < m_XHitSize[itrack]) {
+          // XXX-NUM-ERR assert(chi2 >= 0);
+          const float chi2 = std::abs(outChi2[itrack]);  //fixme negative chi2 sometimes...
+
+          dprint("chi2=" << chi2 << " for trkIdx=" << itrack << " hitIdx=" << m_XHitArr.At(itrack, hit_cnt, 0));
+          if (chi2 < max_c2) {
+            bool isCompatible = true;
+            if (!layer_of_hits.is_pix_lyr()) {
+              //check module compatibility via long strip side = L/sqrt(12)
+              isCompatible =
+                  isStripQCompatible(itrack, layer_of_hits.is_barrel(), m_Err[iP], propPar, m_msErr, m_msPar);
+
+              //rescale strip charge to track parameters and reapply the cut
+              isCompatible &= passStripChargePCMfromTrack(
+                  itrack, layer_of_hits.is_barrel(), charge_pcm[itrack], Hit::minChargePerCM(), propPar, m_msErr);
+            }
+
+            if (isCompatible) {
+              CombCandidate &ccand = cloner.combCandWithOriginalIndex(m_SeedIdx(itrack, 0, 0));
+              bool hitExists = false;
+              int maxHits = m_NFoundHits(itrack, 0, 0);
+              if (layer_of_hits.is_pix_lyr()) {
+                for (int i = 0; i <= maxHits; ++i) {
+                  if (i > 2)
+                    break;
+                  if (ccand.hot(i).layer == layer_of_hits.layer_id()) {
+                    hitExists = true;
+                    break;
+                  }
+                }
+              }
+              if (hitExists)
+                continue;
+
+              nHitsAdded[itrack]++;
+              const int hit_idx = m_XHitArr.At(itrack, hit_cnt, 0);
+
+              // Register hit for overlap consideration, here we apply chi2 cut
+              if (chi2 < m_iteration_params->chi2CutOverlap) {
+                ccand[m_CandIdx(itrack, 0, 0)].considerHitForOverlap(
+                    hit_idx, layer_of_hits.refHit(hit_idx).detIDinLayer(), chi2);
+              }
+
+              IdxChi2List tmpList;
+              tmpList.trkIdx = m_CandIdx(itrack, 0, 0);
+              tmpList.hitIdx = hit_idx;
+              tmpList.module = layer_of_hits.refHit(hit_idx).detIDinLayer();
+              tmpList.nhits = m_NFoundHits(itrack, 0, 0) + 1;
+              tmpList.ntailholes = 0;
+              tmpList.noverlaps = m_NOverlapHits(itrack, 0, 0);
+              tmpList.nholes = num_all_minus_one_hits(itrack);
+              tmpList.pt = std::abs(1.0f / m_Par[iP].At(itrack, 3, 0));
+              tmpList.chi2 = m_Chi2(itrack, 0, 0) + chi2;
+              tmpList.chi2_hit = chi2;
+              tmpList.score = getScoreStruct(tmpList);
+              cloner.add_cand(m_SeedIdx(itrack, 0, 0) - offset, tmpList);
+
+              dprint("  adding hit with hit_cnt=" << hit_cnt << " for trkIdx=" << tmpList.trkIdx
+                                                  << " orig Seed=" << m_Label(itrack, 0, 0));
+            }
+          }
+        }
+      }
+
+    }  //end loop over hits
+
+    //now add invalid hit
+    for (int itrack = 0; itrack < N_proc; ++itrack) {
+      dprint("num_all_minus_one_hits(" << itrack << ")=" << num_all_minus_one_hits(itrack));
+
+      // Cands that miss the layer are stashed away in MkBuilder(), before propagation,
+      // and then merged back afterwards.
+      if (m_XWsrResult[itrack].m_wsr == WSR_Outside) {
+        continue;
+      }
+
+      // int fake_hit_idx = num_all_minus_one_hits(itrack) < m_iteration_params->maxHolesPerCand ? -1 : -2;
+      int fake_hit_idx = ((num_all_minus_one_hits(itrack) < m_iteration_params->maxHolesPerCand) &&
+                          (m_NTailMinusOneHits(itrack, 0, 0) < m_iteration_params->maxConsecHoles))
+                             ? -1
+                             : -2;
+
+      if (m_XWsrResult[itrack].m_wsr == WSR_Edge) {
+        fake_hit_idx = -3;
+      }
+      //now add fake hit for tracks that passsed through inactive modules
+      else if (m_XWsrResult[itrack].m_in_gap == true && nHitsAdded[itrack] == 0) {
+        fake_hit_idx = -7;
+      }
+
+      IdxChi2List tmpList;
+      tmpList.trkIdx = m_CandIdx(itrack, 0, 0);
+      tmpList.hitIdx = fake_hit_idx;
+      tmpList.module = -1;
+      tmpList.nhits = m_NFoundHits(itrack, 0, 0);
+      tmpList.ntailholes =
+          (fake_hit_idx == -1 ? m_NTailMinusOneHits(itrack, 0, 0) + 1 : m_NTailMinusOneHits(itrack, 0, 0));
+      tmpList.noverlaps = m_NOverlapHits(itrack, 0, 0);
+      tmpList.nholes = num_inside_minus_one_hits(itrack);
+      tmpList.pt = std::abs(1.0f / m_Par[iP].At(itrack, 3, 0));
+      tmpList.chi2 = m_Chi2(itrack, 0, 0);
+      tmpList.chi2_hit = 0;
+      tmpList.score = getScoreStruct(tmpList);
+      cloner.add_cand(m_SeedIdx(itrack, 0, 0) - offset, tmpList);
+      dprint("adding invalid hit " << fake_hit_idx);
+    }
+  }
+
+  //==============================================================================
+  // UpdateWithLastHit
+  //==============================================================================
+
+  void MkFinder::updateWithLastHit(const LayerOfHits &layer_of_hits, int N_proc, const FindingFoos &fnd_foos) {
+    for (int i = 0; i < N_proc; ++i) {
+      const HitOnTrack &hot = m_LastHoT[i];
+
+      const Hit &hit = layer_of_hits.refHit(hot.index);
+
+      m_msErr.copyIn(i, hit.errArray());
+      m_msPar.copyIn(i, hit.posArray());
+    }
+
+    // See comment in MkBuilder::find_tracks_in_layer() about intra / inter flags used here
+    // for propagation to the hit.
+    (*fnd_foos.m_update_param_foo)(m_Err[iP],
+                                   m_Par[iP],
+                                   m_Chg,
+                                   m_msErr,
+                                   m_msPar,
+                                   m_Err[iC],
+                                   m_Par[iC],
+                                   N_proc,
+                                   m_prop_config->finding_inter_layer_pflags,
+                                   m_prop_config->finding_requires_propagation_to_hit_pos);
+  }
+
+  //==============================================================================
+  // CopyOutParErr
+  //==============================================================================
+
+  void MkFinder::copyOutParErr(std::vector<CombCandidate> &seed_cand_vec, int N_proc, bool outputProp) const {
+    const int iO = outputProp ? iP : iC;
+
+    for (int i = 0; i < N_proc; ++i) {
+      TrackCand &cand = seed_cand_vec[m_SeedIdx(i, 0, 0)][m_CandIdx(i, 0, 0)];
+
+      // Set the track state to the updated parameters
+      m_Err[iO].copyOut(i, cand.errors_nc().Array());
+      m_Par[iO].copyOut(i, cand.parameters_nc().Array());
+      cand.setCharge(m_Chg(i, 0, 0));
+
+      dprint((outputProp ? "propagated" : "updated")
+             << " track parameters x=" << cand.parameters()[0] << " y=" << cand.parameters()[1]
+             << " z=" << cand.parameters()[2] << " pt=" << 1. / cand.parameters()[3] << " posEta=" << cand.posEta());
+    }
+  }
+
+  //==============================================================================
+  // Backward Fit hack
+  //==============================================================================
+
+  void MkFinder::bkFitInputTracks(TrackVec &cands, int beg, int end) {
+    // Uses HitOnTrack vector from Track directly + a local cursor array to current hit.
+
+    MatriplexTrackPacker mtp(cands[beg]);
+
+    int itrack = 0;
+
+    for (int i = beg; i < end; ++i, ++itrack) {
+      const Track &trk = cands[i];
+
+      m_Chg(itrack, 0, 0) = trk.charge();
+      m_CurHit[itrack] = trk.nTotalHits() - 1;
+      m_HoTArr[itrack] = trk.getHitsOnTrackArray();
+
+      mtp.addInput(trk);
+    }
+
+    m_Chi2.setVal(0);
+
+    mtp.pack(m_Err[iC], m_Par[iC]);
+
+    m_Err[iC].scale(100.0f);
+  }
+
+  void MkFinder::bkFitInputTracks(EventOfCombCandidates &eocss, int beg, int end) {
+    // Could as well use HotArrays from tracks directly + a local cursor array to last hit.
+
+    // XXXX - shall we assume only TrackCand-zero is needed and that we can freely
+    // bork the HoTNode array?
+
+    MatriplexTrackPacker mtp(eocss[beg][0]);
+
+    int itrack = 0;
+
+    for (int i = beg; i < end; ++i, ++itrack) {
+      const TrackCand &trk = eocss[i][0];
+
+      m_Chg(itrack, 0, 0) = trk.charge();
+      m_CurNode[itrack] = trk.lastCcIndex();
+      m_HoTNodeArr[itrack] = trk.combCandidate()->hotsData();
+
+      // XXXX Need TrackCand* to update num-hits. Unless I collect info elsewhere
+      // and fix it in BkFitOutputTracks.
+      m_TrkCand[itrack] = &eocss[i][0];
+
+      mtp.addInput(trk);
+    }
+
+    m_Chi2.setVal(0);
+
+    mtp.pack(m_Err[iC], m_Par[iC]);
+
+    m_Err[iC].scale(100.0f);
+  }
+
+  //------------------------------------------------------------------------------
+
+  void MkFinder::bkFitOutputTracks(TrackVec &cands, int beg, int end, bool outputProp) {
+    // Only copy out track params / errors / chi2, all the rest is ok.
+
+    const int iO = outputProp ? iP : iC;
+
+    int itrack = 0;
+    for (int i = beg; i < end; ++i, ++itrack) {
+      Track &trk = cands[i];
+
+      m_Err[iO].copyOut(itrack, trk.errors_nc().Array());
+      m_Par[iO].copyOut(itrack, trk.parameters_nc().Array());
+
+      trk.setChi2(m_Chi2(itrack, 0, 0));
+      if (isFinite(trk.chi2())) {
+        trk.setScore(getScoreCand(trk));
+      }
+    }
+  }
+
+  void MkFinder::bkFitOutputTracks(EventOfCombCandidates &eocss, int beg, int end, bool outputProp) {
+    // Only copy out track params / errors / chi2, all the rest is ok.
+
+    // XXXX - where will rejected hits get removed?
+
+    const int iO = outputProp ? iP : iC;
+
+    int itrack = 0;
+    for (int i = beg; i < end; ++i, ++itrack) {
+      TrackCand &trk = eocss[i][0];
+
+      m_Err[iO].copyOut(itrack, trk.errors_nc().Array());
+      m_Par[iO].copyOut(itrack, trk.parameters_nc().Array());
+
+      trk.setChi2(m_Chi2(itrack, 0, 0));
+      if (isFinite(trk.chi2())) {
+        trk.setScore(getScoreCand(trk));
+      }
+    }
+  }
+
+  //------------------------------------------------------------------------------
+
+#if defined(DEBUG_BACKWARD_FIT) || defined(DEBUG_BACKWARD_FIT_BH)
+  namespace {
+    float e2s(float x) { return 1e4 * std::sqrt(x); }
+  }  // namespace
+#endif
+
+  void MkFinder::bkFitFitTracksBH(const EventOfHits &eventofhits,
+                                  const SteeringParams &st_par,
+                                  const int N_proc,
+                                  bool chiDebug) {
+    // Prototyping final backward fit.
+    // This works with track-finding indices, before remapping.
+    //
+    // Layers should be collected during track finding and list all layers that have actual hits.
+    // Then we could avoid checking which layers actually do have hits.
+
+    MPlexQF tmp_chi2;
+    float tmp_err[6] = {666, 0, 666, 0, 0, 666};
+    float tmp_pos[3];
+
+    for (auto lp_iter = st_par.m_layer_plan.rbegin(); lp_iter != st_par.m_layer_plan.rend(); ++lp_iter) {
+      const int layer = lp_iter->m_layer;
+
+      const LayerOfHits &L = eventofhits[layer];
+      const LayerInfo &LI = *L.layer_info();
+
+      int count = 0;
+      for (int i = 0; i < N_proc; ++i) {
+        while (m_CurHit[i] >= 0 && m_HoTArr[i][m_CurHit[i]].index < 0)
+          --m_CurHit[i];
+
+        if (m_CurHit[i] >= 0 && m_HoTArr[i][m_CurHit[i]].layer == layer) {
+          // Skip the overlap hits -- if they exist.
+          // 1. Overlap hit gets placed *after* the original hit in TrackCand::exportTrack()
+          // which is *before* in the reverse iteration that we are doing here.
+          // 2. Seed-hit merging can result in more than two hits per layer.
+          while (m_CurHit[i] > 0 && m_HoTArr[i][m_CurHit[i] - 1].layer == layer)
+            --m_CurHit[i];
+
+          const Hit &hit = L.refHit(m_HoTArr[i][m_CurHit[i]].index);
+          m_msErr.copyIn(i, hit.errArray());
+          m_msPar.copyIn(i, hit.posArray());
+          ++count;
+          --m_CurHit[i];
+        } else {
+          tmp_pos[0] = m_Par[iC](i, 0, 0);
+          tmp_pos[1] = m_Par[iC](i, 1, 0);
+          tmp_pos[2] = m_Par[iC](i, 2, 0);
+          m_msErr.copyIn(i, tmp_err);
+          m_msPar.copyIn(i, tmp_pos);
+        }
+      }
+
+      if (count == 0)
+        continue;
+
+      // ZZZ Could add missing hits here, only if there are any actual matches.
+
+      if (LI.is_barrel()) {
+        propagateTracksToHitR(m_msPar, N_proc, m_prop_config->backward_fit_pflags);
+
+        kalmanOperation(KFO_Calculate_Chi2 | KFO_Update_Params,
+                        m_Err[iP],
+                        m_Par[iP],
+                        m_msErr,
+                        m_msPar,
+                        m_Err[iC],
+                        m_Par[iC],
+                        tmp_chi2,
+                        N_proc);
+      } else {
+        propagateTracksToHitZ(m_msPar, N_proc, m_prop_config->backward_fit_pflags);
+
+        kalmanOperationEndcap(KFO_Calculate_Chi2 | KFO_Update_Params,
+                              m_Err[iP],
+                              m_Par[iP],
+                              m_msErr,
+                              m_msPar,
+                              m_Err[iC],
+                              m_Par[iC],
+                              tmp_chi2,
+                              N_proc);
+      }
+
+      //fixup invpt sign and charge
+      for (int n = 0; n < N_proc; ++n) {
+        if (m_Par[iC].At(n, 3, 0) < 0) {
+          m_Chg.At(n, 0, 0) = -m_Chg.At(n, 0, 0);
+          m_Par[iC].At(n, 3, 0) = -m_Par[iC].At(n, 3, 0);
+        }
+      }
+
+#ifdef DEBUG_BACKWARD_FIT_BH
+      // Dump per hit chi2
+      for (int i = 0; i < N_proc; ++i) {
+        float r_h = std::hypot(m_msPar.At(i, 0, 0), m_msPar.At(i, 1, 0));
+        float r_t = std::hypot(m_Par[iC].At(i, 0, 0), m_Par[iC].At(i, 1, 0));
+
+        // if ((std::isnan(tmp_chi2[i]) || std::isnan(r_t)))
+        // if ( ! std::isnan(tmp_chi2[i]) && tmp_chi2[i] > 0) // && tmp_chi2[i] > 30)
+        if (chiDebug) {
+          int ti = iP;
+          printf(
+              "CHIHIT %3d %10g %10g %10g %10g %10g %11.5g %11.5g %11.5g %10g %10g %10g %10g %11.5g %11.5g %11.5g %10g "
+              "%10g %10g %10g %10g %11.5g %11.5g\n",
+              layer,
+              tmp_chi2[i],
+              m_msPar.At(i, 0, 0),
+              m_msPar.At(i, 1, 0),
+              m_msPar.At(i, 2, 0),
+              r_h,  // x_h y_h z_h r_h -- hit pos
+              e2s(m_msErr.At(i, 0, 0)),
+              e2s(m_msErr.At(i, 1, 1)),
+              e2s(m_msErr.At(i, 2, 2)),  // ex_h ey_h ez_h -- hit errors
+              m_Par[ti].At(i, 0, 0),
+              m_Par[ti].At(i, 1, 0),
+              m_Par[ti].At(i, 2, 0),
+              r_t,  // x_t y_t z_t r_t -- track pos
+              e2s(m_Err[ti].At(i, 0, 0)),
+              e2s(m_Err[ti].At(i, 1, 1)),
+              e2s(m_Err[ti].At(i, 2, 2)),  // ex_t ey_t ez_t -- track errors
+              1.0f / m_Par[ti].At(i, 3, 0),
+              m_Par[ti].At(i, 4, 0),
+              m_Par[ti].At(i, 5, 0),                                     // pt, phi, theta
+              std::atan2(m_msPar.At(i, 1, 0), m_msPar.At(i, 0, 0)),      // phi_h
+              std::atan2(m_Par[ti].At(i, 1, 0), m_Par[ti].At(i, 0, 0)),  // phi_t
+              1e4f * std::hypot(m_msPar.At(i, 0, 0) - m_Par[ti].At(i, 0, 0),
+                                m_msPar.At(i, 1, 0) - m_Par[ti].At(i, 1, 0)),  // d_xy
+              1e4f * (m_msPar.At(i, 2, 0) - m_Par[ti].At(i, 2, 0))             // d_z
+              // e2s((m_msErr.At(i,0,0) + m_msErr.At(i,1,1)) / (r_h * r_h)),     // ephi_h
+              // e2s((m_Err[ti].At(i,0,0) + m_Err[ti].At(i,1,1)) / (r_t * r_t))  // ephi_t
+          );
+        }
+      }
+#endif
+
+      // update chi2
+      m_Chi2.add(tmp_chi2);
+    }
+  }
+
+  //------------------------------------------------------------------------------
+
+  void MkFinder::bkFitFitTracks(const EventOfHits &eventofhits,
+                                const SteeringParams &st_par,
+                                const int N_proc,
+                                bool chiDebug) {
+    // Prototyping final backward fit.
+    // This works with track-finding indices, before remapping.
+    //
+    // Layers should be collected during track finding and list all layers that have actual hits.
+    // Then we could avoid checking which layers actually do have hits.
+
+    MPlexQF tmp_chi2;
+    MPlexQI no_mat_effs;
+    float tmp_err[6] = {666, 0, 666, 0, 0, 666};
+    float tmp_pos[3];
+
+    for (auto lp_iter = st_par.make_iterator(SteeringParams::IT_BkwFit); lp_iter.is_valid(); ++lp_iter) {
+      const int layer = lp_iter.layer();
+
+      const LayerOfHits &L = eventofhits[layer];
+      const LayerInfo &LI = *L.layer_info();
+
+      // XXXX
+#if defined(DEBUG_BACKWARD_FIT)
+      const Hit *last_hit_ptr[NN];
+#endif
+
+      no_mat_effs.setVal(0);
+      int done_count = 0;
+      int here_count = 0;
+      for (int i = 0; i < N_proc; ++i) {
+        while (m_CurNode[i] >= 0 && m_HoTNodeArr[i][m_CurNode[i]].m_hot.index < 0) {
+          m_CurNode[i] = m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx;
+        }
+
+        if (m_CurNode[i] < 0)
+          ++done_count;
+
+        if (m_CurNode[i] >= 0 && m_HoTNodeArr[i][m_CurNode[i]].m_hot.layer == layer) {
+          // Skip the overlap hits -- if they exist.
+          // 1. Overlap hit gets placed *after* the original hit in TrackCand::exportTrack()
+          // which is *before* in the reverse iteration that we are doing here.
+          // 2. Seed-hit merging can result in more than two hits per layer.
+          // while (m_CurHit[i] > 0 && m_HoTArr[ i ][ m_CurHit[i] - 1 ].layer == layer) --m_CurHit[i];
+          while (m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx >= 0 &&
+                 m_HoTNodeArr[i][m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx].m_hot.layer == layer)
+            m_CurNode[i] = m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx;
+
+          const Hit &hit = L.refHit(m_HoTNodeArr[i][m_CurNode[i]].m_hot.index);
+
+#ifdef DEBUG_BACKWARD_FIT
+          last_hit_ptr[i] = &hit;
+#endif
+          m_msErr.copyIn(i, hit.errArray());
+          m_msPar.copyIn(i, hit.posArray());
+          ++here_count;
+
+          m_CurNode[i] = m_HoTNodeArr[i][m_CurNode[i]].m_prev_idx;
+        } else {
+#ifdef DEBUG_BACKWARD_FIT
+          last_hit_ptr[i] = nullptr;
+#endif
+          no_mat_effs[i] = 1;
+          tmp_pos[0] = m_Par[iC](i, 0, 0);
+          tmp_pos[1] = m_Par[iC](i, 1, 0);
+          tmp_pos[2] = m_Par[iC](i, 2, 0);
+          m_msErr.copyIn(i, tmp_err);
+          m_msPar.copyIn(i, tmp_pos);
+        }
+      }
+
+      if (done_count == N_proc)
+        break;
+      if (here_count == 0)
+        continue;
+
+      // ZZZ Could add missing hits here, only if there are any actual matches.
+
+      if (LI.is_barrel()) {
+        propagateTracksToHitR(m_msPar, N_proc, m_prop_config->backward_fit_pflags, &no_mat_effs);
+
+        kalmanOperation(KFO_Calculate_Chi2 | KFO_Update_Params,
+                        m_Err[iP],
+                        m_Par[iP],
+                        m_msErr,
+                        m_msPar,
+                        m_Err[iC],
+                        m_Par[iC],
+                        tmp_chi2,
+                        N_proc);
+      } else {
+        propagateTracksToHitZ(m_msPar, N_proc, m_prop_config->backward_fit_pflags, &no_mat_effs);
+
+        kalmanOperationEndcap(KFO_Calculate_Chi2 | KFO_Update_Params,
+                              m_Err[iP],
+                              m_Par[iP],
+                              m_msErr,
+                              m_msPar,
+                              m_Err[iC],
+                              m_Par[iC],
+                              tmp_chi2,
+                              N_proc);
+      }
+
+      //fixup invpt sign and charge
+      for (int n = 0; n < N_proc; ++n) {
+        if (m_Par[iC].At(n, 3, 0) < 0) {
+          m_Chg.At(n, 0, 0) = -m_Chg.At(n, 0, 0);
+          m_Par[iC].At(n, 3, 0) = -m_Par[iC].At(n, 3, 0);
+        }
+      }
+
+      for (int i = 0; i < N_proc; ++i) {
+#if defined(DEBUG_BACKWARD_FIT)
+        if (chiDebug && last_hit_ptr[i]) {
+          TrackCand &bb = *m_TrkCand[i];
+          int ti = iP;
+          float chi = tmp_chi2.At(i, 0, 0);
+          float chi_prnt = std::isfinite(chi) ? chi : -9;
+
+#if defined(MKFIT_STANDALONE)
+          const MCHitInfo &mchi = m_event->simHitsInfo_[last_hit_ptr[i]->mcHitID()];
+
+          printf(
+              "BKF_OVERLAP %d %d %d %d %d %d %d "
+              "%f %f %f %f %d %d %d %d "
+              "%f %f %f %f %f\n",
+              m_event->evtID(),
+#else
+          printf(
+              "BKF_OVERLAP %d %d %d %d %d %d "
+              "%f %f %f %f %d %d %d "
+              "%f %f %f %f %f\n",
+#endif
+              bb.label(),
+              (int)bb.prodType(),
+              bb.isFindable(),
+              layer,
+              L.is_stereo(),
+              L.is_barrel(),
+              bb.pT(),
+              bb.posEta(),
+              bb.posPhi(),
+              chi_prnt,
+              std::isnan(chi),
+              std::isfinite(chi),
+              chi > 0,
+#if defined(MKFIT_STANDALONE)
+              mchi.mcTrackID(),
+#endif
+              e2s(m_Err[ti].At(i, 0, 0)),
+              e2s(m_Err[ti].At(i, 1, 1)),
+              e2s(m_Err[ti].At(i, 2, 2)),  // sx_t sy_t sz_t -- track errors
+              1e4f * std::hypot(m_msPar.At(i, 0, 0) - m_Par[ti].At(i, 0, 0),
+                                m_msPar.At(i, 1, 0) - m_Par[ti].At(i, 1, 0)),  // d_xy
+              1e4f * (m_msPar.At(i, 2, 0) - m_Par[ti].At(i, 2, 0))             // d_z
+          );
+        }
+#endif
+      }
+
+      // update chi2
+      m_Chi2.add(tmp_chi2);
+    }
+  }
+
+  //------------------------------------------------------------------------------
+
+  void MkFinder::bkFitPropTracksToPCA(const int N_proc) {
+    propagateTracksToPCAZ(N_proc, m_prop_config->pca_prop_pflags);
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/MkFinder.h b/RecoTracker/MkFitCore/src/MkFinder.h
new file mode 100644
index 0000000000000..457305c81eda8
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MkFinder.h
@@ -0,0 +1,324 @@
+#ifndef RecoTracker_MkFitCore_src_MkFinder_h
+#define RecoTracker_MkFitCore_src_MkFinder_h
+
+#include "MkBase.h"
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
+#include "RecoTracker/MkFitCore/interface/Track.h"
+
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
+
+// Define to get printouts about track and hit chi2.
+// See also MkBuilder::backwardFit().
+
+//#define DEBUG_BACKWARD_FIT_BH
+//#define DEBUG_BACKWARD_FIT
+
+namespace mkfit {
+
+  class CandCloner;
+  class CombCandidate;
+  class LayerOfHits;
+  class FindingFoos;
+  class IterationParams;
+  class IterationLayerConfig;
+  class SteeringParams;
+
+#if defined(DUMPHITWINDOW) or defined(DEBUG_BACKWARD_FIT)
+  class Event;
+#endif
+
+  class MkFinder : public MkBase {
+    friend class MkBuilder;
+
+  public:
+    static constexpr int MPlexHitIdxMax = 16;
+
+    using MPlexHitIdx = Matriplex::Matriplex<int, MPlexHitIdxMax, 1, NN>;
+    using MPlexQHoT = Matriplex::Matriplex<HitOnTrack, 1, 1, NN>;
+
+    //----------------------------------------------------------------------------
+
+    MkFinder() {}
+
+    void setup(const PropagationConfig &pc,
+               const IterationParams &ip,
+               const IterationLayerConfig &ilc,
+               const std::vector<bool> *ihm);
+    void release();
+
+    //----------------------------------------------------------------------------
+
+    void inputTracksAndHitIdx(const std::vector<Track> &tracks, int beg, int end, bool inputProp);
+
+    void inputTracksAndHitIdx(const std::vector<Track> &tracks,
+                              const std::vector<int> &idxs,
+                              int beg,
+                              int end,
+                              bool inputProp,
+                              int mp_offset);
+
+    void inputTracksAndHitIdx(const std::vector<CombCandidate> &tracks,
+                              const std::vector<std::pair<int, int>> &idxs,
+                              int beg,
+                              int end,
+                              bool inputProp);
+
+    void inputTracksAndHitIdx(const std::vector<CombCandidate> &tracks,
+                              const std::vector<std::pair<int, IdxChi2List>> &idxs,
+                              int beg,
+                              int end,
+                              bool inputProp);
+
+    void outputTracksAndHitIdx(std::vector<Track> &tracks, int beg, int end, bool outputProp) const;
+
+    void outputTracksAndHitIdx(
+        std::vector<Track> &tracks, const std::vector<int> &idxs, int beg, int end, bool outputProp) const;
+
+    void outputTrackAndHitIdx(Track &track, int itrack, bool outputProp) const {
+      const int iO = outputProp ? iP : iC;
+      copy_out(track, itrack, iO);
+    }
+
+    void outputNonStoppedTracksAndHitIdx(
+        std::vector<Track> &tracks, const std::vector<int> &idxs, int beg, int end, bool outputProp) const {
+      const int iO = outputProp ? iP : iC;
+      for (int i = beg, imp = 0; i < end; ++i, ++imp) {
+        if (!m_Stopped[imp])
+          copy_out(tracks[idxs[i]], imp, iO);
+      }
+    }
+
+    HitOnTrack bestHitLastHoT(int itrack) const { return m_HoTArrs[itrack][m_NHits(itrack, 0, 0) - 1]; }
+
+    //----------------------------------------------------------------------------
+
+    void getHitSelDynamicWindows(
+        const float invpt, const float theta, float &min_dq, float &max_dq, float &min_dphi, float &max_dphi);
+
+    float getHitSelDynamicChi2Cut(const int itrk, const int ipar);
+
+    void selectHitIndices(const LayerOfHits &layer_of_hits, const int N_proc);
+
+    void addBestHit(const LayerOfHits &layer_of_hits, const int N_proc, const FindingFoos &fnd_foos);
+
+    //----------------------------------------------------------------------------
+
+    void findCandidates(const LayerOfHits &layer_of_hits,
+                        std::vector<std::vector<TrackCand>> &tmp_candidates,
+                        const int offset,
+                        const int N_proc,
+                        const FindingFoos &fnd_foos);
+
+    //----------------------------------------------------------------------------
+
+    void findCandidatesCloneEngine(const LayerOfHits &layer_of_hits,
+                                   CandCloner &cloner,
+                                   const int offset,
+                                   const int N_proc,
+                                   const FindingFoos &fnd_foos);
+
+    void updateWithLastHit(const LayerOfHits &layer_of_hits, int N_proc, const FindingFoos &fnd_foos);
+
+    void copyOutParErr(std::vector<CombCandidate> &seed_cand_vec, int N_proc, bool outputProp) const;
+
+    //----------------------------------------------------------------------------
+    // Backward fit
+
+    void bkFitInputTracks(TrackVec &cands, int beg, int end);
+    void bkFitOutputTracks(TrackVec &cands, int beg, int end, bool outputProp);
+
+    void bkFitInputTracks(EventOfCombCandidates &eocss, int beg, int end);
+    void bkFitOutputTracks(EventOfCombCandidates &eocss, int beg, int end, bool outputProp);
+
+    void bkFitFitTracksBH(const EventOfHits &eventofhits,
+                          const SteeringParams &st_par,
+                          const int N_proc,
+                          bool chiDebug = false);
+
+    void bkFitFitTracks(const EventOfHits &eventofhits,
+                        const SteeringParams &st_par,
+                        const int N_proc,
+                        bool chiDebug = false);
+
+    void bkFitPropTracksToPCA(const int N_proc);
+
+    //----------------------------------------------------------------------------
+
+  private:
+    void copy_in(const Track &trk, const int mslot, const int tslot) {
+      m_Err[tslot].copyIn(mslot, trk.errors().Array());
+      m_Par[tslot].copyIn(mslot, trk.parameters().Array());
+
+      m_Chg(mslot, 0, 0) = trk.charge();
+      m_Chi2(mslot, 0, 0) = trk.chi2();
+      m_Label(mslot, 0, 0) = trk.label();
+
+      m_NHits(mslot, 0, 0) = trk.nTotalHits();
+      m_NFoundHits(mslot, 0, 0) = trk.nFoundHits();
+
+      m_NInsideMinusOneHits(mslot, 0, 0) = trk.nInsideMinusOneHits();
+      m_NTailMinusOneHits(mslot, 0, 0) = trk.nTailMinusOneHits();
+
+      std::copy(trk.beginHitsOnTrack(), trk.endHitsOnTrack(), m_HoTArrs[mslot]);
+    }
+
+    void copy_out(Track &trk, const int mslot, const int tslot) const {
+      m_Err[tslot].copyOut(mslot, trk.errors_nc().Array());
+      m_Par[tslot].copyOut(mslot, trk.parameters_nc().Array());
+
+      trk.setCharge(m_Chg(mslot, 0, 0));
+      trk.setChi2(m_Chi2(mslot, 0, 0));
+      trk.setLabel(m_Label(mslot, 0, 0));
+
+      trk.resizeHits(m_NHits(mslot, 0, 0), m_NFoundHits(mslot, 0, 0));
+      std::copy(m_HoTArrs[mslot], &m_HoTArrs[mslot][m_NHits(mslot, 0, 0)], trk.beginHitsOnTrack_nc());
+    }
+
+    void copy_in(const TrackCand &trk, const int mslot, const int tslot) {
+      m_Err[tslot].copyIn(mslot, trk.errors().Array());
+      m_Par[tslot].copyIn(mslot, trk.parameters().Array());
+
+      m_Chg(mslot, 0, 0) = trk.charge();
+      m_Chi2(mslot, 0, 0) = trk.chi2();
+      m_Label(mslot, 0, 0) = trk.label();
+
+      m_LastHitCcIndex(mslot, 0, 0) = trk.lastCcIndex();
+      m_NFoundHits(mslot, 0, 0) = trk.nFoundHits();
+      m_NMissingHits(mslot, 0, 0) = trk.nMissingHits();
+      m_NOverlapHits(mslot, 0, 0) = trk.nOverlapHits();
+
+      m_NInsideMinusOneHits(mslot, 0, 0) = trk.nInsideMinusOneHits();
+      m_NTailMinusOneHits(mslot, 0, 0) = trk.nTailMinusOneHits();
+
+      m_LastHoT[mslot] = trk.getLastHitOnTrack();
+      m_CombCand[mslot] = trk.combCandidate();
+      m_TrkStatus[mslot] = trk.getStatus();
+    }
+
+    void copy_out(TrackCand &trk, const int mslot, const int tslot) const {
+      m_Err[tslot].copyOut(mslot, trk.errors_nc().Array());
+      m_Par[tslot].copyOut(mslot, trk.parameters_nc().Array());
+
+      trk.setCharge(m_Chg(mslot, 0, 0));
+      trk.setChi2(m_Chi2(mslot, 0, 0));
+      trk.setLabel(m_Label(mslot, 0, 0));
+
+      trk.setLastCcIndex(m_LastHitCcIndex(mslot, 0, 0));
+      trk.setNFoundHits(m_NFoundHits(mslot, 0, 0));
+      trk.setNMissingHits(m_NMissingHits(mslot, 0, 0));
+      trk.setNOverlapHits(m_NOverlapHits(mslot, 0, 0));
+
+      trk.setNInsideMinusOneHits(m_NInsideMinusOneHits(mslot, 0, 0));
+      trk.setNTailMinusOneHits(m_NTailMinusOneHits(mslot, 0, 0));
+
+      trk.setCombCandidate(m_CombCand[mslot]);
+      trk.setStatus(m_TrkStatus[mslot]);
+    }
+
+    void add_hit(const int mslot, int index, int layer) {
+      // Only used by BestHit.
+      // m_NInsideMinusOneHits and m_NTailMinusOneHits are maintained here but are
+      // not used and are not copied out (as Track does not have these members).
+
+      int &n_tot_hits = m_NHits(mslot, 0, 0);
+      int &n_fnd_hits = m_NFoundHits(mslot, 0, 0);
+
+      if (n_tot_hits < Config::nMaxTrkHits) {
+        m_HoTArrs[mslot][n_tot_hits++] = {index, layer};
+        if (index >= 0) {
+          ++n_fnd_hits;
+          m_NInsideMinusOneHits(mslot, 0, 0) += m_NTailMinusOneHits(mslot, 0, 0);
+          m_NTailMinusOneHits(mslot, 0, 0) = 0;
+        } else if (index == -1) {
+          ++m_NTailMinusOneHits(mslot, 0, 0);
+        }
+      } else {
+        // printf("WARNING MkFinder::add_hit hit-on-track limit reached for label=%d\n", label_);
+
+        const int LH = Config::nMaxTrkHits - 1;
+
+        if (index >= 0) {
+          if (m_HoTArrs[mslot][LH].index < 0)
+            ++n_fnd_hits;
+          m_HoTArrs[mslot][LH] = {index, layer};
+        } else if (index == -2) {
+          if (m_HoTArrs[mslot][LH].index >= 0)
+            --n_fnd_hits;
+          m_HoTArrs[mslot][LH] = {index, layer};
+        }
+      }
+    }
+
+    int num_all_minus_one_hits(const int mslot) const {
+      return m_NInsideMinusOneHits(mslot, 0, 0) + m_NTailMinusOneHits(mslot, 0, 0);
+    }
+
+    int num_inside_minus_one_hits(const int mslot) const { return m_NInsideMinusOneHits(mslot, 0, 0); }
+
+    //----------------------------------------------------------------------------
+
+    MPlexQF m_Chi2;
+    MPlexQI m_Label;  // seed index in global seed vector (for MC truth match)
+
+    MPlexQI m_NHits;
+    MPlexQI m_NFoundHits;
+
+    HitOnTrack m_HoTArrs[NN][Config::nMaxTrkHits];
+
+#if defined(DUMPHITWINDOW) or defined(DEBUG_BACKWARD_FIT)
+    MPlexQI m_SeedAlgo;   // seed algorithm
+    MPlexQI m_SeedLabel;  // seed label
+    Event *m_event;
+#endif
+
+    MPlexQI m_SeedIdx;  // seed index in local thread (for bookkeeping at thread level)
+    MPlexQI m_CandIdx;  // candidate index for the given seed (for bookkeeping of clone engine)
+
+    MPlexQI m_Stopped;  // Flag for BestHit that a track has been stopped (and copied out already)
+
+    // Additions / substitutions for TrackCand copy_in/out()
+    // One could really access the original TrackCand for all of those, especially the ones that
+    // are STD only. This then requires access back to that TrackCand memory.
+    // So maybe one should just have flags for CopyIn methods (or several versions). Yay, etc.
+    MPlexQI m_NMissingHits;             // sub: m_NHits, sort of, STD only
+    MPlexQI m_NOverlapHits;             // add: num of overlaps registered in HitOnTrack, STD only
+    MPlexQI m_NInsideMinusOneHits;      // sub: before we copied all hit idcs and had a loop counting them only
+    MPlexQI m_NTailMinusOneHits;        // sub: before we copied all hit idcs and had a loop counting them only
+    MPlexQI m_LastHitCcIndex;           // add: index of last hit in m_CombCand hit tree, STD only
+    TrackBase::Status m_TrkStatus[NN];  // STD only, status bits
+    HitOnTrack m_LastHoT[NN];
+    CombCandidate *m_CombCand[NN];
+    // const TrackCand *m_TrkCand[NN]; // hmmh, could get all data through this guy ... but scattered
+    // storing it in now for bkfit debug printouts
+    TrackCand *m_TrkCand[NN];
+
+    // Hit indices into LayerOfHits to explore.
+    WSR_Result m_XWsrResult[NN];  // Could also merge it with m_XHitSize. Or use smaller arrays.
+    MPlexQI m_XHitSize;
+    MPlexHitIdx m_XHitArr;
+
+    // Hit errors / parameters for hit matching, update.
+    MPlexHS m_msErr;
+    MPlexHV m_msPar;
+
+    // An idea: Do propagation to hit in FindTracksXYZZ functions.
+    // Have some state / functions here that make this short to write.
+    // This would simplify KalmanUtils (remove the propagate functions).
+    // Track errors / parameters propagated to current hit.
+    // MPlexLS    candErrAtCurrHit;
+    // MPlexLV    candParAtCurrHit;
+
+    const PropagationConfig *m_prop_config = nullptr;
+    const IterationParams *m_iteration_params = nullptr;
+    const IterationLayerConfig *m_iteration_layer_config = nullptr;
+    const std::vector<bool> *m_iteration_hit_mask = nullptr;
+
+    // Backward fit
+    int m_CurHit[NN];
+    const HitOnTrack *m_HoTArr[NN];
+    int m_CurNode[NN];
+    const HoTNode *m_HoTNodeArr[NN];
+  };
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/src/MkFitter.cc b/RecoTracker/MkFitCore/src/MkFitter.cc
new file mode 100644
index 0000000000000..df1d9371b9bd7
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MkFitter.cc
@@ -0,0 +1,385 @@
+#include "MkFitter.h"
+
+#include "KalmanUtilsMPlex.h"
+#include "MatriplexPackers.h"
+
+//#define DEBUG
+#include "Debug.h"
+
+#include <sstream>
+
+namespace mkfit {
+
+  void MkFitter::checkAlignment() {
+    printf("MkFitter alignment check:\n");
+    Matriplex::align_check("  m_Err[0]   =", &m_Err[0].fArray[0]);
+    Matriplex::align_check("  m_Err[1]   =", &m_Err[1].fArray[0]);
+    Matriplex::align_check("  m_Par[0]   =", &m_Par[0].fArray[0]);
+    Matriplex::align_check("  m_Par[1]   =", &m_Par[1].fArray[0]);
+    Matriplex::align_check("  m_msErr[0] =", &m_msErr[0].fArray[0]);
+    Matriplex::align_check("  m_msPar[0] =", &m_msPar[0].fArray[0]);
+  }
+
+  void MkFitter::printPt(int idx) {
+    for (int i = 0; i < NN; ++i) {
+      printf("%5.2f  ", std::hypot(m_Par[idx].At(i, 3, 0), m_Par[idx].At(i, 4, 0)));
+    }
+  }
+
+  int MkFitter::countValidHits(int itrack, int end_hit) const {
+    int result = 0;
+    for (int hi = 0; hi < end_hit; ++hi) {
+      if (m_HoTArr[hi](itrack, 0, 0).index >= 0)
+        result++;
+    }
+    return result;
+  }
+
+  int MkFitter::countInvalidHits(int itrack, int end_hit) const {
+    int result = 0;
+    for (int hi = 0; hi < end_hit; ++hi) {
+      // XXXX MT: Should also count -2 hits as invalid?
+      if (m_HoTArr[hi](itrack, 0, 0).index == -1)
+        result++;
+    }
+    return result;
+  }
+
+  //==============================================================================
+
+  void MkFitter::inputTracksAndHits(const std::vector<Track>& tracks,
+                                    const std::vector<HitVec>& layerHits,
+                                    int beg,
+                                    int end) {
+    // Assign track parameters to initial state and copy hit values in.
+
+    // This might not be true for the last chunk!
+    // assert(end - beg == NN);
+
+    int itrack = 0;
+
+    for (int i = beg; i < end; ++i, ++itrack) {
+      const Track& trk = tracks[i];
+
+      m_Err[iC].copyIn(itrack, trk.errors().Array());
+      m_Par[iC].copyIn(itrack, trk.parameters().Array());
+
+      m_Chg(itrack, 0, 0) = trk.charge();
+      m_Chi2(itrack, 0, 0) = trk.chi2();
+      m_Label(itrack, 0, 0) = trk.label();
+
+      // CopyIn seems fast enough, but indirections are quite slow.
+      for (int hi = 0; hi < m_Nhits; ++hi) {
+        m_HoTArr[hi](itrack, 0, 0) = trk.getHitOnTrack(hi);
+
+        const int hidx = trk.getHitIdx(hi);
+        if (hidx < 0)
+          continue;
+
+        const Hit& hit = layerHits[hi][hidx];
+        m_msErr[hi].copyIn(itrack, hit.errArray());
+        m_msPar[hi].copyIn(itrack, hit.posArray());
+      }
+    }
+  }
+
+  void MkFitter::inputTracksAndHits(const std::vector<Track>& tracks,
+                                    const std::vector<LayerOfHits>& layerHits,
+                                    int beg,
+                                    int end) {
+    // Assign track parameters to initial state and copy hit values in.
+
+    // This might not be true for the last chunk!
+    // assert(end - beg == NN);
+
+    int itrack;
+
+    for (int i = beg; i < end; ++i) {
+      itrack = i - beg;
+      const Track& trk = tracks[i];
+
+      m_Label(itrack, 0, 0) = trk.label();
+
+      m_Err[iC].copyIn(itrack, trk.errors().Array());
+      m_Par[iC].copyIn(itrack, trk.parameters().Array());
+
+      m_Chg(itrack, 0, 0) = trk.charge();
+      m_Chi2(itrack, 0, 0) = trk.chi2();
+
+      // CopyIn seems fast enough, but indirections are quite slow.
+      for (int hi = 0; hi < m_Nhits; ++hi) {
+        const int hidx = trk.getHitIdx(hi);
+        const int hlyr = trk.getHitLyr(hi);
+        const Hit& hit = layerHits[hlyr].refHit(hidx);
+
+        m_msErr[hi].copyIn(itrack, hit.errArray());
+        m_msPar[hi].copyIn(itrack, hit.posArray());
+
+        m_HoTArr[hi](itrack, 0, 0) = trk.getHitOnTrack(hi);
+      }
+    }
+  }
+
+  void MkFitter::slurpInTracksAndHits(const std::vector<Track>& tracks,
+                                      const std::vector<HitVec>& layerHits,
+                                      int beg,
+                                      int end) {
+    // Assign track parameters to initial state and copy hit values in.
+
+    // This might not be true for the last chunk!
+    // assert(end - beg == NN);
+
+    MatriplexTrackPacker mtp(tracks[beg]);
+
+    for (int i = beg; i < end; ++i) {
+      int itrack = i - beg;
+      const Track& trk = tracks[i];
+
+      m_Label(itrack, 0, 0) = trk.label();
+
+      mtp.addInput(trk);
+
+      m_Chg(itrack, 0, 0) = trk.charge();
+      m_Chi2(itrack, 0, 0) = trk.chi2();
+    }
+
+    mtp.pack(m_Err[iC], m_Par[iC]);
+
+    // CopyIn seems fast enough, but indirections are quite slow.
+    for (int hi = 0; hi < m_Nhits; ++hi) {
+      MatriplexHitPacker mhp(layerHits[hi][0]);
+
+      for (int i = beg; i < end; ++i) {
+        const int hidx = tracks[i].getHitIdx(hi);
+        const Hit& hit = layerHits[hi][hidx];
+
+        m_HoTArr[hi](i - beg, 0, 0) = tracks[i].getHitOnTrack(hi);
+
+        mhp.addInput(hit);
+      }
+
+      mhp.pack(m_msErr[hi], m_msPar[hi]);
+    }
+  }
+
+  void MkFitter::inputTracksAndHitIdx(const std::vector<Track>& tracks, int beg, int end, bool inputProp) {
+    // Assign track parameters to initial state and copy hit values in.
+
+    // This might not be true for the last chunk!
+    // assert(end - beg == NN);
+
+    const int iI = inputProp ? iP : iC;
+
+    int itrack = 0;
+    for (int i = beg; i < end; ++i, ++itrack) {
+      const Track& trk = tracks[i];
+
+      m_Err[iI].copyIn(itrack, trk.errors().Array());
+      m_Par[iI].copyIn(itrack, trk.parameters().Array());
+
+      m_Chg(itrack, 0, 0) = trk.charge();
+      m_Chi2(itrack, 0, 0) = trk.chi2();
+      m_Label(itrack, 0, 0) = trk.label();
+
+      for (int hi = 0; hi < m_Nhits; ++hi) {
+        m_HoTArr[hi](itrack, 0, 0) = trk.getHitOnTrack(hi);
+      }
+    }
+  }
+
+  void MkFitter::inputTracksAndHitIdx(const std::vector<std::vector<Track> >& tracks,
+                                      const std::vector<std::pair<int, int> >& idxs,
+                                      int beg,
+                                      int end,
+                                      bool inputProp) {
+    // Assign track parameters to initial state and copy hit values in.
+
+    // This might not be true for the last chunk!
+    // assert(end - beg == NN);
+
+    const int iI = inputProp ? iP : iC;
+
+    int itrack = 0;
+    for (int i = beg; i < end; ++i, ++itrack) {
+      const Track& trk = tracks[idxs[i].first][idxs[i].second];
+
+      m_Label(itrack, 0, 0) = trk.label();
+      m_SeedIdx(itrack, 0, 0) = idxs[i].first;
+      m_CandIdx(itrack, 0, 0) = idxs[i].second;
+
+      m_Err[iI].copyIn(itrack, trk.errors().Array());
+      m_Par[iI].copyIn(itrack, trk.parameters().Array());
+
+      m_Chg(itrack, 0, 0) = trk.charge();
+      m_Chi2(itrack, 0, 0) = trk.chi2();
+
+      for (int hi = 0; hi < m_Nhits; ++hi) {
+        m_HoTArr[hi](itrack, 0, 0) = trk.getHitOnTrack(hi);
+      }
+    }
+  }
+
+  void MkFitter::inputSeedsTracksAndHits(const std::vector<Track>& seeds,
+                                         const std::vector<Track>& tracks,
+                                         const std::vector<HitVec>& layerHits,
+                                         int beg,
+                                         int end) {
+    // Assign track parameters to initial state and copy hit values in.
+
+    // This might not be true for the last chunk!
+    // assert(end - beg == NN);
+
+    int itrack;
+    for (int i = beg; i < end; ++i) {
+      itrack = i - beg;
+
+      const Track& see = seeds[i];
+
+      m_Label(itrack, 0, 0) = see.label();
+      if (see.label() < 0)
+        continue;
+
+      m_Err[iC].copyIn(itrack, see.errors().Array());
+      m_Par[iC].copyIn(itrack, see.parameters().Array());
+
+      m_Chg(itrack, 0, 0) = see.charge();
+      m_Chi2(itrack, 0, 0) = see.chi2();
+
+      const Track& trk = tracks[see.label()];
+
+      // CopyIn seems fast enough, but indirections are quite slow.
+      for (int hi = 0; hi < m_Nhits; ++hi) {
+        m_HoTArr[hi](itrack, 0, 0) = trk.getHitOnTrack(hi);
+
+        const int hidx = trk.getHitIdx(hi);
+        if (hidx < 0)
+          continue;  //fixme, check if this is harmless
+
+        const Hit& hit = layerHits[hi][hidx];
+        m_msErr[hi].copyIn(itrack, hit.errArray());
+        m_msPar[hi].copyIn(itrack, hit.posArray());
+      }
+    }
+  }
+
+  //------------------------------------------------------------------------------
+  // Fitting with interleaved hit loading
+  //------------------------------------------------------------------------------
+
+  void MkFitter::inputTracksForFit(const std::vector<Track>& tracks, int beg, int end) {
+    // Loads track parameters and hit indices.
+
+    // XXXXMT4K has Config::nLayers: How many hits do we read in?
+    // Check for max? Expect an argument?
+    // What to do with invalid hits? Skip?
+
+    // XXXX MT Here the same idx array WAS used for slurping in of tracks and
+    // hots. With this, two index arrays are built, one within each packer.
+
+    MatriplexTrackPacker mtp(tracks[beg]);
+    MatriplexHoTPacker mhotp(*tracks[beg].getHitsOnTrackArray());
+
+    int itrack = 0;
+
+    for (int i = beg; i < end; ++i, ++itrack) {
+      const Track& trk = tracks[i];
+
+      m_Chg(itrack, 0, 0) = trk.charge();
+      m_Chi2(itrack, 0, 0) = trk.chi2();
+      m_Label(itrack, 0, 0) = trk.label();
+
+      mtp.addInput(trk);
+
+      mhotp.addInput(*trk.getHitsOnTrackArray());
+    }
+
+    mtp.pack(m_Err[iC], m_Par[iC]);
+    for (int ll = 0; ll < Config::nLayers; ++ll) {
+      mhotp.pack(m_HoTArr[ll], ll);
+    }
+  }
+
+  void MkFitter::fitTracksWithInterSlurp(const std::vector<HitVec>& layersohits, const int N_proc) {
+    // XXXX This has potential issues hits coming from different layers!
+    // Expected to only work reliably with barrel (consecutive layers from 0 -> m_Nhits)
+    // and with hits present on every layer for every track.
+
+    // Loops over layers and:
+    // a) slurps in hit parameters;
+    // b) propagates and updates tracks
+
+    for (int ii = 0; ii < m_Nhits; ++ii) {
+      // XXXX Assuming hit index corresponds to layer!
+      MatriplexHitPacker mhp(layersohits[ii][0]);
+
+      for (int i = 0; i < N_proc; ++i) {
+        const int hidx = m_HoTArr[ii](i, 0, 0).index;
+        const int hlyr = m_HoTArr[ii](i, 0, 0).layer;
+
+        // XXXXMT4K What to do with hidx < 0 ????
+        // This could solve the unbalanced fit.
+        // Or, if the hidx is the "universal" missing hit, it could just work.
+        // Say, hidx = 0 ... grr ... but then we don't know it is missing.
+
+        if (hidx < 0 || hlyr < 0) {
+          mhp.addNullInput();
+        } else {
+          mhp.addInput(layersohits[hlyr][hidx]);
+        }
+      }
+
+      mhp.pack(m_msErr[0], m_msPar[0]);
+
+      propagateTracksToHitR(m_msPar[0], N_proc, PropagationConfig::get_default().forward_fit_pflags);
+
+      kalmanUpdate(m_Err[iP], m_Par[iP], m_msErr[0], m_msPar[0], m_Err[iC], m_Par[iC], N_proc);
+    }
+  }
+
+  //==============================================================================
+  // Fitting functions
+  //==============================================================================
+
+  void MkFitter::outputTracks(std::vector<Track>& tracks, int beg, int end, int iCP) const {
+    // Copies last track parameters (updated) into Track objects.
+    // The tracks vector should be resized to allow direct copying.
+
+    int itrack = 0;
+    for (int i = beg; i < end; ++i, ++itrack) {
+      m_Err[iCP].copyOut(itrack, tracks[i].errors_nc().Array());
+      m_Par[iCP].copyOut(itrack, tracks[i].parameters_nc().Array());
+
+      tracks[i].setCharge(m_Chg(itrack, 0, 0));
+
+      // XXXXX chi2 is not set (also not in SMatrix fit, it seems)
+      tracks[i].setChi2(m_Chi2(itrack, 0, 0));
+      tracks[i].setLabel(m_Label(itrack, 0, 0));
+    }
+  }
+
+  void MkFitter::outputFittedTracksAndHitIdx(std::vector<Track>& tracks, int beg, int end, bool outputProp) const {
+    // Copies last track parameters (updated) into Track objects and up to m_Nhits.
+    // The tracks vector should be resized to allow direct copying.
+
+    const int iO = outputProp ? iP : iC;
+
+    int itrack = 0;
+    for (int i = beg; i < end; ++i, ++itrack) {
+      m_Err[iO].copyOut(itrack, tracks[i].errors_nc().Array());
+      m_Par[iO].copyOut(itrack, tracks[i].parameters_nc().Array());
+
+      tracks[i].setCharge(m_Chg(itrack, 0, 0));
+      tracks[i].setChi2(m_Chi2(itrack, 0, 0));
+      tracks[i].setLabel(m_Label(itrack, 0, 0));
+
+      // QQQQ Could do resize and std::copy, as in MkFinder::copy_out(), but
+      // we do not know the correct N_found_hits.
+      tracks[i].resetHits();
+      tracks[i].reserveHits(m_Nhits);
+      for (int hi = 0; hi < m_Nhits; ++hi) {
+        tracks[i].addHitIdx(m_HoTArr[hi](itrack, 0, 0), 0.);
+      }
+    }
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/MkFitter.h b/RecoTracker/MkFitCore/src/MkFitter.h
new file mode 100644
index 0000000000000..19bed8937ada7
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MkFitter.h
@@ -0,0 +1,93 @@
+#ifndef RecoTracker_MkFitCore_src_MkFitter_h
+#define RecoTracker_MkFitCore_src_MkFitter_h
+
+#include "MkBase.h"
+
+#include "RecoTracker/MkFitCore/interface/HitStructures.h"
+
+namespace mkfit {
+
+  class CandCloner;
+
+  static constexpr int MPlexHitIdxMax = 16;
+  using MPlexHitIdx = Matriplex::Matriplex<int, MPlexHitIdxMax, 1, NN>;
+  using MPlexQHoT = Matriplex::Matriplex<HitOnTrack, 1, 1, NN>;
+
+  class MkFitter : public MkBase {
+  public:
+    MkFitter() : m_Nhits(0) {}
+
+    // Copy-in timing tests.
+    MPlexLS& refErr0() { return m_Err[0]; }
+    MPlexLV& refPar0() { return m_Par[0]; }
+
+    //----------------------------------------------------------------------------
+
+    void checkAlignment();
+
+    void printPt(int idx);
+
+    void setNhits(int newnhits) { m_Nhits = std::min(newnhits, Config::nMaxTrkHits - 1); }
+
+    int countValidHits(int itrack, int end_hit) const;
+    int countInvalidHits(int itrack, int end_hit) const;
+    int countValidHits(int itrack) const { return countValidHits(itrack, m_Nhits); }
+    int countInvalidHits(int itrack) const { return countInvalidHits(itrack, m_Nhits); }
+
+    void inputTracksAndHits(const std::vector<Track>& tracks, const std::vector<HitVec>& layerHits, int beg, int end);
+    void inputTracksAndHits(const std::vector<Track>& tracks,
+                            const std::vector<LayerOfHits>& layerHits,
+                            int beg,
+                            int end);
+    void slurpInTracksAndHits(const std::vector<Track>& tracks, const std::vector<HitVec>& layerHits, int beg, int end);
+    void inputTracksAndHitIdx(const std::vector<Track>& tracks, int beg, int end, bool inputProp);
+    void inputTracksAndHitIdx(const std::vector<std::vector<Track> >& tracks,
+                              const std::vector<std::pair<int, int> >& idxs,
+                              int beg,
+                              int end,
+                              bool inputProp);
+    void inputSeedsTracksAndHits(const std::vector<Track>& seeds,
+                                 const std::vector<Track>& tracks,
+                                 const std::vector<HitVec>& layerHits,
+                                 int beg,
+                                 int end);
+
+    void inputTracksForFit(const std::vector<Track>& tracks, int beg, int end);
+    void fitTracksWithInterSlurp(const std::vector<HitVec>& layersohits, int N_proc);
+
+    void outputTracks(std::vector<Track>& tracks, int beg, int end, int iCP) const;
+
+    void outputFittedTracks(std::vector<Track>& tracks, int beg, int end) const {
+      return outputTracks(tracks, beg, end, iC);
+    }
+
+    void outputPropagatedTracks(std::vector<Track>& tracks, int beg, int end) const {
+      return outputTracks(tracks, beg, end, iP);
+    }
+
+    void outputFittedTracksAndHitIdx(std::vector<Track>& tracks, int beg, int end, bool outputProp) const;
+
+    //----------------------------------------------------------------------------
+
+  private:
+    MPlexQF m_Chi2;
+
+    MPlexHS m_msErr[Config::nMaxTrkHits];
+    MPlexHV m_msPar[Config::nMaxTrkHits];
+
+    MPlexQI m_Label;    //this is the seed index in global seed vector (for MC truth match)
+    MPlexQI m_SeedIdx;  //this is the seed index in local thread (for bookkeeping at thread level)
+    MPlexQI m_CandIdx;  //this is the candidate index for the given seed (for bookkeeping of clone engine)
+
+    MPlexQHoT m_HoTArr[Config::nMaxTrkHits];
+
+    // Hold hit indices to explore at current layer.
+    MPlexQI m_XHitSize;
+    MPlexHitIdx m_XHitArr;
+
+    int m_Nhits;
+  };
+
+}  // end namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/src/MultHelixProp.ah b/RecoTracker/MkFitCore/src/MultHelixProp.ah
new file mode 100644
index 0000000000000..f97fd070eff97
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MultHelixProp.ah
@@ -0,0 +1,300 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_1 = MUL(a_0, b_1);
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t c_2 = MUL(a_0, b_3);
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_3 = MUL(a_0, b_6);
+      IntrVec_t b_10 = LD(b, 10);
+      IntrVec_t c_4 = MUL(a_0, b_10);
+      IntrVec_t b_15 = LD(b, 15);
+      IntrVec_t c_5 = MUL(a_0, b_15);
+
+      IntrVec_t a_1 = LD(a, 1);
+      c_0 = FMA(a_1, b_1, c_0);
+      IntrVec_t b_2 = LD(b, 2);
+      c_1 = FMA(a_1, b_2, c_1);
+      IntrVec_t b_4 = LD(b, 4);
+      c_2 = FMA(a_1, b_4, c_2);
+      IntrVec_t b_7 = LD(b, 7);
+      c_3 = FMA(a_1, b_7, c_3);
+      IntrVec_t b_11 = LD(b, 11);
+      c_4 = FMA(a_1, b_11, c_4);
+      IntrVec_t b_16 = LD(b, 16);
+      c_5 = FMA(a_1, b_16, c_5);
+
+
+      IntrVec_t a_3 = LD(a, 3);
+      c_0 = FMA(a_3, b_6, c_0);
+      c_1 = FMA(a_3, b_7, c_1);
+      IntrVec_t b_8 = LD(b, 8);
+      c_2 = FMA(a_3, b_8, c_2);
+      IntrVec_t b_9 = LD(b, 9);
+      c_3 = FMA(a_3, b_9, c_3);
+      IntrVec_t b_13 = LD(b, 13);
+      c_4 = FMA(a_3, b_13, c_4);
+      IntrVec_t b_18 = LD(b, 18);
+      c_5 = FMA(a_3, b_18, c_5);
+
+      IntrVec_t a_4 = LD(a, 4);
+      c_0 = FMA(a_4, b_10, c_0);
+      c_1 = FMA(a_4, b_11, c_1);
+      IntrVec_t b_12 = LD(b, 12);
+      c_2 = FMA(a_4, b_12, c_2);
+      c_3 = FMA(a_4, b_13, c_3);
+      IntrVec_t b_14 = LD(b, 14);
+      c_4 = FMA(a_4, b_14, c_4);
+      IntrVec_t b_19 = LD(b, 19);
+      c_5 = FMA(a_4, b_19, c_5);
+
+
+      IntrVec_t a_6 = LD(a, 6);
+      IntrVec_t c_6 = MUL(a_6, b_0);
+      IntrVec_t c_7 = MUL(a_6, b_1);
+      IntrVec_t c_8 = MUL(a_6, b_3);
+      ST(c, 0, c_0);
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+      IntrVec_t c_9 = MUL(a_6, b_6);
+      IntrVec_t c_10 = MUL(a_6, b_10);
+      IntrVec_t c_11 = MUL(a_6, b_15);
+
+      IntrVec_t a_7 = LD(a, 7);
+      c_6 = FMA(a_7, b_1, c_6);
+      c_7 = FMA(a_7, b_2, c_7);
+      c_8 = FMA(a_7, b_4, c_8);
+      c_9 = FMA(a_7, b_7, c_9);
+      c_10 = FMA(a_7, b_11, c_10);
+      c_11 = FMA(a_7, b_16, c_11);
+
+
+      IntrVec_t a_9 = LD(a, 9);
+      c_6 = FMA(a_9, b_6, c_6);
+      c_7 = FMA(a_9, b_7, c_7);
+      c_8 = FMA(a_9, b_8, c_8);
+      c_9 = FMA(a_9, b_9, c_9);
+      c_10 = FMA(a_9, b_13, c_10);
+      c_11 = FMA(a_9, b_18, c_11);
+
+      IntrVec_t a_10 = LD(a, 10);
+      c_6 = FMA(a_10, b_10, c_6);
+      c_7 = FMA(a_10, b_11, c_7);
+      c_8 = FMA(a_10, b_12, c_8);
+      c_9 = FMA(a_10, b_13, c_9);
+      c_10 = FMA(a_10, b_14, c_10);
+      c_11 = FMA(a_10, b_19, c_11);
+
+
+      IntrVec_t a_12 = LD(a, 12);
+      IntrVec_t c_12 = MUL(a_12, b_0);
+      IntrVec_t c_13 = MUL(a_12, b_1);
+      IntrVec_t c_14 = MUL(a_12, b_3);
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+      ST(c, 9, c_9);
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+      IntrVec_t c_15 = MUL(a_12, b_6);
+      IntrVec_t c_16 = MUL(a_12, b_10);
+      IntrVec_t c_17 = MUL(a_12, b_15);
+
+      IntrVec_t a_13 = LD(a, 13);
+      c_12 = FMA(a_13, b_1, c_12);
+      c_13 = FMA(a_13, b_2, c_13);
+      c_14 = FMA(a_13, b_4, c_14);
+      c_15 = FMA(a_13, b_7, c_15);
+      c_16 = FMA(a_13, b_11, c_16);
+      c_17 = FMA(a_13, b_16, c_17);
+
+      c_12 = ADD(b_3, c_12);
+      c_13 = ADD(b_4, c_13);
+      IntrVec_t b_5 = LD(b, 5);
+      c_14 = ADD(b_5, c_14);
+      c_15 = ADD(b_8, c_15);
+      c_16 = ADD(b_12, c_16);
+      IntrVec_t b_17 = LD(b, 17);
+      c_17 = ADD(b_17, c_17);
+
+      IntrVec_t a_15 = LD(a, 15);
+      c_12 = FMA(a_15, b_6, c_12);
+      c_13 = FMA(a_15, b_7, c_13);
+      c_14 = FMA(a_15, b_8, c_14);
+      c_15 = FMA(a_15, b_9, c_15);
+      c_16 = FMA(a_15, b_13, c_16);
+      c_17 = FMA(a_15, b_18, c_17);
+
+      IntrVec_t a_16 = LD(a, 16);
+      c_12 = FMA(a_16, b_10, c_12);
+      c_13 = FMA(a_16, b_11, c_13);
+      c_14 = FMA(a_16, b_12, c_14);
+      c_15 = FMA(a_16, b_13, c_15);
+      c_16 = FMA(a_16, b_14, c_16);
+      c_17 = FMA(a_16, b_19, c_17);
+
+      IntrVec_t a_17 = LD(a, 17);
+      c_12 = FMA(a_17, b_15, c_12);
+      c_13 = FMA(a_17, b_16, c_13);
+      c_14 = FMA(a_17, b_17, c_14);
+      c_15 = FMA(a_17, b_18, c_15);
+      c_16 = FMA(a_17, b_19, c_16);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      IntrVec_t b_20 = LD(b, 20);
+      c_17 = FMA(a_17, b_20, c_17);
+
+      IntrVec_t a_18 = LD(a, 18);
+      IntrVec_t c_18 = MUL(a_18, b_0);
+      IntrVec_t c_19 = MUL(a_18, b_1);
+      IntrVec_t c_20 = MUL(a_18, b_3);
+      ST(c, 17, c_17);
+      IntrVec_t c_21 = MUL(a_18, b_6);
+      IntrVec_t c_22 = MUL(a_18, b_10);
+      IntrVec_t c_23 = MUL(a_18, b_15);
+
+      IntrVec_t a_19 = LD(a, 19);
+      c_18 = FMA(a_19, b_1, c_18);
+      c_19 = FMA(a_19, b_2, c_19);
+      c_20 = FMA(a_19, b_4, c_20);
+      c_21 = FMA(a_19, b_7, c_21);
+      c_22 = FMA(a_19, b_11, c_22);
+      c_23 = FMA(a_19, b_16, c_23);
+
+
+      IntrVec_t a_21 = LD(a, 21);
+      c_18 = FMA(a_21, b_6, c_18);
+      c_19 = FMA(a_21, b_7, c_19);
+      c_20 = FMA(a_21, b_8, c_20);
+      c_21 = FMA(a_21, b_9, c_21);
+      c_22 = FMA(a_21, b_13, c_22);
+      c_23 = FMA(a_21, b_18, c_23);
+
+      IntrVec_t a_22 = LD(a, 22);
+      c_18 = FMA(a_22, b_10, c_18);
+      c_19 = FMA(a_22, b_11, c_19);
+      c_20 = FMA(a_22, b_12, c_20);
+      c_21 = FMA(a_22, b_13, c_21);
+      c_22 = FMA(a_22, b_14, c_22);
+      c_23 = FMA(a_22, b_19, c_23);
+
+
+      IntrVec_t a_24 = LD(a, 24);
+      IntrVec_t c_24 = MUL(a_24, b_0);
+      IntrVec_t c_25 = MUL(a_24, b_1);
+      IntrVec_t c_26 = MUL(a_24, b_3);
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      ST(c, 20, c_20);
+      ST(c, 21, c_21);
+      ST(c, 22, c_22);
+      ST(c, 23, c_23);
+      IntrVec_t c_27 = MUL(a_24, b_6);
+      IntrVec_t c_28 = MUL(a_24, b_10);
+      IntrVec_t c_29 = MUL(a_24, b_15);
+
+      IntrVec_t a_25 = LD(a, 25);
+      c_24 = FMA(a_25, b_1, c_24);
+      c_25 = FMA(a_25, b_2, c_25);
+      c_26 = FMA(a_25, b_4, c_26);
+      c_27 = FMA(a_25, b_7, c_27);
+      c_28 = FMA(a_25, b_11, c_28);
+      c_29 = FMA(a_25, b_16, c_29);
+
+
+      IntrVec_t a_27 = LD(a, 27);
+      c_24 = FMA(a_27, b_6, c_24);
+      c_25 = FMA(a_27, b_7, c_25);
+      c_26 = FMA(a_27, b_8, c_26);
+      c_27 = FMA(a_27, b_9, c_27);
+      c_28 = FMA(a_27, b_13, c_28);
+      c_29 = FMA(a_27, b_18, c_29);
+
+      IntrVec_t a_28 = LD(a, 28);
+      c_24 = FMA(a_28, b_10, c_24);
+      c_25 = FMA(a_28, b_11, c_25);
+      c_26 = FMA(a_28, b_12, c_26);
+      c_27 = FMA(a_28, b_13, c_27);
+      c_28 = FMA(a_28, b_14, c_28);
+      c_29 = FMA(a_28, b_19, c_29);
+
+
+
+
+
+
+
+      IntrVec_t c_30 = b_15;
+      IntrVec_t c_31 = b_16;
+      IntrVec_t c_32 = b_17;
+      IntrVec_t c_33 = b_18;
+      ST(c, 24, c_24);
+      ST(c, 25, c_25);
+      ST(c, 26, c_26);
+      ST(c, 27, c_27);
+      ST(c, 28, c_28);
+      ST(c, 29, c_29);
+      ST(c, 30, c_30);
+      ST(c, 31, c_31);
+      ST(c, 32, c_32);
+      ST(c, 33, c_33);
+      IntrVec_t c_34 = b_19;
+      IntrVec_t c_35 = b_20;
+      ST(c, 34, c_34);
+      ST(c, 35, c_35);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n] + a[ 3*N+n]*b[ 6*N+n] + a[ 4*N+n]*b[10*N+n];
+      c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n] + a[ 1*N+n]*b[ 2*N+n] + a[ 3*N+n]*b[ 7*N+n] + a[ 4*N+n]*b[11*N+n];
+      c[ 2*N+n] = a[ 0*N+n]*b[ 3*N+n] + a[ 1*N+n]*b[ 4*N+n] + a[ 3*N+n]*b[ 8*N+n] + a[ 4*N+n]*b[12*N+n];
+      c[ 3*N+n] = a[ 0*N+n]*b[ 6*N+n] + a[ 1*N+n]*b[ 7*N+n] + a[ 3*N+n]*b[ 9*N+n] + a[ 4*N+n]*b[13*N+n];
+      c[ 4*N+n] = a[ 0*N+n]*b[10*N+n] + a[ 1*N+n]*b[11*N+n] + a[ 3*N+n]*b[13*N+n] + a[ 4*N+n]*b[14*N+n];
+      c[ 5*N+n] = a[ 0*N+n]*b[15*N+n] + a[ 1*N+n]*b[16*N+n] + a[ 3*N+n]*b[18*N+n] + a[ 4*N+n]*b[19*N+n];
+      c[ 6*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 1*N+n] + a[ 9*N+n]*b[ 6*N+n] + a[10*N+n]*b[10*N+n];
+      c[ 7*N+n] = a[ 6*N+n]*b[ 1*N+n] + a[ 7*N+n]*b[ 2*N+n] + a[ 9*N+n]*b[ 7*N+n] + a[10*N+n]*b[11*N+n];
+      c[ 8*N+n] = a[ 6*N+n]*b[ 3*N+n] + a[ 7*N+n]*b[ 4*N+n] + a[ 9*N+n]*b[ 8*N+n] + a[10*N+n]*b[12*N+n];
+      c[ 9*N+n] = a[ 6*N+n]*b[ 6*N+n] + a[ 7*N+n]*b[ 7*N+n] + a[ 9*N+n]*b[ 9*N+n] + a[10*N+n]*b[13*N+n];
+      c[10*N+n] = a[ 6*N+n]*b[10*N+n] + a[ 7*N+n]*b[11*N+n] + a[ 9*N+n]*b[13*N+n] + a[10*N+n]*b[14*N+n];
+      c[11*N+n] = a[ 6*N+n]*b[15*N+n] + a[ 7*N+n]*b[16*N+n] + a[ 9*N+n]*b[18*N+n] + a[10*N+n]*b[19*N+n];
+      c[12*N+n] = a[12*N+n]*b[ 0*N+n] + a[13*N+n]*b[ 1*N+n] + b[ 3*N+n] + a[15*N+n]*b[ 6*N+n] + a[16*N+n]*b[10*N+n] + a[17*N+n]*b[15*N+n];
+      c[13*N+n] = a[12*N+n]*b[ 1*N+n] + a[13*N+n]*b[ 2*N+n] + b[ 4*N+n] + a[15*N+n]*b[ 7*N+n] + a[16*N+n]*b[11*N+n] + a[17*N+n]*b[16*N+n];
+      c[14*N+n] = a[12*N+n]*b[ 3*N+n] + a[13*N+n]*b[ 4*N+n] + b[ 5*N+n] + a[15*N+n]*b[ 8*N+n] + a[16*N+n]*b[12*N+n] + a[17*N+n]*b[17*N+n];
+      c[15*N+n] = a[12*N+n]*b[ 6*N+n] + a[13*N+n]*b[ 7*N+n] + b[ 8*N+n] + a[15*N+n]*b[ 9*N+n] + a[16*N+n]*b[13*N+n] + a[17*N+n]*b[18*N+n];
+      c[16*N+n] = a[12*N+n]*b[10*N+n] + a[13*N+n]*b[11*N+n] + b[12*N+n] + a[15*N+n]*b[13*N+n] + a[16*N+n]*b[14*N+n] + a[17*N+n]*b[19*N+n];
+      c[17*N+n] = a[12*N+n]*b[15*N+n] + a[13*N+n]*b[16*N+n] + b[17*N+n] + a[15*N+n]*b[18*N+n] + a[16*N+n]*b[19*N+n] + a[17*N+n]*b[20*N+n];
+      c[18*N+n] = a[18*N+n]*b[ 0*N+n] + a[19*N+n]*b[ 1*N+n] + a[21*N+n]*b[ 6*N+n] + a[22*N+n]*b[10*N+n];
+      c[19*N+n] = a[18*N+n]*b[ 1*N+n] + a[19*N+n]*b[ 2*N+n] + a[21*N+n]*b[ 7*N+n] + a[22*N+n]*b[11*N+n];
+      c[20*N+n] = a[18*N+n]*b[ 3*N+n] + a[19*N+n]*b[ 4*N+n] + a[21*N+n]*b[ 8*N+n] + a[22*N+n]*b[12*N+n];
+      c[21*N+n] = a[18*N+n]*b[ 6*N+n] + a[19*N+n]*b[ 7*N+n] + a[21*N+n]*b[ 9*N+n] + a[22*N+n]*b[13*N+n];
+      c[22*N+n] = a[18*N+n]*b[10*N+n] + a[19*N+n]*b[11*N+n] + a[21*N+n]*b[13*N+n] + a[22*N+n]*b[14*N+n];
+      c[23*N+n] = a[18*N+n]*b[15*N+n] + a[19*N+n]*b[16*N+n] + a[21*N+n]*b[18*N+n] + a[22*N+n]*b[19*N+n];
+      c[24*N+n] = a[24*N+n]*b[ 0*N+n] + a[25*N+n]*b[ 1*N+n] + a[27*N+n]*b[ 6*N+n] + a[28*N+n]*b[10*N+n];
+      c[25*N+n] = a[24*N+n]*b[ 1*N+n] + a[25*N+n]*b[ 2*N+n] + a[27*N+n]*b[ 7*N+n] + a[28*N+n]*b[11*N+n];
+      c[26*N+n] = a[24*N+n]*b[ 3*N+n] + a[25*N+n]*b[ 4*N+n] + a[27*N+n]*b[ 8*N+n] + a[28*N+n]*b[12*N+n];
+      c[27*N+n] = a[24*N+n]*b[ 6*N+n] + a[25*N+n]*b[ 7*N+n] + a[27*N+n]*b[ 9*N+n] + a[28*N+n]*b[13*N+n];
+      c[28*N+n] = a[24*N+n]*b[10*N+n] + a[25*N+n]*b[11*N+n] + a[27*N+n]*b[13*N+n] + a[28*N+n]*b[14*N+n];
+      c[29*N+n] = a[24*N+n]*b[15*N+n] + a[25*N+n]*b[16*N+n] + a[27*N+n]*b[18*N+n] + a[28*N+n]*b[19*N+n];
+      c[30*N+n] = b[15*N+n];
+      c[31*N+n] = b[16*N+n];
+      c[32*N+n] = b[17*N+n];
+      c[33*N+n] = b[18*N+n];
+      c[34*N+n] = b[19*N+n];
+      c[35*N+n] = b[20*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/MultHelixPropEndcap.ah b/RecoTracker/MkFitCore/src/MultHelixPropEndcap.ah
new file mode 100644
index 0000000000000..e119f424d37c6
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MultHelixPropEndcap.ah
@@ -0,0 +1,254 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      #ifdef AVX512_INTRINSICS
+      IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      #else
+      IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0 };
+      #endif
+
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = b_0;
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_1 = b_1;
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t c_2 = b_3;
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_3 = b_6;
+      IntrVec_t b_10 = LD(b, 10);
+      IntrVec_t c_4 = b_10;
+      IntrVec_t b_15 = LD(b, 15);
+      IntrVec_t c_5 = b_15;
+
+
+      IntrVec_t a_2 = LD(a, 2);
+      c_0 = FMA(a_2, b_3, c_0);
+      IntrVec_t b_4 = LD(b, 4);
+      c_1 = FMA(a_2, b_4, c_1);
+      IntrVec_t b_5 = LD(b, 5);
+      c_2 = FMA(a_2, b_5, c_2);
+      IntrVec_t b_8 = LD(b, 8);
+      c_3 = FMA(a_2, b_8, c_3);
+      IntrVec_t b_12 = LD(b, 12);
+      c_4 = FMA(a_2, b_12, c_4);
+      IntrVec_t b_17 = LD(b, 17);
+      c_5 = FMA(a_2, b_17, c_5);
+
+      IntrVec_t a_3 = LD(a, 3);
+      c_0 = FMA(a_3, b_6, c_0);
+      IntrVec_t b_7 = LD(b, 7);
+      c_1 = FMA(a_3, b_7, c_1);
+      c_2 = FMA(a_3, b_8, c_2);
+      IntrVec_t b_9 = LD(b, 9);
+      c_3 = FMA(a_3, b_9, c_3);
+      IntrVec_t b_13 = LD(b, 13);
+      c_4 = FMA(a_3, b_13, c_4);
+      IntrVec_t b_18 = LD(b, 18);
+      c_5 = FMA(a_3, b_18, c_5);
+
+      IntrVec_t a_4 = LD(a, 4);
+      c_0 = FMA(a_4, b_10, c_0);
+      IntrVec_t b_11 = LD(b, 11);
+      c_1 = FMA(a_4, b_11, c_1);
+      c_2 = FMA(a_4, b_12, c_2);
+      c_3 = FMA(a_4, b_13, c_3);
+      IntrVec_t b_14 = LD(b, 14);
+      c_4 = FMA(a_4, b_14, c_4);
+      IntrVec_t b_19 = LD(b, 19);
+      c_5 = FMA(a_4, b_19, c_5);
+
+      IntrVec_t a_5 = LD(a, 5);
+      c_0 = FMA(a_5, b_15, c_0);
+      IntrVec_t b_16 = LD(b, 16);
+      c_1 = FMA(a_5, b_16, c_1);
+      c_2 = FMA(a_5, b_17, c_2);
+      c_3 = FMA(a_5, b_18, c_3);
+      ST(c, 0, c_0);
+      c_4 = FMA(a_5, b_19, c_4);
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+      ST(c, 3, c_3);
+      IntrVec_t b_20 = LD(b, 20);
+      c_5 = FMA(a_5, b_20, c_5);
+      ST(c, 4, c_4);
+
+
+      IntrVec_t c_6 = b_1;
+      IntrVec_t b_2 = LD(b, 2);
+      IntrVec_t c_7 = b_2;
+      ST(c, 5, c_5);
+      IntrVec_t c_8 = b_4;
+      IntrVec_t c_9 = b_7;
+      IntrVec_t c_10 = b_11;
+      IntrVec_t c_11 = b_16;
+
+      IntrVec_t a_8 = LD(a, 8);
+      c_6 = FMA(a_8, b_3, c_6);
+      c_7 = FMA(a_8, b_4, c_7);
+      c_8 = FMA(a_8, b_5, c_8);
+      c_9 = FMA(a_8, b_8, c_9);
+      c_10 = FMA(a_8, b_12, c_10);
+      c_11 = FMA(a_8, b_17, c_11);
+
+      IntrVec_t a_9 = LD(a, 9);
+      c_6 = FMA(a_9, b_6, c_6);
+      c_7 = FMA(a_9, b_7, c_7);
+      c_8 = FMA(a_9, b_8, c_8);
+      c_9 = FMA(a_9, b_9, c_9);
+      c_10 = FMA(a_9, b_13, c_10);
+      c_11 = FMA(a_9, b_18, c_11);
+
+      IntrVec_t a_10 = LD(a, 10);
+      c_6 = FMA(a_10, b_10, c_6);
+      c_7 = FMA(a_10, b_11, c_7);
+      c_8 = FMA(a_10, b_12, c_8);
+      c_9 = FMA(a_10, b_13, c_9);
+      c_10 = FMA(a_10, b_14, c_10);
+      c_11 = FMA(a_10, b_19, c_11);
+
+      IntrVec_t a_11 = LD(a, 11);
+      c_6 = FMA(a_11, b_15, c_6);
+      c_7 = FMA(a_11, b_16, c_7);
+      c_8 = FMA(a_11, b_17, c_8);
+      c_9 = FMA(a_11, b_18, c_9);
+      c_10 = FMA(a_11, b_19, c_10);
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+      ST(c, 9, c_9);
+      ST(c, 10, c_10);
+      c_11 = FMA(a_11, b_20, c_11);
+
+
+
+
+
+
+      ST(c, 12, all_zeros);
+      ST(c, 13, all_zeros);
+      ST(c, 14, all_zeros);
+      ST(c, 15, all_zeros);
+      ST(c, 16, all_zeros);
+      ST(c, 17, all_zeros);
+
+
+
+
+      IntrVec_t c_18 = b_6;
+      IntrVec_t c_19 = b_7;
+      IntrVec_t c_20 = b_8;
+      IntrVec_t c_21 = b_9;
+      ST(c, 11, c_11);
+      IntrVec_t c_22 = b_13;
+      IntrVec_t c_23 = b_18;
+
+
+
+
+
+      IntrVec_t a_26 = LD(a, 26);
+      IntrVec_t c_24 = MUL(a_26, b_3);
+      IntrVec_t c_25 = MUL(a_26, b_4);
+      IntrVec_t c_26 = MUL(a_26, b_5);
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      ST(c, 20, c_20);
+      ST(c, 21, c_21);
+      ST(c, 22, c_22);
+      ST(c, 23, c_23);
+      IntrVec_t c_27 = MUL(a_26, b_8);
+      IntrVec_t c_28 = MUL(a_26, b_12);
+      IntrVec_t c_29 = MUL(a_26, b_17);
+
+      IntrVec_t a_27 = LD(a, 27);
+      c_24 = FMA(a_27, b_6, c_24);
+      c_25 = FMA(a_27, b_7, c_25);
+      c_26 = FMA(a_27, b_8, c_26);
+      c_27 = FMA(a_27, b_9, c_27);
+      c_28 = FMA(a_27, b_13, c_28);
+      c_29 = FMA(a_27, b_18, c_29);
+
+      c_24 = ADD(b_10, c_24);
+      c_25 = ADD(b_11, c_25);
+      c_26 = ADD(b_12, c_26);
+      c_27 = ADD(b_13, c_27);
+      c_28 = ADD(b_14, c_28);
+      c_29 = ADD(b_19, c_29);
+
+      IntrVec_t a_29 = LD(a, 29);
+      c_24 = FMA(a_29, b_15, c_24);
+      c_25 = FMA(a_29, b_16, c_25);
+      c_26 = FMA(a_29, b_17, c_26);
+      c_27 = FMA(a_29, b_18, c_27);
+      c_28 = FMA(a_29, b_19, c_28);
+      ST(c, 24, c_24);
+      ST(c, 25, c_25);
+      ST(c, 26, c_26);
+      ST(c, 27, c_27);
+      ST(c, 28, c_28);
+      c_29 = FMA(a_29, b_20, c_29);
+
+
+
+
+
+
+      IntrVec_t c_30 = b_15;
+      IntrVec_t c_31 = b_16;
+      IntrVec_t c_32 = b_17;
+      IntrVec_t c_33 = b_18;
+      ST(c, 29, c_29);
+      ST(c, 30, c_30);
+      ST(c, 31, c_31);
+      ST(c, 32, c_32);
+      ST(c, 33, c_33);
+      IntrVec_t c_34 = b_19;
+      IntrVec_t c_35 = b_20;
+      ST(c, 34, c_34);
+      ST(c, 35, c_35);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = b[ 0*N+n] + a[ 2*N+n]*b[ 3*N+n] + a[ 3*N+n]*b[ 6*N+n] + a[ 4*N+n]*b[10*N+n] + a[ 5*N+n]*b[15*N+n];
+      c[ 1*N+n] = b[ 1*N+n] + a[ 2*N+n]*b[ 4*N+n] + a[ 3*N+n]*b[ 7*N+n] + a[ 4*N+n]*b[11*N+n] + a[ 5*N+n]*b[16*N+n];
+      c[ 2*N+n] = b[ 3*N+n] + a[ 2*N+n]*b[ 5*N+n] + a[ 3*N+n]*b[ 8*N+n] + a[ 4*N+n]*b[12*N+n] + a[ 5*N+n]*b[17*N+n];
+      c[ 3*N+n] = b[ 6*N+n] + a[ 2*N+n]*b[ 8*N+n] + a[ 3*N+n]*b[ 9*N+n] + a[ 4*N+n]*b[13*N+n] + a[ 5*N+n]*b[18*N+n];
+      c[ 4*N+n] = b[10*N+n] + a[ 2*N+n]*b[12*N+n] + a[ 3*N+n]*b[13*N+n] + a[ 4*N+n]*b[14*N+n] + a[ 5*N+n]*b[19*N+n];
+      c[ 5*N+n] = b[15*N+n] + a[ 2*N+n]*b[17*N+n] + a[ 3*N+n]*b[18*N+n] + a[ 4*N+n]*b[19*N+n] + a[ 5*N+n]*b[20*N+n];
+      c[ 6*N+n] = b[ 1*N+n] + a[ 8*N+n]*b[ 3*N+n] + a[ 9*N+n]*b[ 6*N+n] + a[10*N+n]*b[10*N+n] + a[11*N+n]*b[15*N+n];
+      c[ 7*N+n] = b[ 2*N+n] + a[ 8*N+n]*b[ 4*N+n] + a[ 9*N+n]*b[ 7*N+n] + a[10*N+n]*b[11*N+n] + a[11*N+n]*b[16*N+n];
+      c[ 8*N+n] = b[ 4*N+n] + a[ 8*N+n]*b[ 5*N+n] + a[ 9*N+n]*b[ 8*N+n] + a[10*N+n]*b[12*N+n] + a[11*N+n]*b[17*N+n];
+      c[ 9*N+n] = b[ 7*N+n] + a[ 8*N+n]*b[ 8*N+n] + a[ 9*N+n]*b[ 9*N+n] + a[10*N+n]*b[13*N+n] + a[11*N+n]*b[18*N+n];
+      c[10*N+n] = b[11*N+n] + a[ 8*N+n]*b[12*N+n] + a[ 9*N+n]*b[13*N+n] + a[10*N+n]*b[14*N+n] + a[11*N+n]*b[19*N+n];
+      c[11*N+n] = b[16*N+n] + a[ 8*N+n]*b[17*N+n] + a[ 9*N+n]*b[18*N+n] + a[10*N+n]*b[19*N+n] + a[11*N+n]*b[20*N+n];
+      c[12*N+n] = 0;
+      c[13*N+n] = 0;
+      c[14*N+n] = 0;
+      c[15*N+n] = 0;
+      c[16*N+n] = 0;
+      c[17*N+n] = 0;
+      c[18*N+n] = b[ 6*N+n];
+      c[19*N+n] = b[ 7*N+n];
+      c[20*N+n] = b[ 8*N+n];
+      c[21*N+n] = b[ 9*N+n];
+      c[22*N+n] = b[13*N+n];
+      c[23*N+n] = b[18*N+n];
+      c[24*N+n] = a[26*N+n]*b[ 3*N+n] + a[27*N+n]*b[ 6*N+n] + b[10*N+n] + a[29*N+n]*b[15*N+n];
+      c[25*N+n] = a[26*N+n]*b[ 4*N+n] + a[27*N+n]*b[ 7*N+n] + b[11*N+n] + a[29*N+n]*b[16*N+n];
+      c[26*N+n] = a[26*N+n]*b[ 5*N+n] + a[27*N+n]*b[ 8*N+n] + b[12*N+n] + a[29*N+n]*b[17*N+n];
+      c[27*N+n] = a[26*N+n]*b[ 8*N+n] + a[27*N+n]*b[ 9*N+n] + b[13*N+n] + a[29*N+n]*b[18*N+n];
+      c[28*N+n] = a[26*N+n]*b[12*N+n] + a[27*N+n]*b[13*N+n] + b[14*N+n] + a[29*N+n]*b[19*N+n];
+      c[29*N+n] = a[26*N+n]*b[17*N+n] + a[27*N+n]*b[18*N+n] + b[19*N+n] + a[29*N+n]*b[20*N+n];
+      c[30*N+n] = b[15*N+n];
+      c[31*N+n] = b[16*N+n];
+      c[32*N+n] = b[17*N+n];
+      c[33*N+n] = b[18*N+n];
+      c[34*N+n] = b[19*N+n];
+      c[35*N+n] = b[20*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/MultHelixPropTransp.ah b/RecoTracker/MkFitCore/src/MultHelixPropTransp.ah
new file mode 100644
index 0000000000000..9741d47497205
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MultHelixPropTransp.ah
@@ -0,0 +1,232 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t c_0 = MUL(b_0, a_0);
+
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t a_1 = LD(a, 1);
+      c_0 = FMA(b_1, a_1, c_0);
+
+
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t a_3 = LD(a, 3);
+      c_0 = FMA(b_3, a_3, c_0);
+
+      IntrVec_t b_4 = LD(b, 4);
+      IntrVec_t a_4 = LD(a, 4);
+      c_0 = FMA(b_4, a_4, c_0);
+
+
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_1 = MUL(b_6, a_0);
+      IntrVec_t a_6 = LD(a, 6);
+      IntrVec_t c_2 = MUL(b_6, a_6);
+      ST(c, 0, c_0);
+
+      IntrVec_t b_7 = LD(b, 7);
+      c_1 = FMA(b_7, a_1, c_1);
+      IntrVec_t a_7 = LD(a, 7);
+      c_2 = FMA(b_7, a_7, c_2);
+
+
+      IntrVec_t b_9 = LD(b, 9);
+      c_1 = FMA(b_9, a_3, c_1);
+      IntrVec_t a_9 = LD(a, 9);
+      c_2 = FMA(b_9, a_9, c_2);
+
+      IntrVec_t b_10 = LD(b, 10);
+      c_1 = FMA(b_10, a_4, c_1);
+      IntrVec_t a_10 = LD(a, 10);
+      c_2 = FMA(b_10, a_10, c_2);
+
+
+      IntrVec_t b_12 = LD(b, 12);
+      IntrVec_t c_3 = MUL(b_12, a_0);
+      IntrVec_t c_4 = MUL(b_12, a_6);
+      IntrVec_t a_12 = LD(a, 12);
+      IntrVec_t c_5 = MUL(b_12, a_12);
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+
+      IntrVec_t b_13 = LD(b, 13);
+      c_3 = FMA(b_13, a_1, c_3);
+      c_4 = FMA(b_13, a_7, c_4);
+      IntrVec_t a_13 = LD(a, 13);
+      c_5 = FMA(b_13, a_13, c_5);
+
+      IntrVec_t b_14 = LD(b, 14);
+      c_5 = ADD(b_14, c_5);
+
+      IntrVec_t b_15 = LD(b, 15);
+      c_3 = FMA(b_15, a_3, c_3);
+      c_4 = FMA(b_15, a_9, c_4);
+      IntrVec_t a_15 = LD(a, 15);
+      c_5 = FMA(b_15, a_15, c_5);
+
+      IntrVec_t b_16 = LD(b, 16);
+      c_3 = FMA(b_16, a_4, c_3);
+      c_4 = FMA(b_16, a_10, c_4);
+      IntrVec_t a_16 = LD(a, 16);
+      c_5 = FMA(b_16, a_16, c_5);
+
+      IntrVec_t b_17 = LD(b, 17);
+      IntrVec_t a_17 = LD(a, 17);
+      c_5 = FMA(b_17, a_17, c_5);
+
+      IntrVec_t b_18 = LD(b, 18);
+      IntrVec_t c_6 = MUL(b_18, a_0);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+      IntrVec_t c_7 = MUL(b_18, a_6);
+      IntrVec_t c_8 = MUL(b_18, a_12);
+      IntrVec_t a_18 = LD(a, 18);
+      IntrVec_t c_9 = MUL(b_18, a_18);
+
+      IntrVec_t b_19 = LD(b, 19);
+      c_6 = FMA(b_19, a_1, c_6);
+      c_7 = FMA(b_19, a_7, c_7);
+      c_8 = FMA(b_19, a_13, c_8);
+      IntrVec_t a_19 = LD(a, 19);
+      c_9 = FMA(b_19, a_19, c_9);
+
+      IntrVec_t b_20 = LD(b, 20);
+      c_8 = ADD(b_20, c_8);
+
+      IntrVec_t b_21 = LD(b, 21);
+      c_6 = FMA(b_21, a_3, c_6);
+      c_7 = FMA(b_21, a_9, c_7);
+      c_8 = FMA(b_21, a_15, c_8);
+      IntrVec_t a_21 = LD(a, 21);
+      c_9 = FMA(b_21, a_21, c_9);
+
+      IntrVec_t b_22 = LD(b, 22);
+      c_6 = FMA(b_22, a_4, c_6);
+      c_7 = FMA(b_22, a_10, c_7);
+      c_8 = FMA(b_22, a_16, c_8);
+      IntrVec_t a_22 = LD(a, 22);
+      c_9 = FMA(b_22, a_22, c_9);
+
+      IntrVec_t b_23 = LD(b, 23);
+      c_8 = FMA(b_23, a_17, c_8);
+
+      IntrVec_t b_24 = LD(b, 24);
+      IntrVec_t c_10 = MUL(b_24, a_0);
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+      ST(c, 9, c_9);
+      IntrVec_t c_11 = MUL(b_24, a_6);
+      IntrVec_t c_12 = MUL(b_24, a_12);
+      IntrVec_t c_13 = MUL(b_24, a_18);
+      IntrVec_t a_24 = LD(a, 24);
+      IntrVec_t c_14 = MUL(b_24, a_24);
+
+      IntrVec_t b_25 = LD(b, 25);
+      c_10 = FMA(b_25, a_1, c_10);
+      c_11 = FMA(b_25, a_7, c_11);
+      c_12 = FMA(b_25, a_13, c_12);
+      c_13 = FMA(b_25, a_19, c_13);
+      IntrVec_t a_25 = LD(a, 25);
+      c_14 = FMA(b_25, a_25, c_14);
+
+      IntrVec_t b_26 = LD(b, 26);
+      c_12 = ADD(b_26, c_12);
+
+      IntrVec_t b_27 = LD(b, 27);
+      c_10 = FMA(b_27, a_3, c_10);
+      c_11 = FMA(b_27, a_9, c_11);
+      c_12 = FMA(b_27, a_15, c_12);
+      c_13 = FMA(b_27, a_21, c_13);
+      IntrVec_t a_27 = LD(a, 27);
+      c_14 = FMA(b_27, a_27, c_14);
+
+      IntrVec_t b_28 = LD(b, 28);
+      c_10 = FMA(b_28, a_4, c_10);
+      c_11 = FMA(b_28, a_10, c_11);
+      c_12 = FMA(b_28, a_16, c_12);
+      c_13 = FMA(b_28, a_22, c_13);
+      IntrVec_t a_28 = LD(a, 28);
+      c_14 = FMA(b_28, a_28, c_14);
+
+      IntrVec_t b_29 = LD(b, 29);
+      c_12 = FMA(b_29, a_17, c_12);
+
+      IntrVec_t b_30 = LD(b, 30);
+      IntrVec_t c_15 = MUL(b_30, a_0);
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+      IntrVec_t c_16 = MUL(b_30, a_6);
+      IntrVec_t c_17 = MUL(b_30, a_12);
+      IntrVec_t c_18 = MUL(b_30, a_18);
+      IntrVec_t c_19 = MUL(b_30, a_24);
+
+      IntrVec_t b_31 = LD(b, 31);
+      c_15 = FMA(b_31, a_1, c_15);
+      c_16 = FMA(b_31, a_7, c_16);
+      c_17 = FMA(b_31, a_13, c_17);
+      c_18 = FMA(b_31, a_19, c_18);
+      c_19 = FMA(b_31, a_25, c_19);
+
+      IntrVec_t b_32 = LD(b, 32);
+      c_17 = ADD(b_32, c_17);
+
+      IntrVec_t b_33 = LD(b, 33);
+      c_15 = FMA(b_33, a_3, c_15);
+      c_16 = FMA(b_33, a_9, c_16);
+      c_17 = FMA(b_33, a_15, c_17);
+      c_18 = FMA(b_33, a_21, c_18);
+      c_19 = FMA(b_33, a_27, c_19);
+
+      IntrVec_t b_34 = LD(b, 34);
+      c_15 = FMA(b_34, a_4, c_15);
+      c_16 = FMA(b_34, a_10, c_16);
+      c_17 = FMA(b_34, a_16, c_17);
+      c_18 = FMA(b_34, a_22, c_18);
+      c_19 = FMA(b_34, a_28, c_19);
+
+      IntrVec_t b_35 = LD(b, 35);
+      c_17 = FMA(b_35, a_17, c_17);
+      IntrVec_t c_20 = b_35;
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      ST(c, 17, c_17);
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      ST(c, 20, c_20);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = b[ 0*N+n]*a[ 0*N+n] + b[ 1*N+n]*a[ 1*N+n] + b[ 3*N+n]*a[ 3*N+n] + b[ 4*N+n]*a[ 4*N+n];
+      c[ 1*N+n] = b[ 6*N+n]*a[ 0*N+n] + b[ 7*N+n]*a[ 1*N+n] + b[ 9*N+n]*a[ 3*N+n] + b[10*N+n]*a[ 4*N+n];
+      c[ 2*N+n] = b[ 6*N+n]*a[ 6*N+n] + b[ 7*N+n]*a[ 7*N+n] + b[ 9*N+n]*a[ 9*N+n] + b[10*N+n]*a[10*N+n];
+      c[ 3*N+n] = b[12*N+n]*a[ 0*N+n] + b[13*N+n]*a[ 1*N+n] + b[15*N+n]*a[ 3*N+n] + b[16*N+n]*a[ 4*N+n];
+      c[ 4*N+n] = b[12*N+n]*a[ 6*N+n] + b[13*N+n]*a[ 7*N+n] + b[15*N+n]*a[ 9*N+n] + b[16*N+n]*a[10*N+n];
+      c[ 5*N+n] = b[12*N+n]*a[12*N+n] + b[13*N+n]*a[13*N+n] + b[14*N+n] + b[15*N+n]*a[15*N+n] + b[16*N+n]*a[16*N+n] + b[17*N+n]*a[17*N+n];
+      c[ 6*N+n] = b[18*N+n]*a[ 0*N+n] + b[19*N+n]*a[ 1*N+n] + b[21*N+n]*a[ 3*N+n] + b[22*N+n]*a[ 4*N+n];
+      c[ 7*N+n] = b[18*N+n]*a[ 6*N+n] + b[19*N+n]*a[ 7*N+n] + b[21*N+n]*a[ 9*N+n] + b[22*N+n]*a[10*N+n];
+      c[ 8*N+n] = b[18*N+n]*a[12*N+n] + b[19*N+n]*a[13*N+n] + b[20*N+n] + b[21*N+n]*a[15*N+n] + b[22*N+n]*a[16*N+n] + b[23*N+n]*a[17*N+n];
+      c[ 9*N+n] = b[18*N+n]*a[18*N+n] + b[19*N+n]*a[19*N+n] + b[21*N+n]*a[21*N+n] + b[22*N+n]*a[22*N+n];
+      c[10*N+n] = b[24*N+n]*a[ 0*N+n] + b[25*N+n]*a[ 1*N+n] + b[27*N+n]*a[ 3*N+n] + b[28*N+n]*a[ 4*N+n];
+      c[11*N+n] = b[24*N+n]*a[ 6*N+n] + b[25*N+n]*a[ 7*N+n] + b[27*N+n]*a[ 9*N+n] + b[28*N+n]*a[10*N+n];
+      c[12*N+n] = b[24*N+n]*a[12*N+n] + b[25*N+n]*a[13*N+n] + b[26*N+n] + b[27*N+n]*a[15*N+n] + b[28*N+n]*a[16*N+n] + b[29*N+n]*a[17*N+n];
+      c[13*N+n] = b[24*N+n]*a[18*N+n] + b[25*N+n]*a[19*N+n] + b[27*N+n]*a[21*N+n] + b[28*N+n]*a[22*N+n];
+      c[14*N+n] = b[24*N+n]*a[24*N+n] + b[25*N+n]*a[25*N+n] + b[27*N+n]*a[27*N+n] + b[28*N+n]*a[28*N+n];
+      c[15*N+n] = b[30*N+n]*a[ 0*N+n] + b[31*N+n]*a[ 1*N+n] + b[33*N+n]*a[ 3*N+n] + b[34*N+n]*a[ 4*N+n];
+      c[16*N+n] = b[30*N+n]*a[ 6*N+n] + b[31*N+n]*a[ 7*N+n] + b[33*N+n]*a[ 9*N+n] + b[34*N+n]*a[10*N+n];
+      c[17*N+n] = b[30*N+n]*a[12*N+n] + b[31*N+n]*a[13*N+n] + b[32*N+n] + b[33*N+n]*a[15*N+n] + b[34*N+n]*a[16*N+n] + b[35*N+n]*a[17*N+n];
+      c[18*N+n] = b[30*N+n]*a[18*N+n] + b[31*N+n]*a[19*N+n] + b[33*N+n]*a[21*N+n] + b[34*N+n]*a[22*N+n];
+      c[19*N+n] = b[30*N+n]*a[24*N+n] + b[31*N+n]*a[25*N+n] + b[33*N+n]*a[27*N+n] + b[34*N+n]*a[28*N+n];
+      c[20*N+n] = b[35*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/MultHelixPropTranspEndcap.ah b/RecoTracker/MkFitCore/src/MultHelixPropTranspEndcap.ah
new file mode 100644
index 0000000000000..41745b730c4fb
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/MultHelixPropTranspEndcap.ah
@@ -0,0 +1,209 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      #ifdef AVX512_INTRINSICS
+      IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      #else
+      IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0 };
+      #endif
+
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = b_0;
+
+
+      IntrVec_t b_2 = LD(b, 2);
+      IntrVec_t a_2 = LD(a, 2);
+      c_0 = FMA(b_2, a_2, c_0);
+
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t a_3 = LD(a, 3);
+      c_0 = FMA(b_3, a_3, c_0);
+
+      IntrVec_t b_4 = LD(b, 4);
+      IntrVec_t a_4 = LD(a, 4);
+      c_0 = FMA(b_4, a_4, c_0);
+
+      IntrVec_t b_5 = LD(b, 5);
+      IntrVec_t a_5 = LD(a, 5);
+      c_0 = FMA(b_5, a_5, c_0);
+
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_1 = b_6;
+
+      IntrVec_t b_7 = LD(b, 7);
+      IntrVec_t c_2 = b_7;
+      ST(c, 0, c_0);
+
+      IntrVec_t b_8 = LD(b, 8);
+      c_1 = FMA(b_8, a_2, c_1);
+      IntrVec_t a_8 = LD(a, 8);
+      c_2 = FMA(b_8, a_8, c_2);
+
+      IntrVec_t b_9 = LD(b, 9);
+      c_1 = FMA(b_9, a_3, c_1);
+      IntrVec_t a_9 = LD(a, 9);
+      c_2 = FMA(b_9, a_9, c_2);
+
+      IntrVec_t b_10 = LD(b, 10);
+      c_1 = FMA(b_10, a_4, c_1);
+      IntrVec_t a_10 = LD(a, 10);
+      c_2 = FMA(b_10, a_10, c_2);
+
+      IntrVec_t b_11 = LD(b, 11);
+      c_1 = FMA(b_11, a_5, c_1);
+      IntrVec_t a_11 = LD(a, 11);
+      c_2 = FMA(b_11, a_11, c_2);
+
+      IntrVec_t b_12 = LD(b, 12);
+      IntrVec_t c_3 = b_12;
+      ST(c, 1, c_1);
+
+      IntrVec_t b_13 = LD(b, 13);
+      IntrVec_t c_4 = b_13;
+      ST(c, 2, c_2);
+
+      IntrVec_t b_14 = LD(b, 14);
+      c_3 = FMA(b_14, a_2, c_3);
+      c_4 = FMA(b_14, a_8, c_4);
+
+      IntrVec_t b_15 = LD(b, 15);
+      c_3 = FMA(b_15, a_3, c_3);
+      c_4 = FMA(b_15, a_9, c_4);
+
+      IntrVec_t b_16 = LD(b, 16);
+      c_3 = FMA(b_16, a_4, c_3);
+      c_4 = FMA(b_16, a_10, c_4);
+
+      IntrVec_t b_17 = LD(b, 17);
+      c_3 = FMA(b_17, a_5, c_3);
+      c_4 = FMA(b_17, a_11, c_4);
+      ST(c, 5, all_zeros);
+
+      IntrVec_t b_18 = LD(b, 18);
+      IntrVec_t c_6 = b_18;
+
+      IntrVec_t b_19 = LD(b, 19);
+      IntrVec_t c_7 = b_19;
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+
+      IntrVec_t b_20 = LD(b, 20);
+      c_6 = FMA(b_20, a_2, c_6);
+      c_7 = FMA(b_20, a_8, c_7);
+
+      IntrVec_t b_21 = LD(b, 21);
+      c_6 = FMA(b_21, a_3, c_6);
+      c_7 = FMA(b_21, a_9, c_7);
+      IntrVec_t c_9 = b_21;
+
+      IntrVec_t b_22 = LD(b, 22);
+      c_6 = FMA(b_22, a_4, c_6);
+      c_7 = FMA(b_22, a_10, c_7);
+
+      IntrVec_t b_23 = LD(b, 23);
+      c_6 = FMA(b_23, a_5, c_6);
+      c_7 = FMA(b_23, a_11, c_7);
+      ST(c, 8, all_zeros);
+
+      IntrVec_t b_24 = LD(b, 24);
+      IntrVec_t c_10 = b_24;
+
+      IntrVec_t b_25 = LD(b, 25);
+      IntrVec_t c_11 = b_25;
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 9, c_9);
+
+      IntrVec_t b_26 = LD(b, 26);
+      c_10 = FMA(b_26, a_2, c_10);
+      c_11 = FMA(b_26, a_8, c_11);
+      IntrVec_t a_26 = LD(a, 26);
+      IntrVec_t c_14 = MUL(b_26, a_26);
+
+      IntrVec_t b_27 = LD(b, 27);
+      c_10 = FMA(b_27, a_3, c_10);
+      c_11 = FMA(b_27, a_9, c_11);
+      IntrVec_t c_13 = b_27;
+      IntrVec_t a_27 = LD(a, 27);
+      c_14 = FMA(b_27, a_27, c_14);
+
+      IntrVec_t b_28 = LD(b, 28);
+      c_10 = FMA(b_28, a_4, c_10);
+      c_11 = FMA(b_28, a_10, c_11);
+      c_14 = ADD(b_28, c_14);
+
+      IntrVec_t b_29 = LD(b, 29);
+      c_10 = FMA(b_29, a_5, c_10);
+      c_11 = FMA(b_29, a_11, c_11);
+      ST(c, 12, all_zeros);
+      IntrVec_t a_29 = LD(a, 29);
+      c_14 = FMA(b_29, a_29, c_14);
+
+      IntrVec_t b_30 = LD(b, 30);
+      IntrVec_t c_15 = b_30;
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+
+      IntrVec_t b_31 = LD(b, 31);
+      IntrVec_t c_16 = b_31;
+
+      IntrVec_t b_32 = LD(b, 32);
+      c_15 = FMA(b_32, a_2, c_15);
+      c_16 = FMA(b_32, a_8, c_16);
+      IntrVec_t c_19 = MUL(b_32, a_26);
+
+      IntrVec_t b_33 = LD(b, 33);
+      c_15 = FMA(b_33, a_3, c_15);
+      c_16 = FMA(b_33, a_9, c_16);
+      IntrVec_t c_18 = b_33;
+      c_19 = FMA(b_33, a_27, c_19);
+
+      IntrVec_t b_34 = LD(b, 34);
+      c_15 = FMA(b_34, a_4, c_15);
+      c_16 = FMA(b_34, a_10, c_16);
+      c_19 = ADD(b_34, c_19);
+
+      IntrVec_t b_35 = LD(b, 35);
+      c_15 = FMA(b_35, a_5, c_15);
+      c_16 = FMA(b_35, a_11, c_16);
+      ST(c, 17, all_zeros);
+      c_19 = FMA(b_35, a_29, c_19);
+      IntrVec_t c_20 = b_35;
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      ST(c, 20, c_20);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = b[ 0*N+n] + b[ 2*N+n]*a[ 2*N+n] + b[ 3*N+n]*a[ 3*N+n] + b[ 4*N+n]*a[ 4*N+n] + b[ 5*N+n]*a[ 5*N+n];
+      c[ 1*N+n] = b[ 6*N+n] + b[ 8*N+n]*a[ 2*N+n] + b[ 9*N+n]*a[ 3*N+n] + b[10*N+n]*a[ 4*N+n] + b[11*N+n]*a[ 5*N+n];
+      c[ 2*N+n] = b[ 7*N+n] + b[ 8*N+n]*a[ 8*N+n] + b[ 9*N+n]*a[ 9*N+n] + b[10*N+n]*a[10*N+n] + b[11*N+n]*a[11*N+n];
+      c[ 3*N+n] = b[12*N+n] + b[14*N+n]*a[ 2*N+n] + b[15*N+n]*a[ 3*N+n] + b[16*N+n]*a[ 4*N+n] + b[17*N+n]*a[ 5*N+n];
+      c[ 4*N+n] = b[13*N+n] + b[14*N+n]*a[ 8*N+n] + b[15*N+n]*a[ 9*N+n] + b[16*N+n]*a[10*N+n] + b[17*N+n]*a[11*N+n];
+      c[ 5*N+n] = 0;
+      c[ 6*N+n] = b[18*N+n] + b[20*N+n]*a[ 2*N+n] + b[21*N+n]*a[ 3*N+n] + b[22*N+n]*a[ 4*N+n] + b[23*N+n]*a[ 5*N+n];
+      c[ 7*N+n] = b[19*N+n] + b[20*N+n]*a[ 8*N+n] + b[21*N+n]*a[ 9*N+n] + b[22*N+n]*a[10*N+n] + b[23*N+n]*a[11*N+n];
+      c[ 8*N+n] = 0;
+      c[ 9*N+n] = b[21*N+n];
+      c[10*N+n] = b[24*N+n] + b[26*N+n]*a[ 2*N+n] + b[27*N+n]*a[ 3*N+n] + b[28*N+n]*a[ 4*N+n] + b[29*N+n]*a[ 5*N+n];
+      c[11*N+n] = b[25*N+n] + b[26*N+n]*a[ 8*N+n] + b[27*N+n]*a[ 9*N+n] + b[28*N+n]*a[10*N+n] + b[29*N+n]*a[11*N+n];
+      c[12*N+n] = 0;
+      c[13*N+n] = b[27*N+n];
+      c[14*N+n] = b[26*N+n]*a[26*N+n] + b[27*N+n]*a[27*N+n] + b[28*N+n] + b[29*N+n]*a[29*N+n];
+      c[15*N+n] = b[30*N+n] + b[32*N+n]*a[ 2*N+n] + b[33*N+n]*a[ 3*N+n] + b[34*N+n]*a[ 4*N+n] + b[35*N+n]*a[ 5*N+n];
+      c[16*N+n] = b[31*N+n] + b[32*N+n]*a[ 8*N+n] + b[33*N+n]*a[ 9*N+n] + b[34*N+n]*a[10*N+n] + b[35*N+n]*a[11*N+n];
+      c[17*N+n] = 0;
+      c[18*N+n] = b[33*N+n];
+      c[19*N+n] = b[32*N+n]*a[26*N+n] + b[33*N+n]*a[27*N+n] + b[34*N+n] + b[35*N+n]*a[29*N+n];
+      c[20*N+n] = b[35*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/Pool.h b/RecoTracker/MkFitCore/src/Pool.h
new file mode 100644
index 0000000000000..7ca6a880f7539
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Pool.h
@@ -0,0 +1,54 @@
+#ifndef RecoTracker_MkFitCore_src_Pool_h
+#define RecoTracker_MkFitCore_src_Pool_h
+
+#include "oneapi/tbb/concurrent_queue.h"
+
+namespace mkfit {
+
+  /**
+   * Pool for helper objects. All functions are thread safe.
+   */
+  template <typename TT>
+  class Pool {
+  public:
+    Pool() = default;
+
+    ~Pool() {
+      TT *x = nullptr;
+      while (m_stack.try_pop(x)) {
+        destroy(x);
+      }
+    }
+
+    size_t size() const { return m_stack.unsafe_size(); }
+
+    void populate(int threads = Config::numThreadsFinder) {
+      for (int i = 0; i < threads; ++i) {
+        m_stack.push(create());
+      }
+    }
+
+    auto makeOrGet() {
+      TT *x = nullptr;
+      if (not m_stack.try_pop(x)) {
+        x = create();
+      }
+      auto deleter = [this](TT *ptr) { this->addBack(ptr); };
+      return std::unique_ptr<TT, decltype(deleter)>(x, std::move(deleter));
+    }
+
+  private:
+    TT *create() { return new (std::aligned_alloc(64, sizeof(TT))) TT; };
+
+    void destroy(TT *x) {
+      x->~TT();
+      std::free(x);
+    };
+
+    void addBack(TT *x) { m_stack.push(x); }
+
+    tbb::concurrent_queue<TT *> m_stack;
+  };
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/src/ProjectResErr.ah b/RecoTracker/MkFitCore/src/ProjectResErr.ah
new file mode 100644
index 0000000000000..4d4cfa9d8dd51
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/ProjectResErr.ah
@@ -0,0 +1,64 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_1 = MUL(a_0, b_1);
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t c_2 = MUL(a_0, b_3);
+
+      IntrVec_t a_1 = LD(a, 1);
+      c_0 = FMA(a_1, b_1, c_0);
+      IntrVec_t b_2 = LD(b, 2);
+      c_1 = FMA(a_1, b_2, c_1);
+      IntrVec_t b_4 = LD(b, 4);
+      c_2 = FMA(a_1, b_4, c_2);
+
+
+
+
+      IntrVec_t c_3 = b_3;
+      IntrVec_t c_4 = b_4;
+      IntrVec_t b_5 = LD(b, 5);
+      IntrVec_t c_5 = b_5;
+      ST(c, 0, c_0);
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+
+      IntrVec_t a_6 = LD(a, 6);
+      IntrVec_t c_6 = MUL(a_6, b_0);
+      IntrVec_t c_7 = MUL(a_6, b_1);
+      IntrVec_t c_8 = MUL(a_6, b_3);
+
+      IntrVec_t a_7 = LD(a, 7);
+      c_6 = FMA(a_7, b_1, c_6);
+      c_7 = FMA(a_7, b_2, c_7);
+      c_8 = FMA(a_7, b_4, c_8);
+
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n];
+      c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n] + a[ 1*N+n]*b[ 2*N+n];
+      c[ 2*N+n] = a[ 0*N+n]*b[ 3*N+n] + a[ 1*N+n]*b[ 4*N+n];
+      c[ 3*N+n] = b[ 3*N+n];
+      c[ 4*N+n] = b[ 4*N+n];
+      c[ 5*N+n] = b[ 5*N+n];
+      c[ 6*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 1*N+n];
+      c[ 7*N+n] = a[ 6*N+n]*b[ 1*N+n] + a[ 7*N+n]*b[ 2*N+n];
+      c[ 8*N+n] = a[ 6*N+n]*b[ 3*N+n] + a[ 7*N+n]*b[ 4*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/ProjectResErrTransp.ah b/RecoTracker/MkFitCore/src/ProjectResErrTransp.ah
new file mode 100644
index 0000000000000..82ff7a41fd613
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/ProjectResErrTransp.ah
@@ -0,0 +1,55 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t c_0 = MUL(b_0, a_0);
+
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t a_1 = LD(a, 1);
+      c_0 = FMA(b_1, a_1, c_0);
+
+
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t c_1 = MUL(b_3, a_0);
+
+      IntrVec_t b_4 = LD(b, 4);
+      c_1 = FMA(b_4, a_1, c_1);
+      ST(c, 0, c_0);
+
+      IntrVec_t b_5 = LD(b, 5);
+      IntrVec_t c_2 = b_5;
+
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_3 = MUL(b_6, a_0);
+      ST(c, 1, c_1);
+      IntrVec_t a_6 = LD(a, 6);
+      IntrVec_t c_5 = MUL(b_6, a_6);
+      ST(c, 2, c_2);
+
+      IntrVec_t b_7 = LD(b, 7);
+      c_3 = FMA(b_7, a_1, c_3);
+      IntrVec_t a_7 = LD(a, 7);
+      c_5 = FMA(b_7, a_7, c_5);
+
+      IntrVec_t b_8 = LD(b, 8);
+      IntrVec_t c_4 = b_8;
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = b[ 0*N+n]*a[ 0*N+n] + b[ 1*N+n]*a[ 1*N+n];
+      c[ 1*N+n] = b[ 3*N+n]*a[ 0*N+n] + b[ 4*N+n]*a[ 1*N+n];
+      c[ 2*N+n] = b[ 5*N+n];
+      c[ 3*N+n] = b[ 6*N+n]*a[ 0*N+n] + b[ 7*N+n]*a[ 1*N+n];
+      c[ 4*N+n] = b[ 8*N+n];
+      c[ 5*N+n] = b[ 6*N+n]*a[ 6*N+n] + b[ 7*N+n]*a[ 7*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.cc b/RecoTracker/MkFitCore/src/PropagationMPlex.cc
new file mode 100644
index 0000000000000..806c4767402b1
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/PropagationMPlex.cc
@@ -0,0 +1,920 @@
+#include "MaterialEffects.h"
+#include "PropagationMPlex.h"
+
+//#define DEBUG
+#include "Debug.h"
+
+//==============================================================================
+// propagateLineToRMPlex
+//==============================================================================
+
+using namespace Matriplex;
+
+namespace mkfit {
+
+  void propagateLineToRMPlex(const MPlexLS& psErr,
+                             const MPlexLV& psPar,
+                             const MPlexHS& msErr,
+                             const MPlexHV& msPar,
+                             MPlexLS& outErr,
+                             MPlexLV& outPar,
+                             const int N_proc) {
+    // XXX Regenerate parts below with a script.
+
+    const idx_t N = NN;
+
+#pragma omp simd
+    for (int n = 0; n < NN; ++n) {
+      const float cosA = (psPar[0 * N + n] * psPar[3 * N + n] + psPar[1 * N + n] * psPar[4 * N + n]) /
+                         (std::sqrt((psPar[0 * N + n] * psPar[0 * N + n] + psPar[1 * N + n] * psPar[1 * N + n]) *
+                                    (psPar[3 * N + n] * psPar[3 * N + n] + psPar[4 * N + n] * psPar[4 * N + n])));
+      const float dr = (hipo(msPar[0 * N + n], msPar[1 * N + n]) - hipo(psPar[0 * N + n], psPar[1 * N + n])) / cosA;
+
+      dprint_np(n, "propagateLineToRMPlex dr=" << dr);
+
+      const float pt = hipo(psPar[3 * N + n], psPar[4 * N + n]);
+      const float p = dr / pt;  // path
+      const float psq = p * p;
+
+      outPar[0 * N + n] = psPar[0 * N + n] + p * psPar[3 * N + n];
+      outPar[1 * N + n] = psPar[1 * N + n] + p * psPar[4 * N + n];
+      outPar[2 * N + n] = psPar[2 * N + n] + p * psPar[5 * N + n];
+      outPar[3 * N + n] = psPar[3 * N + n];
+      outPar[4 * N + n] = psPar[4 * N + n];
+      outPar[5 * N + n] = psPar[5 * N + n];
+
+      {
+        const MPlexLS& A = psErr;
+        MPlexLS& B = outErr;
+
+        B.fArray[0 * N + n] = A.fArray[0 * N + n];
+        B.fArray[1 * N + n] = A.fArray[1 * N + n];
+        B.fArray[2 * N + n] = A.fArray[2 * N + n];
+        B.fArray[3 * N + n] = A.fArray[3 * N + n];
+        B.fArray[4 * N + n] = A.fArray[4 * N + n];
+        B.fArray[5 * N + n] = A.fArray[5 * N + n];
+        B.fArray[6 * N + n] = A.fArray[6 * N + n] + p * A.fArray[0 * N + n];
+        B.fArray[7 * N + n] = A.fArray[7 * N + n] + p * A.fArray[1 * N + n];
+        B.fArray[8 * N + n] = A.fArray[8 * N + n] + p * A.fArray[3 * N + n];
+        B.fArray[9 * N + n] =
+            A.fArray[9 * N + n] + p * (A.fArray[6 * N + n] + A.fArray[6 * N + n]) + psq * A.fArray[0 * N + n];
+        B.fArray[10 * N + n] = A.fArray[10 * N + n] + p * A.fArray[1 * N + n];
+        B.fArray[11 * N + n] = A.fArray[11 * N + n] + p * A.fArray[2 * N + n];
+        B.fArray[12 * N + n] = A.fArray[12 * N + n] + p * A.fArray[4 * N + n];
+        B.fArray[13 * N + n] =
+            A.fArray[13 * N + n] + p * (A.fArray[7 * N + n] + A.fArray[10 * N + n]) + psq * A.fArray[1 * N + n];
+        B.fArray[14 * N + n] =
+            A.fArray[14 * N + n] + p * (A.fArray[11 * N + n] + A.fArray[11 * N + n]) + psq * A.fArray[2 * N + n];
+        B.fArray[15 * N + n] = A.fArray[15 * N + n] + p * A.fArray[3 * N + n];
+        B.fArray[16 * N + n] = A.fArray[16 * N + n] + p * A.fArray[4 * N + n];
+        B.fArray[17 * N + n] = A.fArray[17 * N + n] + p * A.fArray[5 * N + n];
+        B.fArray[18 * N + n] =
+            A.fArray[18 * N + n] + p * (A.fArray[8 * N + n] + A.fArray[15 * N + n]) + psq * A.fArray[3 * N + n];
+        B.fArray[19 * N + n] =
+            A.fArray[19 * N + n] + p * (A.fArray[12 * N + n] + A.fArray[16 * N + n]) + psq * A.fArray[4 * N + n];
+        B.fArray[20 * N + n] =
+            A.fArray[20 * N + n] + p * (A.fArray[17 * N + n] + A.fArray[17 * N + n]) + psq * A.fArray[5 * N + n];
+      }
+
+      dprint_np(n, "propagateLineToRMPlex arrive at r=" << hipo(outPar[0 * N + n], outPar[1 * N + n]));
+    }
+  }
+
+}  // end namespace mkfit
+
+//==============================================================================
+// propagateHelixToRMPlex
+//==============================================================================
+
+namespace {
+  using namespace mkfit;
+
+  void MultHelixProp(const MPlexLL& A, const MPlexLS& B, MPlexLL& C) {
+    // C = A * B
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#include "MultHelixProp.ah"
+  }
+
+  void MultHelixPropTransp(const MPlexLL& A, const MPlexLL& B, MPlexLS& C) {
+    // C = B * AT;
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#include "MultHelixPropTransp.ah"
+  }
+
+  void MultHelixPropEndcap(const MPlexLL& A, const MPlexLS& B, MPlexLL& C) {
+    // C = A * B
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#include "MultHelixPropEndcap.ah"
+  }
+
+  void MultHelixPropTranspEndcap(const MPlexLL& A, const MPlexLL& B, MPlexLS& C) {
+    // C = B * AT;
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#include "MultHelixPropTranspEndcap.ah"
+  }
+
+  inline void MultHelixPropTemp(const MPlexLL& A, const MPlexLL& B, MPlexLL& C, int n) {
+    // C = A * B
+
+    typedef float T;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const T* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    T* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+    c[0 * N + n] = a[0 * N + n] * b[0 * N + n] + a[1 * N + n] * b[6 * N + n] + a[2 * N + n] * b[12 * N + n] +
+                   a[4 * N + n] * b[24 * N + n];
+    c[1 * N + n] = a[0 * N + n] * b[1 * N + n] + a[1 * N + n] * b[7 * N + n] + a[2 * N + n] * b[13 * N + n] +
+                   a[4 * N + n] * b[25 * N + n];
+    c[2 * N + n] = a[2 * N + n];
+    c[3 * N + n] = a[0 * N + n] * b[3 * N + n] + a[1 * N + n] * b[9 * N + n] + a[2 * N + n] * b[15 * N + n] +
+                   a[3 * N + n] + a[4 * N + n] * b[27 * N + n];
+    c[4 * N + n] = a[0 * N + n] * b[4 * N + n] + a[1 * N + n] * b[10 * N + n] + a[4 * N + n];
+    c[5 * N + n] = a[2 * N + n] * b[17 * N + n] + a[5 * N + n];
+    c[6 * N + n] = a[6 * N + n] * b[0 * N + n] + a[7 * N + n] * b[6 * N + n] + a[8 * N + n] * b[12 * N + n] +
+                   a[10 * N + n] * b[24 * N + n];
+    c[7 * N + n] = a[6 * N + n] * b[1 * N + n] + a[7 * N + n] * b[7 * N + n] + a[8 * N + n] * b[13 * N + n] +
+                   a[10 * N + n] * b[25 * N + n];
+    c[8 * N + n] = a[8 * N + n];
+    c[9 * N + n] = a[6 * N + n] * b[3 * N + n] + a[7 * N + n] * b[9 * N + n] + a[8 * N + n] * b[15 * N + n] +
+                   a[9 * N + n] + a[10 * N + n] * b[27 * N + n];
+    c[10 * N + n] = a[6 * N + n] * b[4 * N + n] + a[7 * N + n] * b[10 * N + n] + a[10 * N + n];
+    c[11 * N + n] = a[8 * N + n] * b[17 * N + n] + a[11 * N + n];
+    c[12 * N + n] = a[12 * N + n] * b[0 * N + n] + a[13 * N + n] * b[6 * N + n] + a[14 * N + n] * b[12 * N + n] +
+                    a[16 * N + n] * b[24 * N + n];
+    c[13 * N + n] = a[12 * N + n] * b[1 * N + n] + a[13 * N + n] * b[7 * N + n] + a[14 * N + n] * b[13 * N + n] +
+                    a[16 * N + n] * b[25 * N + n];
+    c[14 * N + n] = a[14 * N + n];
+    c[15 * N + n] = a[12 * N + n] * b[3 * N + n] + a[13 * N + n] * b[9 * N + n] + a[14 * N + n] * b[15 * N + n] +
+                    a[15 * N + n] + a[16 * N + n] * b[27 * N + n];
+    c[16 * N + n] = a[12 * N + n] * b[4 * N + n] + a[13 * N + n] * b[10 * N + n] + a[16 * N + n];
+    c[17 * N + n] = a[14 * N + n] * b[17 * N + n] + a[17 * N + n];
+    c[18 * N + n] = a[18 * N + n] * b[0 * N + n] + a[19 * N + n] * b[6 * N + n] + a[20 * N + n] * b[12 * N + n] +
+                    a[22 * N + n] * b[24 * N + n];
+    c[19 * N + n] = a[18 * N + n] * b[1 * N + n] + a[19 * N + n] * b[7 * N + n] + a[20 * N + n] * b[13 * N + n] +
+                    a[22 * N + n] * b[25 * N + n];
+    c[20 * N + n] = a[20 * N + n];
+    c[21 * N + n] = a[18 * N + n] * b[3 * N + n] + a[19 * N + n] * b[9 * N + n] + a[20 * N + n] * b[15 * N + n] +
+                    a[21 * N + n] + a[22 * N + n] * b[27 * N + n];
+    c[22 * N + n] = a[18 * N + n] * b[4 * N + n] + a[19 * N + n] * b[10 * N + n] + a[22 * N + n];
+    c[23 * N + n] = a[20 * N + n] * b[17 * N + n] + a[23 * N + n];
+    c[24 * N + n] = a[24 * N + n] * b[0 * N + n] + a[25 * N + n] * b[6 * N + n] + a[26 * N + n] * b[12 * N + n] +
+                    a[28 * N + n] * b[24 * N + n];
+    c[25 * N + n] = a[24 * N + n] * b[1 * N + n] + a[25 * N + n] * b[7 * N + n] + a[26 * N + n] * b[13 * N + n] +
+                    a[28 * N + n] * b[25 * N + n];
+    c[26 * N + n] = a[26 * N + n];
+    c[27 * N + n] = a[24 * N + n] * b[3 * N + n] + a[25 * N + n] * b[9 * N + n] + a[26 * N + n] * b[15 * N + n] +
+                    a[27 * N + n] + a[28 * N + n] * b[27 * N + n];
+    c[28 * N + n] = a[24 * N + n] * b[4 * N + n] + a[25 * N + n] * b[10 * N + n] + a[28 * N + n];
+    c[29 * N + n] = a[26 * N + n] * b[17 * N + n] + a[29 * N + n];
+    c[30 * N + n] = a[30 * N + n] * b[0 * N + n] + a[31 * N + n] * b[6 * N + n] + a[32 * N + n] * b[12 * N + n] +
+                    a[34 * N + n] * b[24 * N + n];
+    c[31 * N + n] = a[30 * N + n] * b[1 * N + n] + a[31 * N + n] * b[7 * N + n] + a[32 * N + n] * b[13 * N + n] +
+                    a[34 * N + n] * b[25 * N + n];
+    c[32 * N + n] = a[32 * N + n];
+    c[33 * N + n] = a[30 * N + n] * b[3 * N + n] + a[31 * N + n] * b[9 * N + n] + a[32 * N + n] * b[15 * N + n] +
+                    a[33 * N + n] + a[34 * N + n] * b[27 * N + n];
+    c[34 * N + n] = a[30 * N + n] * b[4 * N + n] + a[31 * N + n] * b[10 * N + n] + a[34 * N + n];
+    c[35 * N + n] = a[32 * N + n] * b[17 * N + n] + a[35 * N + n];
+  }
+
+#ifdef UNUSED
+  // this version does not assume to know which elements are 0 or 1, so it does the full multiplication
+  void MultHelixPropFull(const MPlexLL& A, const MPlexLS& B, MPlexLL& C) {
+#pragma omp simd
+    for (int n = 0; n < NN; ++n) {
+      for (int i = 0; i < 6; ++i) {
+        for (int j = 0; j < 6; ++j) {
+          C(n, i, j) = 0.;
+          for (int k = 0; k < 6; ++k)
+            C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, k, j);
+        }
+      }
+    }
+  }
+
+  // this version does not assume to know which elements are 0 or 1, so it does the full multiplication
+  void MultHelixPropFull(const MPlexLL& A, const MPlexLL& B, MPlexLL& C) {
+#pragma omp simd
+    for (int n = 0; n < NN; ++n) {
+      for (int i = 0; i < 6; ++i) {
+        for (int j = 0; j < 6; ++j) {
+          C(n, i, j) = 0.;
+          for (int k = 0; k < 6; ++k)
+            C(n, i, j) += A.constAt(n, i, k) * B.constAt(n, k, j);
+        }
+      }
+    }
+  }
+
+  // this version does not assume to know which elements are 0 or 1, so it does the full mupltiplication
+  void MultHelixPropTranspFull(const MPlexLL& A, const MPlexLL& B, MPlexLS& C) {
+#pragma omp simd
+    for (int n = 0; n < NN; ++n) {
+      for (int i = 0; i < 6; ++i) {
+        for (int j = 0; j < 6; ++j) {
+          C(n, i, j) = 0.;
+          for (int k = 0; k < 6; ++k)
+            C(n, i, j) += B.constAt(n, i, k) * A.constAt(n, j, k);
+        }
+      }
+    }
+  }
+
+  // this version does not assume to know which elements are 0 or 1, so it does the full mupltiplication
+  void MultHelixPropTranspFull(const MPlexLL& A, const MPlexLL& B, MPlexLL& C) {
+#pragma omp simd
+    for (int n = 0; n < NN; ++n) {
+      for (int i = 0; i < 6; ++i) {
+        for (int j = 0; j < 6; ++j) {
+          C(n, i, j) = 0.;
+          for (int k = 0; k < 6; ++k)
+            C(n, i, j) += B.constAt(n, i, k) * A.constAt(n, j, k);
+        }
+      }
+    }
+  }
+#endif
+}  // end unnamed namespace
+
+//==============================================================================
+
+namespace mkfit {
+
+  void helixAtRFromIterativeCCSFullJac(const MPlexLV& inPar,
+                                       const MPlexQI& inChg,
+                                       const MPlexQF& msRad,
+                                       MPlexLV& outPar,
+                                       MPlexLL& errorProp,
+                                       const int N_proc) {
+    errorProp.setVal(0.f);
+    MPlexLL errorPropTmp(0.f);   //initialize to zero
+    MPlexLL errorPropSwap(0.f);  //initialize to zero
+
+#pragma omp simd
+    for (int n = 0; n < NN; ++n) {
+      //initialize erroProp to identity matrix
+      errorProp(n, 0, 0) = 1.f;
+      errorProp(n, 1, 1) = 1.f;
+      errorProp(n, 2, 2) = 1.f;
+      errorProp(n, 3, 3) = 1.f;
+      errorProp(n, 4, 4) = 1.f;
+      errorProp(n, 5, 5) = 1.f;
+
+      const float k = inChg.constAt(n, 0, 0) * 100.f / (-Const::sol * Config::Bfield);
+      const float r = msRad.constAt(n, 0, 0);
+      float r0 = hipo(inPar.constAt(n, 0, 0), inPar.constAt(n, 1, 0));
+
+      if (std::abs(r - r0) < 0.0001f) {
+        dprint_np(n, "distance less than 1mum, skip");
+        continue;
+      }
+
+      const float ipt = inPar.constAt(n, 3, 0);
+      const float phiin = inPar.constAt(n, 4, 0);
+      const float theta = inPar.constAt(n, 5, 0);
+
+      //set those that are 1. before iterations
+      errorPropTmp(n, 2, 2) = 1.f;
+      errorPropTmp(n, 3, 3) = 1.f;
+      errorPropTmp(n, 4, 4) = 1.f;
+      errorPropTmp(n, 5, 5) = 1.f;
+
+      float cosah = 0., sinah = 0.;
+      //no trig approx here, phi and theta can be large
+      float cosP = std::cos(phiin), sinP = std::sin(phiin);
+      const float cosT = std::cos(theta), sinT = std::sin(theta);
+      float pxin = cosP / ipt;
+      float pyin = sinP / ipt;
+
+      for (int i = 0; i < Config::Niter; ++i) {
+        dprint_np(n,
+                  std::endl
+                      << "attempt propagation from r=" << r0 << " to r=" << r << std::endl
+                      << "x=" << outPar.At(n, 0, 0) << " y=" << outPar.At(n, 1, 0) << " z=" << outPar.At(n, 2, 0)
+                      << " px=" << std::cos(phiin) / ipt << " py=" << std::sin(phiin) / ipt
+                      << " pz=" << 1.f / (ipt * tan(theta)) << " q=" << inChg.constAt(n, 0, 0) << std::endl);
+
+        r0 = hipo(outPar.constAt(n, 0, 0), outPar.constAt(n, 1, 0));
+        const float ialpha = (r - r0) * ipt / k;
+        //alpha+=ialpha;
+
+        if (Config::useTrigApprox) {
+          sincos4(ialpha * 0.5f, sinah, cosah);
+        } else {
+          cosah = std::cos(ialpha * 0.5f);
+          sinah = std::sin(ialpha * 0.5f);
+        }
+        const float cosa = 1.f - 2.f * sinah * sinah;
+        const float sina = 2.f * sinah * cosah;
+
+        //derivatives of alpha
+        const float dadx = -outPar.At(n, 0, 0) * ipt / (k * r0);
+        const float dady = -outPar.At(n, 1, 0) * ipt / (k * r0);
+        const float dadipt = (r - r0) / k;
+
+        outPar.At(n, 0, 0) = outPar.constAt(n, 0, 0) + 2.f * k * sinah * (pxin * cosah - pyin * sinah);
+        outPar.At(n, 1, 0) = outPar.constAt(n, 1, 0) + 2.f * k * sinah * (pyin * cosah + pxin * sinah);
+        const float pxinold = pxin;  //copy before overwriting
+        pxin = pxin * cosa - pyin * sina;
+        pyin = pyin * cosa + pxinold * sina;
+
+        //need phi at origin, so this goes before redefining phi
+        //no trig approx here, phi can be large
+        cosP = std::cos(outPar.At(n, 4, 0));
+        sinP = std::sin(outPar.At(n, 4, 0));
+
+        outPar.At(n, 2, 0) = outPar.constAt(n, 2, 0) + k * ialpha * cosT / (ipt * sinT);
+        outPar.At(n, 3, 0) = ipt;
+        outPar.At(n, 4, 0) = outPar.constAt(n, 4, 0) + ialpha;
+        outPar.At(n, 5, 0) = theta;
+
+        errorPropTmp(n, 0, 0) = 1.f + k * (cosP * dadx * cosa - sinP * dadx * sina) / ipt;
+        errorPropTmp(n, 0, 1) = k * (cosP * dady * cosa - sinP * dady * sina) / ipt;
+        errorPropTmp(n, 0, 3) =
+            k * (cosP * (ipt * dadipt * cosa - sina) + sinP * ((1.f - cosa) - ipt * dadipt * sina)) / (ipt * ipt);
+        errorPropTmp(n, 0, 4) = -k * (sinP * sina + cosP * (1.f - cosa)) / ipt;
+
+        errorPropTmp(n, 1, 0) = k * (sinP * dadx * cosa + cosP * dadx * sina) / ipt;
+        errorPropTmp(n, 1, 1) = 1.f + k * (sinP * dady * cosa + cosP * dady * sina) / ipt;
+        errorPropTmp(n, 1, 3) =
+            k * (sinP * (ipt * dadipt * cosa - sina) + cosP * (ipt * dadipt * sina - (1.f - cosa))) / (ipt * ipt);
+        errorPropTmp(n, 1, 4) = k * (cosP * sina - sinP * (1.f - cosa)) / ipt;
+
+        errorPropTmp(n, 2, 0) = k * cosT * dadx / (ipt * sinT);
+        errorPropTmp(n, 2, 1) = k * cosT * dady / (ipt * sinT);
+        errorPropTmp(n, 2, 3) = k * cosT * (ipt * dadipt - ialpha) / (ipt * ipt * sinT);
+        errorPropTmp(n, 2, 5) = -k * ialpha / (ipt * sinT * sinT);
+
+        errorPropTmp(n, 4, 0) = dadx;
+        errorPropTmp(n, 4, 1) = dady;
+        errorPropTmp(n, 4, 3) = dadipt;
+
+        MultHelixPropTemp(errorProp, errorPropTmp, errorPropSwap, n);
+        errorProp = errorPropSwap;
+      }
+
+      dprint_np(
+          n,
+          "propagation end, dump parameters"
+              << std::endl
+              << "pos = " << outPar.At(n, 0, 0) << " " << outPar.At(n, 1, 0) << " " << outPar.At(n, 2, 0) << std::endl
+              << "mom = " << std::cos(outPar.At(n, 4, 0)) / outPar.At(n, 3, 0) << " "
+              << std::sin(outPar.At(n, 4, 0)) / outPar.At(n, 3, 0) << " "
+              << 1. / (outPar.At(n, 3, 0) * tan(outPar.At(n, 5, 0)))
+              << " r=" << std::sqrt(outPar.At(n, 0, 0) * outPar.At(n, 0, 0) + outPar.At(n, 1, 0) * outPar.At(n, 1, 0))
+              << " pT=" << 1. / std::abs(outPar.At(n, 3, 0)) << std::endl);
+
+#ifdef DEBUG
+      if (n < N_proc) {
+        dmutex_guard;
+        std::cout << n << " jacobian" << std::endl;
+        printf("%5f %5f %5f %5f %5f %5f\n",
+               errorProp(n, 0, 0),
+               errorProp(n, 0, 1),
+               errorProp(n, 0, 2),
+               errorProp(n, 0, 3),
+               errorProp(n, 0, 4),
+               errorProp(n, 0, 5));
+        printf("%5f %5f %5f %5f %5f %5f\n",
+               errorProp(n, 1, 0),
+               errorProp(n, 1, 1),
+               errorProp(n, 1, 2),
+               errorProp(n, 1, 3),
+               errorProp(n, 1, 4),
+               errorProp(n, 1, 5));
+        printf("%5f %5f %5f %5f %5f %5f\n",
+               errorProp(n, 2, 0),
+               errorProp(n, 2, 1),
+               errorProp(n, 2, 2),
+               errorProp(n, 2, 3),
+               errorProp(n, 2, 4),
+               errorProp(n, 2, 5));
+        printf("%5f %5f %5f %5f %5f %5f\n",
+               errorProp(n, 3, 0),
+               errorProp(n, 3, 1),
+               errorProp(n, 3, 2),
+               errorProp(n, 3, 3),
+               errorProp(n, 3, 4),
+               errorProp(n, 3, 5));
+        printf("%5f %5f %5f %5f %5f %5f\n",
+               errorProp(n, 4, 0),
+               errorProp(n, 4, 1),
+               errorProp(n, 4, 2),
+               errorProp(n, 4, 3),
+               errorProp(n, 4, 4),
+               errorProp(n, 4, 5));
+        printf("%5f %5f %5f %5f %5f %5f\n",
+               errorProp(n, 5, 0),
+               errorProp(n, 5, 1),
+               errorProp(n, 5, 2),
+               errorProp(n, 5, 3),
+               errorProp(n, 5, 4),
+               errorProp(n, 5, 5));
+      }
+#endif
+    }
+  }
+
+}  // end namespace mkfit
+
+//#pragma omp declare simd simdlen(NN) notinbranch linear(n)
+#include "PropagationMPlex.icc"
+
+namespace mkfit {
+
+  void helixAtRFromIterativeCCS(const MPlexLV& inPar,
+                                const MPlexQI& inChg,
+                                const MPlexQF& msRad,
+                                MPlexLV& outPar,
+                                MPlexLL& errorProp,
+                                MPlexQI& outFailFlag,
+                                const int N_proc,
+                                const PropagationFlags pflags) {
+    errorProp.setVal(0.f);
+    outFailFlag.setVal(0.f);
+
+    helixAtRFromIterativeCCS_impl(inPar, inChg, msRad, outPar, errorProp, outFailFlag, 0, NN, N_proc, pflags);
+  }
+
+  void propagateHelixToRMPlex(const MPlexLS& inErr,
+                              const MPlexLV& inPar,
+                              const MPlexQI& inChg,
+                              const MPlexQF& msRad,
+                              MPlexLS& outErr,
+                              MPlexLV& outPar,
+                              const int N_proc,
+                              const PropagationFlags pflags,
+                              const MPlexQI* noMatEffPtr) {
+    // bool debug = true;
+
+    // This is used further down when calculating similarity with errorProp (and before in DEBUG).
+    // MT: I don't think this really needed if we use inErr where required.
+    outErr = inErr;
+    // This requirement for helixAtRFromIterativeCCS_impl() and for helixAtRFromIterativeCCSFullJac().
+    // MT: This should be properly handled in both functions (expecting input in output parameters sucks).
+    outPar = inPar;
+
+    MPlexLL errorProp;
+    MPlexQI failFlag;
+
+    helixAtRFromIterativeCCS(inPar, inChg, msRad, outPar, errorProp, failFlag, N_proc, pflags);
+
+#ifdef DEBUG
+    {
+      for (int kk = 0; kk < N_proc; ++kk) {
+        dprintf("outErr before prop %d\n", kk);
+        for (int i = 0; i < 6; ++i) {
+          for (int j = 0; j < 6; ++j)
+            dprintf("%8f ", outErr.At(kk, i, j));
+          printf("\n");
+        }
+        dprintf("\n");
+
+        dprintf("errorProp %d\n", kk);
+        for (int i = 0; i < 6; ++i) {
+          for (int j = 0; j < 6; ++j)
+            dprintf("%8f ", errorProp.At(kk, i, j));
+          printf("\n");
+        }
+        dprintf("\n");
+      }
+    }
+#endif
+
+    if (pflags.apply_material) {
+      MPlexQF hitsRl;
+      MPlexQF hitsXi;
+      MPlexQF propSign;
+#pragma omp simd
+      for (int n = 0; n < N_proc; ++n) {
+        if (failFlag(n, 0, 0) || (noMatEffPtr && noMatEffPtr->constAt(n, 0, 0))) {
+          hitsRl(n, 0, 0) = 0.f;
+          hitsXi(n, 0, 0) = 0.f;
+        } else {
+          const int zbin = Config::materialEff.getZbin(outPar(n, 2, 0));
+          const int rbin = Config::materialEff.getRbin(msRad(n, 0, 0));
+          hitsRl(n, 0, 0) = (zbin >= 0 && zbin < Config::nBinsZME && rbin >= 0 && rbin < Config::nBinsRME)
+                                ? Config::materialEff.getRlVal(zbin, rbin)
+                                : 0.f;  // protect against crazy propagations
+          hitsXi(n, 0, 0) = (zbin >= 0 && zbin < Config::nBinsZME && rbin >= 0 && rbin < Config::nBinsRME)
+                                ? Config::materialEff.getXiVal(zbin, rbin)
+                                : 0.f;  // protect against crazy propagations
+        }
+        const float r0 = hipo(inPar(n, 0, 0), inPar(n, 1, 0));
+        const float r = msRad(n, 0, 0);
+        propSign(n, 0, 0) = (r > r0 ? 1. : -1.);
+      }
+      applyMaterialEffects(hitsRl, hitsXi, propSign, outErr, outPar, N_proc, true);
+    }
+
+    squashPhiMPlex(outPar, N_proc);  // ensure phi is between |pi|
+
+    // Matriplex version of:
+    // result.errors = ROOT::Math::Similarity(errorProp, outErr);
+
+    // MultHelixProp can be optimized for CCS coordinates, see GenMPlexOps.pl
+    MPlexLL temp;
+    MultHelixProp(errorProp, outErr, temp);
+    MultHelixPropTransp(errorProp, temp, outErr);
+
+    /*
+     // To be used with: MPT_DIM = 1
+     if (fabs(sqrt(outPar[0]*outPar[0]+outPar[1]*outPar[1]) - msRad[0]) > 0.0001)
+     {
+       std::cout << "DID NOT GET TO R, FailFlag=" << failFlag[0]
+                 << " dR=" << msRad[0] - std::hypot(outPar[0],outPar[1])
+                 << " r="  << msRad[0] << " rin=" << std::hypot(inPar[0],inPar[1]) << " rout=" << std::hypot(outPar[0],outPar[1])
+                 << std::endl;
+       // std::cout << "    pt=" << pt << " pz=" << inPar.At(n, 2) << std::endl;
+     }
+   */
+
+    // FIXUP BOTCHED (low pT) propagations.
+    // For now let's enforce reseting output to input for failed cases. But:
+    // - perhaps this should be optional;
+    // - alternatively, it could also be an extra output parameter;
+    // - if we pass fail outwards, we might *not* need to also reset botched output.
+    for (int i = 0; i < N_proc; ++i) {
+      if (failFlag(i, 0, 0)) {
+        outPar.copySlot(i, inPar);
+        outErr.copySlot(i, inErr);
+      }
+    }
+  }
+
+  //==============================================================================
+
+  void propagateHelixToZMPlex(const MPlexLS& inErr,
+                              const MPlexLV& inPar,
+                              const MPlexQI& inChg,
+                              const MPlexQF& msZ,
+                              MPlexLS& outErr,
+                              MPlexLV& outPar,
+                              const int N_proc,
+                              const PropagationFlags pflags,
+                              const MPlexQI* noMatEffPtr) {
+    // debug = true;
+
+    outErr = inErr;
+    outPar = inPar;
+
+    MPlexLL errorProp;
+
+    helixAtZ(inPar, inChg, msZ, outPar, errorProp, N_proc, pflags);
+
+#ifdef DEBUG
+    {
+      for (int kk = 0; kk < N_proc; ++kk) {
+        dprintf("inErr %d\n", kk);
+        for (int i = 0; i < 6; ++i) {
+          for (int j = 0; j < 6; ++j)
+            dprintf("%8f ", inErr.constAt(kk, i, j));
+          printf("\n");
+        }
+        dprintf("\n");
+
+        dprintf("errorProp %d\n", kk);
+        for (int i = 0; i < 6; ++i) {
+          for (int j = 0; j < 6; ++j)
+            dprintf("%8f ", errorProp.At(kk, i, j));
+          printf("\n");
+        }
+        dprintf("\n");
+      }
+    }
+#endif
+
+    if (pflags.apply_material) {
+      MPlexQF hitsRl;
+      MPlexQF hitsXi;
+      MPlexQF propSign;
+#pragma omp simd
+      for (int n = 0; n < N_proc; ++n) {
+        if (noMatEffPtr && noMatEffPtr->constAt(n, 0, 0)) {
+          hitsRl(n, 0, 0) = 0.f;
+          hitsXi(n, 0, 0) = 0.f;
+        } else {
+          const int zbin = Config::materialEff.getZbin(msZ(n, 0, 0));
+          const int rbin = Config::materialEff.getRbin(std::hypot(outPar(n, 0, 0), outPar(n, 1, 0)));
+          hitsRl(n, 0, 0) = (zbin >= 0 && zbin < Config::nBinsZME && rbin >= 0 && rbin < Config::nBinsRME)
+                                ? Config::materialEff.getRlVal(zbin, rbin)
+                                : 0.f;  // protect against crazy propagations
+          hitsXi(n, 0, 0) = (zbin >= 0 && zbin < Config::nBinsZME && rbin >= 0 && rbin < Config::nBinsRME)
+                                ? Config::materialEff.getXiVal(zbin, rbin)
+                                : 0.f;  // protect against crazy propagations
+        }
+        const float zout = msZ.constAt(n, 0, 0);
+        const float zin = inPar.constAt(n, 2, 0);
+        propSign(n, 0, 0) = (std::abs(zout) > std::abs(zin) ? 1. : -1.);
+      }
+      applyMaterialEffects(hitsRl, hitsXi, propSign, outErr, outPar, N_proc, false);
+    }
+
+    squashPhiMPlex(outPar, N_proc);  // ensure phi is between |pi|
+
+    // Matriplex version of:
+    // result.errors = ROOT::Math::Similarity(errorProp, outErr);
+    MPlexLL temp;
+    MultHelixPropEndcap(errorProp, outErr, temp);
+    MultHelixPropTranspEndcap(errorProp, temp, outErr);
+
+    // This dump is now out of its place as similarity is done with matriplex ops.
+    /*
+#ifdef DEBUG
+   {
+     dmutex_guard;
+     for (int kk = 0; kk < N_proc; ++kk)
+     {
+       dprintf("outErr %d\n", kk);
+       for (int i = 0; i < 6; ++i) { for (int j = 0; j < 6; ++j)
+           dprintf("%8f ", outErr.At(kk,i,j)); printf("\n");
+       } dprintf("\n");
+
+       dprintf("outPar %d\n", kk);
+       for (int i = 0; i < 6; ++i) {
+           dprintf("%8f ", outPar.At(kk,i,0)); printf("\n");
+       } dprintf("\n");
+       if (std::abs(outPar.At(kk,2,0) - msZ.constAt(kk, 0, 0)) > 0.0001) {
+         float pt = 1.0f / inPar.constAt(kk,3,0);
+	 dprint_np(kk, "DID NOT GET TO Z, dZ=" << std::abs(outPar.At(kk,2,0) - msZ.constAt(kk, 0, 0))
+		   << " z=" << msZ.constAt(kk, 0, 0) << " zin=" << inPar.constAt(kk,2,0) << " zout=" << outPar.At(kk,2,0) << std::endl
+		   << "pt=" << pt << " pz=" << pt/std::tan(inPar.constAt(kk,5,0)));
+       }
+     }
+   }
+#endif
+   */
+  }
+
+  void helixAtZ(const MPlexLV& inPar,
+                const MPlexQI& inChg,
+                const MPlexQF& msZ,
+                MPlexLV& outPar,
+                MPlexLL& errorProp,
+                const int N_proc,
+                const PropagationFlags pflags) {
+    errorProp.setVal(0.f);
+
+#pragma omp simd
+    for (int n = 0; n < NN; ++n) {
+      //initialize erroProp to identity matrix, except element 2,2 which is zero
+      errorProp(n, 0, 0) = 1.f;
+      errorProp(n, 1, 1) = 1.f;
+      errorProp(n, 3, 3) = 1.f;
+      errorProp(n, 4, 4) = 1.f;
+      errorProp(n, 5, 5) = 1.f;
+
+      const float zout = msZ.constAt(n, 0, 0);
+
+      const float zin = inPar.constAt(n, 2, 0);
+      const float ipt = inPar.constAt(n, 3, 0);
+      const float phiin = inPar.constAt(n, 4, 0);
+      const float theta = inPar.constAt(n, 5, 0);
+
+      const float k =
+          inChg.constAt(n, 0, 0) * 100.f /
+          (-Const::sol * (pflags.use_param_b_field
+                              ? Config::bFieldFromZR(zin, hipo(inPar.constAt(n, 0, 0), inPar.constAt(n, 1, 0)))
+                              : Config::Bfield));
+      const float kinv = 1.f / k;
+
+      dprint_np(n,
+                std::endl
+                    << "input parameters"
+                    << " inPar.constAt(n, 0, 0)=" << std::setprecision(9) << inPar.constAt(n, 0, 0)
+                    << " inPar.constAt(n, 1, 0)=" << std::setprecision(9) << inPar.constAt(n, 1, 0)
+                    << " inPar.constAt(n, 2, 0)=" << std::setprecision(9) << inPar.constAt(n, 2, 0)
+                    << " inPar.constAt(n, 3, 0)=" << std::setprecision(9) << inPar.constAt(n, 3, 0)
+                    << " inPar.constAt(n, 4, 0)=" << std::setprecision(9) << inPar.constAt(n, 4, 0)
+                    << " inPar.constAt(n, 5, 0)=" << std::setprecision(9) << inPar.constAt(n, 5, 0));
+
+      const float pt = 1.f / ipt;
+
+      float cosahTmp = 0., sinahTmp = 0.;
+      //no trig approx here, phi can be large
+      const float cosP = std::cos(phiin), sinP = std::sin(phiin);
+      const float cosT = std::cos(theta), sinT = std::sin(theta);
+      const float tanT = sinT / cosT;
+      const float icos2T = 1.f / (cosT * cosT);
+      const float pxin = cosP * pt;
+      const float pyin = sinP * pt;
+
+      //fixme, make this printout useful for propagation to z
+      dprint_np(n,
+                std::endl
+                    << "k=" << std::setprecision(9) << k << " pxin=" << std::setprecision(9) << pxin
+                    << " pyin=" << std::setprecision(9) << pyin << " cosP=" << std::setprecision(9) << cosP
+                    << " sinP=" << std::setprecision(9) << sinP << " pt=" << std::setprecision(9) << pt);
+
+      const float deltaZ = zout - zin;
+      const float alpha = deltaZ * tanT * ipt * kinv;
+
+      if (Config::useTrigApprox) {
+        sincos4(alpha * 0.5f, sinahTmp, cosahTmp);
+      } else {
+        cosahTmp = std::cos(alpha * 0.5f);
+        sinahTmp = std::sin(alpha * 0.5f);
+      }
+      const float cosah = cosahTmp;
+      const float sinah = sinahTmp;
+      const float cosa = 1.f - 2.f * sinah * sinah;
+      const float sina = 2.f * sinah * cosah;
+
+      //update parameters
+      outPar.At(n, 0, 0) = outPar.At(n, 0, 0) + 2.f * k * sinah * (pxin * cosah - pyin * sinah);
+      outPar.At(n, 1, 0) = outPar.At(n, 1, 0) + 2.f * k * sinah * (pyin * cosah + pxin * sinah);
+      outPar.At(n, 2, 0) = zout;
+      outPar.At(n, 4, 0) = phiin + alpha;
+
+      dprint_np(n,
+                std::endl
+                    << "outPar.At(n, 0, 0)=" << outPar.At(n, 0, 0) << " outPar.At(n, 1, 0)=" << outPar.At(n, 1, 0)
+                    << " pxin=" << pxin << " pyin=" << pyin);
+
+      const float pxcaMpysa = pxin * cosa - pyin * sina;
+      errorProp(n, 0, 2) = -tanT * ipt * pxcaMpysa;
+      errorProp(n, 0, 3) = k * pt * pt * (cosP * (alpha * cosa - sina) + sinP * 2.f * sinah * (sinah - alpha * cosah));
+      errorProp(n, 0, 4) = -2 * k * pt * sinah * (sinP * cosah + cosP * sinah);
+      errorProp(n, 0, 5) = deltaZ * ipt * pxcaMpysa * icos2T;
+
+      const float pycaPpxsa = pyin * cosa + pxin * sina;
+      errorProp(n, 1, 2) = -tanT * ipt * pycaPpxsa;
+      errorProp(n, 1, 3) = k * pt * pt * (sinP * (alpha * cosa - sina) - cosP * 2.f * sinah * (sinah - alpha * cosah));
+      errorProp(n, 1, 4) = 2 * k * pt * sinah * (cosP * cosah - sinP * sinah);
+      errorProp(n, 1, 5) = deltaZ * ipt * pycaPpxsa * icos2T;
+
+      errorProp(n, 4, 2) = -ipt * tanT * kinv;
+      errorProp(n, 4, 3) = tanT * deltaZ * kinv;
+      errorProp(n, 4, 5) = ipt * deltaZ * kinv * icos2T;
+
+      dprint_np(
+          n,
+          "propagation end, dump parameters"
+              << std::endl
+              << "pos = " << outPar.At(n, 0, 0) << " " << outPar.At(n, 1, 0) << " " << outPar.At(n, 2, 0) << std::endl
+              << "mom = " << std::cos(outPar.At(n, 4, 0)) / outPar.At(n, 3, 0) << " "
+              << std::sin(outPar.At(n, 4, 0)) / outPar.At(n, 3, 0) << " "
+              << 1. / (outPar.At(n, 3, 0) * tan(outPar.At(n, 5, 0)))
+              << " r=" << std::sqrt(outPar.At(n, 0, 0) * outPar.At(n, 0, 0) + outPar.At(n, 1, 0) * outPar.At(n, 1, 0))
+              << " pT=" << 1. / std::abs(outPar.At(n, 3, 0)) << std::endl);
+
+#ifdef DEBUG
+      if (n < N_proc) {
+        dmutex_guard;
+        std::cout << n << ": jacobian" << std::endl;
+        printf("%5f %5f %5f %5f %5f %5f\n",
+               errorProp(n, 0, 0),
+               errorProp(n, 0, 1),
+               errorProp(n, 0, 2),
+               errorProp(n, 0, 3),
+               errorProp(n, 0, 4),
+               errorProp(n, 0, 5));
+        printf("%5f %5f %5f %5f %5f %5f\n",
+               errorProp(n, 1, 0),
+               errorProp(n, 1, 1),
+               errorProp(n, 1, 2),
+               errorProp(n, 1, 3),
+               errorProp(n, 1, 4),
+               errorProp(n, 1, 5));
+        printf("%5f %5f %5f %5f %5f %5f\n",
+               errorProp(n, 2, 0),
+               errorProp(n, 2, 1),
+               errorProp(n, 2, 2),
+               errorProp(n, 2, 3),
+               errorProp(n, 2, 4),
+               errorProp(n, 2, 5));
+        printf("%5f %5f %5f %5f %5f %5f\n",
+               errorProp(n, 3, 0),
+               errorProp(n, 3, 1),
+               errorProp(n, 3, 2),
+               errorProp(n, 3, 3),
+               errorProp(n, 3, 4),
+               errorProp(n, 3, 5));
+        printf("%5f %5f %5f %5f %5f %5f\n",
+               errorProp(n, 4, 0),
+               errorProp(n, 4, 1),
+               errorProp(n, 4, 2),
+               errorProp(n, 4, 3),
+               errorProp(n, 4, 4),
+               errorProp(n, 4, 5));
+        printf("%5f %5f %5f %5f %5f %5f\n",
+               errorProp(n, 5, 0),
+               errorProp(n, 5, 1),
+               errorProp(n, 5, 2),
+               errorProp(n, 5, 3),
+               errorProp(n, 5, 4),
+               errorProp(n, 5, 5));
+      }
+#endif
+    }
+  }
+
+  //==============================================================================
+
+  void applyMaterialEffects(const MPlexQF& hitsRl,
+                            const MPlexQF& hitsXi,
+                            const MPlexQF& propSign,
+                            MPlexLS& outErr,
+                            MPlexLV& outPar,
+                            const int N_proc,
+                            const bool isBarrel) {
+#pragma omp simd
+    for (int n = 0; n < NN; ++n) {
+      float radL = hitsRl.constAt(n, 0, 0);
+      if (radL < 1e-13f)
+        continue;  //ugly, please fixme
+      const float theta = outPar.constAt(n, 5, 0);
+      const float pt = 1.f / outPar.constAt(n, 3, 0);  //fixme, make sure it is positive?
+      const float p = pt / std::sin(theta);
+      const float p2 = p * p;
+      constexpr float mpi = 0.140;       // m=140 MeV, pion
+      constexpr float mpi2 = mpi * mpi;  // m=140 MeV, pion
+      const float beta2 = p2 / (p2 + mpi2);
+      const float beta = std::sqrt(beta2);
+      //radiation lenght, corrected for the crossing angle (cos alpha from dot product of radius vector and momentum)
+      const float invCos = (isBarrel ? p / pt : 1.f / std::abs(std::cos(theta)));
+      radL = radL * invCos;  //fixme works only for barrel geom
+      // multiple scattering
+      //vary independently phi and theta by the rms of the planar multiple scattering angle
+      // XXX-KMD radL < 0, see your fixme above! Repeating bailout
+      if (radL < 1e-13f)
+        continue;
+      // const float thetaMSC = 0.0136f*std::sqrt(radL)*(1.f+0.038f*std::log(radL))/(beta*p);// eq 32.15
+      // const float thetaMSC2 = thetaMSC*thetaMSC;
+      const float thetaMSC = 0.0136f * (1.f + 0.038f * std::log(radL)) / (beta * p);  // eq 32.15
+      const float thetaMSC2 = thetaMSC * thetaMSC * radL;
+      outErr.At(n, 4, 4) += thetaMSC2;
+      // outErr.At(n, 4, 5) += thetaMSC2;
+      outErr.At(n, 5, 5) += thetaMSC2;
+      //std::cout << "beta=" << beta << " p=" << p << std::endl;
+      //std::cout << "multiple scattering thetaMSC=" << thetaMSC << " thetaMSC2=" << thetaMSC2 << " radL=" << radL << std::endl;
+      // energy loss
+      // XXX-KMD beta2 = 1 => 1 / sqrt(0)
+      // const float gamma = 1.f/std::sqrt(1.f - std::min(beta2, 0.999999f));
+      // const float gamma2 = gamma*gamma;
+      const float gamma2 = (p2 + mpi2) / mpi2;
+      const float gamma = std::sqrt(gamma2);  //1.f/std::sqrt(1.f - std::min(beta2, 0.999999f));
+      constexpr float me = 0.0005;            // m=0.5 MeV, electron
+      const float wmax = 2.f * me * beta2 * gamma2 / (1.f + 2.f * gamma * me / mpi + me * me / (mpi * mpi));
+      constexpr float I = 16.0e-9 * 10.75;
+      const float deltahalf = std::log(28.816e-9f * std::sqrt(2.33f * 0.498f) / I) + std::log(beta * gamma) - 0.5f;
+      const float dEdx =
+          beta < 1.f
+              ? (2.f * (hitsXi.constAt(n, 0, 0) * invCos *
+                        (0.5f * std::log(2.f * me * beta2 * gamma2 * wmax / (I * I)) - beta2 - deltahalf) / beta2))
+              : 0.f;  //protect against infs and nans
+      // dEdx = dEdx*2.;//xi in cmssw is defined with an extra factor 0.5 with respect to formula 27.1 in pdg
+      //std::cout << "dEdx=" << dEdx << " delta=" << deltahalf << " wmax=" << wmax << " Xi=" << hitsXi.constAt(n,0,0) << std::endl;
+      const float dP = propSign.constAt(n, 0, 0) * dEdx / beta;
+      outPar.At(n, 3, 0) = p / (std::max(p + dP, 0.001f) * pt);  //stay above 1MeV
+      //assume 100% uncertainty
+      outErr.At(n, 3, 3) += dP * dP / (p2 * pt * pt);
+    }
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.h b/RecoTracker/MkFitCore/src/PropagationMPlex.h
new file mode 100644
index 0000000000000..ca6cd2e125ec6
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/PropagationMPlex.h
@@ -0,0 +1,86 @@
+#ifndef RecoTracker_MkFitCore_src_PropagationMPlex_h
+#define RecoTracker_MkFitCore_src_PropagationMPlex_h
+
+#include "Matrix.h"
+
+namespace mkfit {
+
+  inline void squashPhiMPlex(MPlexLV& par, const int N_proc) {
+#pragma omp simd
+    for (int n = 0; n < NN; ++n) {
+      if (par(n, 4, 0) >= Const::PI)
+        par(n, 4, 0) -= Const::TwoPI;
+      if (par(n, 4, 0) < -Const::PI)
+        par(n, 4, 0) += Const::TwoPI;
+    }
+  }
+
+  inline void squashPhiMPlexGeneral(MPlexLV& par, const int N_proc) {
+#pragma omp simd
+    for (int n = 0; n < NN; ++n) {
+      par(n, 4, 0) -= std::floor(0.5f * Const::InvPI * (par(n, 4, 0) + Const::PI)) * Const::TwoPI;
+    }
+  }
+
+  void propagateLineToRMPlex(const MPlexLS& psErr,
+                             const MPlexLV& psPar,
+                             const MPlexHS& msErr,
+                             const MPlexHV& msPar,
+                             MPlexLS& outErr,
+                             MPlexLV& outPar,
+                             const int N_proc);
+
+  void propagateHelixToRMPlex(const MPlexLS& inErr,
+                              const MPlexLV& inPar,
+                              const MPlexQI& inChg,
+                              const MPlexQF& msRad,
+                              MPlexLS& outErr,
+                              MPlexLV& outPar,
+                              const int N_proc,
+                              const PropagationFlags pflags,
+                              const MPlexQI* noMatEffPtr = nullptr);
+
+  void helixAtRFromIterativeCCSFullJac(const MPlexLV& inPar,
+                                       const MPlexQI& inChg,
+                                       const MPlexQF& msRad,
+                                       MPlexLV& outPar,
+                                       MPlexLL& errorProp,
+                                       const int N_proc);
+
+  void helixAtRFromIterativeCCS(const MPlexLV& inPar,
+                                const MPlexQI& inChg,
+                                const MPlexQF& msRad,
+                                MPlexLV& outPar,
+                                MPlexLL& errorProp,
+                                MPlexQI& outFailFlag,
+                                const int N_proc,
+                                const PropagationFlags pflags);
+
+  void propagateHelixToZMPlex(const MPlexLS& inErr,
+                              const MPlexLV& inPar,
+                              const MPlexQI& inChg,
+                              const MPlexQF& msZ,
+                              MPlexLS& outErr,
+                              MPlexLV& outPar,
+                              const int N_proc,
+                              const PropagationFlags pflags,
+                              const MPlexQI* noMatEffPtr = nullptr);
+
+  void helixAtZ(const MPlexLV& inPar,
+                const MPlexQI& inChg,
+                const MPlexQF& msZ,
+                MPlexLV& outPar,
+                MPlexLL& errorProp,
+                const int N_proc,
+                const PropagationFlags pflags);
+
+  void applyMaterialEffects(const MPlexQF& hitsRl,
+                            const MPlexQF& hitsXi,
+                            const MPlexQF& propSign,
+                            MPlexLS& outErr,
+                            MPlexLV& outPar,
+                            const int N_proc,
+                            const bool isBarrel);
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/src/PropagationMPlex.icc b/RecoTracker/MkFitCore/src/PropagationMPlex.icc
new file mode 100644
index 0000000000000..a66083319f61c
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/PropagationMPlex.icc
@@ -0,0 +1,291 @@
+///////////////////////////////////////////////////////////////////////////////
+/// helixAtRFromIterativeCCS_impl
+///////////////////////////////////////////////////////////////////////////////
+
+template <typename Tf, typename Ti, typename TfLL1, typename Tf11, typename TfLLL>
+static inline void helixAtRFromIterativeCCS_impl(const Tf& __restrict__ inPar,
+                                                 const Ti& __restrict__ inChg,
+                                                 const Tf11& __restrict__ msRad,
+                                                 TfLL1& __restrict__ outPar,
+                                                 TfLLL& __restrict__ errorProp,
+                                                 Ti& __restrict__ outFailFlag,  // expected to be initialized to 0
+                                                 const int nmin,
+                                                 const int nmax,
+                                                 const int N_proc,
+                                                 const PropagationFlags pf) {
+  // bool debug = true;
+
+#pragma omp simd
+  for (int n = nmin; n < nmax; ++n) {
+    //initialize erroProp to identity matrix
+    errorProp(n, 0, 0) = 1.f;
+    errorProp(n, 1, 1) = 1.f;
+    errorProp(n, 2, 2) = 1.f;
+    errorProp(n, 3, 3) = 1.f;
+    errorProp(n, 4, 4) = 1.f;
+    errorProp(n, 5, 5) = 1.f;
+
+    float r0 = hipo(inPar(n, 0, 0), inPar(n, 1, 0));
+    const float k = inChg(n, 0, 0) * 100.f /
+                    (-Const::sol * (pf.use_param_b_field ? Config::bFieldFromZR(inPar(n, 2, 0), r0) : Config::Bfield));
+    const float r = msRad(n, 0, 0);
+
+    // if (std::abs(r-r0)<0.0001f) {
+    // 	dprint("distance less than 1mum, skip");
+    // 	continue;
+    // }
+
+    const float xin = inPar(n, 0, 0);
+    const float yin = inPar(n, 1, 0);
+    const float ipt = inPar(n, 3, 0);
+    const float phiin = inPar(n, 4, 0);
+    const float theta = inPar(n, 5, 0);
+
+    dprint(std::endl);
+    dprint_np(n,
+              "input parameters"
+                  << " inPar(n, 0, 0)=" << std::setprecision(9) << inPar(n, 0, 0) << " inPar(n, 1, 0)="
+                  << std::setprecision(9) << inPar(n, 1, 0) << " inPar(n, 2, 0)=" << std::setprecision(9)
+                  << inPar(n, 2, 0) << " inPar(n, 3, 0)=" << std::setprecision(9) << inPar(n, 3, 0)
+                  << " inPar(n, 4, 0)=" << std::setprecision(9) << inPar(n, 4, 0)
+                  << " inPar(n, 5, 0)=" << std::setprecision(9) << inPar(n, 5, 0));
+
+    const float kinv = 1.f / k;
+    const float pt = 1.f / ipt;
+
+    float D = 0., cosa = 0., sina = 0., cosah = 0., sinah = 0., id = 0.;
+    //no trig approx here, phi can be large
+    float cosPorT = std::cos(phiin), sinPorT = std::sin(phiin);
+    float pxin = cosPorT * pt;
+    float pyin = sinPorT * pt;
+
+    dprint_np(n,
+              "k=" << std::setprecision(9) << k << " pxin=" << std::setprecision(9) << pxin
+                   << " pyin=" << std::setprecision(9) << pyin << " cosPorT=" << std::setprecision(9) << cosPorT
+                   << " sinPorT=" << std::setprecision(9) << sinPorT << " pt=" << std::setprecision(9) << pt);
+
+    //derivatives initialized to value for first iteration, i.e. distance = r-r0in
+    float dDdx = r0 > 0.f ? -xin / r0 : 0.f;
+    float dDdy = r0 > 0.f ? -yin / r0 : 0.f;
+    float dDdipt = 0.;
+    float dDdphi = 0.;
+
+    for (int i = 0; i < Config::Niter; ++i) {
+      //compute distance and path for the current iteration
+      r0 = hipo(outPar(n, 0, 0), outPar(n, 1, 0));
+
+      // Use one over dot produce of transverse momentum and radial
+      // direction to scale the step. Propagation is prevented from reaching
+      // too close to the apex (dotp > 0.2).
+      // - Can / should we come up with a better approximation?
+      // - Can / should take +/- curvature into account?
+
+      const float oodotp = r0 * pt / (pxin * outPar(n, 0, 0) + pyin * outPar(n, 1, 0));
+
+      if (oodotp > 5.0f || oodotp < 0)  // 0.2 is 78.5 deg
+      {
+        id = 0.0f;
+        outFailFlag(n, 0, 0) = 1;
+      } else {
+        // Can we come up with a better approximation?
+        // Should take +/- curvature into account.
+        id = (r - r0) * oodotp;
+      }
+      D += id;
+
+      if (Config::useTrigApprox) {
+        sincos4(id * ipt * kinv * 0.5f, sinah, cosah);
+      } else {
+        cosah = std::cos(id * ipt * kinv * 0.5f);
+        sinah = std::sin(id * ipt * kinv * 0.5f);
+      }
+      cosa = 1.f - 2.f * sinah * sinah;
+      sina = 2.f * sinah * cosah;
+
+      dprint_np(n,
+                "Attempt propagation from r="
+                    << r0 << " to r=" << r << std::endl
+                    << "   x=" << xin << " y=" << yin << " z=" << inPar(n, 2, 0) << " px=" << pxin << " py=" << pyin
+                    << " pz=" << pt * std::tan(theta) << " q=" << inChg(n, 0, 0) << std::endl
+                    << "   r=" << std::setprecision(9) << r << " r0=" << std::setprecision(9) << r0
+                    << " id=" << std::setprecision(9) << id << " dr=" << std::setprecision(9) << r - r0
+                    << " cosa=" << cosa << " sina=" << sina);
+
+      //update derivatives on total distance
+      if (i + 1 != Config::Niter) {
+        const float x = outPar(n, 0, 0);
+        const float y = outPar(n, 1, 0);
+        const float oor0 = (r0 > 0.f && std::abs(r - r0) < 0.0001f) ? 1.f / r0 : 0.f;
+
+        const float dadipt = id * kinv;
+
+        const float dadx = -x * ipt * kinv * oor0;
+        const float dady = -y * ipt * kinv * oor0;
+
+        const float pxca = pxin * cosa;
+        const float pxsa = pxin * sina;
+        const float pyca = pyin * cosa;
+        const float pysa = pyin * sina;
+
+        float tmp;
+
+        tmp = k * dadx;
+        dDdx -= (x * (1.f + tmp * (pxca - pysa)) + y * tmp * (pyca + pxsa)) * oor0;
+
+        tmp = k * dady;
+        dDdy -= (x * tmp * (pxca - pysa) + y * (1.f + tmp * (pyca + pxsa))) * oor0;
+
+        //now r0 depends on ipt and phi as well
+        tmp = dadipt * ipt;
+        dDdipt -=
+            k *
+            (x * (pxca * tmp - pysa * tmp - pyca - pxsa + pyin) + y * (pyca * tmp + pxsa * tmp - pysa + pxca - pxin)) *
+            pt * oor0;
+        dDdphi += k * (x * (pysa - pxin + pxca) - y * (pxsa - pyin + pyca)) * oor0;
+      }
+
+      //update parameters
+      outPar(n, 0, 0) = outPar(n, 0, 0) + 2.f * k * sinah * (pxin * cosah - pyin * sinah);
+      outPar(n, 1, 0) = outPar(n, 1, 0) + 2.f * k * sinah * (pyin * cosah + pxin * sinah);
+      const float pxinold = pxin;  //copy before overwriting
+      pxin = pxin * cosa - pyin * sina;
+      pyin = pyin * cosa + pxinold * sina;
+
+      dprint_np(n,
+                "outPar(n, 0, 0)=" << outPar(n, 0, 0) << " outPar(n, 1, 0)=" << outPar(n, 1, 0) << " pxin=" << pxin
+                                   << " pyin=" << pyin);
+    }
+
+    const float alpha = D * ipt * kinv;
+    const float dadx = dDdx * ipt * kinv;
+    const float dady = dDdy * ipt * kinv;
+    const float dadipt = (ipt * dDdipt + D) * kinv;
+    const float dadphi = dDdphi * ipt * kinv;
+
+    if (Config::useTrigApprox) {
+      sincos4(alpha, sina, cosa);
+    } else {
+      cosa = std::cos(alpha);
+      sina = std::sin(alpha);
+    }
+
+    errorProp(n, 0, 0) = 1.f + k * dadx * (cosPorT * cosa - sinPorT * sina) * pt;
+    errorProp(n, 0, 1) = k * dady * (cosPorT * cosa - sinPorT * sina) * pt;
+    errorProp(n, 0, 2) = 0.f;
+    errorProp(n, 0, 3) =
+        k * (cosPorT * (ipt * dadipt * cosa - sina) + sinPorT * ((1.f - cosa) - ipt * dadipt * sina)) * pt * pt;
+    errorProp(n, 0, 4) =
+        k * (cosPorT * dadphi * cosa - sinPorT * dadphi * sina - sinPorT * sina + cosPorT * cosa - cosPorT) * pt;
+    errorProp(n, 0, 5) = 0.f;
+
+    errorProp(n, 1, 0) = k * dadx * (sinPorT * cosa + cosPorT * sina) * pt;
+    errorProp(n, 1, 1) = 1.f + k * dady * (sinPorT * cosa + cosPorT * sina) * pt;
+    errorProp(n, 1, 2) = 0.f;
+    errorProp(n, 1, 3) =
+        k * (sinPorT * (ipt * dadipt * cosa - sina) + cosPorT * (ipt * dadipt * sina - (1.f - cosa))) * pt * pt;
+    errorProp(n, 1, 4) =
+        k * (sinPorT * dadphi * cosa + cosPorT * dadphi * sina + sinPorT * cosa + cosPorT * sina - sinPorT) * pt;
+    errorProp(n, 1, 5) = 0.f;
+
+    //no trig approx here, theta can be large
+    cosPorT = std::cos(theta);
+    sinPorT = std::sin(theta);
+    //redefine sinPorT as 1./sinPorT to reduce the number of temporaries
+    sinPorT = 1.f / sinPorT;
+
+    outPar(n, 2, 0) = inPar(n, 2, 0) + k * alpha * cosPorT * pt * sinPorT;
+
+    errorProp(n, 2, 0) = k * cosPorT * dadx * pt * sinPorT;
+    errorProp(n, 2, 1) = k * cosPorT * dady * pt * sinPorT;
+    errorProp(n, 2, 2) = 1.f;
+    errorProp(n, 2, 3) = k * cosPorT * (ipt * dadipt - alpha) * pt * pt * sinPorT;
+    errorProp(n, 2, 4) = k * dadphi * cosPorT * pt * sinPorT;
+    errorProp(n, 2, 5) = -k * alpha * pt * sinPorT * sinPorT;
+
+    outPar(n, 3, 0) = ipt;
+
+    errorProp(n, 3, 0) = 0.f;
+    errorProp(n, 3, 1) = 0.f;
+    errorProp(n, 3, 2) = 0.f;
+    errorProp(n, 3, 3) = 1.f;
+    errorProp(n, 3, 4) = 0.f;
+    errorProp(n, 3, 5) = 0.f;
+
+    outPar(n, 4, 0) = inPar(n, 4, 0) + alpha;
+
+    errorProp(n, 4, 0) = dadx;
+    errorProp(n, 4, 1) = dady;
+    errorProp(n, 4, 2) = 0.f;
+    errorProp(n, 4, 3) = dadipt;
+    errorProp(n, 4, 4) = 1.f + dadphi;
+    errorProp(n, 4, 5) = 0.f;
+
+    outPar(n, 5, 0) = theta;
+
+    errorProp(n, 5, 0) = 0.f;
+    errorProp(n, 5, 1) = 0.f;
+    errorProp(n, 5, 2) = 0.f;
+    errorProp(n, 5, 3) = 0.f;
+    errorProp(n, 5, 4) = 0.f;
+    errorProp(n, 5, 5) = 1.f;
+
+    dprint_np(n,
+              "propagation end, dump parameters"
+                  << std::endl
+                  << "   pos = " << outPar(n, 0, 0) << " " << outPar(n, 1, 0) << " " << outPar(n, 2, 0) << "\t\t r="
+                  << std::sqrt(outPar(n, 0, 0) * outPar(n, 0, 0) + outPar(n, 1, 0) * outPar(n, 1, 0)) << std::endl
+                  << "   mom = " << std::cos(outPar(n, 4, 0)) / outPar(n, 3, 0) << " "
+                  << std::sin(outPar(n, 4, 0)) / outPar(n, 3, 0) << " " << 1. / (outPar(n, 3, 0) * tan(outPar(n, 5, 0)))
+                  << "\t\tpT=" << 1. / std::abs(outPar(n, 3, 0)) << std::endl);
+
+#ifdef DEBUG
+    if (n < N_proc) {
+      dmutex_guard;
+      std::cout << n << ": jacobian" << std::endl;
+      printf("%5f %5f %5f %5f %5f %5f\n",
+             errorProp(n, 0, 0),
+             errorProp(n, 0, 1),
+             errorProp(n, 0, 2),
+             errorProp(n, 0, 3),
+             errorProp(n, 0, 4),
+             errorProp(n, 0, 5));
+      printf("%5f %5f %5f %5f %5f %5f\n",
+             errorProp(n, 1, 0),
+             errorProp(n, 1, 1),
+             errorProp(n, 1, 2),
+             errorProp(n, 1, 3),
+             errorProp(n, 1, 4),
+             errorProp(n, 1, 5));
+      printf("%5f %5f %5f %5f %5f %5f\n",
+             errorProp(n, 2, 0),
+             errorProp(n, 2, 1),
+             errorProp(n, 2, 2),
+             errorProp(n, 2, 3),
+             errorProp(n, 2, 4),
+             errorProp(n, 2, 5));
+      printf("%5f %5f %5f %5f %5f %5f\n",
+             errorProp(n, 3, 0),
+             errorProp(n, 3, 1),
+             errorProp(n, 3, 2),
+             errorProp(n, 3, 3),
+             errorProp(n, 3, 4),
+             errorProp(n, 3, 5));
+      printf("%5f %5f %5f %5f %5f %5f\n",
+             errorProp(n, 4, 0),
+             errorProp(n, 4, 1),
+             errorProp(n, 4, 2),
+             errorProp(n, 4, 3),
+             errorProp(n, 4, 4),
+             errorProp(n, 4, 5));
+      printf("%5f %5f %5f %5f %5f %5f\n",
+             errorProp(n, 5, 0),
+             errorProp(n, 5, 1),
+             errorProp(n, 5, 2),
+             errorProp(n, 5, 3),
+             errorProp(n, 5, 4),
+             errorProp(n, 5, 5));
+      printf("\n");
+    }
+#endif
+  }
+}
diff --git a/RecoTracker/MkFitCore/src/Track.cc b/RecoTracker/MkFitCore/src/Track.cc
new file mode 100644
index 0000000000000..4e85f7cd613fb
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/Track.cc
@@ -0,0 +1,424 @@
+#include "RecoTracker/MkFitCore/interface/Track.h"
+#include "Matrix.h"
+
+//#define DEBUG
+#include "Debug.h"
+
+namespace mkfit {
+
+  //==============================================================================
+  // TrackState
+  //==============================================================================
+
+  void TrackState::convertFromCartesianToCCS() {
+    //assume we are currently in cartesian coordinates and want to move to ccs
+    const float px = parameters.At(3);
+    const float py = parameters.At(4);
+    const float pz = parameters.At(5);
+    const float pt = std::sqrt(px * px + py * py);
+    const float phi = getPhi(px, py);
+    const float theta = getTheta(pt, pz);
+    parameters.At(3) = 1.f / pt;
+    parameters.At(4) = phi;
+    parameters.At(5) = theta;
+    SMatrix66 jac = jacobianCartesianToCCS(px, py, pz);
+    errors = ROOT::Math::Similarity(jac, errors);
+  }
+
+  void TrackState::convertFromCCSToCartesian() {
+    //assume we are currently in ccs coordinates and want to move to cartesian
+    const float invpt = parameters.At(3);
+    const float phi = parameters.At(4);
+    const float theta = parameters.At(5);
+    const float pt = 1.f / invpt;
+    float cosP = std::cos(phi);
+    float sinP = std::sin(phi);
+    float cosT = std::cos(theta);
+    float sinT = std::sin(theta);
+    parameters.At(3) = cosP * pt;
+    parameters.At(4) = sinP * pt;
+    parameters.At(5) = cosT * pt / sinT;
+    SMatrix66 jac = jacobianCCSToCartesian(invpt, phi, theta);
+    errors = ROOT::Math::Similarity(jac, errors);
+  }
+
+  SMatrix66 TrackState::jacobianCCSToCartesian(float invpt, float phi, float theta) const {
+    //arguments are passed so that the function can be used both starting from ccs and from cartesian
+    SMatrix66 jac = ROOT::Math::SMatrixIdentity();
+    float cosP = std::cos(phi);
+    float sinP = std::sin(phi);
+    float cosT = std::cos(theta);
+    float sinT = std::sin(theta);
+    const float pt = 1.f / invpt;
+    jac(3, 3) = -cosP * pt * pt;
+    jac(3, 4) = -sinP * pt;
+    jac(4, 3) = -sinP * pt * pt;
+    jac(4, 4) = cosP * pt;
+    jac(5, 3) = -cosT * pt * pt / sinT;
+    jac(5, 5) = -pt / (sinT * sinT);
+    return jac;
+  }
+
+  SMatrix66 TrackState::jacobianCartesianToCCS(float px, float py, float pz) const {
+    //arguments are passed so that the function can be used both starting from ccs and from cartesian
+    SMatrix66 jac = ROOT::Math::SMatrixIdentity();
+    const float pt = std::sqrt(px * px + py * py);
+    const float p2 = px * px + py * py + pz * pz;
+    jac(3, 3) = -px / (pt * pt * pt);
+    jac(3, 4) = -py / (pt * pt * pt);
+    jac(4, 3) = -py / (pt * pt);
+    jac(4, 4) = px / (pt * pt);
+    jac(5, 3) = px * pz / (pt * p2);
+    jac(5, 4) = py * pz / (pt * p2);
+    jac(5, 5) = -pt / p2;
+    return jac;
+  }
+
+  void TrackState::convertFromGlbCurvilinearToCCS() {
+    //assume we are currently in global state with curvilinear error and want to move to ccs
+    const float px = parameters.At(3);
+    const float py = parameters.At(4);
+    const float pz = parameters.At(5);
+    const float pt = std::sqrt(px * px + py * py);
+    const float phi = getPhi(px, py);
+    const float theta = getTheta(pt, pz);
+    parameters.At(3) = 1.f / pt;
+    parameters.At(4) = phi;
+    parameters.At(5) = theta;
+    SMatrix66 jac = jacobianCurvilinearToCCS(px, py, pz, charge);
+    errors = ROOT::Math::Similarity(jac, errors);
+  }
+
+  void TrackState::convertFromCCSToGlbCurvilinear() {
+    //assume we are currently in ccs coordinates and want to move to global state with cartesian error
+    const float invpt = parameters.At(3);
+    const float phi = parameters.At(4);
+    const float theta = parameters.At(5);
+    const float pt = 1.f / invpt;
+    float cosP = std::cos(phi);
+    float sinP = std::sin(phi);
+    float cosT = std::cos(theta);
+    float sinT = std::sin(theta);
+    parameters.At(3) = cosP * pt;
+    parameters.At(4) = sinP * pt;
+    parameters.At(5) = cosT * pt / sinT;
+    SMatrix66 jac = jacobianCCSToCurvilinear(invpt, cosP, sinP, cosT, sinT, charge);
+    errors = ROOT::Math::Similarity(jac, errors);
+  }
+
+  SMatrix66 TrackState::jacobianCCSToCurvilinear(
+      float invpt, float cosP, float sinP, float cosT, float sinT, short charge) const {
+    SMatrix66 jac;
+    jac(3, 0) = -sinP;
+    jac(4, 0) = -cosP * cosT;
+    jac(3, 1) = cosP;
+    jac(4, 1) = -sinP * cosT;
+    jac(4, 2) = sinT;
+    jac(0, 3) = charge * sinT;
+    jac(0, 5) = charge * cosT * invpt;
+    jac(1, 5) = -1.f;
+    jac(2, 4) = 1.f;
+
+    return jac;
+  }
+
+  SMatrix66 TrackState::jacobianCurvilinearToCCS(float px, float py, float pz, short charge) const {
+    const float pt2 = px * px + py * py;
+    const float pt = sqrt(pt2);
+    const float invpt2 = 1.f / pt2;
+    const float invpt = 1.f / pt;
+    const float invp = 1.f / sqrt(pt2 + pz * pz);
+    const float sinPhi = py * invpt;
+    const float cosPhi = px * invpt;
+    const float sinLam = pz * invp;
+    const float cosLam = pt * invp;
+
+    SMatrix66 jac;
+    jac(0, 3) = -sinPhi;
+    jac(0, 4) = -sinLam * cosPhi;
+    jac(1, 3) = cosPhi;
+    jac(1, 4) = -sinLam * sinPhi;
+    jac(2, 4) = cosLam;
+    jac(3, 0) = charge / cosLam;  //assumes |charge|==1 ; else 1.f/charge here
+    jac(3, 1) = pz * invpt2;
+    jac(4, 2) = 1.f;
+    jac(5, 1) = -1.f;
+
+    return jac;
+  }
+
+  //==============================================================================
+  // TrackBase
+  //==============================================================================
+
+  bool TrackBase::hasSillyValues(bool dump, bool fix, const char* pref) {
+    bool is_silly = false;
+    for (int i = 0; i < LL; ++i) {
+      for (int j = 0; j <= i; ++j) {
+        if ((i == j && state_.errors.At(i, j) < 0) || !std::isfinite(state_.errors.At(i, j))) {
+          if (!is_silly) {
+            is_silly = true;
+            if (dump)
+              printf("%s (label=%d, pT=%f):", pref, label(), pT());
+          }
+          if (dump)
+            printf(" (%d,%d)=%e", i, j, state_.errors.At(i, j));
+          if (fix)
+            state_.errors.At(i, j) = 0.00001;
+        }
+      }
+    }
+    if (is_silly && dump)
+      printf("\n");
+    return is_silly;
+  }
+
+  bool TrackBase::hasNanNSillyValues() const {
+    bool is_silly = false;
+    for (int i = 0; i < LL; ++i) {
+      for (int j = 0; j <= i; ++j) {
+        if ((i == j && state_.errors.At(i, j) < 0) || !std::isfinite(state_.errors.At(i, j))) {
+          is_silly = true;
+          return is_silly;
+        }
+      }
+    }
+    return is_silly;
+  }
+
+  // If linearize=true, use linear estimate of d0: suitable at pT>~10 GeV (--> 10 micron error)
+  float TrackBase::d0BeamSpot(const float x_bs, const float y_bs, bool linearize) const {
+    if (linearize) {
+      return std::abs(std::cos(momPhi()) * (y() - y_bs) - std::sin(momPhi()) * (x() - x_bs));
+    } else {
+      const float k = ((charge() < 0) ? 100.0f : -100.0f) / (Const::sol * Config::Bfield);
+      const float abs_ooc_half = std::abs(k * pT());
+      // center of helix in x,y plane
+      const float x_center = x() - k * py();
+      const float y_center = y() + k * px();
+      return std::hypot(x_center - x_bs, y_center - y_bs) - abs_ooc_half;
+    }
+  }
+
+  const char* TrackBase::algoint_to_cstr(int algo) {
+    static const char* const names[] = {"undefAlgorithm",
+                                        "ctf",
+                                        "duplicateMerge",
+                                        "cosmics",
+                                        "initialStep",
+                                        "lowPtTripletStep",
+                                        "pixelPairStep",
+                                        "detachedTripletStep",
+                                        "mixedTripletStep",
+                                        "pixelLessStep",
+                                        "tobTecStep",
+                                        "jetCoreRegionalStep",
+                                        "conversionStep",
+                                        "muonSeededStepInOut",
+                                        "muonSeededStepOutIn",
+                                        "outInEcalSeededConv",
+                                        "inOutEcalSeededConv",
+                                        "nuclInter",
+                                        "standAloneMuon",
+                                        "globalMuon",
+                                        "cosmicStandAloneMuon",
+                                        "cosmicGlobalMuon",
+                                        "highPtTripletStep",
+                                        "lowPtQuadStep",
+                                        "detachedQuadStep",
+                                        "reservedForUpgrades1",
+                                        "reservedForUpgrades2",
+                                        "bTagGhostTracks",
+                                        "beamhalo",
+                                        "gsf",
+                                        "hltPixel",
+                                        "hltIter0",
+                                        "hltIter1",
+                                        "hltIter2",
+                                        "hltIter3",
+                                        "hltIter4",
+                                        "hltIterX",
+                                        "hiRegitMuInitialStep",
+                                        "hiRegitMuLowPtTripletStep",
+                                        "hiRegitMuPixelPairStep",
+                                        "hiRegitMuDetachedTripletStep",
+                                        "hiRegitMuMixedTripletStep",
+                                        "hiRegitMuPixelLessStep",
+                                        "hiRegitMuTobTecStep",
+                                        "hiRegitMuMuonSeededStepInOut",
+                                        "hiRegitMuMuonSeededStepOutIn",
+                                        "algoSize"};
+
+    if (algo < 0 || algo >= (int)TrackAlgorithm::algoSize)
+      return names[0];
+    return names[algo];
+  }
+
+  //==============================================================================
+  // Track
+  //==============================================================================
+
+  void Track::resizeHitsForInput() {
+    bzero(&hitsOnTrk_, sizeof(hitsOnTrk_));
+    hitsOnTrk_.resize(lastHitIdx_ + 1);
+  }
+
+  void Track::sortHitsByLayer() {
+    std::stable_sort(&hitsOnTrk_[0], &hitsOnTrk_[lastHitIdx_ + 1], [](const auto& h1, const auto& h2) {
+      return h1.layer < h2.layer;
+    });
+  }
+
+  float Track::swimPhiToR(const float x0, const float y0) const {
+    const float dR = getHypot(x() - x0, y() - y0);
+    // XXX-ASSUMPTION-ERROR can not always reach R, should see what callers expect.
+    // For now return PI to signal apex on the ohter side of the helix.
+    const float v = dR / 176.f / pT() * charge();
+    const float dPhi = std::abs(v) <= 1.0f ? 2.f * std::asin(v) : Const::PI;
+    ;
+    return squashPhiGeneral(momPhi() - dPhi);
+  }
+
+  bool Track::canReachRadius(float R) const {
+    const float k = ((charge() < 0) ? 100.0f : -100.0f) / (Const::sol * Config::Bfield);
+    const float ooc = 2.0f * k * pT();
+    return std::abs(ooc) > R - std::hypot(x(), y());
+  }
+
+  float Track::maxReachRadius() const {
+    const float k = ((charge() < 0) ? 100.0f : -100.0f) / (Const::sol * Config::Bfield);
+    const float abs_ooc_half = std::abs(k * pT());
+    // center of helix in x,y plane
+    const float x_center = x() - k * py();
+    const float y_center = y() + k * px();
+    return std::hypot(x_center, y_center) + abs_ooc_half;
+  }
+
+  float Track::zAtR(float R, float* r_reached) const {
+    float xc = x();
+    float yc = y();
+    float pxc = px();
+    float pyc = py();
+
+    const float ipt = invpT();
+    const float kinv = ((charge() < 0) ? 0.01f : -0.01f) * Const::sol * Config::Bfield;
+    const float k = 1.0f / kinv;
+
+    const float c = 0.5f * kinv * ipt;
+    const float ooc = 1.0f / c;  // 2 * radius of curvature
+    const float lambda = pz() * ipt;
+
+    //printf("Track::zAtR to R=%f: k=%e, ipt=%e, c=%e, ooc=%e  -- can hit = %f (if > 1 can)\n",
+    //       R, k, ipt, c, ooc, ooc / (R - std::hypot(xc,yc)));
+
+    float D = 0;
+
+    for (int i = 0; i < Config::Niter; ++i) {
+      // compute tangental and ideal distance for the current iteration.
+      // 3-rd order asin for symmetric incidence (shortest arc lenght).
+      float r0 = std::hypot(xc, yc);
+      float td = (R - r0) * c;
+      float id = ooc * td * (1.0f + 0.16666666f * td * td);
+      // This would be for line approximation:
+      // float id = R - r0;
+      D += id;
+
+      //printf("%-3d r0=%f R-r0=%f td=%f id=%f id_line=%f delta_id=%g\n",
+      //       i, r0, R-r0, td, id, R - r0, id - (R-r0));
+
+      float cosa = std::cos(id * ipt * kinv);
+      float sina = std::sin(id * ipt * kinv);
+
+      //update parameters
+      xc += k * (pxc * sina - pyc * (1.0f - cosa));
+      yc += k * (pyc * sina + pxc * (1.0f - cosa));
+
+      const float pxo = pxc;  //copy before overwriting
+      pxc = pxc * cosa - pyc * sina;
+      pyc = pyc * cosa + pxo * sina;
+    }
+
+    if (r_reached)
+      *r_reached = std::hypot(xc, yc);
+
+    return z() + lambda * D;
+
+    // ----------------------------------------------------------------
+    // Exact solution from Avery's notes ... loses precision somewhere
+    // {
+    //   const float a = kinv;
+    //   float pT      = S.pT();
+
+    //   float ax2y2  = a*(x*x + y*y);
+    //   float T      = std::sqrt(pT*pT - 2.0f*a*(x*py - y*px) + a*ax2y2);
+    //   float D0     = (T - pT) / a;
+    //   float D      = (-2.0f * (x*py - y*px) + a * (x*x + y*y)) / (T + pT);
+
+    //   float B      = c * std::sqrt((R*R - D*D) / (1.0f + 2.0f*c*D));
+    //   float s1     = std::asin(B) / c;
+    //   float s2     = (Const::PI - std::asin(B)) / c;
+
+    //   printf("pt %f, invpT %f\n", pT, S.invpT());
+    //   printf("lambda %f, a %f, c %f, T %f, D0 %f, D %f, B %f, s1 %f, s2 %f\n",
+    //          lambda, a, c, T, D0, D, B, s1, s2);
+    //   printf("%f = %f / %f\n", (R*R - D*D) / (1.0f + 2.0f*c*D), (R*R - D*D), (1.0f + 2.0f*c*D));
+
+    //   z1 = S.z() + lambda * s1;
+    //   z2 = S.z() + lambda * s2;
+
+    //   printf("z1=%f z2=%f\n", z1, z2);
+    // }
+    // ----------------------------------------------------------------
+  }
+
+  float Track::rAtZ(float Z) const {
+    float xc = x();
+    float yc = y();
+    float pxc = px();
+    float pyc = py();
+
+    const float ipt = invpT();
+    const float kinv = ((charge() < 0) ? 0.01f : -0.01f) * Const::sol * Config::Bfield;
+    const float k = 1.0f / kinv;
+
+    const float dz = Z - z();
+    const float alpha = dz * ipt * kinv * std::tan(theta());
+
+    const float cosa = std::cos(alpha);
+    const float sina = std::sin(alpha);
+
+    xc += k * (pxc * sina - pyc * (1.0f - cosa));
+    yc += k * (pyc * sina + pxc * (1.0f - cosa));
+
+    // const float pxo = pxc;//copy before overwriting
+    // pxc = pxc * cosa  -  pyc * sina;
+    // pyc = pyc * cosa  +  pxo * sina;
+
+    return std::hypot(xc, yc);
+  }
+
+  //==============================================================================
+
+  void print(const TrackState& s) {
+    std::cout << " x:  " << s.parameters[0] << " y:  " << s.parameters[1] << " z:  " << s.parameters[2] << std::endl
+              << " px: " << s.parameters[3] << " py: " << s.parameters[4] << " pz: " << s.parameters[5] << std::endl
+              << "valid: " << s.valid << " errors: " << std::endl;
+    dumpMatrix(s.errors);
+    std::cout << std::endl;
+  }
+
+  void print(std::string label, int itrack, const Track& trk, bool print_hits) {
+    std::cout << std::endl << label << ": " << itrack << " hits: " << trk.nFoundHits() << " State" << std::endl;
+    print(trk.state());
+    if (print_hits) {
+      for (int i = 0; i < trk.nTotalHits(); ++i)
+        printf("  %2d: lyr %2d idx %d\n", i, trk.getHitLyr(i), trk.getHitIdx(i));
+    }
+  }
+
+  void print(std::string label, const TrackState& s) {
+    std::cout << label << std::endl;
+    print(s);
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/TrackerInfo.cc b/RecoTracker/MkFitCore/src/TrackerInfo.cc
new file mode 100644
index 0000000000000..501381b3d040e
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/TrackerInfo.cc
@@ -0,0 +1,62 @@
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
+
+#include <cassert>
+
+namespace mkfit {
+
+  void LayerInfo::set_limits(float r1, float r2, float z1, float z2) {
+    m_rin = r1;
+    m_rout = r2;
+    m_zmin = z1;
+    m_zmax = z2;
+  }
+
+  void LayerInfo::set_r_hole_range(float rh1, float rh2) {
+    m_has_r_range_hole = true;
+    m_hole_r_min = rh1;
+    m_hole_r_max = rh2;
+  }
+
+  //==============================================================================
+  // TrackerInfo
+  //==============================================================================
+
+  void TrackerInfo::reserve_layers(int n_brl, int n_ec_pos, int n_ec_neg) {
+    m_layers.reserve(n_brl + n_ec_pos + n_ec_neg);
+    m_barrel.reserve(n_brl);
+    m_ecap_pos.reserve(n_ec_pos);
+    m_ecap_neg.reserve(n_ec_neg);
+  }
+
+  void TrackerInfo::create_layers(int n_brl, int n_ec_pos, int n_ec_neg) {
+    reserve_layers(n_brl, n_ec_pos, n_ec_neg);
+    for (int i = 0; i < n_brl; ++i)
+      new_barrel_layer();
+    for (int i = 0; i < n_ec_pos; ++i)
+      new_ecap_pos_layer();
+    for (int i = 0; i < n_ec_neg; ++i)
+      new_ecap_neg_layer();
+  }
+
+  int TrackerInfo::new_layer(LayerInfo::LayerType_e type) {
+    int l = (int)m_layers.size();
+    m_layers.emplace_back(LayerInfo(l, type));
+    return l;
+  }
+
+  LayerInfo &TrackerInfo::new_barrel_layer() {
+    m_barrel.push_back(new_layer(LayerInfo::Barrel));
+    return m_layers.back();
+  }
+
+  LayerInfo &TrackerInfo::new_ecap_pos_layer() {
+    m_ecap_pos.push_back(new_layer(LayerInfo::EndCapPos));
+    return m_layers.back();
+  }
+
+  LayerInfo &TrackerInfo::new_ecap_neg_layer() {
+    m_ecap_neg.push_back(new_layer(LayerInfo::EndCapNeg));
+    return m_layers.back();
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/src/upParam_MultKalmanGain.ah b/RecoTracker/MkFitCore/src/upParam_MultKalmanGain.ah
new file mode 100644
index 0000000000000..ec2a8b3ac5413
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/upParam_MultKalmanGain.ah
@@ -0,0 +1,142 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_1 = MUL(a_0, b_1);
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t c_2 = MUL(a_0, b_3);
+
+      IntrVec_t a_1 = LD(a, 1);
+      c_0 = FMA(a_1, b_1, c_0);
+      IntrVec_t b_2 = LD(b, 2);
+      c_1 = FMA(a_1, b_2, c_1);
+      IntrVec_t b_4 = LD(b, 4);
+      c_2 = FMA(a_1, b_4, c_2);
+
+      IntrVec_t a_3 = LD(a, 3);
+      c_0 = FMA(a_3, b_3, c_0);
+      c_1 = FMA(a_3, b_4, c_1);
+      IntrVec_t b_5 = LD(b, 5);
+      c_2 = FMA(a_3, b_5, c_2);
+
+      IntrVec_t c_3 = MUL(a_1, b_0);
+      ST(c, 0, c_0);
+      ST(c, 1, c_1);
+      IntrVec_t c_4 = MUL(a_1, b_1);
+      ST(c, 2, c_2);
+      IntrVec_t c_5 = MUL(a_1, b_3);
+
+      IntrVec_t a_2 = LD(a, 2);
+      c_3 = FMA(a_2, b_1, c_3);
+      c_4 = FMA(a_2, b_2, c_4);
+      c_5 = FMA(a_2, b_4, c_5);
+
+      IntrVec_t a_4 = LD(a, 4);
+      c_3 = FMA(a_4, b_3, c_3);
+      c_4 = FMA(a_4, b_4, c_4);
+      c_5 = FMA(a_4, b_5, c_5);
+
+      IntrVec_t c_6 = MUL(a_3, b_0);
+      IntrVec_t c_7 = MUL(a_3, b_1);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+      IntrVec_t c_8 = MUL(a_3, b_3);
+
+      c_6 = FMA(a_4, b_1, c_6);
+      c_7 = FMA(a_4, b_2, c_7);
+      c_8 = FMA(a_4, b_4, c_8);
+
+      IntrVec_t a_5 = LD(a, 5);
+      c_6 = FMA(a_5, b_3, c_6);
+      c_7 = FMA(a_5, b_4, c_7);
+      c_8 = FMA(a_5, b_5, c_8);
+
+      IntrVec_t a_6 = LD(a, 6);
+      IntrVec_t c_9 = MUL(a_6, b_0);
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+      IntrVec_t c_10 = MUL(a_6, b_1);
+      IntrVec_t c_11 = MUL(a_6, b_3);
+
+      IntrVec_t a_7 = LD(a, 7);
+      c_9 = FMA(a_7, b_1, c_9);
+      c_10 = FMA(a_7, b_2, c_10);
+      c_11 = FMA(a_7, b_4, c_11);
+
+      IntrVec_t a_8 = LD(a, 8);
+      c_9 = FMA(a_8, b_3, c_9);
+      c_10 = FMA(a_8, b_4, c_10);
+      c_11 = FMA(a_8, b_5, c_11);
+
+      IntrVec_t a_10 = LD(a, 10);
+      IntrVec_t c_12 = MUL(a_10, b_0);
+      ST(c, 9, c_9);
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+      IntrVec_t c_13 = MUL(a_10, b_1);
+      IntrVec_t c_14 = MUL(a_10, b_3);
+
+      IntrVec_t a_11 = LD(a, 11);
+      c_12 = FMA(a_11, b_1, c_12);
+      c_13 = FMA(a_11, b_2, c_13);
+      c_14 = FMA(a_11, b_4, c_14);
+
+      IntrVec_t a_12 = LD(a, 12);
+      c_12 = FMA(a_12, b_3, c_12);
+      c_13 = FMA(a_12, b_4, c_13);
+      c_14 = FMA(a_12, b_5, c_14);
+
+      IntrVec_t a_15 = LD(a, 15);
+      IntrVec_t c_15 = MUL(a_15, b_0);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+      IntrVec_t c_16 = MUL(a_15, b_1);
+      IntrVec_t c_17 = MUL(a_15, b_3);
+
+      IntrVec_t a_16 = LD(a, 16);
+      c_15 = FMA(a_16, b_1, c_15);
+      c_16 = FMA(a_16, b_2, c_16);
+      c_17 = FMA(a_16, b_4, c_17);
+
+      IntrVec_t a_17 = LD(a, 17);
+      c_15 = FMA(a_17, b_3, c_15);
+      c_16 = FMA(a_17, b_4, c_16);
+      c_17 = FMA(a_17, b_5, c_17);
+
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      ST(c, 17, c_17);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n] + a[ 3*N+n]*b[ 3*N+n];
+      c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n] + a[ 1*N+n]*b[ 2*N+n] + a[ 3*N+n]*b[ 4*N+n];
+      c[ 2*N+n] = a[ 0*N+n]*b[ 3*N+n] + a[ 1*N+n]*b[ 4*N+n] + a[ 3*N+n]*b[ 5*N+n];
+      c[ 3*N+n] = a[ 1*N+n]*b[ 0*N+n] + a[ 2*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 3*N+n];
+      c[ 4*N+n] = a[ 1*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 2*N+n] + a[ 4*N+n]*b[ 4*N+n];
+      c[ 5*N+n] = a[ 1*N+n]*b[ 3*N+n] + a[ 2*N+n]*b[ 4*N+n] + a[ 4*N+n]*b[ 5*N+n];
+      c[ 6*N+n] = a[ 3*N+n]*b[ 0*N+n] + a[ 4*N+n]*b[ 1*N+n] + a[ 5*N+n]*b[ 3*N+n];
+      c[ 7*N+n] = a[ 3*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 2*N+n] + a[ 5*N+n]*b[ 4*N+n];
+      c[ 8*N+n] = a[ 3*N+n]*b[ 3*N+n] + a[ 4*N+n]*b[ 4*N+n] + a[ 5*N+n]*b[ 5*N+n];
+      c[ 9*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 1*N+n] + a[ 8*N+n]*b[ 3*N+n];
+      c[10*N+n] = a[ 6*N+n]*b[ 1*N+n] + a[ 7*N+n]*b[ 2*N+n] + a[ 8*N+n]*b[ 4*N+n];
+      c[11*N+n] = a[ 6*N+n]*b[ 3*N+n] + a[ 7*N+n]*b[ 4*N+n] + a[ 8*N+n]*b[ 5*N+n];
+      c[12*N+n] = a[10*N+n]*b[ 0*N+n] + a[11*N+n]*b[ 1*N+n] + a[12*N+n]*b[ 3*N+n];
+      c[13*N+n] = a[10*N+n]*b[ 1*N+n] + a[11*N+n]*b[ 2*N+n] + a[12*N+n]*b[ 4*N+n];
+      c[14*N+n] = a[10*N+n]*b[ 3*N+n] + a[11*N+n]*b[ 4*N+n] + a[12*N+n]*b[ 5*N+n];
+      c[15*N+n] = a[15*N+n]*b[ 0*N+n] + a[16*N+n]*b[ 1*N+n] + a[17*N+n]*b[ 3*N+n];
+      c[16*N+n] = a[15*N+n]*b[ 1*N+n] + a[16*N+n]*b[ 2*N+n] + a[17*N+n]*b[ 4*N+n];
+      c[17*N+n] = a[15*N+n]*b[ 3*N+n] + a[16*N+n]*b[ 4*N+n] + a[17*N+n]*b[ 5*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/upParam_kalmanGain_x_propErr.ah b/RecoTracker/MkFitCore/src/upParam_kalmanGain_x_propErr.ah
new file mode 100644
index 0000000000000..3f5cd68e5121a
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/upParam_kalmanGain_x_propErr.ah
@@ -0,0 +1,168 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+
+      IntrVec_t a_1 = LD(a, 1);
+      IntrVec_t b_1 = LD(b, 1);
+      c_0 = FMA(a_1, b_1, c_0);
+
+      IntrVec_t a_2 = LD(a, 2);
+      IntrVec_t b_3 = LD(b, 3);
+      c_0 = FMA(a_2, b_3, c_0);
+
+      IntrVec_t a_3 = LD(a, 3);
+      IntrVec_t c_1 = MUL(a_3, b_0);
+      IntrVec_t c_2 = MUL(a_3, b_1);
+
+      IntrVec_t a_4 = LD(a, 4);
+      c_1 = FMA(a_4, b_1, c_1);
+      ST(c, 0, c_0);
+      IntrVec_t b_2 = LD(b, 2);
+      c_2 = FMA(a_4, b_2, c_2);
+
+      IntrVec_t a_5 = LD(a, 5);
+      c_1 = FMA(a_5, b_3, c_1);
+      IntrVec_t b_4 = LD(b, 4);
+      c_2 = FMA(a_5, b_4, c_2);
+
+      IntrVec_t a_6 = LD(a, 6);
+      IntrVec_t c_3 = MUL(a_6, b_0);
+      ST(c, 1, c_1);
+      IntrVec_t c_4 = MUL(a_6, b_1);
+      ST(c, 2, c_2);
+      IntrVec_t c_5 = MUL(a_6, b_3);
+
+      IntrVec_t a_7 = LD(a, 7);
+      c_3 = FMA(a_7, b_1, c_3);
+      c_4 = FMA(a_7, b_2, c_4);
+      c_5 = FMA(a_7, b_4, c_5);
+
+      IntrVec_t a_8 = LD(a, 8);
+      c_3 = FMA(a_8, b_3, c_3);
+      c_4 = FMA(a_8, b_4, c_4);
+      IntrVec_t b_5 = LD(b, 5);
+      c_5 = FMA(a_8, b_5, c_5);
+
+      IntrVec_t a_9 = LD(a, 9);
+      IntrVec_t c_6 = MUL(a_9, b_0);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+      IntrVec_t c_7 = MUL(a_9, b_1);
+      IntrVec_t c_8 = MUL(a_9, b_3);
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_9 = MUL(a_9, b_6);
+
+      IntrVec_t a_10 = LD(a, 10);
+      c_6 = FMA(a_10, b_1, c_6);
+      c_7 = FMA(a_10, b_2, c_7);
+      c_8 = FMA(a_10, b_4, c_8);
+      IntrVec_t b_7 = LD(b, 7);
+      c_9 = FMA(a_10, b_7, c_9);
+
+      IntrVec_t a_11 = LD(a, 11);
+      c_6 = FMA(a_11, b_3, c_6);
+      c_7 = FMA(a_11, b_4, c_7);
+      c_8 = FMA(a_11, b_5, c_8);
+      IntrVec_t b_8 = LD(b, 8);
+      c_9 = FMA(a_11, b_8, c_9);
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+
+      IntrVec_t a_12 = LD(a, 12);
+      IntrVec_t c_10 = MUL(a_12, b_0);
+      ST(c, 9, c_9);
+      IntrVec_t c_11 = MUL(a_12, b_1);
+      IntrVec_t c_12 = MUL(a_12, b_3);
+      IntrVec_t c_13 = MUL(a_12, b_6);
+      IntrVec_t b_10 = LD(b, 10);
+      IntrVec_t c_14 = MUL(a_12, b_10);
+
+      IntrVec_t a_13 = LD(a, 13);
+      c_10 = FMA(a_13, b_1, c_10);
+      c_11 = FMA(a_13, b_2, c_11);
+      c_12 = FMA(a_13, b_4, c_12);
+      c_13 = FMA(a_13, b_7, c_13);
+      IntrVec_t b_11 = LD(b, 11);
+      c_14 = FMA(a_13, b_11, c_14);
+
+      IntrVec_t a_14 = LD(a, 14);
+      c_10 = FMA(a_14, b_3, c_10);
+      c_11 = FMA(a_14, b_4, c_11);
+      c_12 = FMA(a_14, b_5, c_12);
+      c_13 = FMA(a_14, b_8, c_13);
+      IntrVec_t b_12 = LD(b, 12);
+      c_14 = FMA(a_14, b_12, c_14);
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+
+      IntrVec_t a_15 = LD(a, 15);
+      IntrVec_t c_15 = MUL(a_15, b_0);
+      IntrVec_t c_16 = MUL(a_15, b_1);
+      IntrVec_t c_17 = MUL(a_15, b_3);
+      IntrVec_t c_18 = MUL(a_15, b_6);
+      IntrVec_t c_19 = MUL(a_15, b_10);
+      IntrVec_t b_15 = LD(b, 15);
+      IntrVec_t c_20 = MUL(a_15, b_15);
+
+      IntrVec_t a_16 = LD(a, 16);
+      c_15 = FMA(a_16, b_1, c_15);
+      c_16 = FMA(a_16, b_2, c_16);
+      c_17 = FMA(a_16, b_4, c_17);
+      c_18 = FMA(a_16, b_7, c_18);
+      c_19 = FMA(a_16, b_11, c_19);
+      IntrVec_t b_16 = LD(b, 16);
+      c_20 = FMA(a_16, b_16, c_20);
+
+      IntrVec_t a_17 = LD(a, 17);
+      c_15 = FMA(a_17, b_3, c_15);
+      c_16 = FMA(a_17, b_4, c_16);
+      c_17 = FMA(a_17, b_5, c_17);
+      c_18 = FMA(a_17, b_8, c_18);
+      c_19 = FMA(a_17, b_12, c_19);
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      ST(c, 17, c_17);
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      IntrVec_t b_17 = LD(b, 17);
+      c_20 = FMA(a_17, b_17, c_20);
+      ST(c, 20, c_20);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 3*N+n];
+      c[ 1*N+n] = a[ 3*N+n]*b[ 0*N+n] + a[ 4*N+n]*b[ 1*N+n] + a[ 5*N+n]*b[ 3*N+n];
+      c[ 2*N+n] = a[ 3*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 2*N+n] + a[ 5*N+n]*b[ 4*N+n];
+      c[ 3*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 1*N+n] + a[ 8*N+n]*b[ 3*N+n];
+      c[ 4*N+n] = a[ 6*N+n]*b[ 1*N+n] + a[ 7*N+n]*b[ 2*N+n] + a[ 8*N+n]*b[ 4*N+n];
+      c[ 5*N+n] = a[ 6*N+n]*b[ 3*N+n] + a[ 7*N+n]*b[ 4*N+n] + a[ 8*N+n]*b[ 5*N+n];
+      c[ 6*N+n] = a[ 9*N+n]*b[ 0*N+n] + a[10*N+n]*b[ 1*N+n] + a[11*N+n]*b[ 3*N+n];
+      c[ 7*N+n] = a[ 9*N+n]*b[ 1*N+n] + a[10*N+n]*b[ 2*N+n] + a[11*N+n]*b[ 4*N+n];
+      c[ 8*N+n] = a[ 9*N+n]*b[ 3*N+n] + a[10*N+n]*b[ 4*N+n] + a[11*N+n]*b[ 5*N+n];
+      c[ 9*N+n] = a[ 9*N+n]*b[ 6*N+n] + a[10*N+n]*b[ 7*N+n] + a[11*N+n]*b[ 8*N+n];
+      c[10*N+n] = a[12*N+n]*b[ 0*N+n] + a[13*N+n]*b[ 1*N+n] + a[14*N+n]*b[ 3*N+n];
+      c[11*N+n] = a[12*N+n]*b[ 1*N+n] + a[13*N+n]*b[ 2*N+n] + a[14*N+n]*b[ 4*N+n];
+      c[12*N+n] = a[12*N+n]*b[ 3*N+n] + a[13*N+n]*b[ 4*N+n] + a[14*N+n]*b[ 5*N+n];
+      c[13*N+n] = a[12*N+n]*b[ 6*N+n] + a[13*N+n]*b[ 7*N+n] + a[14*N+n]*b[ 8*N+n];
+      c[14*N+n] = a[12*N+n]*b[10*N+n] + a[13*N+n]*b[11*N+n] + a[14*N+n]*b[12*N+n];
+      c[15*N+n] = a[15*N+n]*b[ 0*N+n] + a[16*N+n]*b[ 1*N+n] + a[17*N+n]*b[ 3*N+n];
+      c[16*N+n] = a[15*N+n]*b[ 1*N+n] + a[16*N+n]*b[ 2*N+n] + a[17*N+n]*b[ 4*N+n];
+      c[17*N+n] = a[15*N+n]*b[ 3*N+n] + a[16*N+n]*b[ 4*N+n] + a[17*N+n]*b[ 5*N+n];
+      c[18*N+n] = a[15*N+n]*b[ 6*N+n] + a[16*N+n]*b[ 7*N+n] + a[17*N+n]*b[ 8*N+n];
+      c[19*N+n] = a[15*N+n]*b[10*N+n] + a[16*N+n]*b[11*N+n] + a[17*N+n]*b[12*N+n];
+      c[20*N+n] = a[15*N+n]*b[15*N+n] + a[16*N+n]*b[16*N+n] + a[17*N+n]*b[17*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/upParam_propErrT_x_simil_propErr.ah b/RecoTracker/MkFitCore/src/upParam_propErrT_x_simil_propErr.ah
new file mode 100644
index 0000000000000..b24a86e57b98c
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/upParam_propErrT_x_simil_propErr.ah
@@ -0,0 +1,186 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+
+      IntrVec_t a_1 = LD(a, 1);
+      IntrVec_t b_6 = LD(b, 6);
+      c_0 = FMA(a_1, b_6, c_0);
+
+      IntrVec_t a_3 = LD(a, 3);
+      IntrVec_t b_12 = LD(b, 12);
+      c_0 = FMA(a_3, b_12, c_0);
+
+
+
+
+      IntrVec_t c_1 = MUL(a_1, b_0);
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_2 = MUL(a_1, b_1);
+
+      IntrVec_t a_2 = LD(a, 2);
+      c_1 = FMA(a_2, b_6, c_1);
+      ST(c, 0, c_0);
+      IntrVec_t b_7 = LD(b, 7);
+      c_2 = FMA(a_2, b_7, c_2);
+
+      IntrVec_t a_4 = LD(a, 4);
+      c_1 = FMA(a_4, b_12, c_1);
+      IntrVec_t b_13 = LD(b, 13);
+      c_2 = FMA(a_4, b_13, c_2);
+
+
+
+
+      IntrVec_t c_3 = MUL(a_3, b_0);
+      IntrVec_t c_4 = MUL(a_3, b_1);
+      IntrVec_t b_2 = LD(b, 2);
+      IntrVec_t c_5 = MUL(a_3, b_2);
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+
+      c_3 = FMA(a_4, b_6, c_3);
+      c_4 = FMA(a_4, b_7, c_4);
+      IntrVec_t b_8 = LD(b, 8);
+      c_5 = FMA(a_4, b_8, c_5);
+
+      IntrVec_t a_5 = LD(a, 5);
+      c_3 = FMA(a_5, b_12, c_3);
+      c_4 = FMA(a_5, b_13, c_4);
+      IntrVec_t b_14 = LD(b, 14);
+      c_5 = FMA(a_5, b_14, c_5);
+
+
+
+
+      IntrVec_t a_6 = LD(a, 6);
+      IntrVec_t c_6 = MUL(a_6, b_0);
+      IntrVec_t c_7 = MUL(a_6, b_1);
+      IntrVec_t c_8 = MUL(a_6, b_2);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t c_9 = MUL(a_6, b_3);
+
+      IntrVec_t a_7 = LD(a, 7);
+      c_6 = FMA(a_7, b_6, c_6);
+      c_7 = FMA(a_7, b_7, c_7);
+      c_8 = FMA(a_7, b_8, c_8);
+      IntrVec_t b_9 = LD(b, 9);
+      c_9 = FMA(a_7, b_9, c_9);
+
+      IntrVec_t a_8 = LD(a, 8);
+      c_6 = FMA(a_8, b_12, c_6);
+      c_7 = FMA(a_8, b_13, c_7);
+      c_8 = FMA(a_8, b_14, c_8);
+      IntrVec_t b_15 = LD(b, 15);
+      c_9 = FMA(a_8, b_15, c_9);
+
+
+
+
+      IntrVec_t a_10 = LD(a, 10);
+      IntrVec_t c_10 = MUL(a_10, b_0);
+      IntrVec_t c_11 = MUL(a_10, b_1);
+      IntrVec_t c_12 = MUL(a_10, b_2);
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+      ST(c, 9, c_9);
+      IntrVec_t c_13 = MUL(a_10, b_3);
+      IntrVec_t b_4 = LD(b, 4);
+      IntrVec_t c_14 = MUL(a_10, b_4);
+
+      IntrVec_t a_11 = LD(a, 11);
+      c_10 = FMA(a_11, b_6, c_10);
+      c_11 = FMA(a_11, b_7, c_11);
+      c_12 = FMA(a_11, b_8, c_12);
+      c_13 = FMA(a_11, b_9, c_13);
+      IntrVec_t b_10 = LD(b, 10);
+      c_14 = FMA(a_11, b_10, c_14);
+
+      IntrVec_t a_12 = LD(a, 12);
+      c_10 = FMA(a_12, b_12, c_10);
+      c_11 = FMA(a_12, b_13, c_11);
+      c_12 = FMA(a_12, b_14, c_12);
+      c_13 = FMA(a_12, b_15, c_13);
+      IntrVec_t b_16 = LD(b, 16);
+      c_14 = FMA(a_12, b_16, c_14);
+
+
+
+
+      IntrVec_t a_15 = LD(a, 15);
+      IntrVec_t c_15 = MUL(a_15, b_0);
+      IntrVec_t c_16 = MUL(a_15, b_1);
+      IntrVec_t c_17 = MUL(a_15, b_2);
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+      IntrVec_t c_18 = MUL(a_15, b_3);
+      IntrVec_t c_19 = MUL(a_15, b_4);
+      IntrVec_t b_5 = LD(b, 5);
+      IntrVec_t c_20 = MUL(a_15, b_5);
+
+      IntrVec_t a_16 = LD(a, 16);
+      c_15 = FMA(a_16, b_6, c_15);
+      c_16 = FMA(a_16, b_7, c_16);
+      c_17 = FMA(a_16, b_8, c_17);
+      c_18 = FMA(a_16, b_9, c_18);
+      c_19 = FMA(a_16, b_10, c_19);
+      IntrVec_t b_11 = LD(b, 11);
+      c_20 = FMA(a_16, b_11, c_20);
+
+      IntrVec_t a_17 = LD(a, 17);
+      c_15 = FMA(a_17, b_12, c_15);
+      c_16 = FMA(a_17, b_13, c_16);
+      c_17 = FMA(a_17, b_14, c_17);
+      c_18 = FMA(a_17, b_15, c_18);
+      c_19 = FMA(a_17, b_16, c_19);
+      IntrVec_t b_17 = LD(b, 17);
+      c_20 = FMA(a_17, b_17, c_20);
+
+
+
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      ST(c, 17, c_17);
+      ST(c, 18, c_18);
+      ST(c, 19, c_19);
+      ST(c, 20, c_20);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 6*N+n] + a[ 3*N+n]*b[12*N+n];
+      c[ 1*N+n] = a[ 1*N+n]*b[ 0*N+n] + a[ 2*N+n]*b[ 6*N+n] + a[ 4*N+n]*b[12*N+n];
+      c[ 2*N+n] = a[ 1*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 7*N+n] + a[ 4*N+n]*b[13*N+n];
+      c[ 3*N+n] = a[ 3*N+n]*b[ 0*N+n] + a[ 4*N+n]*b[ 6*N+n] + a[ 5*N+n]*b[12*N+n];
+      c[ 4*N+n] = a[ 3*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 7*N+n] + a[ 5*N+n]*b[13*N+n];
+      c[ 5*N+n] = a[ 3*N+n]*b[ 2*N+n] + a[ 4*N+n]*b[ 8*N+n] + a[ 5*N+n]*b[14*N+n];
+      c[ 6*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 6*N+n] + a[ 8*N+n]*b[12*N+n];
+      c[ 7*N+n] = a[ 6*N+n]*b[ 1*N+n] + a[ 7*N+n]*b[ 7*N+n] + a[ 8*N+n]*b[13*N+n];
+      c[ 8*N+n] = a[ 6*N+n]*b[ 2*N+n] + a[ 7*N+n]*b[ 8*N+n] + a[ 8*N+n]*b[14*N+n];
+      c[ 9*N+n] = a[ 6*N+n]*b[ 3*N+n] + a[ 7*N+n]*b[ 9*N+n] + a[ 8*N+n]*b[15*N+n];
+      c[10*N+n] = a[10*N+n]*b[ 0*N+n] + a[11*N+n]*b[ 6*N+n] + a[12*N+n]*b[12*N+n];
+      c[11*N+n] = a[10*N+n]*b[ 1*N+n] + a[11*N+n]*b[ 7*N+n] + a[12*N+n]*b[13*N+n];
+      c[12*N+n] = a[10*N+n]*b[ 2*N+n] + a[11*N+n]*b[ 8*N+n] + a[12*N+n]*b[14*N+n];
+      c[13*N+n] = a[10*N+n]*b[ 3*N+n] + a[11*N+n]*b[ 9*N+n] + a[12*N+n]*b[15*N+n];
+      c[14*N+n] = a[10*N+n]*b[ 4*N+n] + a[11*N+n]*b[10*N+n] + a[12*N+n]*b[16*N+n];
+      c[15*N+n] = a[15*N+n]*b[ 0*N+n] + a[16*N+n]*b[ 6*N+n] + a[17*N+n]*b[12*N+n];
+      c[16*N+n] = a[15*N+n]*b[ 1*N+n] + a[16*N+n]*b[ 7*N+n] + a[17*N+n]*b[13*N+n];
+      c[17*N+n] = a[15*N+n]*b[ 2*N+n] + a[16*N+n]*b[ 8*N+n] + a[17*N+n]*b[14*N+n];
+      c[18*N+n] = a[15*N+n]*b[ 3*N+n] + a[16*N+n]*b[ 9*N+n] + a[17*N+n]*b[15*N+n];
+      c[19*N+n] = a[15*N+n]*b[ 4*N+n] + a[16*N+n]*b[10*N+n] + a[17*N+n]*b[16*N+n];
+      c[20*N+n] = a[15*N+n]*b[ 5*N+n] + a[16*N+n]*b[11*N+n] + a[17*N+n]*b[17*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/src/upParam_simil_x_propErr.ah b/RecoTracker/MkFitCore/src/upParam_simil_x_propErr.ah
new file mode 100644
index 0000000000000..6c8494ff3f4bc
--- /dev/null
+++ b/RecoTracker/MkFitCore/src/upParam_simil_x_propErr.ah
@@ -0,0 +1,201 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      #ifdef AVX512_INTRINSICS
+      IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+      #else
+      IntrVec_t all_zeros = { 0, 0, 0, 0, 0, 0, 0, 0 };
+      #endif
+
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+      IntrVec_t b_1 = LD(b, 1);
+      IntrVec_t c_1 = MUL(a_0, b_1);
+      IntrVec_t b_3 = LD(b, 3);
+      IntrVec_t c_2 = MUL(a_0, b_3);
+      IntrVec_t b_6 = LD(b, 6);
+      IntrVec_t c_3 = MUL(a_0, b_6);
+      IntrVec_t b_10 = LD(b, 10);
+      IntrVec_t c_4 = MUL(a_0, b_10);
+      IntrVec_t b_15 = LD(b, 15);
+      IntrVec_t c_5 = MUL(a_0, b_15);
+
+      IntrVec_t a_1 = LD(a, 1);
+      c_0 = FMA(a_1, b_1, c_0);
+      IntrVec_t b_2 = LD(b, 2);
+      c_1 = FMA(a_1, b_2, c_1);
+      IntrVec_t b_4 = LD(b, 4);
+      c_2 = FMA(a_1, b_4, c_2);
+      IntrVec_t b_7 = LD(b, 7);
+      c_3 = FMA(a_1, b_7, c_3);
+      IntrVec_t b_11 = LD(b, 11);
+      c_4 = FMA(a_1, b_11, c_4);
+      IntrVec_t b_16 = LD(b, 16);
+      c_5 = FMA(a_1, b_16, c_5);
+
+      IntrVec_t a_3 = LD(a, 3);
+      c_0 = FMA(a_3, b_3, c_0);
+      c_1 = FMA(a_3, b_4, c_1);
+      IntrVec_t b_5 = LD(b, 5);
+      c_2 = FMA(a_3, b_5, c_2);
+      IntrVec_t b_8 = LD(b, 8);
+      c_3 = FMA(a_3, b_8, c_3);
+      IntrVec_t b_12 = LD(b, 12);
+      c_4 = FMA(a_3, b_12, c_4);
+      IntrVec_t b_17 = LD(b, 17);
+      c_5 = FMA(a_3, b_17, c_5);
+
+
+
+
+      IntrVec_t c_6 = MUL(a_1, b_0);
+      IntrVec_t c_7 = MUL(a_1, b_1);
+      IntrVec_t c_8 = MUL(a_1, b_3);
+      IntrVec_t c_9 = MUL(a_1, b_6);
+      ST(c, 0, c_0);
+      ST(c, 1, c_1);
+      ST(c, 2, c_2);
+      ST(c, 3, c_3);
+      ST(c, 4, c_4);
+      ST(c, 5, c_5);
+      IntrVec_t c_10 = MUL(a_1, b_10);
+      IntrVec_t c_11 = MUL(a_1, b_15);
+
+      IntrVec_t a_2 = LD(a, 2);
+      c_6 = FMA(a_2, b_1, c_6);
+      c_7 = FMA(a_2, b_2, c_7);
+      c_8 = FMA(a_2, b_4, c_8);
+      c_9 = FMA(a_2, b_7, c_9);
+      c_10 = FMA(a_2, b_11, c_10);
+      c_11 = FMA(a_2, b_16, c_11);
+
+      IntrVec_t a_4 = LD(a, 4);
+      c_6 = FMA(a_4, b_3, c_6);
+      c_7 = FMA(a_4, b_4, c_7);
+      c_8 = FMA(a_4, b_5, c_8);
+      c_9 = FMA(a_4, b_8, c_9);
+      c_10 = FMA(a_4, b_12, c_10);
+      c_11 = FMA(a_4, b_17, c_11);
+
+
+
+
+      IntrVec_t c_12 = MUL(a_3, b_0);
+      IntrVec_t c_13 = MUL(a_3, b_1);
+      IntrVec_t c_14 = MUL(a_3, b_3);
+      IntrVec_t c_15 = MUL(a_3, b_6);
+      ST(c, 6, c_6);
+      ST(c, 7, c_7);
+      ST(c, 8, c_8);
+      ST(c, 9, c_9);
+      ST(c, 10, c_10);
+      ST(c, 11, c_11);
+      IntrVec_t c_16 = MUL(a_3, b_10);
+      IntrVec_t c_17 = MUL(a_3, b_15);
+
+      c_12 = FMA(a_4, b_1, c_12);
+      c_13 = FMA(a_4, b_2, c_13);
+      c_14 = FMA(a_4, b_4, c_14);
+      c_15 = FMA(a_4, b_7, c_15);
+      c_16 = FMA(a_4, b_11, c_16);
+      c_17 = FMA(a_4, b_16, c_17);
+
+      IntrVec_t a_5 = LD(a, 5);
+      c_12 = FMA(a_5, b_3, c_12);
+      c_13 = FMA(a_5, b_4, c_13);
+      c_14 = FMA(a_5, b_5, c_14);
+      c_15 = FMA(a_5, b_8, c_15);
+      c_16 = FMA(a_5, b_12, c_16);
+      c_17 = FMA(a_5, b_17, c_17);
+
+
+
+
+
+
+
+
+
+      ST(c, 18, all_zeros);
+      ST(c, 19, all_zeros);
+      ST(c, 20, all_zeros);
+      ST(c, 21, all_zeros);
+      ST(c, 22, all_zeros);
+      ST(c, 23, all_zeros);
+
+
+
+
+
+
+      ST(c, 24, all_zeros);
+      ST(c, 25, all_zeros);
+      ST(c, 26, all_zeros);
+      ST(c, 27, all_zeros);
+      ST(c, 28, all_zeros);
+      ST(c, 29, all_zeros);
+
+
+
+
+
+
+      ST(c, 30, all_zeros);
+      ST(c, 31, all_zeros);
+      ST(c, 32, all_zeros);
+      ST(c, 33, all_zeros);
+      ST(c, 34, all_zeros);
+      ST(c, 35, all_zeros);
+      ST(c, 12, c_12);
+      ST(c, 13, c_13);
+      ST(c, 14, c_14);
+      ST(c, 15, c_15);
+      ST(c, 16, c_16);
+      ST(c, 17, c_17);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n] + a[ 3*N+n]*b[ 3*N+n];
+      c[ 1*N+n] = a[ 0*N+n]*b[ 1*N+n] + a[ 1*N+n]*b[ 2*N+n] + a[ 3*N+n]*b[ 4*N+n];
+      c[ 2*N+n] = a[ 0*N+n]*b[ 3*N+n] + a[ 1*N+n]*b[ 4*N+n] + a[ 3*N+n]*b[ 5*N+n];
+      c[ 3*N+n] = a[ 0*N+n]*b[ 6*N+n] + a[ 1*N+n]*b[ 7*N+n] + a[ 3*N+n]*b[ 8*N+n];
+      c[ 4*N+n] = a[ 0*N+n]*b[10*N+n] + a[ 1*N+n]*b[11*N+n] + a[ 3*N+n]*b[12*N+n];
+      c[ 5*N+n] = a[ 0*N+n]*b[15*N+n] + a[ 1*N+n]*b[16*N+n] + a[ 3*N+n]*b[17*N+n];
+      c[ 6*N+n] = a[ 1*N+n]*b[ 0*N+n] + a[ 2*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 3*N+n];
+      c[ 7*N+n] = a[ 1*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 2*N+n] + a[ 4*N+n]*b[ 4*N+n];
+      c[ 8*N+n] = a[ 1*N+n]*b[ 3*N+n] + a[ 2*N+n]*b[ 4*N+n] + a[ 4*N+n]*b[ 5*N+n];
+      c[ 9*N+n] = a[ 1*N+n]*b[ 6*N+n] + a[ 2*N+n]*b[ 7*N+n] + a[ 4*N+n]*b[ 8*N+n];
+      c[10*N+n] = a[ 1*N+n]*b[10*N+n] + a[ 2*N+n]*b[11*N+n] + a[ 4*N+n]*b[12*N+n];
+      c[11*N+n] = a[ 1*N+n]*b[15*N+n] + a[ 2*N+n]*b[16*N+n] + a[ 4*N+n]*b[17*N+n];
+      c[12*N+n] = a[ 3*N+n]*b[ 0*N+n] + a[ 4*N+n]*b[ 1*N+n] + a[ 5*N+n]*b[ 3*N+n];
+      c[13*N+n] = a[ 3*N+n]*b[ 1*N+n] + a[ 4*N+n]*b[ 2*N+n] + a[ 5*N+n]*b[ 4*N+n];
+      c[14*N+n] = a[ 3*N+n]*b[ 3*N+n] + a[ 4*N+n]*b[ 4*N+n] + a[ 5*N+n]*b[ 5*N+n];
+      c[15*N+n] = a[ 3*N+n]*b[ 6*N+n] + a[ 4*N+n]*b[ 7*N+n] + a[ 5*N+n]*b[ 8*N+n];
+      c[16*N+n] = a[ 3*N+n]*b[10*N+n] + a[ 4*N+n]*b[11*N+n] + a[ 5*N+n]*b[12*N+n];
+      c[17*N+n] = a[ 3*N+n]*b[15*N+n] + a[ 4*N+n]*b[16*N+n] + a[ 5*N+n]*b[17*N+n];
+      c[18*N+n] = 0;
+      c[19*N+n] = 0;
+      c[20*N+n] = 0;
+      c[21*N+n] = 0;
+      c[22*N+n] = 0;
+      c[23*N+n] = 0;
+      c[24*N+n] = 0;
+      c[25*N+n] = 0;
+      c[26*N+n] = 0;
+      c[27*N+n] = 0;
+      c[28*N+n] = 0;
+      c[29*N+n] = 0;
+      c[30*N+n] = 0;
+      c[31*N+n] = 0;
+      c[32*N+n] = 0;
+      c[33*N+n] = 0;
+      c[34*N+n] = 0;
+      c[35*N+n] = 0;
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/CFMatrix33Vector3.ah b/RecoTracker/MkFitCore/standalone/CFMatrix33Vector3.ah
new file mode 100644
index 0000000000000..8a74940976238
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/CFMatrix33Vector3.ah
@@ -0,0 +1,48 @@
+#ifdef MPLEX_INTRINSICS
+
+   for (int n = 0; n < N; n += MPLEX_INTRINSICS_WIDTH_BYTES / sizeof(T))
+   {
+      IntrVec_t a_0 = LD(a, 0);
+      IntrVec_t b_0 = LD(b, 0);
+      IntrVec_t c_0 = MUL(a_0, b_0);
+
+      IntrVec_t a_1 = LD(a, 1);
+      IntrVec_t b_1 = LD(b, 1);
+      c_0 = FMA(a_1, b_1, c_0);
+
+      IntrVec_t a_2 = LD(a, 2);
+      IntrVec_t b_2 = LD(b, 2);
+      c_0 = FMA(a_2, b_2, c_0);
+
+      IntrVec_t a_3 = LD(a, 3);
+      IntrVec_t c_1 = MUL(a_3, b_0);
+
+      IntrVec_t a_4 = LD(a, 4);
+      c_1 = FMA(a_4, b_1, c_1);
+      ST(c, 0, c_0);
+
+      IntrVec_t a_5 = LD(a, 5);
+      c_1 = FMA(a_5, b_2, c_1);
+
+      IntrVec_t a_6 = LD(a, 6);
+      IntrVec_t c_2 = MUL(a_6, b_0);
+
+      IntrVec_t a_7 = LD(a, 7);
+      c_2 = FMA(a_7, b_1, c_2);
+      ST(c, 1, c_1);
+
+      IntrVec_t a_8 = LD(a, 8);
+      c_2 = FMA(a_8, b_2, c_2);
+      ST(c, 2, c_2);
+   }
+
+#else
+
+#pragma omp simd
+   for (int n = 0; n < N; ++n)
+   {
+      c[ 0*N+n] = a[ 0*N+n]*b[ 0*N+n] + a[ 1*N+n]*b[ 1*N+n] + a[ 2*N+n]*b[ 2*N+n];
+      c[ 1*N+n] = a[ 3*N+n]*b[ 0*N+n] + a[ 4*N+n]*b[ 1*N+n] + a[ 5*N+n]*b[ 2*N+n];
+      c[ 2*N+n] = a[ 6*N+n]*b[ 0*N+n] + a[ 7*N+n]*b[ 1*N+n] + a[ 8*N+n]*b[ 2*N+n];
+   }
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/ConfigStandalone.cc b/RecoTracker/MkFitCore/standalone/ConfigStandalone.cc
new file mode 100644
index 0000000000000..64f18ff2df8b3
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/ConfigStandalone.cc
@@ -0,0 +1,135 @@
+#include "RecoTracker/MkFitCore/standalone/ConfigStandalone.h"
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
+#include "RecoTracker/MkFitCore/interface/IterationConfig.h"
+
+// For plugin loader
+#include <dlfcn.h>
+#include <sys/stat.h>
+#include <cstdlib>
+
+namespace mkfit {
+
+  namespace Config {
+
+    TrackerInfo TrkInfo;
+    IterationsInfo ItrInfo;
+
+    std::string geomPlugin = "CylCowWLids";
+
+    int nTracks = 10000;
+    int nEvents = 20;
+    int nItersCMSSW = 0;
+    bool loopOverFile = false;
+
+    seedOpts seedInput = simSeeds;
+    cleanOpts seedCleaning = noCleaning;
+
+    bool readCmsswTracks = false;
+
+    bool dumpForPlots = false;
+
+    bool cf_seeding = false;
+    bool cf_fitting = false;
+
+    bool quality_val = false;
+    bool sim_val_for_cmssw = false;
+    bool sim_val = false;
+    bool cmssw_val = false;
+    bool fit_val = false;
+    bool readSimTrackStates = false;
+    bool inclusiveShorts = false;
+    bool keepHitInfo = false;
+    bool tryToSaveSimInfo = false;
+    matchOpts cmsswMatchingFW = hitBased;
+    matchOpts cmsswMatchingBK = trkParamBased;
+
+    bool useDeadModules = false;
+
+    // number of hits per task for finding seeds
+    int numHitsPerTask = 32;
+
+    bool mtvLikeValidation = false;
+    bool mtvRequireSeeds = false;
+    int cmsSelMinLayers = 12;
+    int nMinFoundHits = 10;
+
+    bool kludgeCmsHitErrors = false;
+    bool backwardFit = false;
+    bool backwardSearch = true;
+
+    int numThreadsSimulation = 12;
+
+    int finderReportBestOutOfN = 1;
+
+    bool includePCA = false;
+
+    // ================================================================
+
+    bool silent = false;
+    bool json_verbose = false;
+    bool json_dump_before = false;
+    bool json_dump_after = false;
+    std::vector<std::string> json_patch_filenames;
+    std::vector<std::string> json_load_filenames;
+    std::string json_save_iters_fname_fmt;
+    bool json_save_iters_include_iter_info_preamble = false;
+
+    // ================================================================
+
+    void recalculateDependentConstants() {}
+
+  }  // namespace Config
+
+  //==============================================================================
+  // Geometry / Configuration Plugin Loader
+  //==============================================================================
+
+  namespace {
+    const char *search_path[] = {"", "../Geoms/", "Geoms/", "../", nullptr};
+    typedef void (*TrackerInfoCreator_foo)(TrackerInfo &, IterationsInfo &, bool verbose);
+  }  // namespace
+
+  void execTrackerInfoCreatorPlugin(const std::string &base, TrackerInfo &ti, IterationsInfo &ii, bool verbose) {
+    std::string soname = base + ".so";
+
+    struct stat st;
+
+    int si = 0;
+    while (search_path[si]) {
+      std::string path;
+      const char *envpath = std::getenv("MKFIT_BASE");
+      if (envpath != nullptr) {
+        path += envpath;
+        path += "/";
+      }
+      path += search_path[si];
+      path += soname;
+      if (stat(path.c_str(), &st) == 0) {
+        printf("mkfit::execTrackerInfoCreatorPlugin processing '%s'\n", path.c_str());
+
+        void *h = dlopen(path.c_str(), RTLD_LAZY);
+        if (!h) {
+          perror("dlopen failed");
+          exit(2);
+        }
+
+        long long *p2f = (long long *)dlsym(h, "TrackerInfoCrator_ptr");
+        if (!p2f) {
+          perror("dlsym failed");
+          exit(2);
+        }
+
+        TrackerInfoCreator_foo foo = (TrackerInfoCreator_foo)(*p2f);
+        foo(ti, ii, verbose);
+
+        return;
+      }
+
+      ++si;
+    }
+
+    fprintf(stderr, "TrackerInfo plugin '%s' not found in search path.\n", soname.c_str());
+    exit(2);
+  }
+
+}  // namespace mkfit
diff --git a/RecoTracker/MkFitCore/standalone/ConfigStandalone.h b/RecoTracker/MkFitCore/standalone/ConfigStandalone.h
new file mode 100644
index 0000000000000..1d49a8bb8c897
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/ConfigStandalone.h
@@ -0,0 +1,188 @@
+#ifndef RecoTracker_MkFitCore_standalone_ConfigStandalone_h
+#define RecoTracker_MkFitCore_standalone_ConfigStandalone_h
+
+#include "RecoTracker/MkFitCore/interface/Config.h"
+
+#include <string>
+#include <map>
+#include <vector>
+
+namespace mkfit {
+
+  class TrackerInfo;
+  class IterationsInfo;
+
+  void execTrackerInfoCreatorPlugin(const std::string& base, TrackerInfo& ti, IterationsInfo& ii, bool verbose = false);
+
+  //------------------------------------------------------------------------------
+
+  // Enum for input seed options
+  enum seedOpts { simSeeds, cmsswSeeds, findSeeds };
+  typedef std::map<std::string, std::pair<seedOpts, std::string> > seedOptsMap;
+
+  // Enum for seed cleaning options
+  enum cleanOpts { noCleaning, cleanSeedsN2, cleanSeedsPure, cleanSeedsBadLabel };
+  typedef std::map<std::string, std::pair<cleanOpts, std::string> > cleanOptsMap;
+
+  // Enum for cmssw matching options
+  enum matchOpts { trkParamBased, hitBased, labelBased };
+  typedef std::map<std::string, std::pair<matchOpts, std::string> > matchOptsMap;
+
+  //------------------------------------------------------------------------------
+
+  namespace Config {
+
+    extern TrackerInfo TrkInfo;
+    extern IterationsInfo ItrInfo;
+
+    extern std::string geomPlugin;
+
+    // default file version
+    constexpr int FileVersion = 1;
+
+    // config on main + mkFit
+    extern int nTracks;  //defined in Config.cc by default or when reading events from file
+    extern int nEvents;
+    extern int nItersCMSSW;
+    extern bool loopOverFile;
+    // XXXXMT: nTracks should be thrown out ... SMatrix and Event allocate some arrays on this
+    // which can be wrong for real data or in multi-event environment
+
+    // the following are only used in SMatrix version
+    constexpr float nSigma = 3.;
+    constexpr float minDPhi = 0.01;  // default: 0.;  cmssw tests: 0.01;
+    constexpr float maxDPhi = Const::PI;
+    constexpr float minDEta = 0.;
+    constexpr float maxDEta = 1.0;
+
+    // Configuration for simulation info
+    constexpr int NiterSim = 10;  // Can make more steps due to near volume misses.
+    // CMS beam spot width 25um in xy and 5cm in z
+    constexpr float beamspotX = 0.1;
+    constexpr float beamspotY = 0.1;
+    constexpr float beamspotZ = 1.0;
+
+    // XXMT4K minPt was 0.5. Figure out what is the new limit for 90cm or be
+    // more flexible about finding fewer hits. Or postprocess looper candidates.
+    constexpr float minSimPt = 1;
+    constexpr float maxSimPt = 10.;
+
+    // XXMT Hardhack -- transition region excluded in Simulation::setupTrackByToyMC()
+    constexpr float minSimEta = -2.4;
+    constexpr float maxSimEta = 2.4;
+    // For testing separate EC-/BRL/EC+; -2.3--1.5 / -0.9-0.9 / 1.5-2.3
+    //constexpr float minSimEta =  -0.9;
+    //constexpr float maxSimEta =   0.9;
+
+    constexpr float hitposerrXY = 0.01;  // resolution is 100um in xy --> more realistic scenario is 0.003
+    constexpr float hitposerrZ = 0.1;    // resolution is 1mm in z
+    constexpr float hitposerrR = Config::hitposerrXY / 10.0f;  // XXMT4K ??? I don't get this ...
+    constexpr float varXY = Config::hitposerrXY * Config::hitposerrXY;
+    constexpr float varZ = Config::hitposerrZ * Config::hitposerrZ;
+    constexpr float varR = Config::hitposerrR * Config::hitposerrR;
+
+    // scattering simulation
+    constexpr float X0 =
+        9.370;  // cm, from http://pdg.lbl.gov/2014/AtomicNuclearProperties/HTML/silicon_Si.html // Pb = 0.5612 cm
+    constexpr float xr =
+        0.1;  //  -assumes radial impact. This is bigger than what we have in main --> shouldn't it be the parameter below??? if radial impact??
+    //const     float xr = std::sqrt(Config::beamspotX*Config::beamspotX + Config::beamspotY*Config::beamspotY);
+
+    // Config for seeding
+    constexpr int nlayers_per_seed_max = 4;  // Needed for allocation of arrays on stack.
+    constexpr float chi2seedcut = 9.0;
+    constexpr float lay01angdiff =
+        0.0634888;  // analytically derived... depends on geometry of detector --> from mathematica ... d0 set to one sigma of getHypot(bsX,bsY)
+    constexpr float lay02angdiff = 0.11537;
+    constexpr float dEtaSeedTrip =
+        0.06;  // for almost max efficiency --> empirically derived... depends on geometry of detector
+    constexpr float dPhiSeedTrip =
+        0.0458712;  // numerically+semianalytically derived... depends on geometry of detector
+    // Recalculated in seedTest as it depends on nlayers_per_seed
+    // static const float seed_z2cut= (nlayers_per_seed * fRadialSpacing) / std::tan(2.0f*std::atan(std::exp(-1.0f*dEtaSeedTrip)));
+    constexpr float seed_z0cut = beamspotZ * 3.0f;   // 3cm
+    constexpr float seed_z1cut = hitposerrZ * 3.6f;  // 3.6 mm --> to match efficiency from chi2cut
+    constexpr float seed_d0cut = 0.5f;               // 5mm
+    extern bool cf_seeding;
+
+    // config for seeding as well... needed bfield
+    constexpr float maxCurvR = (100 * minSimPt) / (Const::sol * Bfield);  // in cm
+
+    // Config for Conformal fitter --> these change depending on inward/outward, which tracks used (MC vs reco), geometry, layers used, track params generated...
+    // parameters for layers 0,4,9
+    constexpr float blowupfit = 10.0;
+    constexpr float ptinverr049 =
+        0.0078;  // 0.0075; // errors used for MC only fit, straight from sim tracks, outward with simple geometry
+    constexpr float phierr049 = 0.0017;    // 0.0017;
+    constexpr float thetaerr049 = 0.0033;  // 0.0031;
+    // parameters for layers 0,1,2 // --> ENDTOEND with "real seeding", fit is outward by definition, with poly geo
+    constexpr float ptinverr012 = 0.12007;  // 0.1789;  -->old values from only MC seeds
+    constexpr float phierr012 = 1.0;        // found empirically 0.00646; // 0.0071
+    constexpr float thetaerr012 = 0.2;      // also found empirically 0.01366; // 0.0130;
+
+    // config on fitting
+    extern bool cf_fitting;
+
+    extern bool mtvLikeValidation;
+    extern bool mtvRequireSeeds;
+    // Selection of simtracks from CMSSW. Used in Event::clean_cms_simtracks() and MkBuilder::prep_cmsswtracks()
+    extern int cmsSelMinLayers;
+
+    // config on validation
+    extern int nMinFoundHits;
+    constexpr float minCMSSWMatchChi2[6] = {100, 100, 50, 50, 30, 20};
+    constexpr float minCMSSWMatchdPhi[6] = {0.2, 0.2, 0.1, 0.05, 0.01, 0.005};
+    extern bool quality_val;
+    extern bool sim_val_for_cmssw;
+    extern bool sim_val;
+    extern bool cmssw_val;
+    extern bool fit_val;
+    extern bool readSimTrackStates;  // need this to fill pulls
+    extern bool inclusiveShorts;
+    extern bool keepHitInfo;
+    extern bool tryToSaveSimInfo;
+    extern matchOpts cmsswMatchingFW;
+    extern matchOpts cmsswMatchingBK;
+
+    // config on dead modules
+    extern bool useDeadModules;
+
+    // number of layer1 hits for finding seeds per task
+    extern int numHitsPerTask;
+
+    // seed options
+    extern seedOpts seedInput;
+    extern cleanOpts seedCleaning;
+    extern bool readCmsswTracks;
+
+    extern bool dumpForPlots;
+
+    extern bool kludgeCmsHitErrors;
+    extern bool backwardFit;
+    extern bool backwardSearch;
+
+    extern int numThreadsSimulation;
+    extern int finderReportBestOutOfN;
+
+    extern bool includePCA;
+
+    // ================================================================
+
+    extern bool silent;
+    extern bool json_verbose;
+    extern bool json_dump_before;
+    extern bool json_dump_after;
+    extern std::vector<std::string> json_patch_filenames;
+    extern std::vector<std::string> json_load_filenames;
+    extern std::string json_save_iters_fname_fmt;
+    extern bool json_save_iters_include_iter_info_preamble;
+
+    // ================================================================
+
+    void recalculateDependentConstants();
+
+  }  // end namespace Config
+
+}  // end namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/ConformalUtilsMPlex.cc b/RecoTracker/MkFitCore/standalone/ConformalUtilsMPlex.cc
new file mode 100644
index 0000000000000..44c326416513d
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/ConformalUtilsMPlex.cc
@@ -0,0 +1,274 @@
+#include "ConformalUtilsMPlex.h"
+#include "RecoTracker/MkFitCore/standalone/ConfigStandalone.h"
+#include "RecoTracker/MkFitCore/interface/Track.h"
+#include "RecoTracker/MkFitCore/interface/Hit.h"
+
+//#define DEBUG
+#include "RecoTracker/MkFitCore/src/Debug.h"
+
+/* From MkFitter.h/.cc
+// ----------------
+  void ConformalFitTracks(bool fitting, int beg, int end);
+// ----------------
+  void MkFitter::ConformalFitTracks(bool fitting, int beg, int end) {
+    // bool fitting to determine to use fitting CF error widths
+    // in reality, this is depedent on hits used to make pulls
+    // could consider writing an array for widths for a given hit combo
+    // to give precise widths --> then would drop boolean
+    // also used to determine which hits to use
+
+    int front, middle, back;
+
+    // FIXME FITTING HITS --> assume one hit per layer and all layers found! BAD! Need vector of indices to do this right instead...
+    // can always assume 0,1,2 for seeding --> triplets in forward direction
+#ifdef INWARDFIT
+    front = (fitting ? Config::nLayers - 1
+                     : 0);  // i.e. would rather have true option not hardcoded... but set by ACTUAL last hit found
+    middle =
+        (fitting ? (Config::nLayers - 1) / 2 : 1);  // same with this one... would rather middle hit be in the middle!
+    back = (fitting ? 0 : 2);
+#else
+    front = (fitting ? 0 : 0);
+    middle = (fitting ? (Config::nLayers - 1) / 2 : 1);  // ditto above
+    back = (fitting ? Config::nLayers - 1 : 2);          // yup...
+#endif
+
+    // write to iC --> next step will be a propagation no matter what
+    conformalFitMPlex(fitting, Label, Err[iC], Par[iC], msPar[front], msPar[middle], msPar[back]);
+
+    // need to set most off-diagonal elements in unc. to zero, inflate all other elements;
+    if (fitting) {
+      using idx_t = Matriplex::idx_t;
+      const idx_t N = NN;
+#pragma omp simd
+      for (int n = 0; n < N; ++n) {
+        Err[iC].At(n, 0, 0) = Err[iC].constAt(n, 0, 0) * Config::blowupfit;
+        Err[iC].At(n, 0, 1) = Err[iC].constAt(n, 0, 1) * Config::blowupfit;
+        Err[iC].At(n, 1, 0) = Err[iC].constAt(n, 1, 0) * Config::blowupfit;
+        Err[iC].At(n, 1, 1) = Err[iC].constAt(n, 1, 1) * Config::blowupfit;
+        Err[iC].At(n, 2, 2) = Err[iC].constAt(n, 2, 2) * Config::blowupfit;
+        Err[iC].At(n, 3, 3) = Err[iC].constAt(n, 3, 3) * Config::blowupfit;
+        Err[iC].At(n, 4, 4) = Err[iC].constAt(n, 4, 4) * Config::blowupfit;
+        Err[iC].At(n, 5, 5) = Err[iC].constAt(n, 5, 5) * Config::blowupfit;
+
+        Err[iC].At(n, 0, 2) = 0.0f;
+        Err[iC].At(n, 0, 3) = 0.0f;
+        Err[iC].At(n, 0, 4) = 0.0f;
+        Err[iC].At(n, 0, 5) = 0.0f;
+        Err[iC].At(n, 1, 2) = 0.0f;
+        Err[iC].At(n, 1, 3) = 0.0f;
+        Err[iC].At(n, 1, 4) = 0.0f;
+        Err[iC].At(n, 1, 5) = 0.0f;
+        Err[iC].At(n, 2, 0) = 0.0f;
+        Err[iC].At(n, 2, 1) = 0.0f;
+        Err[iC].At(n, 2, 3) = 0.0f;
+        Err[iC].At(n, 2, 4) = 0.0f;
+        Err[iC].At(n, 2, 5) = 0.0f;
+        Err[iC].At(n, 3, 0) = 0.0f;
+        Err[iC].At(n, 3, 1) = 0.0f;
+        Err[iC].At(n, 3, 2) = 0.0f;
+        Err[iC].At(n, 3, 4) = 0.0f;
+        Err[iC].At(n, 3, 5) = 0.0f;
+        Err[iC].At(n, 4, 0) = 0.0f;
+        Err[iC].At(n, 4, 1) = 0.0f;
+        Err[iC].At(n, 4, 2) = 0.0f;
+        Err[iC].At(n, 4, 3) = 0.0f;
+        Err[iC].At(n, 4, 5) = 0.0f;
+        Err[iC].At(n, 5, 0) = 0.0f;
+        Err[iC].At(n, 5, 1) = 0.0f;
+        Err[iC].At(n, 5, 2) = 0.0f;
+        Err[iC].At(n, 5, 3) = 0.0f;
+        Err[iC].At(n, 5, 4) = 0.0f;
+      }
+    }
+  }
+*/
+
+namespace mkfit {
+
+  inline void CFMap(const MPlexHH& A, const MPlexHV& B, MPlexHV& C) {
+    using idx_t = Matriplex::idx_t;
+
+    // C = A * B, C is 3x1, A is 3x3 , B is 3x1
+
+    typedef float T;
+    typedef float Tv;
+    const idx_t N = NN;
+
+    const T* a = A.fArray;
+    ASSUME_ALIGNED(a, 64);
+    const Tv* b = B.fArray;
+    ASSUME_ALIGNED(b, 64);
+    Tv* c = C.fArray;
+    ASSUME_ALIGNED(c, 64);
+
+#include "RecoTracker/MkFitCore/standalone/CFMatrix33Vector3.ah"
+  }
+
+  //M. Hansroul, H. Jeremie and D. Savard, NIM A 270 (1988) 498
+  //http://www.sciencedirect.com/science/article/pii/016890028890722X
+
+  void conformalFitMPlex(bool fitting,
+                         MPlexQI seedID,
+                         MPlexLS& outErr,
+                         MPlexLV& outPar,
+                         const MPlexHV& msPar0,
+                         const MPlexHV& msPar1,
+                         const MPlexHV& msPar2) {
+    bool debug(false);
+
+    using idx_t = Matriplex::idx_t;
+    const idx_t N = NN;
+
+    // Store positions in mplex vectors... could consider storing in a 3x3 matrix, too
+    MPlexHV x, y, z, r2;
+#pragma omp simd
+    for (int n = 0; n < N; ++n) {
+      x.At(n, 0, 0) = msPar0.constAt(n, 0, 0);
+      x.At(n, 1, 0) = msPar1.constAt(n, 0, 0);
+      x.At(n, 2, 0) = msPar2.constAt(n, 0, 0);
+
+      y.At(n, 0, 0) = msPar0.constAt(n, 1, 0);
+      y.At(n, 1, 0) = msPar1.constAt(n, 1, 0);
+      y.At(n, 2, 0) = msPar2.constAt(n, 1, 0);
+
+      z.At(n, 0, 0) = msPar0.constAt(n, 2, 0);
+      z.At(n, 1, 0) = msPar1.constAt(n, 2, 0);
+      z.At(n, 2, 0) = msPar2.constAt(n, 2, 0);
+
+      for (int i = 0; i < 3; ++i) {
+        r2.At(n, i, 0) = getRad2(x.constAt(n, i, 0), y.constAt(n, i, 0));
+      }
+    }
+
+    // Start setting the output parameters
+#pragma omp simd
+    for (int n = 0; n < N; ++n) {
+      outPar.At(n, 0, 0) = x.constAt(n, 0, 0);
+      outPar.At(n, 1, 0) = y.constAt(n, 0, 0);
+      outPar.At(n, 2, 0) = z.constAt(n, 0, 0);
+    }
+
+    // Use r-phi smearing to set initial error estimation for positions
+    // trackStates already initialized to identity for seeding ... don't store off-diag 0's, zero's for fitting set outside CF
+#pragma omp simd
+    for (int n = 0; n < N; ++n) {
+      const float varPhi = Config::varXY / r2.constAt(n, 0, 0);
+      const float invvarR2 = Config::varR / r2.constAt(n, 0, 0);
+
+      outErr.At(n, 0, 0) =
+          x.constAt(n, 0, 0) * x.constAt(n, 0, 0) * invvarR2 + y.constAt(n, 0, 0) * y.constAt(n, 0, 0) * varPhi;
+      outErr.At(n, 0, 1) = x.constAt(n, 0, 0) * y.constAt(n, 0, 0) * (invvarR2 - varPhi);
+
+      outErr.At(n, 1, 0) = outErr.constAt(n, 0, 1);
+      outErr.At(n, 1, 1) =
+          y.constAt(n, 0, 0) * y.constAt(n, 0, 0) * invvarR2 + x.constAt(n, 0, 0) * x.constAt(n, 0, 0) * varPhi;
+
+      outErr.At(n, 2, 2) = Config::varZ;
+    }
+
+    MPlexQF initPhi;
+    MPlexQI xtou;  // bool to determine "split space", i.e. map x to u or v
+#pragma omp simd
+    for (int n = 0; n < N; ++n) {
+      initPhi.At(n, 0, 0) = std::abs(getPhi(x.constAt(n, 0, 0), y.constAt(n, 0, 0)));
+      xtou.At(n, 0, 0) =
+          ((initPhi.constAt(n, 0, 0) < Const::PIOver4 || initPhi.constAt(n, 0, 0) > Const::PI3Over4) ? 1 : 0);
+    }
+
+    MPlexHV u, v;
+#pragma omp simd
+    for (int n = 0; n < N; ++n) {
+      if (xtou.At(n, 0, 0))  // x mapped to u
+      {
+        for (int i = 0; i < 3; ++i) {
+          u.At(n, i, 0) = x.constAt(n, i, 0) / r2.constAt(n, i, 0);
+          v.At(n, i, 0) = y.constAt(n, i, 0) / r2.constAt(n, i, 0);
+        }
+      } else  // x mapped to v
+      {
+        for (int i = 0; i < 3; ++i) {
+          u.At(n, i, 0) = y.constAt(n, i, 0) / r2.constAt(n, i, 0);
+          v.At(n, i, 0) = x.constAt(n, i, 0) / r2.constAt(n, i, 0);
+        }
+      }
+    }
+
+    MPlexHH A;
+    //#pragma omp simd // triggers an internal compiler error with icc 18.0.2!
+    for (int n = 0; n < N; ++n) {
+      for (int i = 0; i < 3; ++i) {
+        A.At(n, i, 0) = 1.0f;
+        A.At(n, i, 1) = -u.constAt(n, i, 0);
+        A.At(n, i, 2) = -u.constAt(n, i, 0) * u.constAt(n, i, 0);
+      }
+    }
+    Matriplex::invertCramer(A);
+    MPlexHV C;
+    CFMap(A, v, C);
+
+    MPlexQF a, b;
+#pragma omp simd
+    for (int n = 0; n < N; ++n) {
+      b.At(n, 0, 0) = 1.0f / (2.0f * C.constAt(n, 0, 0));
+      a.At(n, 0, 0) = b.constAt(n, 0, 0) * C.constAt(n, 1, 0);
+    }
+
+    // constant used throughtout
+    const float k = (Const::sol * Config::Bfield) / 100.0f;
+
+    MPlexQF vrx, vry, pT, px, py, pz;
+#pragma omp simd
+    for (int n = 0; n < N; ++n) {
+      vrx.At(n, 0, 0) =
+          (xtou.constAt(n, 0, 0) ? x.constAt(n, 0, 0) - a.constAt(n, 0, 0) : x.constAt(n, 0, 0) - b.constAt(n, 0, 0));
+      vry.At(n, 0, 0) =
+          (xtou.constAt(n, 0, 0) ? y.constAt(n, 0, 0) - b.constAt(n, 0, 0) : y.constAt(n, 0, 0) - a.constAt(n, 0, 0));
+      pT.At(n, 0, 0) = k * hipo(vrx.constAt(n, 0, 0), vry.constAt(n, 0, 0));
+      px.At(n, 0, 0) = std::copysign(k * vry.constAt(n, 0, 0), x.constAt(n, 2, 0) - x.constAt(n, 0, 0));
+      py.At(n, 0, 0) = std::copysign(k * vrx.constAt(n, 0, 0), y.constAt(n, 2, 0) - y.constAt(n, 0, 0));
+      pz.At(n, 0, 0) = (pT.constAt(n, 0, 0) * (z.constAt(n, 2, 0) - z.constAt(n, 0, 0))) /
+                       hipo((x.constAt(n, 2, 0) - x.constAt(n, 0, 0)), (y.constAt(n, 2, 0) - y.constAt(n, 0, 0)));
+    }
+
+#pragma omp simd
+    for (int n = 0; n < N; ++n) {
+      outPar.At(n, 3, 0) = 1.0f / pT.constAt(n, 0, 0);
+      outPar.At(n, 4, 0) = getPhi(px.constAt(n, 0, 0), py.constAt(n, 0, 0));
+      outPar.At(n, 5, 0) = getTheta(pT.constAt(n, 0, 0), pz.constAt(n, 0, 0));
+#ifdef INWARDFIT  // arctan is odd, so pz -> -pz means theta -> -theta
+      if (fitting)
+        outPar.At(n, 5, 0) *= -1.0f;
+#endif
+    }
+
+#pragma omp simd
+    for (int n = 0; n < N; ++n) {
+      outErr.At(n, 3, 3) =
+          (fitting ? Config::ptinverr049 * Config::ptinverr049 : Config::ptinverr012 * Config::ptinverr012);
+      outErr.At(n, 4, 4) = (fitting ? Config::phierr049 * Config::phierr049 : Config::phierr012 * Config::phierr012);
+      outErr.At(n, 5, 5) =
+          (fitting ? Config::thetaerr049 * Config::thetaerr049 : Config::thetaerr012 * Config::thetaerr012);
+    }
+
+    if (debug) {
+      for (int n = 0; n < N; ++n) {
+        dprintf("afterCF seedID: %1u \n", seedID.constAt(n, 0, 0));
+        // do a dumb copy out
+        TrackState updatedState;
+        for (int i = 0; i < 6; i++) {
+          updatedState.parameters[i] = outPar.constAt(n, i, 0);
+          for (int j = 0; j < 6; j++) {
+            updatedState.errors[i][j] = outErr.constAt(n, i, j);
+          }
+        }
+
+        dcall(print("CCS", updatedState));
+        updatedState.convertFromCCSToCartesian();
+        dcall(print("Pol", updatedState));
+        dprint("--------------------------------");
+      }
+    }
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/standalone/ConformalUtilsMPlex.h b/RecoTracker/MkFitCore/standalone/ConformalUtilsMPlex.h
new file mode 100644
index 0000000000000..efa17519d2aeb
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/ConformalUtilsMPlex.h
@@ -0,0 +1,19 @@
+#ifndef RecoTracker_MkFitCore_standalone_ConformalUtilsMPlex_h
+#define RecoTracker_MkFitCore_standalone_ConformalUtilsMPlex_h
+
+#include "RecoTracker/MkFitCore/src/Matrix.h"
+
+namespace mkfit {
+
+  // write to iC --> next step will be a propagation no matter what
+  void conformalFitMPlex(bool fitting,
+                         const MPlexQI seedID,
+                         MPlexLS& outErr,
+                         MPlexLV& outPar,
+                         const MPlexHV& msPar0,
+                         const MPlexHV& msPar1,
+                         const MPlexHV& msPar2);
+
+}  // end namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/Event.cc b/RecoTracker/MkFitCore/standalone/Event.cc
new file mode 100644
index 0000000000000..16722d02134ab
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/Event.cc
@@ -0,0 +1,987 @@
+#include "Event.h"
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
+
+//#define DEBUG
+#include "RecoTracker/MkFitCore/src/Debug.h"
+
+#ifdef TBB
+#include "oneapi/tbb/parallel_for.h"
+#endif
+
+#include <memory>
+
+namespace {
+  std::unique_ptr<mkfit::Validation> dummyValidation(mkfit::Validation::make_validation("dummy", nullptr));
+}
+
+namespace mkfit {
+
+  std::mutex Event::printmutex;
+
+  Event::Event(int evtID, int nLayers) : validation_(*dummyValidation), evtID_(evtID) {
+    layerHits_.resize(nLayers);
+    layerHitMasks_.resize(nLayers);
+  }
+
+  Event::Event(Validation &v, int evtID, int nLayers) : validation_(v), evtID_(evtID) {
+    layerHits_.resize(nLayers);
+    layerHitMasks_.resize(nLayers);
+    validation_.resetValidationMaps();  // need to reset maps for every event.
+  }
+
+  void Event::reset(int evtID) {
+    evtID_ = evtID;
+
+    for (auto &&l : layerHits_) {
+      l.clear();
+    }
+    for (auto &&l : layerHitMasks_) {
+      l.clear();
+    }
+
+    simHitsInfo_.clear();
+    simTrackStates_.clear();
+    simTracks_.clear();
+    simTracksExtra_.clear();
+    seedTracks_.clear();
+    seedTracksExtra_.clear();
+    candidateTracks_.clear();
+    candidateTracksExtra_.clear();
+    fitTracks_.clear();
+    fitTracksExtra_.clear();
+    cmsswTracks_.clear();
+    cmsswTracksExtra_.clear();
+    beamSpot_ = {};
+
+    validation_.resetValidationMaps();  // need to reset maps for every event.
+  }
+
+  void Event::validate() {
+    // special map needed for sim_val_for_cmssw + set the track scores
+    if (Config::sim_val_for_cmssw) {
+      validation_.makeRecoTkToSeedTkMapsDumbCMSSW(*this);
+      validation_.setTrackScoresDumbCMSSW(*this);
+    }
+
+    // standard eff/fr/dr validation
+    if (Config::sim_val || Config::sim_val_for_cmssw) {
+      validation_.setTrackExtras(*this);
+      validation_.makeSimTkToRecoTksMaps(*this);
+      validation_.makeSeedTkToRecoTkMaps(*this);
+      validation_.fillEfficiencyTree(*this);
+      validation_.fillFakeRateTree(*this);
+    }
+
+    // special cmssw to mkfit validation
+    if (Config::cmssw_val) {
+      validation_.makeCMSSWTkToSeedTkMap(*this);
+      validation_.makeRecoTkToRecoTkMaps(*this);
+      validation_.setTrackExtras(*this);
+      validation_.makeCMSSWTkToRecoTksMaps(*this);
+      validation_.fillCMSSWEfficiencyTree(*this);
+      validation_.fillCMSSWFakeRateTree(*this);
+    }
+
+    if (Config::fit_val) {  // fit val for z-phi tuning
+      validation_.fillFitTree(*this);
+    }
+  }
+
+  void Event::printStats(const TrackVec &trks, TrackExtraVec &trkextras) {
+    int miss(0), found(0), fp_10(0), fp_20(0), hit8(0), h8_10(0), h8_20(0);
+
+    for (auto &&trk : trks) {
+      auto &&extra = trkextras[trk.label()];
+      extra.setMCTrackIDInfo(trk, layerHits_, simHitsInfo_, simTracks_, false, true);
+      if (extra.mcTrackID() < 0) {
+        ++miss;
+      } else {
+        auto &&mctrk = simTracks_[extra.mcTrackID()];
+        auto pr = trk.pT() / mctrk.pT();
+        found++;
+        bool h8 = trk.nFoundHits() >= 8;
+        bool pt10 = pr > 0.9 && pr < 1.1;
+        bool pt20 = pr > 0.8 && pr < 1.2;
+        fp_10 += pt10;
+        fp_20 += pt20;
+        hit8 += h8;
+        h8_10 += h8 && pt10;
+        h8_20 += h8 && pt20;
+      }
+    }
+    std::cout << "found tracks=" << found << "  in pT 10%=" << fp_10 << "  in pT 20%=" << fp_20
+              << "     no_mc_assoc=" << miss << std::endl
+              << "  nH >= 8   =" << hit8 << "  in pT 10%=" << h8_10 << "  in pT 20%=" << h8_20 << std::endl;
+  }
+
+  void Event::write_out(DataFile &data_file) {
+    FILE *fp = data_file.f_fp;
+
+    static std::mutex writemutex;
+    std::lock_guard<std::mutex> writelock(writemutex);
+
+    auto start = ftell(fp);
+    int evsize = sizeof(int);
+    fwrite(&evsize, sizeof(int), 1, fp);  // this will be overwritten at the end
+
+    evsize += write_tracks(fp, simTracks_);
+
+    if (data_file.hasSimTrackStates()) {
+      int nts = simTrackStates_.size();
+      fwrite(&nts, sizeof(int), 1, fp);
+      fwrite(&simTrackStates_[0], sizeof(TrackState), nts, fp);
+      evsize += sizeof(int) + nts * sizeof(TrackState);
+    }
+
+    int nl = layerHits_.size();
+    fwrite(&nl, sizeof(int), 1, fp);
+    evsize += sizeof(int);
+    for (int il = 0; il < nl; ++il) {
+      int nh = layerHits_[il].size();
+      fwrite(&nh, sizeof(int), 1, fp);
+      fwrite(&layerHits_[il][0], sizeof(Hit), nh, fp);
+      evsize += sizeof(int) + nh * sizeof(Hit);
+    }
+
+    if (data_file.hasHitIterMasks()) {
+      //sizes are the same as in layerHits_
+      for (int il = 0; il < nl; ++il) {
+        int nh = layerHitMasks_[il].size();
+        assert(nh == (int)layerHits_[il].size());
+        fwrite(&layerHitMasks_[il][0], sizeof(uint64_t), nh, fp);
+        evsize += nh * sizeof(uint64_t);
+      }
+    }
+
+    int nm = simHitsInfo_.size();
+    fwrite(&nm, sizeof(int), 1, fp);
+    fwrite(&simHitsInfo_[0], sizeof(MCHitInfo), nm, fp);
+    evsize += sizeof(int) + nm * sizeof(MCHitInfo);
+
+    if (data_file.hasSeeds()) {
+      evsize += write_tracks(fp, seedTracks_);
+    }
+
+    if (data_file.hasCmsswTracks()) {
+      evsize += write_tracks(fp, cmsswTracks_);
+    }
+
+    if (data_file.hasBeamSpot()) {
+      fwrite(&beamSpot_, sizeof(BeamSpot), 1, fp);
+      evsize += sizeof(BeamSpot);
+    }
+
+    fseek(fp, start, SEEK_SET);
+    fwrite(&evsize, sizeof(int), 1, fp);
+    fseek(fp, 0, SEEK_END);
+
+    //layerHitMap_ is recreated afterwards
+
+    /*
+  printf("write %i tracks\n",nt);
+  for (int it = 0; it<nt; it++) {
+    printf("track with pT=%5.3f\n",simTracks_[it].pT());
+    for (int ih=0; ih<simTracks_[it].nTotalHits(); ++ih) {
+      printf("hit lyr:%2d idx=%i\n", simTracks_[it].getHitLyr(ih), simTracks_[it].getHitIdx(ih));
+    }
+  }
+  printf("write %i layers\n",nl);
+  for (int il = 0; il<nl; il++) {
+    printf("write %i hits in layer %i\n",layerHits_[il].size(),il);
+    for (int ih = 0; ih<layerHits_[il].size(); ih++) {
+      printf("hit with r=%5.3f x=%5.3f y=%5.3f z=%5.3f\n",layerHits_[il][ih].r(),layerHits_[il][ih].x(),layerHits_[il][ih].y(),layerHits_[il][ih].z());
+    }
+  }
+  */
+  }
+
+  // #define DUMP_SEEDS
+  // #define DUMP_SEED_HITS
+  // #define DUMP_TRACKS
+  // #define DUMP_TRACK_HITS
+  // #define DUMP_LAYER_HITS
+  // #define DUMP_REC_TRACKS
+  // #define DUMP_REC_TRACK_HITS
+
+  void Event::read_in(DataFile &data_file, FILE *in_fp) {
+    FILE *fp = in_fp ? in_fp : data_file.f_fp;
+
+    data_file.advancePosToNextEvent(fp);
+
+    int nt = read_tracks(fp, simTracks_);
+    Config::nTracks = nt;
+
+    if (data_file.hasSimTrackStates()) {
+      int nts;
+      fread(&nts, sizeof(int), 1, fp);
+      simTrackStates_.resize(nts);
+      fread(&simTrackStates_[0], sizeof(TrackState), nts, fp);
+    }
+
+    int nl;
+    fread(&nl, sizeof(int), 1, fp);
+    layerHits_.resize(nl);
+    layerHitMasks_.resize(nl);
+    for (int il = 0; il < nl; ++il) {
+      int nh;
+      fread(&nh, sizeof(int), 1, fp);
+      layerHits_[il].resize(nh);
+      layerHitMasks_[il].resize(nh, 0);  //init to 0 by default
+      fread(&layerHits_[il][0], sizeof(Hit), nh, fp);
+    }
+
+    if (data_file.hasHitIterMasks()) {
+      for (int il = 0; il < nl; ++il) {
+        int nh = layerHits_[il].size();
+        fread(&layerHitMasks_[il][0], sizeof(uint64_t), nh, fp);
+      }
+    }
+
+    int nm;
+    fread(&nm, sizeof(int), 1, fp);
+    simHitsInfo_.resize(nm);
+    fread(&simHitsInfo_[0], sizeof(MCHitInfo), nm, fp);
+
+    if (data_file.hasSeeds()) {
+      int ns = read_tracks(fp, seedTracks_, Config::seedInput != cmsswSeeds);
+      (void)ns;
+
+#ifdef DUMP_SEEDS
+      printf("Read %i seedtracks (neg value means actual reading was skipped)\n", ns);
+      for (int it = 0; it < ns; it++) {
+        const Track &ss = seedTracks_[it];
+        printf("  %3i q=%+i pT=%7.3f eta=% 7.3f nHits=%i label=%4i algo=%2i\n",
+               it,
+               ss.charge(),
+               ss.pT(),
+               ss.momEta(),
+               ss.nFoundHits(),
+               ss.label(),
+               (int)ss.algorithm());
+#ifdef DUMP_SEED_HITS
+        for (int ih = 0; ih < seedTracks_[it].nTotalHits(); ++ih) {
+          int lyr = seedTracks_[it].getHitLyr(ih);
+          int idx = seedTracks_[it].getHitIdx(ih);
+          if (idx >= 0) {
+            const Hit &hit = layerHits_[lyr][idx];
+            printf("    hit %2d lyr=%3d idx=%4d pos r=%7.3f z=% 8.3f   mc_hit=%3d mc_trk=%3d\n",
+                   ih,
+                   lyr,
+                   idx,
+                   layerHits_[lyr][idx].r(),
+                   layerHits_[lyr][idx].z(),
+                   hit.mcHitID(),
+                   hit.mcTrackID(simHitsInfo_));
+          } else
+            printf("    hit %2d idx=%i\n", ih, seedTracks_[it].getHitIdx(ih));
+        }
+#endif
+      }
+#endif
+    }
+
+    int nert = -99999;
+    if (data_file.hasCmsswTracks()) {
+      nert = read_tracks(fp, cmsswTracks_, !Config::readCmsswTracks);
+      (void)nert;
+    }
+
+    /*
+    // HACK TO ONLY SELECT ONE PROBLEMATIC TRACK.
+    // Note that MC matching gets screwed.
+    // Works for MC seeding.
+    //
+    printf("************** SIM SELECTION HACK IN FORCE ********************\n");
+    TrackVec x;
+    x.push_back(simTracks_[3]);
+    simTracks_.swap(x);
+    nt = 1;
+  */
+
+#ifdef DUMP_TRACKS
+    printf("Read %i simtracks\n", nt);
+    for (int it = 0; it < nt; it++) {
+      const Track &t = simTracks_[it];
+      printf("  %3i q=%+i pT=%7.3f eta=% 7.3f nHits=%2d  label=%4d\n",
+             it,
+             t.charge(),
+             t.pT(),
+             t.momEta(),
+             t.nFoundHits(),
+             t.label());
+#ifdef DUMP_TRACK_HITS
+      for (int ih = 0; ih < t.nTotalHits(); ++ih) {
+        int lyr = t.getHitLyr(ih);
+        int idx = t.getHitIdx(ih);
+        if (idx >= 0) {
+          const Hit &hit = layerHits_[lyr][idx];
+          printf("    hit %2d lyr=%2d idx=%3d pos r=%7.3f x=% 8.3f y=% 8.3f z=% 8.3f   mc_hit=%3d mc_trk=%3d\n",
+                 ih,
+                 lyr,
+                 idx,
+                 layerHits_[lyr][idx].r(),
+                 layerHits_[lyr][idx].x(),
+                 layerHits_[lyr][idx].y(),
+                 layerHits_[lyr][idx].z(),
+                 hit.mcHitID(),
+                 hit.mcTrackID(simHitsInfo_));
+        } else
+          printf("    hit %2d idx=%i\n", ih, t.getHitIdx(ih));
+      }
+#endif
+    }
+#endif
+#ifdef DUMP_LAYER_HITS
+    printf("Read %i layers\n", nl);
+    int total_hits = 0;
+    for (int il = 0; il < nl; il++) {
+      if (layerHits_[il].empty())
+        continue;
+
+      printf("Read %i hits in layer %i\n", (int)layerHits_[il].size(), il);
+      total_hits += layerHits_[il].size();
+      for (int ih = 0; ih < (int)layerHits_[il].size(); ih++) {
+        const Hit &hit = layerHits_[il][ih];
+        printf("  mcHitID=%5d r=%10g x=%10g y=%10g z=%10g  sx=%10.4g sy=%10.4e sz=%10.4e\n",
+               hit.mcHitID(),
+               hit.r(),
+               hit.x(),
+               hit.y(),
+               hit.z(),
+               std::sqrt(hit.exx()),
+               std::sqrt(hit.eyy()),
+               std::sqrt(hit.ezz()));
+      }
+    }
+    printf("Total hits in all layers = %d\n", total_hits);
+#endif
+#ifdef DUMP_REC_TRACKS
+    printf("Read %i rectracks\n", nert);
+    for (int it = 0; it < nert; it++) {
+      const Track &t = cmsswTracks_[it];
+      printf("  %i with q=%+i pT=%7.3f eta=% 7.3f nHits=%2d  label=%4d algo=%2d\n",
+             it,
+             t.charge(),
+             t.pT(),
+             t.momEta(),
+             t.nFoundHits(),
+             t.label(),
+             (int)t.algorithm());
+#ifdef DUMP_REC_TRACK_HITS
+      for (int ih = 0; ih < t.nTotalHits(); ++ih) {
+        int lyr = t.getHitLyr(ih);
+        int idx = t.getHitIdx(ih);
+        if (idx >= 0) {
+          const Hit &hit = layerHits_[lyr][idx];
+          printf("    hit %2d lyr=%2d idx=%3d pos r=%7.3f z=% 8.3f   mc_hit=%3d mc_trk=%3d\n",
+                 ih,
+                 lyr,
+                 idx,
+                 hit.r(),
+                 hit.z(),
+                 hit.mcHitID(),
+                 hit.mcTrackID(simHitsInfo_));
+        } else
+          printf("    hit %2d        idx=%i\n", ih, t.getHitIdx(ih));
+      }
+#endif
+    }
+#endif
+
+    if (data_file.hasBeamSpot()) {
+      fread(&beamSpot_, sizeof(BeamSpot), 1, fp);
+    }
+
+    if (Config::kludgeCmsHitErrors) {
+      kludge_cms_hit_errors();
+    }
+
+    if (!Config::silent)
+      printf("Read complete, %d simtracks on file.\n", nt);
+  }
+
+  //------------------------------------------------------------------------------
+
+  int Event::write_tracks(FILE *fp, const TrackVec &tracks) {
+    // Returns total number of bytes written.
+
+    int n_tracks = tracks.size();
+    fwrite(&n_tracks, sizeof(int), 1, fp);
+
+    auto start = ftell(fp);
+    int data_size = 2 * sizeof(int) + n_tracks * sizeof(Track);
+    fwrite(&data_size, sizeof(int), 1, fp);
+
+    fwrite(tracks.data(), sizeof(Track), n_tracks, fp);
+
+    for (int i = 0; i < n_tracks; ++i) {
+      fwrite(tracks[i].beginHitsOnTrack(), sizeof(HitOnTrack), tracks[i].nTotalHits(), fp);
+      data_size += tracks[i].nTotalHits() * sizeof(HitOnTrack);
+    }
+
+    fseek(fp, start, SEEK_SET);
+    fwrite(&data_size, sizeof(int), 1, fp);
+    fseek(fp, 0, SEEK_END);
+
+    return data_size;
+  }
+
+  int Event::read_tracks(FILE *fp, TrackVec &tracks, bool skip_reading) {
+    // Returns number of read tracks (negative if actual reading was skipped).
+
+    int n_tracks, data_size;
+    fread(&n_tracks, sizeof(int), 1, fp);
+    fread(&data_size, sizeof(int), 1, fp);
+
+    if (skip_reading) {
+      fseek(fp, data_size - 2 * sizeof(int), SEEK_CUR);  // -2 because data_size counts itself and n_tracks too
+      n_tracks = -n_tracks;
+    } else {
+      tracks.resize(n_tracks);
+
+      fread(tracks.data(), sizeof(Track), n_tracks, fp);
+
+      for (int i = 0; i < n_tracks; ++i) {
+        tracks[i].resizeHitsForInput();
+        fread(tracks[i].beginHitsOnTrack_nc(), sizeof(HitOnTrack), tracks[i].nTotalHits(), fp);
+      }
+    }
+
+    return n_tracks;
+  }
+
+  //------------------------------------------------------------------------------
+
+  void Event::setInputFromCMSSW(std::vector<HitVec> hits, TrackVec seeds) {
+    layerHits_ = std::move(hits);
+    seedTracks_ = std::move(seeds);
+  }
+
+  //------------------------------------------------------------------------------
+
+  void Event::kludge_cms_hit_errors() {
+    // Enforce Vxy on all layers, Vz on pixb only.
+
+    const float Exy = 15 * 1e-4, Vxy = Exy * Exy;
+    const float Ez = 30 * 1e-4, Vz = Ez * Ez;
+
+    int nl = layerHits_.size();
+
+    int cnt = 0;
+
+    for (int il = 0; il < nl; il++) {
+      if (layerHits_[il].empty())
+        continue;
+
+      for (Hit &h : layerHits_[il]) {
+        SVector6 &c = h.error_nc();
+
+        float vxy = c[0] + c[2];
+        if (vxy < Vxy) {
+          c[0] *= Vxy / vxy;
+          c[2] *= Vxy / vxy;
+          ++cnt;
+        }
+        if (il < 4 && c[5] < Vz) {
+          c[5] = Vz;
+          ++cnt;
+        }
+      }
+    }
+
+    printf("Event::kludge_cms_hit_errors processed %d layers, kludged %d entries.\n", nl, cnt);
+  }
+
+  //------------------------------------------------------------------------------
+
+  int Event::clean_cms_simtracks() {
+    // Sim tracks from cmssw have the following issues:
+    // - hits are not sorted by layer;
+    // - there are tracks with too low number of hits, even 0;
+    // - even with enough hits, there can be too few layers (esp. in endcap);
+    // - tracks from secondaries can have extremely low pT.
+    // Possible further checks:
+    // - make sure enough hits exist in seeding layers.
+    //
+    // What is done:
+    // 1. Hits are sorted by layer;
+    // 2. Non-findable tracks are marked with Track::Status::not_findable flag.
+    //
+    // Returns number of passed simtracks.
+
+    dprintf("Event::clean_cms_simtracks processing %lu simtracks.\n", simTracks_.size());
+
+    int n_acc = 0;
+    int i = -1;  //wrap in ifdef DEBUG?
+    for (Track &t : simTracks_) {
+      i++;
+
+      t.sortHitsByLayer();
+
+      const int lyr_cnt = t.nUniqueLayers();
+
+      //const int lasthit = t.getLastFoundHitPos();
+      //const float eta = layerHits_[t.getHitLyr(lasthit)][t.getHitIdx(lasthit)].eta();
+
+      if (lyr_cnt < Config::cmsSelMinLayers)  // || Config::TrkInfo.is_transition(eta))
+      {
+        dprintf("Rejecting simtrack %d, n_hits=%d, n_layers=%d, pT=%f\n", i, t.nFoundHits(), lyr_cnt, t.pT());
+        t.setNotFindable();
+      } else {
+        dprintf("Accepting simtrack %d, n_hits=%d, n_layers=%d, pT=%f\n", i, t.nFoundHits(), lyr_cnt, t.pT());
+        ++n_acc;
+      }
+    }
+
+    return n_acc;
+  }
+
+  void Event::print_tracks(const TrackVec &tracks, bool print_hits) const {
+    const int nt = tracks.size();
+
+    //WARNING: Printouts for hits will not make any sense if mkFit is not run with a validation flag such as --quality-val
+    printf("Event::print_tracks printing %d tracks %s hits:\n", nt, (print_hits ? "with" : "without"));
+    for (int it = 0; it < nt; it++) {
+      const Track &t = tracks[it];
+      printf("  %i with q=%+i pT=%7.3f eta=% 7.3f nHits=%2d  label=%4d findable=%d score=%7.3f chi2=%7.3f\n",
+             it,
+             t.charge(),
+             t.pT(),
+             t.momEta(),
+             t.nFoundHits(),
+             t.label(),
+             t.isFindable(),
+             getScoreCand(t),
+             t.chi2());
+
+      if (print_hits) {
+        for (int ih = 0; ih < t.nTotalHits(); ++ih) {
+          int lyr = t.getHitLyr(ih);
+          int idx = t.getHitIdx(ih);
+          if (idx >= 0) {
+            const Hit &hit = layerHits_[lyr][idx];
+            printf("    hit %2d lyr=%2d idx=%3d pos r=%7.3f z=% 8.3f   mc_hit=%3d mc_trk=%3d\n",
+                   ih,
+                   lyr,
+                   idx,
+                   layerHits_[lyr][idx].r(),
+                   layerHits_[lyr][idx].z(),
+                   hit.mcHitID(),
+                   hit.mcTrackID(simHitsInfo_));
+          } else
+            printf("    hit %2d lyr=%2d idx=%3d\n", ih, t.getHitLyr(ih), t.getHitIdx(ih));
+        }
+      }
+    }
+  }
+
+  int Event::clean_cms_seedtracks(TrackVec *seed_ptr) {
+    const float etamax_brl = Config::c_etamax_brl;
+    const float dpt_common = Config::c_dpt_common;
+    const float dzmax_brl = Config::c_dzmax_brl;
+    const float drmax_brl = Config::c_drmax_brl;
+    const float ptmin_hpt = Config::c_ptmin_hpt;
+    const float dzmax_hpt = Config::c_dzmax_hpt;
+    const float drmax_hpt = Config::c_drmax_hpt;
+    const float dzmax_els = Config::c_dzmax_els;
+    const float drmax_els = Config::c_drmax_els;
+
+    const float dzmax2_inv_brl = 1.f / (dzmax_brl * dzmax_brl);
+    const float drmax2_inv_brl = 1.f / (drmax_brl * drmax_brl);
+    const float dzmax2_inv_hpt = 1.f / (dzmax_hpt * dzmax_hpt);
+    const float drmax2_inv_hpt = 1.f / (drmax_hpt * drmax_hpt);
+    const float dzmax2_inv_els = 1.f / (dzmax_els * dzmax_els);
+    const float drmax2_inv_els = 1.f / (drmax_els * drmax_els);
+
+    TrackVec &seeds = (seed_ptr != nullptr) ? *seed_ptr : seedTracks_;
+    const int ns = seeds.size();
+
+    TrackVec cleanSeedTracks;
+    cleanSeedTracks.reserve(ns);
+    std::vector<bool> writetrack(ns, true);
+
+    const float invR1GeV = 1.f / Config::track1GeVradius;
+
+    std::vector<int> nHits(ns);
+    std::vector<int> charge(ns);
+    std::vector<float> oldPhi(ns);
+    std::vector<float> pos2(ns);
+    std::vector<float> eta(ns);
+    std::vector<float> ctheta(ns);
+    std::vector<float> invptq(ns);
+    std::vector<float> pt(ns);
+    std::vector<float> x(ns);
+    std::vector<float> y(ns);
+    std::vector<float> z(ns);
+
+    for (int ts = 0; ts < ns; ts++) {
+      const Track &tk = seeds[ts];
+      nHits[ts] = tk.nFoundHits();
+      charge[ts] = tk.charge();
+      oldPhi[ts] = tk.momPhi();
+      pos2[ts] = std::pow(tk.x(), 2) + std::pow(tk.y(), 2);
+      eta[ts] = tk.momEta();
+      ctheta[ts] = 1.f / std::tan(tk.theta());
+      invptq[ts] = tk.charge() * tk.invpT();
+      pt[ts] = tk.pT();
+      x[ts] = tk.x();
+      y[ts] = tk.y();
+      z[ts] = tk.z();
+    }
+
+    for (int ts = 0; ts < ns; ts++) {
+      if (not writetrack[ts])
+        continue;  //FIXME: this speed up prevents transitive masking; check build cost!
+
+      const float oldPhi1 = oldPhi[ts];
+      const float pos2_first = pos2[ts];
+      const float Eta1 = eta[ts];
+      const float Pt1 = pt[ts];
+      const float invptq_first = invptq[ts];
+
+      //#pragma simd /* Vectorization via simd had issues with icc */
+      for (int tss = ts + 1; tss < ns; tss++) {
+        const float Pt2 = pt[tss];
+
+        ////// Always require charge consistency. If different charge is assigned, do not remove seed-track
+        if (charge[tss] != charge[ts])
+          continue;
+
+        const float thisDPt = std::abs(Pt2 - Pt1);
+        ////// Require pT consistency between seeds. If dpT is large, do not remove seed-track.
+        if (thisDPt > dpt_common * (Pt1))
+          continue;
+
+        const float Eta2 = eta[tss];
+        const float deta2 = std::pow(Eta1 - Eta2, 2);
+
+        const float oldPhi2 = oldPhi[tss];
+
+        const float pos2_second = pos2[tss];
+        const float thisDXYSign05 = pos2_second > pos2_first ? -0.5f : 0.5f;
+
+        const float thisDXY = thisDXYSign05 * sqrt(std::pow(x[ts] - x[tss], 2) + std::pow(y[ts] - y[tss], 2));
+
+        const float invptq_second = invptq[tss];
+
+        const float newPhi1 = oldPhi1 - thisDXY * invR1GeV * invptq_first;
+        const float newPhi2 = oldPhi2 + thisDXY * invR1GeV * invptq_second;
+
+        const float dphi = cdist(std::abs(newPhi1 - newPhi2));
+
+        const float dr2 = deta2 + dphi * dphi;
+
+        const float thisDZ = z[ts] - z[tss] - thisDXY * (ctheta[ts] + ctheta[tss]);
+        const float dz2 = thisDZ * thisDZ;
+
+        ////// Reject tracks within dR-dz elliptical window.
+        ////// Adaptive thresholds, based on observation that duplicates are more abundant at large pseudo-rapidity and low track pT
+        if (std::abs(Eta1) < etamax_brl) {
+          if (dz2 * dzmax2_inv_brl + dr2 * drmax2_inv_brl < 1.0f)
+            writetrack[tss] = false;
+        } else if (Pt1 > ptmin_hpt) {
+          if (dz2 * dzmax2_inv_hpt + dr2 * drmax2_inv_hpt < 1.0f)
+            writetrack[tss] = false;
+        } else {
+          if (dz2 * dzmax2_inv_els + dr2 * drmax2_inv_els < 1.0f)
+            writetrack[tss] = false;
+        }
+      }
+
+      if (writetrack[ts])
+        cleanSeedTracks.emplace_back(seeds[ts]);
+    }
+
+    seeds.swap(cleanSeedTracks);
+
+#ifdef DEBUG
+    {
+      const int ns2 = seeds.size();
+      printf("Number of CMS seeds before %d --> after %d cleaning\n", ns, ns2);
+
+      for (int it = 0; it < ns2; it++) {
+        const Track &ss = seeds[it];
+        printf("  %3i q=%+i pT=%7.3f eta=% 7.3f nHits=%i label=% i\n",
+               it,
+               ss.charge(),
+               ss.pT(),
+               ss.momEta(),
+               ss.nFoundHits(),
+               ss.label());
+      }
+    }
+#endif
+
+    return seeds.size();
+  }
+
+  int Event::select_tracks_iter(unsigned int n) {
+    if (n == 0)
+      return 1;
+
+    unsigned int algorithms[] = {4, 22, 23, 5, 24, 7, 8, 9, 10, 6};  //to be stored somewhere common
+
+    //saving seeds by algorithm
+    const int ns = seedTracks_.size();
+
+    TrackVec cleanSeedTracks;
+    cleanSeedTracks.reserve(ns);
+
+    for (int ts = 0; ts < ns; ts++) {
+      const Track &tk = seedTracks_[ts];
+      unsigned int algo = (unsigned int)tk.algorithm();
+      if (std::find(algorithms, algorithms + n, algo) != algorithms + n)
+        cleanSeedTracks.emplace_back(seedTracks_[ts]);
+    }
+    seedTracks_.swap(cleanSeedTracks);
+
+    //saving tracks by algorithm
+    const int nt = cmsswTracks_.size();
+
+    TrackVec cleanTracks;
+    cleanTracks.reserve(nt);
+
+    for (int ts = 0; ts < nt; ts++) {
+      const Track &tk = cmsswTracks_[ts];
+      unsigned int algo = (unsigned int)tk.algorithm();
+      if (std::find(algorithms, algorithms + n, algo) != algorithms + n)
+        cleanTracks.emplace_back(cmsswTracks_[ts]);
+    }
+    cmsswTracks_.swap(cleanTracks);
+    return cmsswTracks_.size() + seedTracks_.size();
+  }
+
+  int Event::clean_cms_seedtracks_badlabel() {
+    printf("***\n*** REMOVING SEEDS WITH BAD LABEL. This is a development hack. ***\n***\n");
+    TrackVec buf;
+    seedTracks_.swap(buf);
+    std::copy_if(
+        buf.begin(), buf.end(), std::back_inserter(seedTracks_), [](const Track &t) { return t.label() >= 0; });
+    return seedTracks_.size();
+  }
+
+  int Event::use_seeds_from_cmsswtracks() {
+    int ns = seedTracks_.size();
+
+    TrackVec cleanSeedTracks;
+    cleanSeedTracks.reserve(ns);
+
+    for (auto &&cmsswtrack : cmsswTracks_) {
+      cleanSeedTracks.emplace_back(seedTracks_[cmsswtrack.label()]);
+    }
+
+    seedTracks_.swap(cleanSeedTracks);
+
+    return seedTracks_.size();
+  }
+
+  void Event::relabel_bad_seedtracks() {
+    int newlabel = 0;
+    for (auto &&track : seedTracks_) {
+      if (track.label() < 0)
+        track.setLabel(--newlabel);
+    }
+  }
+
+  void Event::relabel_cmsswtracks_from_seeds() {
+    std::map<int, int> cmsswLabelMap;
+    for (size_t iseed = 0; iseed < seedTracks_.size(); iseed++) {
+      for (size_t icmssw = 0; icmssw < cmsswTracks_.size(); icmssw++) {
+        if (cmsswTracks_[icmssw].label() == static_cast<int>(iseed)) {
+          cmsswLabelMap[icmssw] = seedTracks_[iseed].label();
+          break;
+        }
+      }
+    }
+    for (size_t icmssw = 0; icmssw < cmsswTracks_.size(); icmssw++) {
+      cmsswTracks_[icmssw].setLabel(cmsswLabelMap[icmssw]);
+    }
+  }
+
+  //==============================================================================
+  // HitMask handling
+  //==============================================================================
+
+  void Event::fill_hitmask_bool_vectors(int track_algo, std::vector<std::vector<bool>> &layer_masks) {
+    // Convert from per-hit uint64_t to per layer bool-vectors for given
+    // iteration.
+
+    uint64_t iter_mask = 1 << track_algo;
+
+    const int n_lay = (int)layerHits_.size();
+    layer_masks.resize(n_lay);
+
+    for (int l = 0; l < n_lay; ++l) {
+      const int n_hit = (int)layerHits_[l].size();
+      layer_masks[l].resize(n_hit);
+
+      for (int i = 0; i < n_hit; ++i) {
+        layer_masks[l][i] = layerHitMasks_[l][i] & iter_mask;
+      }
+    }
+  }
+
+  void Event::fill_hitmask_bool_vectors(std::vector<int> &track_algo_vec, std::vector<std::vector<bool>> &layer_masks) {
+    // Convert from per-hit uint64_t to per layer bool-vectors for a list of
+    // iterations.
+    // A hit mask is set if it is set for _all_ listed iterations.
+
+    uint64_t iter_mask = 0;
+    for (auto ta : track_algo_vec)
+      iter_mask |= 1 << ta;
+
+    const int n_lay = (int)layerHits_.size();
+    layer_masks.resize(n_lay);
+
+    for (int l = 0; l < n_lay; ++l) {
+      const int n_hit = (int)layerHits_[l].size();
+      layer_masks[l].resize(n_hit);
+
+      for (int i = 0; i < n_hit; ++i) {
+        uint64_t hitmasks = layerHitMasks_[l][i];
+        layer_masks[l][i] = ((iter_mask ^ hitmasks) & iter_mask) == 0;
+      }
+    }
+  }
+
+  //==============================================================================
+  // DataFile
+  //==============================================================================
+
+  int DataFile::openRead(const std::string &fname, int expected_n_layers) {
+    constexpr int min_ver = 4;
+    constexpr int max_ver = 6;
+
+    f_fp = fopen(fname.c_str(), "r");
+    assert(f_fp != 0 && "Opening of input file failed.");
+
+    fread(&f_header, sizeof(DataFileHeader), 1, f_fp);
+
+    if (f_header.f_magic != 0xBEEF) {
+      fprintf(stderr, "Incompatible input file (wrong magick).\n");
+      exit(1);
+    }
+    if (f_header.f_format_version < min_ver || f_header.f_format_version > max_ver) {
+      fprintf(stderr,
+              "Unsupported file version %d. Supported versions are from %d to %d.\n",
+              f_header.f_format_version,
+              min_ver,
+              max_ver);
+      exit(1);
+    }
+    if (f_header.f_sizeof_track != sizeof(Track)) {
+      fprintf(stderr,
+              "sizeof(Track) on file (%d) different from current value (%d).\n",
+              f_header.f_sizeof_track,
+              (int)sizeof(Track));
+      exit(1);
+    }
+    if (f_header.f_sizeof_hit != sizeof(Hit)) {
+      fprintf(stderr,
+              "sizeof(Hit) on file (%d) different from current value (%d).\n",
+              f_header.f_sizeof_hit,
+              (int)sizeof(Hit));
+      exit(1);
+    }
+    if (f_header.f_sizeof_hot != sizeof(HitOnTrack)) {
+      fprintf(stderr,
+              "sizeof(HitOnTrack) on file (%d) different from current value (%d).\n",
+              f_header.f_sizeof_hot,
+              (int)sizeof(HitOnTrack));
+      exit(1);
+    }
+    if (f_header.f_n_layers != expected_n_layers) {
+      fprintf(stderr,
+              "Number of layers on file (%d) is different from current TrackerInfo (%d).\n",
+              f_header.f_n_layers,
+              expected_n_layers);
+      exit(1);
+    }
+
+    printf("Opened file '%s', format version %d, n_layers %d, n_events %d\n",
+           fname.c_str(),
+           f_header.f_format_version,
+           f_header.f_n_layers,
+           f_header.f_n_events);
+    if (f_header.f_extra_sections) {
+      printf("  Extra sections:");
+      if (f_header.f_extra_sections & ES_SimTrackStates)
+        printf(" SimTrackStates");
+      if (f_header.f_extra_sections & ES_Seeds)
+        printf(" Seeds");
+      if (f_header.f_extra_sections & ES_CmsswTracks)
+        printf(" CmsswTracks");
+      printf("\n");
+    }
+
+    if (Config::seedInput == cmsswSeeds && !hasSeeds()) {
+      fprintf(stderr, "Reading of CmsswSeeds requested but data not available on file.\n");
+      exit(1);
+    }
+
+    if (Config::readCmsswTracks && !hasCmsswTracks()) {
+      fprintf(stderr, "Reading of CmsswTracks requested but data not available on file.\n");
+      exit(1);
+    }
+
+    return f_header.f_n_events;
+  }
+
+  void DataFile::openWrite(const std::string &fname, int n_layers, int n_ev, int extra_sections) {
+    f_fp = fopen(fname.c_str(), "w");
+    f_header.f_n_layers = n_layers;
+    f_header.f_n_events = n_ev;
+    f_header.f_extra_sections = extra_sections;
+
+    fwrite(&f_header, sizeof(DataFileHeader), 1, f_fp);
+  }
+
+  int DataFile::advancePosToNextEvent(FILE *fp) {
+    int evsize;
+
+    std::lock_guard<std::mutex> readlock(f_next_ev_mutex);
+
+    fseek(fp, f_pos, SEEK_SET);
+    fread(&evsize, sizeof(int), 1, fp);
+    if (Config::loopOverFile) {
+      // File ended, rewind back to beginning
+      if (feof(fp) != 0) {
+        f_pos = sizeof(DataFileHeader);
+        fseek(fp, f_pos, SEEK_SET);
+        fread(&evsize, sizeof(int), 1, fp);
+      }
+    }
+
+    f_pos += evsize;
+
+    return evsize;
+  }
+
+  void DataFile::skipNEvents(int n_to_skip) {
+    int evsize;
+
+    std::lock_guard<std::mutex> readlock(f_next_ev_mutex);
+
+    while (n_to_skip-- > 0) {
+      fseek(f_fp, f_pos, SEEK_SET);
+      fread(&evsize, sizeof(int), 1, f_fp);
+      f_pos += evsize;
+    }
+  }
+
+  void DataFile::close() {
+    fclose(f_fp);
+    f_fp = 0;
+    f_header = DataFileHeader();
+  }
+
+  void DataFile::CloseWrite(int n_written) {
+    if (f_header.f_n_events != n_written) {
+      fseek(f_fp, 0, SEEK_SET);
+      f_header.f_n_events = n_written;
+      fwrite(&f_header, sizeof(DataFileHeader), 1, f_fp);
+    }
+    close();
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/standalone/Event.h b/RecoTracker/MkFitCore/standalone/Event.h
new file mode 100644
index 0000000000000..aacc45daaa5a4
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/Event.h
@@ -0,0 +1,123 @@
+#ifndef RecoTracker_MkFitCore_standalone_Event_h
+#define RecoTracker_MkFitCore_standalone_Event_h
+
+#include "RecoTracker/MkFitCore/interface/Track.h"
+#include "Validation.h"
+#include "RecoTracker/MkFitCore/interface/Config.h"
+
+#include <mutex>
+
+namespace mkfit {
+
+  struct DataFile;
+
+  class Event {
+  public:
+    explicit Event(int evtID, int nLayers);
+    Event(Validation &v, int evtID, int nLayers);
+
+    void reset(int evtID);
+    void validate();
+    void printStats(const TrackVec &, TrackExtraVec &);
+
+    int evtID() const { return evtID_; }
+    void resetLayerHitMap(bool resetSimHits);
+
+    void write_out(DataFile &data_file);
+    void read_in(DataFile &data_file, FILE *in_fp = 0);
+    int write_tracks(FILE *fp, const TrackVec &tracks);
+    int read_tracks(FILE *fp, TrackVec &tracks, bool skip_reading = false);
+
+    void setInputFromCMSSW(std::vector<HitVec> hits, TrackVec seeds);
+
+    void kludge_cms_hit_errors();
+
+    int use_seeds_from_cmsswtracks();  //special mode --> use only seeds which generated cmssw reco track
+    int clean_cms_simtracks();
+    int clean_cms_seedtracks(
+        TrackVec *seed_ptr = nullptr);    //operates on seedTracks_; returns the number of cleaned seeds
+    int clean_cms_seedtracks_badlabel();  //operates on seedTracks_, removes those with label == -1;
+    void relabel_bad_seedtracks();
+    void relabel_cmsswtracks_from_seeds();
+
+    int select_tracks_iter(unsigned int n = 0);  //for cmssw input
+
+    void fill_hitmask_bool_vectors(int track_algo, std::vector<std::vector<bool>> &layer_masks);
+    void fill_hitmask_bool_vectors(std::vector<int> &track_algo_vec, std::vector<std::vector<bool>> &layer_masks);
+
+    void print_tracks(const TrackVec &tracks, bool print_hits) const;
+
+    Validation &validation_;
+
+  private:
+    int evtID_;
+
+  public:
+    BeamSpot beamSpot_;  // XXXX Read/Write of BeamSpot + file-version bump or extra-section to be added.
+    std::vector<HitVec> layerHits_;
+    std::vector<std::vector<uint64_t>> layerHitMasks_;  //aligned with layerHits_
+    MCHitInfoVec simHitsInfo_;
+
+    TrackVec simTracks_, seedTracks_, candidateTracks_, fitTracks_;
+    TrackVec cmsswTracks_;
+    // validation sets these, so needs to be mutable
+    mutable TrackExtraVec simTracksExtra_, seedTracksExtra_, candidateTracksExtra_, fitTracksExtra_;
+    mutable TrackExtraVec cmsswTracksExtra_;
+
+    TSVec simTrackStates_;
+    static std::mutex printmutex;
+  };
+
+  typedef std::vector<Event> EventVec;
+
+  struct DataFileHeader {
+    int f_magic = 0xBEEF;
+    int f_format_version = 6;
+    int f_sizeof_track = sizeof(Track);
+    int f_sizeof_hit = sizeof(Hit);
+    int f_sizeof_hot = sizeof(HitOnTrack);
+    int f_n_layers = -1;
+    int f_n_events = -1;
+
+    int f_extra_sections = 0;
+
+    DataFileHeader() = default;
+  };
+
+  struct DataFile {
+    enum ExtraSection {
+      ES_SimTrackStates = 0x1,
+      ES_Seeds = 0x2,
+      ES_CmsswTracks = 0x4,
+      ES_HitIterMasks = 0x8,
+      ES_BeamSpot = 0x10
+    };
+
+    FILE *f_fp = 0;
+    long f_pos = sizeof(DataFileHeader);
+
+    DataFileHeader f_header;
+
+    std::mutex f_next_ev_mutex;
+
+    // ----------------------------------------------------------------
+
+    bool hasSimTrackStates() const { return f_header.f_extra_sections & ES_SimTrackStates; }
+    bool hasSeeds() const { return f_header.f_extra_sections & ES_Seeds; }
+    bool hasCmsswTracks() const { return f_header.f_extra_sections & ES_CmsswTracks; }
+    bool hasHitIterMasks() const { return f_header.f_extra_sections & ES_HitIterMasks; }
+    bool hasBeamSpot() const { return f_header.f_extra_sections & ES_BeamSpot; }
+
+    int openRead(const std::string &fname, int expected_n_layers);
+    void openWrite(const std::string &fname, int n_layers, int n_ev, int extra_sections = 0);
+
+    int advancePosToNextEvent(FILE *fp);
+
+    void skipNEvents(int n_to_skip);
+
+    void close();
+    void CloseWrite(int n_written);  //override nevents in the header and close
+  };
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/Geoms/CylCowWLids.cc b/RecoTracker/MkFitCore/standalone/Geoms/CylCowWLids.cc
new file mode 100644
index 0000000000000..a70c5568e5412
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/Geoms/CylCowWLids.cc
@@ -0,0 +1,180 @@
+//---------------------------
+// Cylindrical Cow with Lids
+//---------------------------
+//
+// Intended coverage: |eta| < 2.4 with D_z_beam_spot = +-3 cm (3 sigma)
+// B-layer extends to 2.55.
+// Layers 1 and 2 have somewhat longer barrels. It is assumed
+// those will be needed / used for seed finding.
+//
+// Layers 3 - 9:
+//   Barrel:     0.0 - 1.0
+//   Transition: 1.0 - 1.4
+//   Endcap:     1.4 - 2.4
+//
+// Run root test/CylCowWLids.C to get a plot and dumps of
+// edge coordinates and etas.
+//
+// Eta partitions for B / T / EC
+
+#include "RecoTracker/MkFitCore/interface/Config.h"
+#include "RecoTracker/MkFitCore/standalone/ConfigStandalone.h"
+#include "RecoTracker/MkFitCore/interface/TrackerInfo.h"
+
+#include <cmath>
+
+using namespace mkfit;
+
+namespace {
+  float getTheta(float r, float z) { return std::atan2(r, z); }
+
+  float getEta(float r, float z) { return -1.0f * std::log(std::tan(getTheta(r, z) / 2.0f)); }
+
+  // float getEta(float theta)
+  // {
+  //   return -1.0f * std::log( std::tan(theta/2.0f) );
+  // }
+
+  float getTgTheta(float eta) { return std::tan(2.0 * std::atan(std::exp(-eta))); }
+
+  class CylCowWLidsCreator {
+    TrackerInfo& m_trkinfo;
+
+    static constexpr float m_det_half_thickness = 0.005;  // for 100 micron total
+
+    //------------------------------------------------------------------------------
+
+    void add_barrel(int lid, float r, float z, float eta) {
+      // printf("Adding barrel layer r=%.3f z=%.3f eta_t=%.3f\n", r, z, eta);
+
+      LayerInfo& li = m_trkinfo.layer_nc(lid);
+
+      li.set_layer_type(LayerInfo::Barrel);
+
+      li.set_limits(r - m_det_half_thickness, r + m_det_half_thickness, -z, z);
+      li.set_propagate_to(li.rin());
+
+      li.set_q_bin(2.0);
+    }
+
+    void add_barrel_r_eta(int lid, float r, float eta) {
+      float z = r / getTgTheta(eta);
+
+      add_barrel(lid, r, z, eta);
+    }
+
+    void add_barrel_r_z(int lid, float r, float z) {
+      float eta = getEta(r, z);
+
+      add_barrel(lid, r, z, eta);
+    }
+
+    void add_endcap(int lid, float r, float z, float eta) {
+      float r_end = z * getTgTheta(eta);
+
+      // printf("Adding endcap layer r=%.3f z=%.3f r_l=%.3f eta_l=%.3f\n", r, z, r_end, eta);
+
+      {
+        LayerInfo& li = m_trkinfo.layer_nc(lid);
+
+        li.set_layer_type(LayerInfo::EndCapPos);
+
+        li.set_limits(r_end, r, z - m_det_half_thickness, z + m_det_half_thickness);
+        li.set_propagate_to(li.zmin());
+
+        li.set_q_bin(1.5);
+      }
+      {
+        lid += 9;
+        LayerInfo& li = m_trkinfo.layer_nc(lid);
+
+        li.set_layer_type(LayerInfo::EndCapNeg);
+
+        li.set_limits(r_end, r, -z - m_det_half_thickness, -z + m_det_half_thickness);
+        li.set_propagate_to(li.zmax());
+
+        li.set_q_bin(1.5);
+      }
+    }
+
+    //------------------------------------------------------------------------------
+
+  public:
+    CylCowWLidsCreator(TrackerInfo& ti) : m_trkinfo(ti) {}
+
+    void FillTrackerInfo() {
+      m_trkinfo.create_layers(10, 9, 9);
+
+      // Actual coverage for tracks with z = 3cm is 2.4
+      float full_eta = 2.5;
+      float full_eta_pix_0 = 2.55;  // To account for BS z-spread
+      float full_eta_ec_in[] = {0, 2.525, 2.515};
+
+      float pix_0 = 4, pix_sep = 6;
+      float pix_z0 = 24, pix_zgrow = 6;
+
+      float sct_sep = 10;
+      float sct_0 = pix_0 + 2 * pix_sep + sct_sep;
+      float sct_zgrow = 10;
+      float sct_z0 = pix_z0 + 2 * pix_zgrow + sct_zgrow;
+
+      float pix_ec_zgap = 2;
+      float pix_ec_rextra = 2;
+
+      float sct_ec_zgap = 4;
+      float sct_ec_rextra = 4;
+
+      add_barrel_r_eta(0, pix_0, full_eta_pix_0);
+
+      add_barrel_r_z(1, pix_0 + 1 * pix_sep, pix_z0 + 1 * pix_zgrow);
+      add_barrel_r_z(2, pix_0 + 2 * pix_sep, pix_z0 + 2 * pix_zgrow);
+      add_barrel_r_z(3, pix_0 + 3 * pix_sep, pix_z0 + 3 * pix_zgrow);
+
+      for (int i = 0; i < 6; ++i) {
+        add_barrel_r_z(4 + i, sct_0 + i * sct_sep, sct_z0 + i * sct_zgrow);
+      }
+
+      for (int i = 1; i < 4; ++i) {
+        add_endcap(9 + i, pix_0 + i * pix_sep + pix_ec_rextra, pix_z0 + i * pix_zgrow + pix_ec_zgap, full_eta_ec_in[i]);
+      }
+      for (int i = 0; i < 6; ++i) {
+        add_endcap(13 + i, sct_0 + i * sct_sep + sct_ec_rextra, sct_z0 + i * sct_zgrow + sct_ec_zgap, full_eta);
+      }
+      // + endcap disks at -z
+    }
+  };
+
+  //============================================================================
+
+  void Create_CylCowWLids(TrackerInfo& ti, IterationsInfo& ii, bool verbose) {
+    PropagationConfig pconf;
+    pconf.backward_fit_to_pca = Config::includePCA;
+    pconf.finding_requires_propagation_to_hit_pos = false;
+    pconf.finding_inter_layer_pflags = PropagationFlags(PF_none);
+    pconf.finding_intra_layer_pflags = PropagationFlags(PF_none);
+    pconf.backward_fit_pflags = PropagationFlags(PF_use_param_b_field);
+    pconf.forward_fit_pflags = PropagationFlags(PF_use_param_b_field);
+    pconf.seed_fit_pflags = PropagationFlags(PF_none);
+    pconf.pca_prop_pflags = PropagationFlags(PF_use_param_b_field);
+    pconf.set_as_default();
+
+    CylCowWLidsCreator creator(ti);
+
+    creator.FillTrackerInfo();
+
+    if (verbose) {
+      printf("==========================================================================================\n");
+    }
+    printf("Create_CylCowWLids -- creation complete\n");
+
+    if (verbose) {
+      printf("==========================================================================================\n");
+      for (int ii = 0; ii < ti.n_layers(); ++ii)
+        ti.layer(ii).print_layer();
+      printf("==========================================================================================\n");
+    }
+  }
+
+}  // namespace
+
+void* TrackerInfoCrator_ptr = (void*)Create_CylCowWLids;
diff --git a/RecoTracker/MkFitCore/standalone/Geoms/Makefile b/RecoTracker/MkFitCore/standalone/Geoms/Makefile
new file mode 100644
index 0000000000000..53383818ba1f0
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/Geoms/Makefile
@@ -0,0 +1,48 @@
+include ${SADIR}/Makefile.config
+
+CPPEXTRA := ${USER_CPPFLAGS} ${DEFS} -I${SRCDIR}
+LDEXTRA  := ${USER_LDFLAGS}
+
+CPPFLAGS := ${CPPEXTRA} ${CPPFLAGS}
+CXXFLAGS += -fPIC ${USER_CXXFLAGS}
+LDFLAGS  += ${LDEXTRA}
+
+CPPFLAGS_NO_ROOT := ${CPPEXTRA} ${CPPFLAGS_NO_ROOT}
+
+.PHONY: all clean distclean echo
+
+SRCS := $(wildcard ${SADIR}/Geoms/*.cc)
+SRCB := $(notdir ${SRCS})
+DEPS := $(SRCB:.cc=.d)
+OBJS := $(SRCB:.cc=.o)
+
+TGTS := $(basename ${OBJS})
+TGTS := $(addprefix ../, $(addsuffix .so, ${TGTS}))
+
+vpath %.cc ${SADIR}/Geoms
+
+all: ${TGTS}
+
+%.o: %.cc %.d
+	${CXX} ${CPPFLAGS} ${CXXFLAGS} ${VEC_HOST} -c -o $@ $<
+
+%.d: %.cc
+	${MAKEDEPEND} -o $@ $<
+
+../%.so: %.o
+	${CXX} -shared -L.. -lMicCore -o $@ $<
+
+ifeq ($(filter clean distclean, ${MAKECMDGOALS}),)
+include ${DEPS}
+endif
+
+clean:
+	-rm -f *.so *.o *.om *.d *.optrpt
+
+distclean: clean
+	-rm -f ${TGTS}
+
+echo:
+	@echo SRCS = ${SRCS}
+	@echo DEPS = ${DEPS}
+	@echo OBJS = ${OBJS}
diff --git a/RecoTracker/MkFitCore/standalone/Makefile b/RecoTracker/MkFitCore/standalone/Makefile
new file mode 100644
index 0000000000000..0a6b72ba67868
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/Makefile
@@ -0,0 +1,82 @@
+include ${SADIR}/Makefile.config
+
+CPPFLAGS := -I. -I${SRCDIR} -I../mkFit-external ${CPPFLAGS}
+
+CORE_DIR := ${SRCDIR}/RecoTracker/MkFitCore
+
+LIB_CORE := ../libMicCore.so
+
+TGTS := ${LIB_CORE}
+
+.PHONY: all clean distclean
+
+all: ${TGTS}
+
+SRCS := $(wildcard ${CORE_DIR}/src/*.cc) \
+        $(wildcard ${CORE_DIR}/src/Ice/*.cc) \
+		$(wildcard ${CORE_DIR}/src/Matriplex/*.cc) \
+		$(wildcard ${SADIR}/*.cc)
+SRCB := $(notdir ${SRCS})
+DEPS := $(SRCB:.cc=.d)
+OBJS := $(SRCB:.cc=.o)
+
+vpath %.cc ${CORE_DIR}/src ${CORE_DIR}/src/Ice ${CORE_DIR}/src/Matriplex ${SADIR}
+
+AUTO_TGTS :=
+
+# Begin Matriplex
+
+auto-matriplex:
+###	${MAKE} -f ${CORE_DIR}/src/Matriplex auto && touch $@
+	touch $@
+
+AUTO_TGTS += auto-matriplex
+
+${DEPS}: auto-matriplex
+
+# End Matriplex
+
+ifeq ($(filter clean-local clean distclean, ${MAKECMDGOALS}),)
+include ${DEPS}
+endif
+
+clean-local:
+	-rm -f ${TGTS} *.d *.o *.om *.so
+	-rm -rf main.dSYM
+	-rm -rf plotting/*.so plotting/*.d plotting/*.pcm
+
+clean: clean-local
+
+distclean: clean-local
+	-rm -f ${AUTO_TGTS}
+	-rm -f *.optrpt
+	-rm -f ${LIB_CORE}
+###	cd Matriplex && ${MAKE} distclean
+
+${LIB_CORE}: ${OBJS}
+	@mkdir -p $(@D)
+	${CXX} ${CXXFLAGS} ${VEC_HOST} ${OBJS} -shared -o $@ ${LDFLAGS_HOST} ${LDFLAGS}
+
+${OBJS}: %.o: %.cc %.d
+	${CXX} ${CPPFLAGS} ${CXXFLAGS} ${VEC_HOST} -c -o $@ $<
+
+%.d: %.cc
+	${MAKEDEPEND} -o $@ $<
+
+echo:
+	@echo "CXX=${CXX}"
+	@echo SRCS = ${SRCS}
+	@echo DEPS = ${DEPS}
+	@echo OBJS = ${OBJS}
+
+echo_cc_defs:
+	${CXX} -dM -E -mavx2 - < /dev/null
+
+echo-srcs:
+	@echo ${SRCS}
+
+echo-flags:
+	@echo "CPPFLAGS=${CPPFLAGS}"
+
+echo-tbb:
+	@echo "TBB_GCC=${TBB_GCC}, TBB_PREFIX=${TBB_PREFIX}, TBB_ROOT=${TBB_ROOT}"
diff --git a/RecoTracker/MkFitCore/standalone/Makefile.config b/RecoTracker/MkFitCore/standalone/Makefile.config
new file mode 100644
index 0000000000000..da574a62e3cf9
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/Makefile.config
@@ -0,0 +1,192 @@
+################################################################
+# Build notes
+################################################################
+
+# Requires a recent gcc, e.g.:
+#   . /opt/rh/devtoolset-2/enable
+# This is also needed for icc as it uses gcc headers.
+
+# To build mkFit -- Matriplex Kalman Fit:
+# - cd mkFit; make (or make -C mkFit).
+
+
+################################################################
+# Configuration section
+################################################################
+
+# -1. What to build - default is AVX
+# Define to build for AVX_512
+#AVX_512 := 1
+# Define to build for AVX2
+#AVX2    := 1
+# Define to build for SSE3
+#SSE3 := 1
+
+# 0. Use gcc-5 from MacPorts on OSX
+# OSXGCC5    := 1
+# Use clang from MacPorts on OSX
+# OSXMPCLANG   := 1
+
+# 1. Use ROOT or not
+# Comment out to disable root ("1" is not relevant)
+#WITH_ROOT := 1
+
+# 2. Use gcc (clang by default on mac) or icc
+# Comment out to force using standard c++.
+ifdef INTEL_LICENSE_FILE
+  # Define ICC to make it easier to switch to icpc
+  ICC := icc
+  CXX := ${ICC}
+else ifdef OSXGCC5
+  CXX := c++-mp-5
+  TBB_PREFIX := /opt/local
+else ifdef OSXMPCLANG
+  CXX := ${OSXMPCLANG} -Wall -Wno-unknown-pragmas -Wno-missing-braces
+  TBB_PREFIX := /opt/local
+endif
+
+# 3. Optimization
+# -O3 implies vectorization and simd (but not AVX)
+OPT := -g -O3
+
+# 4. Vectorization settings
+ifdef AVX_512
+VEC_GCC  := -march=native # -fopt-info-vec -mavx512f -mavx512cd
+VEC_ICC  := -xHost -qopt-zmm-usage=high # -xcore-avx512
+else ifdef AVX2
+VEC_GCC  := -mavx2 -mfma
+VEC_ICC  := -mavx2 -mfma
+else ifdef SSE3
+VEC_GCC  := -msse3
+VEC_ICC  := -msse3
+else
+VEC_GCC  := -mavx # -fopt-info-vec-all
+VEC_ICC  := -mavx
+endif
+
+# 5. Matriplex intrinsics, vtune
+# Comment-out to enable intrinsics (supported for AVX512, AVX2 and AVX)
+USE_INTRINSICS := -DMPLEX_USE_INTRINSICS
+# To enforce given vector size (does not work with intrinsics!)
+#USE_INTRINSICS := -DMPT_SIZE=1
+USE_VTUNE_NOTIFY := 1
+
+# 6. MIC stuff - obsolete
+
+# 7. OSX hack (is there a good way to detect clang?)
+# MT needs this on OSX-10.8, c++ -v
+#    Apple LLVM version 5.1 (clang-503.0.40) (based on LLVM 3.4svn)
+# OSX_CXXFLAGS := -stdlib=libc++
+# And with gcc-4.8.1 from cms we need this
+# OSX_LDFLAGS  := -lstdc++
+
+# 9. Check track state propagation for success, turns on simple
+# checks of filter convergence: used in SMatrix code mostly, still retain as toyMC propagation still uses this
+USE_STATE_VALIDITY_CHECKS := -DCHECKSTATEVALID
+
+# 10. Turn on multiple scattering: for toyMC SMatrix code. Scattering handled through material map in CMSSW
+#USE_SCATTERING := -DSCATTERING
+
+# 11. In SMatrix track building, use linear interpolation across a
+# a volume instead of using the geometry
+#USE_LINEAR_INTERPOLATION := -DLINEARINTERP
+
+# 12. Use built tracks for fitting in SMatrix code, comment out to fit sim tracks
+#ENDTOEND := -DENDTOEND
+
+# 13. Intel Threading Building Blocks.  With icc uses system
+# TBB, otherwise requires a local installation, and paths will
+# have to be adjusted.
+WITH_TBB := 1
+
+# 14. Use inward fit in Conformal fit + final KF Fit: unsed in mkFit, used in SMatrix
+#INWARD_FIT := -DINWARDFIT
+
+################################################################
+# Derived settings
+################################################################
+
+CPPFLAGS := ${USE_INTRINSICS} -DMKFIT_STANDALONE
+CXXFLAGS := -fPIC ${OPT} ${OSX_CXXFLAGS}
+
+LDFLAGS_HOST := 
+
+CPPFLAGS += ${USE_STATE_VALIDITY_CHECKS} ${USE_SCATTERING} ${USE_LINEAR_INTERPOLATION} ${ENDTOEND} ${INWARD_FIT} 
+
+ifdef USE_VTUNE_NOTIFY
+  ifdef VTUNE_AMPLIFIER_XE_2017_DIR
+    CPPFLAGS     += -I$(VTUNE_AMPLIFIER_XE_2017_DIR)/include -DUSE_VTUNE_PAUSE
+    LDFLAGS_HOST += -L$(VTUNE_AMPLIFIER_XE_2017_DIR)/lib64 -littnotify
+  endif
+endif
+
+ifeq (${CXX}, ${ICC})
+  VEC_HOST := ${VEC_ICC}
+  CXXFLAGS += -qopt-report=5 -qopenmp-simd -qopt-report-phase=all
+else
+  VEC_HOST := ${VEC_GCC}
+endif
+
+ifeq ($(CXX), g++)
+  CXXFLAGS += -std=c++1z -ftree-vectorize -Werror=main -Werror=pointer-arith -Werror=overlength-strings -Wno-vla -Werror=overflow -Wstrict-overflow -Werror=array-bounds -Werror=format-contains-nul -Werror=type-limits -fvisibility-inlines-hidden -fno-math-errno --param vect-max-version-for-alias-checks=50 -Xassembler --compress-debug-sections -felide-constructors -fmessage-length=0 -Wall -Wno-non-template-friend -Wno-long-long -Wreturn-type -Wunused -Wparentheses -Wno-deprecated -Werror=return-type -Werror=missing-braces -Werror=unused-value -Werror=address -Werror=format -Werror=sign-compare -Werror=write-strings -Werror=delete-non-virtual-dtor -Wstrict-aliasing -Werror=narrowing -Werror=unused-but-set-variable -Werror=reorder -Werror=unused-variable -Werror=conversion-null -Werror=return-local-addr -Wnon-virtual-dtor -Werror=switch -fdiagnostics-show-option -Wno-unused-local-typedefs -Wno-attributes -Wno-psabi
+  CXXFLAGS += -fdiagnostics-color=auto -fdiagnostics-show-option -pthread -pipe -fopenmp-simd
+endif
+
+# Try to find a new enough TBB
+ifneq ($(CXX),icc)
+  ifndef TBB_PREFIX
+    ifdef CMSSW_BASE
+      CPPFLAGS += -I$(shell cd $$CMSSW_BASE && scram tool tag tbb INCLUDE)
+      LDFLAGS_HOST += -L$(shell cd $$CMSSW_BASE && scram tool tag tbb LIBDIR)
+    else ifdef TBB_GCC
+      TBB_PREFIX = $(TBB_GCC)
+    endif
+  endif
+endif
+
+ifdef WITH_TBB
+  # icc finds tbb in its own installation, but allow overriding in case it doesn't
+  ifdef TBB_PREFIX
+    CPPFLAGS += -I${TBB_PREFIX}/include
+    LDFLAGS  += -L${TBB_PREFIX}/lib -Wl,-rpath,${TBB_PREFIX}/lib
+  endif
+  CPPFLAGS += -DTBB
+  LDFLAGS  += -ltbb
+endif
+
+CPPFLAGS_NO_ROOT := ${CPPFLAGS} -DNO_ROOT
+LDFLAGS_NO_ROOT  := ${LDFLAGS}
+
+ifdef WITH_ROOT
+  CPPFLAGS += $(shell root-config --cflags)
+  LDFLAGS  += $(shell root-config --libs)
+else
+  MKFIT_ROOT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
+  CPPFLAGS += -DNO_ROOT -I$(MKFIT_ROOT_DIR)/from-root
+endif
+
+ifdef DEBUG
+  CPPFLAGS += -DDEBUG
+endif
+
+# Set stdlib at the very end, as other flags (i.e. ROOT) can override our choice for which version of c++
+CPPFLAGS += -std=c++1z
+
+################################################################
+# Dependency generation
+################################################################
+
+DEPEND_TGTS = -MQ '$(patsubst %.d,%.o,$@)'
+
+# With icc use gcc for dependency generation. Check if this is still true with icc-16.
+# MT-2016-08-30: icc 16.0.3 seems ok. Leaving this here until we update phiphi.
+
+ifeq (${CXX}, ${ICC})
+  MAKEDEPEND = gcc    -MM -MG ${DEPEND_TGTS} ${CPPFLAGS}
+else
+  MAKEDEPEND = ${CXX} -MM -MG ${DEPEND_TGTS} ${CPPFLAGS}
+endif
+
+CPPFLAGS += ${CPPUSERFLAGS}
+CXXFLAGS += ${CXXUSERFLAGS}
+LDFLAGS  += ${LDUSERFLAGS}
diff --git a/RecoTracker/MkFitCore/standalone/README.md b/RecoTracker/MkFitCore/standalone/README.md
new file mode 100644
index 0000000000000..fb8ed71ab1d60
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/README.md
@@ -0,0 +1,863 @@
+# mkFit: a repository for vectorized, parallelized charged particle track reconstruction
+
+**Intro**: Below is a short README on setup steps, code change procedures, and some helpful pointers. Please read this thoroughly before checking out the code! As this is a markdown file, it is best viewed via a web browser.
+
+### Outline
+1) Test platforms
+2) How to checkout the code
+3) How to run the code
+4) How to make changes to the main development branch
+5) The benchmark and validation suite
+   1) Running the main script
+   2) Some (must read) advice on benchmarking
+   3) (Optional) Using additional scripts to display plots on the web
+   4) Interpreting the results
+      1) Benchmark results
+      2) Validation results
+      3) Other plots
+6) Submit an issue
+7) Condensed description of code
+8) Other helpful README's in the repository
+9) CMSSW integration
+   1) Considerations for `mkFit` code
+   2) Building and setting up `mkFit` for CMSSW
+      1) Build `mkFit`
+         1) Lxplus
+         2) Phi3
+      2) Set up `mkFit` as an external
+      3) Pull CMSSW code and build
+   3) Recipes for the impatient on phi3
+      1) Offline tracking
+      2) HLT tracking (iter0)
+   4) More thorough running instructions
+      1) Offline tracking
+         1) Customize functions
+         2) Timing measurements
+         3) Producing MultiTrackValidator plots
+      2) HLT tracking (iter0)
+   5) Interpretation of results
+      1) MultiTrackValidator plots
+      2) Timing
+10) Other useful information
+    1) Important Links
+    2) Tips and Tricks
+       1) Missing Libraries and Debugging
+       2) SSH passwordless login for benchmarking and web scripts
+    3) Acronyms/Abbreviations
+
+## Section 1: Test platforms
+
+- **phi1.t2.ucsd.edu**: [Intel Xeon Processor E5-2620](https://ark.intel.com/products/64594/Intel-Xeon-Processor-E5-2620-15M-Cache-2_00-GHz-7_20-GTs-Intel-QPI) _Sandy Bridge_ (referred to as SNB, phiphi, phi1)
+- **phi2.t2.ucsd.edu**: [Intel Xeon Phi Processor 7210](https://ark.intel.com/products/94033/Intel-Xeon-Phi-Processor-7210-16GB-1_30-GHz-64-core) _Knights Landing_ (referred to as KNL, phi2)
+- **phi3.t2.ucsd.edu**: [Intel Xeon Gold 6130 Processor](https://ark.intel.com/products/120492/Intel-Xeon-Gold-6130-Processor-22M-Cache-2_10-GHz) _Skylake Scalable Performance_ (referred to as SKL-Au, SKL-SP, phi3)
+- **lnx4108.classe.cornell.edu**: [Intel Xeon Silver 4116 Processor](https://ark.intel.com/products/120481/Intel-Xeon-Silver-4116-Processor-16_5M-Cache-2_10-GHz) _Skylake Scalable Performance_ (referred to as SKL-Ag, SKL-SP, lnx4108, LNX-S)
+- **lnx7188.classe.cornell.edu**: [Intel Xeon Gold 6142 Processor](https://ark.intel.com/content/www/us/en/ark/products/120487/intel-xeon-gold-6142-processor-22m-cache-2-60-ghz.html) _Skylake Scalable Performance_ (referred to as lnx7188,LNX-G)
+
+phi1, phi2, and phi3 are all managed across a virtual login server and therefore the home user spaces are shared. phi1, phi2, phi3, lnx7188, and lnx4108 also have /cvmfs mounted so you can source the environment needed to run the code.
+
+The main development platform is phi3. This is the recommended machine for beginning development and testing. Login into any of the machines is achieved through ```ssh -X -Y <phi username>@phi<N>.t2.ucsd.edu```. It is recommended that you setup ssh key forwarding on your local machine so as to avoid typing in your password with every login, and more importantly, to avoid typing your password during the benchmarking (see Section 10.ii.b).
+
+**Extra platform configuration information**
+- phi1, phi3, and lnx4108 are dual socket machines and have two identical Xeons on each board
+- phi1, phi2, and phi3 all have TurboBoost disabled to disentangle some effects of dynamic frequency scaling with higher vectorization
+
+For further info on the configuration of each machine, use your favorite text file viewer to peruse the files ```/proc/cpuinfo``` and ```/proc/meminfo``` on each machine.
+
+## Section 2: How to checkout the code
+
+The master development branch is ```devel```, hosted on a [public GH repo](https://github.com/trackreco/mkFit) (referred to as ```trackreco/devel``` for the remainder of the README). This is a public repository, as are all forks of this repository. Development for mkFit is done on separate branches within a forked repository. Make sure to fork the repository to your own account first (using the "Fork" option at the top of the webpage), and push any development branches to your own forked repo first.
+
+Once forked, checkout a local copy by simply doing a git clone:
+
+```
+git clone git@github.com:<user>/mkFit
+```
+
+where ```<user>``` is your GH username if renamed your remote to your username. Otherwise ```<user>``` will be ```origin```.
+
+If you wish to add another user's repo to your local clone, do:
+
+```
+git remote add <user> git@github.com:<user>/mkFit
+```
+
+This is useful if you want to submit changes to another user's branches. To checkout a remote branch, do:
+
+```
+git fetch <user>
+git fetch <user> <branch>
+git checkout -b <branch> <user>/<branch>
+```
+
+## Section 3: How to run the code
+
+As already mentioned, the recommended test platform to run the code is phi3. Checkout a local repo on phi3 from your forked repo. To run the code out-of-the-box from the main ```devel``` branch, you will first need to source the environment:
+
+```
+source xeon_scripts/init-env.sh
+```
+
+You are free to put the lines from this script in your login scripts (.bashrc, .bash_profile, etc). However, encapsulate them within a function and then call that function upon logging into phi3. We want clean shells before launching any tests. Therefore, if you have any setup that sources something, disable it and do a fresh login before running any tests! 
+
+Now compile the code:
+
+```
+make -j 32 AVX2:=1
+```
+
+To run the code with some generic options, do:
+
+```
+./mkFit/mkFit --cmssw-n2seeds --input-file /data2/slava77/samples/2017/pass-c93773a/initialStep/PU70HS/10224.0_TTbar_13+TTbar_13TeV_TuneCUETP8M1_2017PU_GenSimFullINPUT+DigiFullPU_2017PU+RecoFullPU_2017PU+HARVESTFullPU_2017PU/memoryFile.fv3.clean.writeAll.CCC1620.recT.082418-25daeda.bin --build-ce --num-thr 64 --num-events 20
+```
+
+Consult Sections 7-8 for where to find more information on descriptions of the code, which list resources on where to find the full set of options for running the code.
+
+There are ways to run this code locally on macOS. Instructions for how to to this will be provided later. You will need to have XCode installed (through the AppStore), XCode command line tools, a ROOT6 binary (downloaded from the ROOT webpage), as well as TBB (through homebrew). 
+
+## Section 4: How to make changes to the main development branch
+
+Below are some rules and procedures on how to submit changes to the main development branch. Although not strictly enforced through settings on the main repo, please follow the rules below. This ensures we have a full history of the project, as we can trace any changes to compute or physics performance that are introduced (whether intentional or unintentional). 
+
+**Special note**: Do not commit directly to ```cerati/devel```! This has caused issues in the past that made it difficult to track down changes in compute and physics performance. Please always submit a Pull Request first, ensuring it is reviewed and given the green light before hitting "Merge pull request". 
+
+1. Checkout a new branch on your local repo: ```git checkout -b <branch>```
+2. Make some changes on your local repo, and commit them to your branch: ```git commit -m "some meaningful text describing the changes"```
+3. If you have made multiple commits, see if you can squash them together to make the git history legibile for review. If you do not know what you are doing with this, make sure to save a copy of the local branch as backup by simplying checking out a new branch from the branch you are with something like: ```git checkout -b <branch_copy>```. Git provides a [tutorial on squashing commits](https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History).
+4. Ensure you have pulled down the latest changes from the main development branch merged into your local development branch. ```git merge cerati devel``` can make a mess, so the preferred option is ```git rebase --onto <new_base_hash> <old_base_hash> <branch>```. CMSSW provides a nice explanation of [this rebase option](https://cms-sw.github.io/tutorial-resolve-conflicts.html).
+5. Test locally!
+   1. If you have not done so, clone your forked repo onto phi3, checking out your new branch.
+   2. Source the environment for phi3 as explained in Section 3.
+   3. Compile test: ```make -j 32 AVX2:=1```. Fix compilation errors if they are your fault or email the group / person responsible to fix their errors! 
+   4. Run benchmark test: ```./mkFit/mkFit --cmssw-n2seeds --input-file /data2/slava77/samples/2017/pass-4874f28/initialStep/PU70HS/10224.0_TTbar_13+TTbar_13TeV_TuneCUETP8M1_2017PU_GenSimFullINPUT+DigiFullPU_2017PU+RecoFullPU_2017PU+HARVESTFullPU_2017PU/a/memoryFile.fv3.clean.writeAll.recT.072617.bin --build-ce --num-thr 64 --num-events 20```. Ensure the test did not crash, and fix any segfaults / run-time errors! 
+   5. Compile with ROOT test: ```make -j 32 AVX2:=1 WITH_ROOT:=1```. Before compiling, make sure to do a ```make distclean```, as we do not want conflicting object definitions. Fix errors if compilation fails.
+   6. Run validation test:  ```./mkFit/mkFit --cmssw-n2seeds --input-file /data2/slava77/samples/2017/pass-4874f28/initialStep/PU70HS/10224.0_TTbar_13+TTbar_13TeV_TuneCUETP8M1_2017PU_GenSimFullINPUT+DigiFullPU_2017PU+RecoFullPU_2017PU+HARVESTFullPU_2017PU/a/memoryFile.fv3.clean.writeAll.recT.072617.bin --build-ce --num-thr 64 --num-events 20 --backward-fit-pca --cmssw-val-fhit-bprm```. Ensure the test did not crash! 
+6. Run the full benchmarking + validation suite on all platforms: follow procedure in Section 5 (below)! If you notice changes to compute or physics performance, make sure to understand why! Even if you are proposing a technical two-line change, please follow this step as it ensures we have a full history of changes.
+7. Prepare a Pull Request (PR)
+   1. Push your branch to your forked repo on GitHub: ```git push <forked_repo_name> <branch>```
+   2. [Navigate to the main GH](https://github.com/trackreco/mkFit)
+   3. Click on "New Pull Request"
+   4. Click on "Compare across forks", and navigate to your fork + branch you wish to merge as the "head fork + compare"
+   5. Provide a decent title, give a brief description of the proposed commits. Include a link to the benchmarking and validation plots in the description. If there are changes to the compute or physics performance, provide an explanation for why! If no changes are expected and none are seen, make sure to mention it.
+   6. (Optional) Nominate reviewers to check over the proposed changes.
+   7. Follow up on review comments! After pushing new commits to your branch, repeat big steps 5 and 6 (i.e. test locally and re-run the validation). Post a comment to the PR with the new plots.
+   8. Once given the green light, you can hit "Merge Pull Request", or ask someone else to do it.
+
+## Section 5: The benchmark and validation suite
+
+**Notes on nomenclature**
+- "benchmark": these are the compute performance tests (i.e. time and speedup)
+- "validation": these are the physics performance tests (i.e. track-finding efficiency, fake rate, etc.)
+
+We often use these words interchangibly to refer to the set of benchmark and validation tests as a single suite. So if you are asked to "run the benchmarking" or "run the validation": please run the full suite (unless specifically stated to run one or the other). In fact, the main scripts that run the full suite use "benchmark" in their name, even though they may refer to both the running of the compute and physics performance tests and plot comparisons.
+
+**Notes on samples**
+
+Currently, the full benchmark and validation suite uses simulated event data from CMSSW for ttbar events with an average 70 pileup collisions per event. The binary file has over 5000 events to be used for high statistics testing of time performance. There also exists samples with lower number of events for plain ttbar no pileup and ttbar + 30 pileup, used to measure the effects on physics performance when adding more complexity. Lastly, there also exists a sample for muon-gun events: 10 muons per event with no pileup. The muon-gun sample is used to show physics performance in a very clean detector environment. All of these samples are replicated on disk on all three platforms to make time measurements as repeatable and representative as possible. 
+
+### Section 5.i: Running the main script
+
+The main script for running the full suite can be launched from the top-level directory with:
+
+```
+./xeon_scripts/runBenchmark.sh ${suite} ${useARCH} ${lnxuser}
+```
+
+There are three options for running the full suite by passing one of the three strings to the parameter ```${suite}```:
+- ```full``` : runs compute and physics tests for all track finding routines (BH, STD, CE, FV)
+- ```forPR``` : runs compute and physics tests for track finding routines used for comparisons in pull requests (default setting: BH and CE for benchmarks, STD and CE for validation)
+- ```forConf``` : runs compute and physics tests for track finding routines used for conferences only (currently only CE)
+
+The ```full``` option currently takes little more than a half hour, while the other tests take about 25 minutes. 
+
+Additionally, the ```${useARCH}``` option allows the benchmarks to be run on different computer clusters: 
+- ```${useARCH} = 0```: (default) runs on phi3 computers only. This option should be run from phi3.
+- ```${useARCH} = 1```: runs on lnx7188 and lnx4108 only. This option should be run from lnx7188.
+- ```${useARCH} = 2```: runs on both phi3 and lnx. This option should be run from phi3.
+- ```${useARCH} = 3```: runs on both all phi computers (phi1, phi2 and phi3). This option should be run from phi3.
+- ```${useARCH} = 4```: runs on both all phi computers (phi1, phi2 and phi3) as well as lnx7188 and lnx4108. This option should be run from phi3.
+
+
+- ```${lnxuser}``` denotes the username on the lnx computers. This is only need if running on the lnx computers when the lnx username is different from the phi3 username.  
+
+Inside the main script, tests are submitted for phi1, phi2, and phi3 concurrently by: tarring up the local repo, sending the tarball to a disk space on the remote platform, compiling the untarred directory natively on the remote platform, and then sending back the log files to be analyzed on phi3. It should be noted that the tests for phi3 are simply run on in the user home directory when logged into phi3 (although we could in principle ship the code to the work space disk on phi3). Because we run the tests for phi3 in the home directory, which is shared by all three machines, we pack and send the code to a remote _disk_ space _before_ launching the tests on phi3 from the home directory. The scripts that handle the remote testing are: 
+
+```
+./xeon_scripts/tarAndSendToRemote.sh ${remote_arch} ${suite}
+./xeon_scripts/benchmark-cmssw-ttbar-fulldet-build-remote.sh ${ben_arch} ${suite}
+```
+
+When these scripts are called separately to run a test on particular platform, one of three options must be specified for ```${remote_arch}``` or ```${ben_arch}```: ```SNB```, ```KNL```, or ```SKL-SP```. The main script ```xeon_scripts/runBenchmark.sh``` will do this automatically for all three platforms. If the code is already resident on a given machine, it is sufficient to run:
+
+```
+./xeon_scripts/benchmark-cmssw-ttbar-fulldet-build.sh ${ben_arch} ${suite}
+```
+
+The appropriate strings should appear in place of ```${ben_arch}``` and ```${suite}```. In fact, this is the script called by ```xeon_scripts/runBenchmark.sh``` to launch tests on each of the platforms once the code is sent and unpacked.
+
+Within the main ```xeon_scripts/runBenchmark.sh``` script, there are two other scripts that make performance plots from the log files of compute performance tests:
+
+```
+./plotting/benchmarkPlots.sh ${suite} 
+./plotting/textDumpPlots.sh ${suite}
+```
+
+The first will produce the time and speedup plots, while the second produces distributions of basic kinematic quantites of the candidate track collections, comparing the results across the different platforms and different number of vector units and threads.
+
+The main physics performance script that is run is:
+
+```
+./val_scripts/validation-cmssw-benchmarks.sh ${suite}
+```
+
+The physics validation scripts supports also an option to produce results compatible with the standard tracking validation in CMSSW, the MultiTrackValidator (MTV). This can run as:
+```
+./val_scripts/validation-cmssw-benchmarks.sh ${suite} --mtv-like-val
+```
+
+This script will run the validation on the building tests specified by the ```${suite}``` option. It will also produce the full set of physics performance plots and text files detailing the various physics rates.
+
+It should be mentioned that each of these scripts within ```./xeon_scripts/runBenchmark.sh``` can be launched on their own, as again, they each set the environment and run tests and/or plot making. However, for simplicity's sake, it is easiest when prepping for a PR to just run the master ```./xeon_scripts/runBenchmark.sh```.  If you want to test locally, it is of course possible to launch the scripts one at a time.
+
+### Section 5.ii: Some (must read) advice on benchmarking
+
+1. Since the repo tarball and log files are sent back and forth via ```scp``` in various subscripts, it is highly recommended you have SSH-forwarding set up to avoid having to type your password every time ```scp``` is called. This can be particularly annoying since the return of the log files is mostly indeterminate, since it is just when the scripts finish running on the remote they will be sent back. Coupled with ```nohup``` when launching the main script, the prompt will never appear, and the log files will then be lost, as the final step in remote testing is removing the copy of repo on the remote platform at the end of ```xeon_scripts/benchmark-cmssw-ttbar-fulldet-build-remote.sh```. See Section 10.ii.b for more information on how to set up SSH-forwarding and passwordless login.
+
+2. Before launching any tests, make sure the machines are quiet: we don't want to disturb someone who already is testing! Tests from different users at the same time will also skew the results of your own tests as the scripts make use of the full resources available on each platform at various points. 
+
+3. Please run the full suite from phi3 with a clean login: make sure nothing has been sourced to set up the environment. The main script (as well as the called subscripts) will set the environment and some common variables shared between all subscripts by sourcing two scripts:
+
+4. Check the logs! A log with standard out and error is generated for each test launched. If a plot is empty, check the log corresponding to the test point that failed as this will be the first place to say where and how the test died (hopefully with a somewhat useful stack trace). If you are sure you are not responsible for the crash, email the group listserv to see if anyone else has experienced the issue (attaching the log file(s) for reference). If it cannot be resolved via email, it will be promoted to the a GH Issue.
+
+```
+source xeon_scripts/init-env.sh
+source xeon_scripts/common-variables.sh ${suite}
+```
+
+### Section 5.iii: (Optional) Using additional scripts to display plots on the web
+
+After running the full suite, there is an additional set of scripts within the ```web/``` directory for organizing the output plots and text files for viewing them on the web. Make sure to read the ```web/README_WEBPLOTS.md``` first to setup an /afs or /eos web directory on LXPLUS. If you have your own website where you would rather post the results, just use ```web/collectBenchmarks.sh``` to tidy up the plots into neat directories before sending them somewhere else. More info on this script is below.
+
+The main script for collecting plots and sending them to LXPLUS can be called by:
+
+```
+./web/move-benchmarks.sh ${outdir_name} ${suite} ${afs_or_eos}
+```
+
+where again, ```${suite}``` defaults to ```forPR```. ```${outdir_name}``` will be the top-level directory where the output is collected and eventually shipped to LXPLUS. This script first calls ```./web/collectBenchmarks.sh ${outdir_name} ${suite}```, which will sort the files, and then calls the script ```./web/copyphp.sh```, which copies ```web/index.php``` into the ```${outdir_name}``` to have a nice GUI on the web, and finally calls ```./web/tarAndSendToLXPLUS.sh ${outdir_name} ${suite} ${afs_or_eos}```, which packs up the top-level output dir and copies it to either an /afs or /eos userspace on LXPLUS. 
+
+The option ```${afs_or_eos}``` takes either of the following arguments: ```afs``` or ```eos```, and defaults to ```eos```. The mapping of the username to the remote directories is in ```web/tarAndSendToLXPLUS.sh```. If an incorrect string is passed, the script will exit. 
+
+**IMPORTANT NOTES**
+1) AFS is being phased out at CERN, so the preferred option is ```eos```.
+
+2) There are some assumptions on the remote directory structure, naming, and files present in order for ```web/tarAndSendToLXPLUS.sh``` to work. Please consult ```web/README_WEBPLOTS.md``` for setting this up properly!
+
+**IMPORTANT DISCLAIMERS**
+
+1. There is a script: ```./xeon_scripts/trashSKL-SP.sh``` that is run at the very end of the ```./web/move-benchmarks.sh``` script that will delete log files, pngs, validation directories, root files, and the neat directory created to house all the validation plots.  This means that if the scp fails, the plots will still be deleted locally, and you will be forced to re-run the whole suite!!  You can of course comment this script out if this bothers you.
+
+2. ```web/tarAndSendToLXPLUS.sh``` executes a script remotely on LXPLUS when using AFS, which makes the directory readable to outside world. If you are uncomfortable with this, you can comment it out. If your website is on EOS, then please ignore this disclaimer.
+
+### Section 5.iv: Interpreting the results
+
+This section provides a brief overview in how to interpret the plots and logs from the tests that produced them. This section assumes the plots were organized with the ```web/collectBenchmarks.sh``` script.
+
+#### Section 5.iv.a: Benchmark results
+
+The "main" benchmark plots are organized into two folders:
+- Benchmarks: Will contain plots of the form ```${ben_arch}_CMSSW_TTbar_PU70_${ben_test}_${ben_trend}```
+- MultEvInFlight: Will contain plots of the form ```${ben_arch}_CMSSW_TTbar_PU70_MEIF_${ben_trend}```
+
+where the variables in the plot names are:  
+- ```${ben_arch}```: SNB (phi1 results), KNL (phi2 results), or SKL (phi3 results)
+- ```${ben_test}```: VU (vector units) or TH (threads)
+- ```${ben_trend}```: time or speedup, i.e. the y-axis points
+
+The plots in "Benchmarks" measure the time of the building sections only. These tests run over 20 events total, taking the average to measure the per event time for each building section. We discard the first event's time when computing the timing. The logs used for extracting the time information into plots are of the form: ```log_${ben_arch}_CMSSW_TTbar_PU70_${build}_NVU${nVU}_NTH${nTH}.txt```, where ```${build}``` is the building routine tested. 
+
+The plots in "MultEvInFlight" measure the perfomance of the full event loop time which includes I/O, seed cleaning, etc. These tests run over 20 events times the number of events in flight. The time plotted is the total time for all events divided by the number of events.
+
+The points in the speedup plots are simply produced by dividing the first point by each point in the trend. The ideal scaling line assumes that with an N increase in resources, the speedup is then N, i.e. the code is fully vectorized and parallelized with no stalls from memory bandwidth, latency, cache misses, etc. Ideal scaling also assumes no penalty from [dynamic frequency scaling](https://en.wikichip.org/wiki/intel/frequency_behavior). Intel lowers the base and turbo frequency as a function of the occupancy of the number of cores, which can make speedup plots look much worse than they really are. In addition, different instruction sets have different base and turbo frequency settings. Namely, SSE has the highest settings, AVX2 is at the midpoint, while AVX512 has the lowest.
+
+The "VU" tests measure the performance of the building sections as a function of the vector width. In hardware, of course, vector width is a fixed property equal to the maximum number of floats that can be processed by a VPU. Call this number N_max. One can force the hardware to underutilize its VPUs by compiling the code with an older instruction set, e.g., SSE instead of AVX; however, this would have effects beyond just shrinking the vectors. Therefore, for our "VU" tests, we mimic the effect of reducing vector width by setting the width of Matriplex types to various nVU values up to and including N_max. At nVU=1, the code is effectively serial: the compiler might choose not to vectorize Matriplex operations at all. At the maximum size, e.g. nVU=16 on SKL, Matriplex operations are fully vectorized and the VPU can be fully loaded with 16 floats to process these operations. For intermediate values of nVU, full-vector instructions probably will be used, but they may be masked so that the VPU is in reality only partially utilized.
+
+The vectorization tests only use a single thread. There is an additional point at the VU=N_max (SNB: 8, KNL, SKL: 16) with an open dot: this is a measure of the vectorization using intrinsics. 
+
+The "TH" tests measure the performance of the building sections as a function of the number of threads launched. These tests have vectorization fully enabled with instrinsics. It should be noted that we do not account for frequency scaling in the speedup plots.
+
+The building section has sections of code that are inherently serial (hit chi2 comparisons, copying tracks, etc.), so the vectorization and parallelization is not perfect. However, it is important to consider the effect of [Amdahl's Law](https://en.wikipedia.org/wiki/Amdahl%27s_law). Amdahl's law can be rewritten as:
+```
+    1-1/S
+p = -----
+    1-1/R
+```
+
+where, ```p``` is the fraction of the code that is vectorized/parallelized, ```S``` is the measured speedup, and ```R``` is the amount of speedup from increased resources. For example, we have seen that SKL clocks in at about a factor of three in speedup (S=3) for vectorization when fully vectorized (i.e. nVU=R=16), which suggests the code is 70% vectorized. Of course, this assumes no issues with memory bandwidth, cache misses, etc.
+ 
+We have seen that moving from nVU=1 to nVU=2 the improvement is minimal (and sometimes a loss in performance). One hypothetical reason for this (yet unconfirmed) is that the compiler is using an instruction set other than expected: either finding a way to use vector instructions with nVU=1, or choosing not to vectorize at nVU=2. Furthermore, at run time, the CPU will adjust its frequency depending on the instruction set being used (it runs slower for wider vectors). At present, the exact reasons for the detailed shape of the speedup-vs.-nVU curve are unknown.
+
+Lastly, it is important to consider the effects of hyperthreading in the "TH" tests. At nTH=number of cores, we typically see a clear discontinuation in the slope. The main hypothesis is that this is likely due to resource contention as two threads now share the same cache.
+
+#### Section 5.iv.b: Validation results
+
+The physics validation results are organized into two directories:
+- SimVal: SimTracks are used as the reference set of tracks
+- CMSSWVal: CMSSW tracks are used as the reference set of tracks
+
+Three different matching criteria are used for making associations between reconstructed tracks and reference tracks. Many of the details are enumerated in the validation manifesto, however, for simplicity, the main points are listed here. 
+
+- CMSSW tracks = "initial step" CMSSW tracks, after fitting in CMSSW (i.e. includes outlier rejection)
+- Reference tracks must satisfy:
+  - "Findable": require least 12 layers (includes 4 seed layers, so only 8 outside of the seed are required)
+  - Sim tracks are required to have four hits that match a seed
+- To-be-validated reconstructed tracks must satisfy:
+  - "Good" tracks: require at least 10 layers
+  - If a mkFit track, 4 hits from seed are included in this 10. So, 6 additional hits must be found during building to be considered a "good" track.
+  - If a CMSSW track, up to 4 hits are in included, as a seed hit may be missing from outlier rejection. So, a CMSSW track may have to find more than 6 layers during building to be considered a "good" track, as some hits from the seed may have been removed.
+- Matching Criteria:
+  - SimVal: reco track is matched to a sim track if >= 50% of hits on reco track match hits from a single sim track, excluding hits from the seed
+  - CMSSWVal + Build Tracks: reco track is matched to a CMSSW track if >= 50% of hits on reco track match hits from a single CMSSW track, excluding hits from the seed. Given that CMSSW can produce duplicates (although very low), if a reco track matches more than one CMSSW track, the CMSSW track with the highest match percentage is chosen.
+  - CMSSWVal + Fit Tracks: reco track is matched to a CMSSW track via a set of binned helix chi2 (track eta and track pT) and delta phi cuts
+- Fake = reco track NOT matching a ref. track, excluding matching to non-findable tracks
+- Figures of merit: 
+  - Efficiency = fraction of findable ref. tracks matched to a reco track
+  - Duplicate rate = fraction of matched ref. tracks with more than one match to a reco track
+  - Fake rate = fraction of "good" reco tracks without a match to a ref. track 
+
+In case the MTV-like validation is selected with the option ```mtv-like-val```, the above requirements are replaced with the following:
+- Reference tracks:
+  - Sim tracks required to come from the hard-scatter interaction, originate from R<3.5 cm and |z|<30 cm, and with pseudorapidity |eta|<2.5 (no requirement to have four hits that match a seed)
+- All reconstructed tracks are considered "To-be-validated"
+- Matching Criteria:
+  - Reco track is matched to a sim track if > 75% of hits on reco track match hits from a single sim track (including hits from the seed)
+
+There are text files within these directories that contain the average numbers for each of the figures of merit, which start with "totals\_\*.txt." In addition, these directories contain nHit plots, as well as kinematic difference plots for matched tracks. Best matched plots are for differences with matched reco tracks with the best track score if more than one reco track matches a ref. track. 
+
+#### Section 5.iv.c: Other plots
+
+The last set of plots to consider are those that produce some kinematic distributions from the text file logs, in the directory: "PlotsFromDump." The distributions compare for each building routine run during the benchmarking the differences across platform and vector + thread setup. Ideally, the distributions should have all points lie on top of each other: there should be no dependency on platform or parallelization/vectorization setting for a specific track-finding routine. The text files that produce these plots have nearly the same form as those for benchmarking, except they also have "DumpForPlots" at the very end.
+
+The subdirectory for "Diffs" in "PlotsFromDump" are kinematic difference plots between mkFit and CMSSW. The matching is simple: we compare mkFit to CMSSW tracks for those that share the exact same CMSSW seed (since we clean some seeds out and CMSSW does not produce a track for every seed as well). The printouts that produce the dump have info to compare to sim tracks using the standard 50% hit matching as done in the SimVal. However, we do not produce these plots as it is redundant to the diff plots already in the validation plots.
+
+## Section 6: Submit an issue
+
+It may so happen that you discover a bug or that there is a known problem that needs further discussion outside of private emails/the main list-serv. If so, make sure to open issue on the main repo by clicking on "Issues" on GH, then "Open an issue".  Provide a descriptive title and a description of the issue. Provide reference numbers to relevant PRs and other Issues with"#<number>".  Include a minimal working example to reproduce the problem, attaching log files of error messages and/or plots demonstrating the problem. 
+
+Assign who you think is responsible for the code (which could be yourself!). If you have an idea that could solve the problem: propose it! If it requires a large change to the code, or may hamper performance in either physics or computing, make sure to detail the pros and cons of different approaches. 
+
+Close an issue after it has been resolved, providing a meaningful message + refence to where/how it was resolved.
+
+## Section 7: Condensed description of code
+
+### mkFit/mkFit.cc
+
+This file is where the ```main()``` function is called for running the executable ```./mkFit/mkFit```. The ```main()``` call simply setups the command line options (and lists them), while the meat of the code is called via ```test_standard()```. Some of the command line options will set global variables within mkFit.cc, while others will set the value of variables in the ```Config``` nampespace. Options that require strings are mapped to via enums in the code, with the mapping specified via global functions at the top of mkFit.cc
+
+```test_standard()``` does the majority of the work: running the toy simulation, reading or writing binary files, and running the various tests. The outer loop is a TBB parallel-for over the number of threads used for running multiple-events-in-flight (MEIF). The default is one event in flight. The inner loop is over the number of events specified for that thread. The number of events in total to run over can be specified as a command line option. When running multiple-events-in-flight, in order to have reasonable statistics from variable load from different events, it is advised to have at least 20 events per thread.  When we refer to "total loop time" of the code, we are timing the inner loop section for each event, which includes I/O. However, for the sake of the plots, we simply sum the time for all events and all threads, and divide by the number of events run to obtain an average per event time.
+
+Within the inner loop, a file is read in, then the various building and fitting tests are run. At the end of each event there is optional printout, as well as at the end of all tthe events for a thread. If running the validation with multiple-events-in-flight is enabled, you will have to ```hadd``` these files into one file before making plots. This is handled automatically within the scripts. 
+
+### mkFit/buildtestMPlex.[h,cc]
+
+This code calls the various building routines, setting up the event, etc. The functions defined here are called in mkFit.cc. Functions called within this file are from MkBuilder.
+
+### mkFit/MkBase.h + mkFit/MkFitter.[h,cc] + mkFit/MkFinder.[h,cc]
+
+MkFinder and MkFitter derive from MkBase. High-level code for objects used by building and fitting routines in mkFit. These objects specify I/O operations from standard format to Matriplex format for different templated Matriplex objects (see Matrix[.h,.cc] for template definitions). 
+
+### mkFit/MkBuilder.[h,cc]
+
+Specifies building routines, seed prepping, validation prepping, etc. Code for building and backward fit routines using MkFinders, while seed fitting uses MkFitters. Objects from Event object are converted to their Matriplex-ready formats. Uses the layer plan to navigate which layer to go to for each track. Foos for the navigation are defined in SteerinParams.h.
+
+### Math/ directory
+
+Contains SMatrix headers, used for some operations on track objects (mostly validation and deprecated SMatrix building code -- see below).
+
+### Matriplex/ directory
+
+Contains low-level Matriplex library code for reading/writing into matriplex objects as well as elementary math operations (add, multiply). Includes perl scripts to autogenerate code based on matrix dimension size.
+
+### Geoms/ dir + TrackerInfo.[h,cc]
+
+Geometry plugin info. TrackerInfo setups classes for layer objects. Geoms/ dir contains the actual layout (number scheme, layer attributes, etc) for each of the different geoemetries.
+
+### mkFit/PropagationMPlex.[h,cc,icc] + mkFit/KalmanUtilsMPlex.[h,cc,icc]
+
+Underlying code for propagation and Kalman upate (gain) calculations in Matriplex form. The .icc files contain the low-level computations. Chi2 computations specified in KalmanUtilsMPlex.
+
+### mkFit/CandCloner.[h,cc]
+
+Code used in Clone Engine for bookkeeping + copying candidates after each layer during building. 
+
+### mkFit.HitStructures.[h,cc]
+
+Specifies MkBuilder + Matriplex friendly data formats for hits. Hits are placed in these containers before building.
+
+### Event.[h,cc]
+
+Most of the code is vestigial (see below). However, the Event object is a container for the different track collections and hit collection. There is code for seed processing, namely cleaning. There is also code relevant for validation and validation prep for different track collections.
+
+### Hit.[h,cc] + Track.[h,cc]
+
+Contain the Hit, Track, and TrackExtra classes. These are the "native" formats read from the binary file (read in from the Tracking NTuple). In principle, since we are planning to migrate to CMSSW eventually, these classes (as well Event) may be trimmed to just read straight from CMSSW native formats.
+
+- Hit object contains hit parameters, covariance, and a global ID. The global ID is used for gaining more information on the MC generation of that hit.
+- Track object is simply the track parameters, covariance, charge, track ID, and hit indices + layers. 
+- TrackExtra contains additional information about each track, e.g. associated MC info, seed hits, etc. A Track's TrackExtra is accessed through the track label, which is the index inside the vector of tracks. 
+
+### Config.[h,cc]
+
+Contains the Config namespace. Specifies configurable parameters for the code. For example: number of candidates to create for each track, chi2 cut, number of seeds to process per thread, etc. Also contains functions used for dynamically setting other parameters based on options selected. 
+
+Tracker Geometry plugin also initialized here.
+
+### Validation code
+
+Described in validation manifesto. See Section 8 for more info on manifesto.
+
+### TO DO
+
+- flesh out sections as needed
+- GPU specific code?
+
+### Vestigial code
+
+There are some sections of code that are not in use anymore and/or are not regularly updated. A short list is here:
+- main.cc : Old SMatrix implementation of the code, which is sometimes referred to as the "serial" version of the code.
+- USolids/ : Directory for implementing USolids geometry package. Originally implemented in SMatrix code.
+- seedtest[.h,.cc] : SMatrix seeding
+- buildtest[.h,.cc] : SMatrix building
+- fittest[.h,.cc] : SMatrix fitting
+- ConformalUtils[.h,.cc] : SMatrix conformal fitter for seeding/fitting
+- (possibly) Propagation[.h,.cc] : currently in use by the currently defunct Simulation[.h,.cc]. In reality, will probably move simulation code to MPlex format, which will deprecate this code.
+- KalmanUtils[.h,.cc] : SMatrix Kalman Update
+- mkFit/seedtestMPlex[.h,.cc] and all code in MkBuilder[.h,.cc] related to finding seeds with our own algorithm
+- mkFit/ConformalUtils[.h,.cc] : used by the seeding, although could be revived for fitting
+- additional val_scripts/ and web/ scripts not automatically updated outside of main benchmarking code
+- mtorture test/ code 
+
+## Section 8: Other helpful README's in the repository
+
+Given that this is a living repository, the comments in the code may not always be enough. Here are some useful other README's within this repo:
+- afer compiling the code, do: ```./mkFit/mkFit --help``` : Describes the full list of command line options, inputs, and defaults when running mkFit. The list can also be seen in the code in mkFit/mkFit.cc, although the defaults are hidden behind Config.[h,cc], as well as mkFit.cc.
+- cmssw-trackerinfo-desc.txt : Describes the structure of the CMS Phase-I geometry as represented within this repo.
+- index-desc.txt : Desribes the various hit and track indices used by different sets of tracks throughout the different stages of the read in, seeding, building, fitting, and validation.
+- validation-desc.txt : The validation manifesto: (somewhat) up-to-date description of the full physics validation suite. It is complemented by a somewhat out-of-date [code flow diagram](https://indico.cern.ch/event/656884/contributions/2676532/attachments/1513662/2363067/validation_flow_diagram-v4.pdf).
+- web/README_WEBPLOTS.md : A short markdown file on how to setup a website with an AFS or EOS directory on LXPLUS (best when viewed from a web browser, like this README).
+
+## Section 9: CMSSW integration
+
+The supported CMSSW version is currently `11_2_0`. The
+integration of `mkFit` in CMSSW is based on setting it up as a CMSSW
+external.
+
+### Section 9.i: Considerations for `mkFit` code
+
+The multi-threaded CMSSW framework, and the iterative nature of CMS
+tracking impose some constraints on `mkFit` code (that are not all met
+yet). Note that not all are mandatory per se, but they would make the
+life easier for everybody.
+
+* A single instance of `mkFit` should correspond to a single track building iteration
+* There should be no global non-const variables
+  - Currently there are non-const global variables e.g. in `Config` namespace
+* All iteration-specific parameters should be passed from CMSSW to `mkFit` at run time
+
+### Section 9.ii: Building and setting up `mkFit` for CMSSW
+
+#### Section 9.ii.a: Build `mkFit`
+
+To be used from CMSSW the `mkFit` must be built with the CMSSW
+toolchain. Assuming you are in an empty directory, the following
+recipe will set up a CMSSW developer area and a `mkFit` area there,
+and compile `mkFit` using the CMSSW toolchain.
+
+**Note:** The recipes have been tested on `lxplus` and on `phi3`.
+Currently there is no working recipe to compile with `icc` on LPC.
+
+##### Section 9.ii.a.a: Lxplus
+
+```bash
+cmsrel CMSSW_11_2_0
+pushd CMSSW_11_2_0/src
+cmsenv
+git cms-init
+popd
+git clone git@github.com:trackreco/mkFit
+pushd mkFit
+make -j 12 TBB_PREFIX=$(dirname $(cd $CMSSW_BASE && scram tool tag tbb INCLUDE)) CXX=g++ WITH_ROOT=1 VEC_GCC="-march=core2"
+popd
+```
+
+##### Section 9.ii.a.b: Phi3
+
+```bash
+source /cvmfs/cms.cern.ch/cmsset_default.sh
+source /opt/intel/bin/compilervars.sh intel64
+export SCRAM_ARCH=slc7_amd64_gcc900
+cmsrel CMSSW_11_2_0
+pushd CMSSW_11_2_0/src
+cmsenv
+git cms-init
+popd
+git clone git@github.com:trackreco/mkFit
+pushd mkFit
+# for gcc CMSSW "default" build:
+#   1) call "unset INTEL_LICENSE_FILE", or do not source compilevars.sh above
+#   2) replace AVX* with VEC_GCC="-msse3"
+make -j 12 TBB_PREFIX=$(dirname $(cd $CMSSW_BASE && scram tool tag tbb INCLUDE)) WITH_ROOT=1 AVX2:=1
+popd
+```
+
+#### Section 9.ii.b: Set up `mkFit` as an external
+
+Assuming you are in the aforementioned parent directory, the following
+recipe will create a scram tool file, and set up scram to use it
+
+```bash
+pushd CMSSW_11_2_0/src
+cat <<EOF >mkfit.xml
+<tool name="mkfit" version="1.0">
+  <client>
+    <environment name="MKFITBASE" default="$PWD/../../mkFit"/>
+    <environment name="LIBDIR" default="\$MKFITBASE/lib"/>
+    <environment name="INCLUDE" default="\$MKFITBASE"/>
+  </client>
+  <runtime name="MKFIT_BASE" value="\$MKFITBASE"/>
+  <lib name="MicCore"/>
+  <lib name="MkFit"/>
+</tool>
+EOF
+scram setup mkfit.xml
+cmsenv
+```
+
+#### Section 9.ii.c: Pull CMSSW code and build
+
+The following recipe will pull the necessary CMSSW-side code and build it
+
+```bash
+# in CMSSW_11_2_0/src
+git cms-remote add trackreco
+git fetch trackreco
+git checkout -b CMSSW_11_2_0_mkFit_X trackreco/CMSSW_11_2_0_mkFit_X
+git cms-addpkg $(git diff $CMSSW_VERSION --name-only | cut -d/ -f-2 | uniq)
+git cms-checkdeps -a
+scram b -j 12
+```
+
+### Section 9.iii Recipes for the impatient on phi3
+
+#### Section 9.iii.a: Offline tracking
+
+`trackingOnly` reconstruction, DQM, and VALIDATION.
+
+```bash
+# in CMSSW_11_2_0/src
+
+# sample = 10mu, ttbarnopu, ttbarpu35, ttbarpu50, ttbarpu70
+# mkfit = 'all', 'InitialStep', ..., 'InitialStep,LowPtQuadStep', ..., ''
+# timing = '', 'framework', 'FastTimerService'
+# (maxEvents = 0, <N>, -1)
+# nthreads = 1, <N>
+# nstreams = 0, <N>
+# trackingNtuple = '', 'generalTracks', 'InitialStep', ...
+# jsonPatch = '', <path-to-JSON-file>
+# for core pinning prepend e.g. for nthreads=8 "taskset -c 0,32,1,33,2,34,3,35" 
+#     0,32 will correspond to the same physical core with 2-way hyperthreading
+#     the step is 32 for phi3; check /proc/cpuinfo for same physical id
+cmsRun RecoTracker/MkFit/test/reco_cfg.py sample=ttbarpu50 timing=1
+```
+* The default values for the command line parameters are the first ones.
+* `mkfit=1` runs MkFit, `0` runs CMSSW tracking
+* The job produces `step3_inDQM.root` that needs to be "harvested" to
+  get a "normal" ROOT file with the histograms.
+* If `maxEvents` is set to `0`, the number of events to be processed
+  is set to a relatively small value depending on the sample for short
+  testing purposes.
+* Setting `maxEvents=-1` means to process all events.
+* `nthreads` sets the number of threads (default 1), and `nstreams`
+  the number of EDM streams (or events in flight, default 0, meaning
+  the same value as the number of threads)
+* [TrackingNtuple](https://github.com/cms-sw/cmssw/blob/master/Validation/RecoTrack/README.md#ntuple)
+  can be enabled either for general tracks (`generalTracks`) for for
+  individual iterations (e.g. `InitialStep`). See
+  [here](https://github.com/cms-sw/cmssw/blob/master/Validation/RecoTrack/README.md#using-tracks-from-a-single-iteration-as-an-input)
+  for how the track selection MVA and vertex collection are set
+  differently between the two modes.
+* Iteration configuration can be patched with a JSON file with
+  `jsonPatch` parameter (corresponds to `--json-patch` in the
+  standalone program)
+
+DQM harvesting
+```bash
+cmsRun RecoTracker/MkFit/test/reco_harvest_cfg.py
+```
+* Produces `DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root`
+
+Producing plots
+```bash
+makeTrackValidationPlots.py --extended --ptcut <DQM file> [<another DQM file>]
+```
+* Produces `plots` directory with PDF files and HTML pages for
+  navigation. Copy the directory to your web area of choice.
+* See `makeTrackValidationPlots.py --help` for more options
+
+#### Section 9.iii.b HLT tracking (iter0)
+
+**Note: this subsection has not yet been updated to 11_2_0**
+
+HLT reconstruction
+
+```bash
+# in CMSSW_10_4_0_patch1/src
+
+# in addition to the offline tracking options
+# hltOnDemand = 0, 1
+# hltIncludeFourthHit = 0, 1
+cmsRun RecoTracker/MkFit/test/hlt_cfg.py sample=ttbarpu50 timing=1
+```
+* The default values for the command line parameters are the first ones.
+* For options that are same as in offline tracking, see above
+* Setting `hltOnDemand=1` makes the strip local reconstruction to be
+  run in the "on-demand" mode (which is the default in real HLT but
+  not here). Note that `hltOnDemand=1` works only with `mkfit=0`.
+* Setting `hltIncludeFourthHit=1` changes the (HLT-default) behavior
+  of the EDProducer that converts pixel tracks to `TrajectorySeed`
+  objects to include also the fourth, outermost hit of the pixel track
+  in the seed.
+
+DQM harvesting (unless running timing)
+```bash
+cmsRun RecoTracker/MkFit/test/hlt_harvest.py
+```
+
+Producing plots (unless running timing)
+```bash
+makeTrackValidationPlots.py --extended <DQM file> [<another DQM file>]
+```
+
+### Section 9.iv More thorough instructions
+
+#### Section 9.iv.a: Offline tracking
+
+**Note: this subsection has not yet been updated to 11_2_0**
+
+The example below uses 2018 tracking-only workflow
+
+```bash
+# Generate configuration
+runTheMatrix.py -l 10824.1 --apply 2 --command "--customise RecoTracker/MkFit/customizeInitialStepToMkFit.customizeInitialStepToMkFit --customise RecoTracker/MkFit/customizeInitialStepOnly.customizeInitialStepOnly" -j 0
+cd 10824.1*
+# edit step3*RECO*.py to contain your desired (2018 RelVal MC) input files
+cmsRun step3*RECO*.py
+```
+
+The customize function replaces the initialStep track building module
+with `mkFit`. In principle the customize function should work with any
+reconstruction configuration file.
+
+By default `mkFit` is configured to use Clone Engine with N^2 seed
+cleaning, and to do the backward fit (to the innermost hit) within `mkFit`.
+
+For profiling it is suggested to replace the
+`customizeInitialStepOnly` customize function with
+`customizeInitialStepOnlyNoMTV`. See below for more details.
+
+##### Section 9.iv.a.a: Customize functions
+
+* `RecoTracker/MkFit/customizeInitialStepOnly.customizeInitialStepOnly`
+  * Run only the initialStep tracking. In practice this configuration
+    runs the initialStepPreSplitting iteration, but named as
+    initialStep. MultiTrackValidator is included, and configured to
+    monitor initialStep. Intended to provide the minimal configuration
+    for CMSSW tests.
+* `RecoTracker/MkFit/customizeInitialStepOnly.customizeInitialStepOnlyNoMTV`
+  * Otherwise same as `customizeInitialStepOnly` except drops
+    MultiTrackValidator. Intended for profiling.
+
+##### Section 9.iv.a.b: Timing measurements
+
+There are several options for the CMSSW module timing measurements:
+
+- [FastTimerService](https://twiki.cern.ch/twiki/bin/viewauth/CMS/FastTimerService)
+  * Produces timing measurements as histograms in the DQM root file
+  * `makeTrackValidationPlots.py` (see next subsection) produces plots of those
+     - "Timing" -> "iterationsCPU.pdf", look for "initialStep" histogram and "Building" bin
+- Framework report `process.options = cms.untracked.PSet(wantSummary = cms.untracked.bool(True))`
+  * Prints module timings to the standard output
+  * Look for the timing of `initialStepTrackCandidates`
+- [Timing module](https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideEDMTimingAndMemory)
+  * Prints module timings to the standard output
+  * Look for the timing of `initialStepTrackCandidates`
+
+
+#### Section 9.iv.a.c: Producing MultiTrackValidator plots
+
+The `step3` above runs also the [MultiTrackValidator](https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideMultiTrackValidator).
+
+To produce the plots, first run the DQM harvesting step
+
+```bash
+cmsRun step4_HARVESTING.py
+```
+
+which produces a `DQM_V0001_R000000001__Global__CMSSW_X_Y_Z__RECO.root` file that contains all the histograms. Rename the file to something reflecting the contents, and run
+
+```bash
+makeTrackValidationPlots.py --extended --limit-tracking-algo initialStep <DQM file> [<another DQM file> ...]
+```
+
+The script produces a directory `plots` that can be copied to any web
+area. Note that the script produces an `index.html` to ease the
+navigation.
+
+### Section 9.v: Interpretation of results
+
+#### Section 9.v.a: MultiTrackValidator plots
+
+As the recipe above replaces the initialStep track building, we are
+interested in the plots of "initialStep" (in the main page), and in
+the iteration-specific page the plots on the column "Built tracks".
+Technically these are the output of the final fit of the initialStep,
+but the difference wrt. `TrackCandidate`s of `MkFitProducer` should be
+negligible.
+
+In short, the relevant plots are
+- `effandfake*` show efficiency and fake+duplicate rate vs. various quantities
+- `dupandfake*` show fake, duplicate, and pileup rates vs. various quantities (pileup rate is not that interesting for our case)
+- `distsim*` show distributions for all and reconstructed TrackingParticles (numerators and denominators of efficiencies)
+- `dist*` show distributions for all, true, fake, and duplicate tracks (numerators and denominators of fake and duplicate rates)
+- `hitsAndPt` and hitsLayers shows various information on hits and layers
+- `resolutions*` show track parameter resolutions vs eta and pT
+- `residual*` show track parameter residuals (bias) vs eta and pT
+- `pulls` shows track parameter pulls
+- `tuning` shows chi2/ndof, chi2 probability, chi2/ndof vs eta and pT residual
+- `mva1*` show various information on the BDT track selection
+
+The tracking MC truth matching criteria are different from the mkFit
+SimVal. In MTV a track is classified as a "true track" (and a matched
+SimTrack as "reconstructed") if more than 75 % of the clusters of the
+track are linked to a single SimTrack. A cluster is linked to a
+SimTrack if the SimTrack has induced any amount of charge to any of
+the digis (= pixel or strip) of the cluster.
+
+#### Section 9.v.b: Timing
+
+When looking the per-module timing numbers, please see the following
+table for the relevant modules to look for, and what is their purpose.
+
+| **Module in offline** | **Module in HLT** | **Description** |
+|-----------------------|-------------------|-----------------|
+| `initialStepTrackCandidatesMkFitInput` | `hltIter0PFlowCkfTrackCandidatesMkFitInput` | Input data conversion |
+| `initialStepTrackCandidatesMkFit` | `hltIter0PFlowCkfTrackCandidatesMkFit` | MkFit itself |
+| `initialStepTrackCandidates` | `hltIter0PFlowCkfTrackCandidates` | Output data conversion |
+
+The MTV timing plot of initialStep "Building" includes the
+contributions of all three modules.
+
+
+
+## Section 10: Other useful information
+
+### Section 10.i: Important Links
+
+Project Links
+- [Main development GitHub](https://github.com/trackreco/mkFit)
+- [Our project website](https://trackreco.github.io) and the [GH repo](https://github.com/trackreco/trackreco.github.io-source) hosting the web files. Feel free to edit the website repo if you have contributed a presentation, poster, or paper. 
+- Out-of-date and no longer used [project twiki](https://twiki.cern.ch/twiki/bin/viewauth/CMS/MicTrkRnD)
+- [Indico meeting page](https://indico.cern.ch/category/8433)
+- Vidyo room: Parallel_Kalman_Filter_Tracking
+- Email list-serv: mic-trk-rd@cern.ch
+
+Other Useful References
+- [CMS Run1 Tracking Paper](https://arxiv.org/abs/1405.6569)
+- [CMS Public Tracking Results](https://twiki.cern.ch/twiki/bin/view/CMSPublic/PhysicsResultsTRK)
+- [Kalman Filter in Particle Physics, paper by Rudi Fruhwirth](https://inspirehep.net/record/259509?ln=en)
+- [Kalman Filter explained simply](https://128.232.0.20/~rmf25/papers/Understanding%20the%20Basis%20of%20the%20Kalman%20Filter.pdf)
+
+### Section 10.ii: Tips and Tricks
+
+#### Section 10.ii.a: Missing Libraries and Debugging
+
+When sourcing the environment on phi3 via ```source xeon_scripts/init-env.sh```, some paths will be unset and access to local binaries may be lost. For example, since we source ROOT (and its many dependencies) over CVMFS, there may be some conflicts in loading some applications. In fact, the shell may complain about missing environment variables (emacs loves to complain about TIFF). The best way around this is to simply use CVMFS as a crutch to load in what you need.
+
+This is particularly noticeable when trying to run a debugger. To compile the code, at a minimum, we must source icc + toolkits that give us libraries for c++14. We achieve this through the dependency loading of ROOT through CVMFS (previously, we sourced devtoolset-N to grab c++14 libraries). 
+
+After sourcing and compiling and then running only to find out there is some crash, when trying to load ```mkFit``` into ``gdb`` via ```gdb ./mkFit/mkFit```, it gives rather opaque error messages about missing Python paths.
+
+This can be overcome by loading ```gdb``` over CVMFS: ```source /cvmfs/cms.cern.ch/slc7_amd64_gcc630/external/gdb/7.12.1-omkpbe2/etc/profile.d/init.sh```. At this point, the application will run normally and debugging can commence.
+
+#### Section 10.ii.b: SSH passwordless login for benchmarking scripts and web scripts
+
+When running the benchmarks, a tarball of the working directory will be ```scp```'ed to phi2 and phi1 before running tests on phi3. After the tests complete on each platform, the log files will be ```scp```'ed back to phi3 concurrently. If you do not forward your ssh keys upon login to phi3, you will have to enter your password when first shipping the code over to phi2 and phi1, and also, at some undetermined point, enter it again to receive the logs.
+
+With your favorite text editor, enter the text below into ```~/.ssh/config``` on your local machine to avoid having to type in your password for login to any phi machine (N.B. some lines are optional):
+
+```
+Host phi*.t2.ucsd.edu
+     User <phi* username>
+     ForwardAgent yes
+# lines below are for using X11 on phi* to look at plots, open new windows for emacs, etc.
+     ForwardX11 yes
+     XAuthLocation /opt/X11/bin/xauth
+# lines below are specific to macOS	     
+     AddKeysToAgent yes 
+     UseKeychain yes
+```
+
+After the benchmarks run, you may elect to use the ```web/``` scripts to transfer plots to CERN website hosted on either LXPLUS EOS or AFS. The plots will be put into a tarball, ```scp```'ed over, and then untarred remotely via ```ssh```. To avoid typing in your password for the ```web/``` scripts, you will need to use a Kerberos ticket and also modify your ```.ssh/config``` file in your home directory on the _phi_ machines with the text below:
+
+```
+Host lxplus*.cern.ch
+     User <lxplus username>
+     ForwardAgent yes
+     ForwardX11 yes
+     GSSAPIAuthentication yes
+     GSSAPIDelegateCredentials yes
+```
+
+The last two lines are specific to Kerberos's handling of ssh, which is installed on all of the _phi_ machines. In order to open a Kerberos ticket, you will need to do:
+
+```
+kinit -f <lxplus username>@CERN.CH
+```
+
+and then enter your LXPLUS password. Kerberos will keep your ticket open for a few days to allow passwordless ```ssh``` into LXPLUS. After the ticket expires, you will need to enter that same command again. So, even if you only send plots once every month to LXPLUS, this reduces the number of times of typing in your LXPLUS password from two to one :).
+
+### Section 10.iii: Acronyms/Abbreviations:
+
+[Glossary of acronyms from CMS](https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookGlossary)
+
+- AVX: Advanced Vector Extensions [flavors of AVX: AVX, AVX2, AVX512]
+- BH: Best Hit (building routine that selects only the best hit per layer when performing track building)
+- BkFit: (B)ac(k)wards Fit, i.e. perform a KF fit backwards from the last layer on the track to the first layer / PCA
+- BS: Beamspot (i.e. the luminous region of interactions)
+- CCC: Charge Cluster Cut, used to remove hits that come from out-of-time pileup
+- CE: Clone Engine (building routine that keeps N candidates per seed, performing the KF update after hits have been saved)
+- CMS: Compact Muon Solenoid
+- CMSSW: CMS Software
+- CMSSWVal: CMSSWTrack Validation, use cmssw tracks as reference set of tracks for association
+- FV: Full Vector (building routine that uses a clever way of filling Matriplexes of tracks during track building to boost vectorization, current status: deprecated)
+- GH: GitHub
+- GPU: Graphical Processing Unit
+- GUI: Graphical User Interface
+- KF: Kalman Filter
+- KNL: Knights Landing
+- MEIF: Multiple-Events-In-Flight (method for splitting events into different tasks)
+- mkFit: (m)atriplex (k)alman filter (Fit)
+- MP: Multi-Processing
+- MTV: MultiTrackValidator
+- N^2: Local seed cleaning algorithm developed by Mario and Slava
+- PCA: Point of closest approach to either the origin or the BS
+- PR: Pull Request
+- Reco: Reconstruction
+- SimVal: SimTrack validation, use simtracks as reference set of tracks for association
+- SKL-SP: Skylake Scalable Performance
+- SNB: Sandy Bridge
+- SSE: Streaming SIMD Extensions
+- STD: Standard (building routine, like Clone Engine, but performs KF update before hits are saved to a track)
+- TBB: (Intel) Threaded Building Blocks, open source library from Intel to perform tasks in a multithreaded environment
+- TH: Threads
+- VU: (loosely) Vector Units
diff --git a/RecoTracker/MkFitCore/standalone/README_multipleIterations.txt b/RecoTracker/MkFitCore/standalone/README_multipleIterations.txt
new file mode 100644
index 0000000000000..b62ca17217632
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/README_multipleIterations.txt
@@ -0,0 +1,83 @@
+# Description of implementation of multiple mkFit iterations
+
+- The branch is up-to-date with respect to devel.
+
+- The main changes in this branch affect the following files:
+
+(1) mkFit/SteeringParams.h:
+ - three additional classes, which are iteration-dependent:
+ (a) IterationParams: a container for 'nlayers_per_seed', 'maxCandsPerSeed', 'maxHolesPerCand', 'maxConsecHoles', and 'chi2Cut';
+ (b) IterationLayerConfig: a container for layer-specific iteration-dependent configurations (e.g., hit selection windows);
+ (c) IterationConfig: a container for all of the above, including a virtual functions to import seeds (import_seeds)
+ - one additional struct, which is iteration-dependent:
+ (a) MkSeedPacket: a container of iteration-specific event objects ('m_seedEtaSeparators_', 'm_seedMinLastLayer_', 'm_seedMaxLastLayer_', 'm_layerHits_', 'm_inseeds', 'm_outtrks')
+
+(2) Geoms/CMS-2017.cc:
+ - an instance of IterationConfig is created in Geoms/CMS-2017.cc, to be passed to MkBuilder constructor, which sets all iteration-dependent objects/parameters.
+
+(3) mkFit/MkBuilder[.cc,.h]
+ - all iteration-dependent parameters (regions, steering parameters, etc.) are moved out of MkBuilder, and into IterationConfig, which must be passed to MkBuilder constructor to have one MkBuilder per iteration.
+
+
+-------------------------------------------------------------------------------
+
+MT Notes:
+
+* RegionOfSeedIndices rosi(m_event, region); <---- event
+
+* bkfit --> takes tracks from event->candidateTracks
+  This is a somewhat more general probelm ... flow of tracks through processing and
+  when should they be copied out / extracted (found / fitted / etc).
+  Especially re validation.
+
+
+
+-------------------------------------------------------------------------------
+
+VALIDATION
+
+tested validation with 3 iterations scripts (for running validation forConf)
+
+./val_scripts/validation-cmssw-benchmarks-multiiter.sh
+./web/collectBenchmarks-multi.sh  
+
+the features added to the validation Trees are the following
+
+FR Trees
+  - algorithm: as the tree entries are by seed, the seed "algorithm" (i.e. the iteration number used in cmssw) is saved
+
+EFF Trees
+  
+  - itermask_[seed/build//fit]
+  - iterduplmask_[seed/build//fit]
+  - algo_seed
+
+  these are 3 binary masks of 64 bits, where bits are tunred on depending on the iteration matching a sim track, as the entries are organized by sim track
+  
+  itermask_ :
+    a sim track is matching at least to a track with algorithm M if the bit M of the bit mask is on
+    multiple bits can be on, if the sim track matched to tracks of muktiple iterations
+  
+  iterduplmask_ :
+    a sim track is matching at least twice (duplicate in the iteration) a track with algorithm M if the bit M of the bit mask is on
+
+  algo_seed:
+    to be used in SIMVALSEED
+    bit M is on if the seed matching to the sim track comes from the iteration with code M
+    
+      
+  how to use binary masks (example) : 
+
+  the simtrack matches to the iteration with algo = 4, 22, 23 ... ->  (itermask_[]>>algo)&1 
+  the simtrack matches twice to the iteration with algo = 4, 22, 23 ... ->  (iterduplmask_[]>>algo)&1 
+  the simtrack matches to a seed with algo = 4, 22, 23 ... ->  (algo_seed>>algo)&1 
+
+
+The script val_scripts/validation-cmssw-benchmarks-multiiter.sh produces 4 sets of plots: 3 iteration specific validation plots and 1 global validation
+
+The settings are the same as for the forConf suite, i.e. comparing CE (build-mimi) to CMSSW - no STD build. the validation setups are the usual SIMVAL and SIMVALSEED (MTV).
+In the iteration specific the itermask_[seed/build//fit] and iterduplmask_[seed/build//fit] are used to define efficiency and duplicate rates, algo_seed is also required in SIMVALSEED.
+The global validation is similar to the one used for the initial step only (no bit masks used). It can be useful to check the global absolute efficiency after adding iterations after each other.
+On the other hand, the comparison of fakes and duplicates between mkFit and cmssw is not totally fair, as different types of cleaning are applied to the two collections.
+
+
diff --git a/RecoTracker/MkFitCore/standalone/TTreeValidation.cc b/RecoTracker/MkFitCore/standalone/TTreeValidation.cc
new file mode 100644
index 0000000000000..1e2e93ac745f9
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/TTreeValidation.cc
@@ -0,0 +1,2978 @@
+#include "TTreeValidation.h"
+#include "Event.h"
+#include "RecoTracker/MkFitCore/interface/Config.h"
+#include "RecoTracker/MkFitCore/standalone/ConfigStandalone.h"
+#include "RecoTracker/MkFitCore/interface/IterationConfig.h"
+
+#ifndef NO_ROOT
+
+namespace mkfit {
+
+  TTreeValidation::TTreeValidation(std::string fileName, const TrackerInfo* trk_info) {
+    std::lock_guard<std::mutex> locker(glock_);
+    gROOT->ProcessLine("#include <vector>");
+
+    ntotallayers_fit_ = trk_info->n_layers();
+
+    // KPM via DSR's ROOT wizardry: ROOT's context management implicitly assumes that a file is opened and
+    // closed on the same thread. To avoid the problem, we declare a local
+    // TContext object; when it goes out of scope, its destructor unregisters
+    // the context, guaranteeing the context is unregistered in the same thread
+    // it was registered in. (do this for tfiles and trees
+    TDirectory::TContext contextEraser;
+    f_ = std::unique_ptr<TFile>(TFile::Open(fileName.c_str(), "recreate"));
+
+    if (Config::sim_val_for_cmssw || Config::sim_val) {
+      TTreeValidation::initializeEfficiencyTree();
+      TTreeValidation::initializeFakeRateTree();
+    }
+    if (Config::cmssw_val) {
+      TTreeValidation::initializeCMSSWEfficiencyTree();
+      TTreeValidation::initializeCMSSWFakeRateTree();
+    }
+    if (Config::fit_val) {
+      for (int i = 0; i < nfvs_; ++i)
+        fvs_[i].resize(ntotallayers_fit_);
+      TTreeValidation::initializeFitTree();
+    }
+    TTreeValidation::initializeConfigTree();
+  }
+
+  void TTreeValidation::initializeEfficiencyTree() {
+    // efficiency validation
+    efftree_ = std::make_unique<TTree>("efftree", "efftree");
+    efftree_->SetDirectory(0);
+
+    efftree_->Branch("evtID", &evtID_eff_);
+    efftree_->Branch("mcID", &mcID_eff_);
+
+    efftree_->Branch("nHits_mc", &nHits_mc_eff_);
+    efftree_->Branch("nLayers_mc", &nLayers_mc_eff_);
+    efftree_->Branch("lastlyr_mc", &lastlyr_mc_eff_);
+
+    efftree_->Branch("seedID_seed", &seedID_seed_eff_);
+    efftree_->Branch("seedID_build", &seedID_build_eff_);
+    efftree_->Branch("seedID_fit", &seedID_fit_eff_);
+
+    efftree_->Branch("x_mc_gen", &x_mc_gen_eff_);
+    efftree_->Branch("y_mc_gen", &y_mc_gen_eff_);
+    efftree_->Branch("z_mc_gen", &z_mc_gen_eff_);
+
+    efftree_->Branch("pt_mc_gen", &pt_mc_gen_eff_);
+    efftree_->Branch("phi_mc_gen", &phi_mc_gen_eff_);
+    efftree_->Branch("eta_mc_gen", &eta_mc_gen_eff_);
+
+    efftree_->Branch("mcmask_seed", &mcmask_seed_eff_);
+    efftree_->Branch("mcmask_build", &mcmask_build_eff_);
+    efftree_->Branch("mcmask_fit", &mcmask_fit_eff_);
+
+    efftree_->Branch("mcTSmask_seed", &mcTSmask_seed_eff_);
+    efftree_->Branch("mcTSmask_build", &mcTSmask_build_eff_);
+    efftree_->Branch("mcTSmask_fit", &mcTSmask_fit_eff_);
+
+    efftree_->Branch("xhit_seed", &xhit_seed_eff_);
+    efftree_->Branch("xhit_build", &xhit_build_eff_);
+    efftree_->Branch("xhit_fit", &xhit_fit_eff_);
+
+    efftree_->Branch("yhit_seed", &yhit_seed_eff_);
+    efftree_->Branch("yhit_build", &yhit_build_eff_);
+    efftree_->Branch("yhit_fit", &yhit_fit_eff_);
+
+    efftree_->Branch("zhit_seed", &zhit_seed_eff_);
+    efftree_->Branch("zhit_build", &zhit_build_eff_);
+    efftree_->Branch("zhit_fit", &zhit_fit_eff_);
+
+    efftree_->Branch("pt_mc_seed", &pt_mc_seed_eff_);
+    efftree_->Branch("pt_seed", &pt_seed_eff_);
+    efftree_->Branch("ept_seed", &ept_seed_eff_);
+    efftree_->Branch("pt_mc_build", &pt_mc_build_eff_);
+    efftree_->Branch("pt_build", &pt_build_eff_);
+    efftree_->Branch("ept_build", &ept_build_eff_);
+    efftree_->Branch("pt_mc_fit", &pt_mc_fit_eff_);
+    efftree_->Branch("pt_fit", &pt_fit_eff_);
+    efftree_->Branch("ept_fit", &ept_fit_eff_);
+
+    efftree_->Branch("phi_mc_seed", &phi_mc_seed_eff_);
+    efftree_->Branch("phi_seed", &phi_seed_eff_);
+    efftree_->Branch("ephi_seed", &ephi_seed_eff_);
+    efftree_->Branch("phi_mc_build", &phi_mc_build_eff_);
+    efftree_->Branch("phi_build", &phi_build_eff_);
+    efftree_->Branch("ephi_build", &ephi_build_eff_);
+    efftree_->Branch("phi_mc_fit", &phi_mc_fit_eff_);
+    efftree_->Branch("phi_fit", &phi_fit_eff_);
+    efftree_->Branch("ephi_fit", &ephi_fit_eff_);
+
+    efftree_->Branch("eta_mc_seed", &eta_mc_seed_eff_);
+    efftree_->Branch("eta_seed", &eta_seed_eff_);
+    efftree_->Branch("eeta_seed", &eeta_seed_eff_);
+    efftree_->Branch("eta_mc_build", &eta_mc_build_eff_);
+    efftree_->Branch("eta_build", &eta_build_eff_);
+    efftree_->Branch("eeta_build", &eeta_build_eff_);
+    efftree_->Branch("eta_mc_fit", &eta_mc_fit_eff_);
+    efftree_->Branch("eta_fit", &eta_fit_eff_);
+    efftree_->Branch("eeta_fit", &eeta_fit_eff_);
+
+    efftree_->Branch("nHits_seed", &nHits_seed_eff_);
+    efftree_->Branch("nHits_build", &nHits_build_eff_);
+    efftree_->Branch("nHits_fit", &nHits_fit_eff_);
+
+    efftree_->Branch("nLayers_seed", &nLayers_seed_eff_);
+    efftree_->Branch("nLayers_build", &nLayers_build_eff_);
+    efftree_->Branch("nLayers_fit", &nLayers_fit_eff_);
+
+    efftree_->Branch("nHitsMatched_seed", &nHitsMatched_seed_eff_);
+    efftree_->Branch("nHitsMatched_build", &nHitsMatched_build_eff_);
+    efftree_->Branch("nHitsMatched_fit", &nHitsMatched_fit_eff_);
+
+    efftree_->Branch("fracHitsMatched_seed", &fracHitsMatched_seed_eff_);
+    efftree_->Branch("fracHitsMatched_build", &fracHitsMatched_build_eff_);
+    efftree_->Branch("fracHitsMatched_fit", &fracHitsMatched_fit_eff_);
+
+    efftree_->Branch("lastlyr_seed", &lastlyr_seed_eff_);
+    efftree_->Branch("lastlyr_build", &lastlyr_build_eff_);
+    efftree_->Branch("lastlyr_fit", &lastlyr_fit_eff_);
+
+    efftree_->Branch("dphi_seed", &dphi_seed_eff_);
+    efftree_->Branch("dphi_build", &dphi_build_eff_);
+    efftree_->Branch("dphi_fit", &dphi_fit_eff_);
+
+    efftree_->Branch("hitchi2_seed", &hitchi2_seed_eff_);
+    efftree_->Branch("hitchi2_build", &hitchi2_build_eff_);
+    efftree_->Branch("hitchi2_fit", &hitchi2_fit_eff_);
+
+    efftree_->Branch("score_seed", &score_seed_eff_);
+    efftree_->Branch("score_build", &score_build_eff_);
+    efftree_->Branch("score_fit", &score_fit_eff_);
+
+    efftree_->Branch("helixchi2_seed", &helixchi2_seed_eff_);
+    efftree_->Branch("helixchi2_build", &helixchi2_build_eff_);
+    efftree_->Branch("helixchi2_fit", &helixchi2_fit_eff_);
+
+    efftree_->Branch("duplmask_seed", &duplmask_seed_eff_);
+    efftree_->Branch("duplmask_build", &duplmask_build_eff_);
+    efftree_->Branch("duplmask_fit", &duplmask_fit_eff_);
+
+    efftree_->Branch("nTkMatches_seed", &nTkMatches_seed_eff_);
+    efftree_->Branch("nTkMatches_build", &nTkMatches_build_eff_);
+    efftree_->Branch("nTkMatches_fit", &nTkMatches_fit_eff_);
+
+    efftree_->Branch("itermask_seed", &itermask_seed_eff_);
+    efftree_->Branch("itermask_build", &itermask_build_eff_);
+    efftree_->Branch("itermask_fit", &itermask_fit_eff_);
+    efftree_->Branch("iterduplmask_seed", &iterduplmask_seed_eff_);
+    efftree_->Branch("iterduplmask_build", &iterduplmask_build_eff_);
+    efftree_->Branch("iterduplmask_fit", &iterduplmask_fit_eff_);
+    efftree_->Branch("algo_seed", &algo_seed_eff_);
+
+    if (Config::keepHitInfo) {
+      efftree_->Branch("hitlyrs_mc", &hitlyrs_mc_eff_);
+      efftree_->Branch("hitlyrs_seed", &hitlyrs_seed_eff_);
+      efftree_->Branch("hitlyrs_build", &hitlyrs_build_eff_);
+      efftree_->Branch("hitlyrs_fit", &hitlyrs_fit_eff_);
+
+      efftree_->Branch("hitidxs_mc", &hitidxs_mc_eff_);
+      efftree_->Branch("hitidxs_seed", &hitidxs_seed_eff_);
+      efftree_->Branch("hitidxs_build", &hitidxs_build_eff_);
+      efftree_->Branch("hitidxs_fit", &hitidxs_fit_eff_);
+
+      efftree_->Branch("hitmcTkIDs_mc", &hitmcTkIDs_mc_eff_);
+      efftree_->Branch("hitmcTkIDs_seed", &hitmcTkIDs_seed_eff_);
+      efftree_->Branch("hitmcTkIDs_build", &hitmcTkIDs_build_eff_);
+      efftree_->Branch("hitmcTkIDs_fit", &hitmcTkIDs_fit_eff_);
+
+      efftree_->Branch("hitxs_mc", &hitxs_mc_eff_);
+      efftree_->Branch("hitxs_seed", &hitxs_seed_eff_);
+      efftree_->Branch("hitxs_build", &hitxs_build_eff_);
+      efftree_->Branch("hitxs_fit", &hitxs_fit_eff_);
+
+      efftree_->Branch("hitys_mc", &hitys_mc_eff_);
+      efftree_->Branch("hitys_seed", &hitys_seed_eff_);
+      efftree_->Branch("hitys_build", &hitys_build_eff_);
+      efftree_->Branch("hitys_fit", &hitys_fit_eff_);
+
+      efftree_->Branch("hitzs_mc", &hitzs_mc_eff_);
+      efftree_->Branch("hitzs_seed", &hitzs_seed_eff_);
+      efftree_->Branch("hitzs_build", &hitzs_build_eff_);
+      efftree_->Branch("hitzs_fit", &hitzs_fit_eff_);
+    }
+  }
+
+  void TTreeValidation::initializeFakeRateTree() {
+    // fake rate validation
+    frtree_ = std::make_unique<TTree>("frtree", "frtree");
+    frtree_->SetDirectory(0);
+
+    frtree_->Branch("evtID", &evtID_FR_);
+    frtree_->Branch("seedID", &seedID_FR_);
+
+    frtree_->Branch("seedmask_seed", &seedmask_seed_FR_);
+    frtree_->Branch("seedmask_build", &seedmask_build_FR_);
+    frtree_->Branch("seedmask_fit", &seedmask_fit_FR_);
+
+    frtree_->Branch("xhit_seed", &xhit_seed_FR_);
+    frtree_->Branch("xhit_build", &xhit_build_FR_);
+    frtree_->Branch("xhit_fit", &xhit_fit_FR_);
+
+    frtree_->Branch("yhit_seed", &yhit_seed_FR_);
+    frtree_->Branch("yhit_build", &yhit_build_FR_);
+    frtree_->Branch("yhit_fit", &yhit_fit_FR_);
+
+    frtree_->Branch("zhit_seed", &zhit_seed_FR_);
+    frtree_->Branch("zhit_build", &zhit_build_FR_);
+    frtree_->Branch("zhit_fit", &zhit_fit_FR_);
+
+    frtree_->Branch("pt_seed", &pt_seed_FR_);
+    frtree_->Branch("ept_seed", &ept_seed_FR_);
+    frtree_->Branch("pt_build", &pt_build_FR_);
+    frtree_->Branch("ept_build", &ept_build_FR_);
+    frtree_->Branch("pt_fit", &pt_fit_FR_);
+    frtree_->Branch("ept_fit", &ept_fit_FR_);
+
+    frtree_->Branch("phi_seed", &phi_seed_FR_);
+    frtree_->Branch("ephi_seed", &ephi_seed_FR_);
+    frtree_->Branch("phi_build", &phi_build_FR_);
+    frtree_->Branch("ephi_build", &ephi_build_FR_);
+    frtree_->Branch("phi_fit", &phi_fit_FR_);
+    frtree_->Branch("ephi_fit", &ephi_fit_FR_);
+
+    frtree_->Branch("eta_seed", &eta_seed_FR_);
+    frtree_->Branch("eeta_seed", &eeta_seed_FR_);
+    frtree_->Branch("eta_build", &eta_build_FR_);
+    frtree_->Branch("eeta_build", &eeta_build_FR_);
+    frtree_->Branch("eta_fit", &eta_fit_FR_);
+    frtree_->Branch("eeta_fit", &eeta_fit_FR_);
+
+    frtree_->Branch("nHits_seed", &nHits_seed_FR_);
+    frtree_->Branch("nHits_build", &nHits_build_FR_);
+    frtree_->Branch("nHits_fit", &nHits_fit_FR_);
+
+    frtree_->Branch("nLayers_seed", &nLayers_seed_FR_);
+    frtree_->Branch("nLayers_build", &nLayers_build_FR_);
+    frtree_->Branch("nLayers_fit", &nLayers_fit_FR_);
+
+    frtree_->Branch("nHitsMatched_seed", &nHitsMatched_seed_FR_);
+    frtree_->Branch("nHitsMatched_build", &nHitsMatched_build_FR_);
+    frtree_->Branch("nHitsMatched_fit", &nHitsMatched_fit_FR_);
+
+    frtree_->Branch("fracHitsMatched_seed", &fracHitsMatched_seed_FR_);
+    frtree_->Branch("fracHitsMatched_build", &fracHitsMatched_build_FR_);
+    frtree_->Branch("fracHitsMatched_fit", &fracHitsMatched_fit_FR_);
+
+    frtree_->Branch("lastlyr_seed", &lastlyr_seed_FR_);
+    frtree_->Branch("lastlyr_build", &lastlyr_build_FR_);
+    frtree_->Branch("lastlyr_fit", &lastlyr_fit_FR_);
+
+    frtree_->Branch("dphi_seed", &dphi_seed_FR_);
+    frtree_->Branch("dphi_build", &dphi_build_FR_);
+    frtree_->Branch("dphi_fit", &dphi_fit_FR_);
+
+    frtree_->Branch("hitchi2_seed", &hitchi2_seed_FR_);
+    frtree_->Branch("hitchi2_build", &hitchi2_build_FR_);
+    frtree_->Branch("hitchi2_fit", &hitchi2_fit_FR_);
+
+    frtree_->Branch("score_seed", &score_seed_FR_);
+    frtree_->Branch("score_build", &score_build_FR_);
+    frtree_->Branch("score_fit", &score_fit_FR_);
+
+    // sim info of seed,build,fit tracks
+    frtree_->Branch("mcID_seed", &mcID_seed_FR_);
+    frtree_->Branch("mcID_build", &mcID_build_FR_);
+    frtree_->Branch("mcID_fit", &mcID_fit_FR_);
+
+    frtree_->Branch("mcmask_seed", &mcmask_seed_FR_);
+    frtree_->Branch("mcmask_build", &mcmask_build_FR_);
+    frtree_->Branch("mcmask_fit", &mcmask_fit_FR_);
+
+    frtree_->Branch("mcTSmask_seed", &mcTSmask_seed_FR_);
+    frtree_->Branch("mcTSmask_build", &mcTSmask_build_FR_);
+    frtree_->Branch("mcTSmask_fit", &mcTSmask_fit_FR_);
+
+    frtree_->Branch("pt_mc_seed", &pt_mc_seed_FR_);
+    frtree_->Branch("pt_mc_build", &pt_mc_build_FR_);
+    frtree_->Branch("pt_mc_fit", &pt_mc_fit_FR_);
+
+    frtree_->Branch("phi_mc_seed", &phi_mc_seed_FR_);
+    frtree_->Branch("phi_mc_build", &phi_mc_build_FR_);
+    frtree_->Branch("phi_mc_fit", &phi_mc_fit_FR_);
+
+    frtree_->Branch("eta_mc_seed", &eta_mc_seed_FR_);
+    frtree_->Branch("eta_mc_build", &eta_mc_build_FR_);
+    frtree_->Branch("eta_mc_fit", &eta_mc_fit_FR_);
+
+    frtree_->Branch("nHits_mc_seed", &nHits_mc_seed_FR_);
+    frtree_->Branch("nHits_mc_build", &nHits_mc_build_FR_);
+    frtree_->Branch("nHits_mc_fit", &nHits_mc_fit_FR_);
+
+    frtree_->Branch("nLayers_mc_seed", &nLayers_mc_seed_FR_);
+    frtree_->Branch("nLayers_mc_build", &nLayers_mc_build_FR_);
+    frtree_->Branch("nLayers_mc_fit", &nLayers_mc_fit_FR_);
+
+    frtree_->Branch("lastlyr_mc_seed", &lastlyr_mc_seed_FR_);
+    frtree_->Branch("lastlyr_mc_build", &lastlyr_mc_build_FR_);
+    frtree_->Branch("lastlyr_mc_fit", &lastlyr_mc_fit_FR_);
+
+    frtree_->Branch("helixchi2_seed", &helixchi2_seed_FR_);
+    frtree_->Branch("helixchi2_build", &helixchi2_build_FR_);
+    frtree_->Branch("helixchi2_fit", &helixchi2_fit_FR_);
+
+    frtree_->Branch("duplmask_seed", &duplmask_seed_FR_);
+    frtree_->Branch("duplmask_build", &duplmask_build_FR_);
+    frtree_->Branch("duplmask_fit", &duplmask_fit_FR_);
+
+    frtree_->Branch("iTkMatches_seed", &iTkMatches_seed_FR_);
+    frtree_->Branch("iTkMatches_build", &iTkMatches_build_FR_);
+    frtree_->Branch("iTkMatches_fit", &iTkMatches_fit_FR_);
+
+    frtree_->Branch("algorithm", &algorithm_FR_);
+
+    if (Config::keepHitInfo) {
+      frtree_->Branch("hitlyrs_seed", &hitlyrs_seed_FR_);
+      frtree_->Branch("hitlyrs_mc_seed", &hitlyrs_mc_seed_FR_);
+      frtree_->Branch("hitlyrs_build", &hitlyrs_build_FR_);
+      frtree_->Branch("hitlyrs_mc_build", &hitlyrs_mc_build_FR_);
+      frtree_->Branch("hitlyrs_fit", &hitlyrs_fit_FR_);
+      frtree_->Branch("hitlyrs_mc_fit", &hitlyrs_mc_fit_FR_);
+
+      frtree_->Branch("hitidxs_seed", &hitidxs_seed_FR_);
+      frtree_->Branch("hitidxs_mc_seed", &hitidxs_mc_seed_FR_);
+      frtree_->Branch("hitidxs_build", &hitidxs_build_FR_);
+      frtree_->Branch("hitidxs_mc_build", &hitidxs_mc_build_FR_);
+      frtree_->Branch("hitidxs_fit", &hitidxs_fit_FR_);
+      frtree_->Branch("hitidxs_mc_fit", &hitidxs_mc_fit_FR_);
+
+      frtree_->Branch("hitmcTkIDs_seed", &hitmcTkIDs_seed_FR_);
+      frtree_->Branch("hitmcTkIDs_mc_seed", &hitmcTkIDs_mc_seed_FR_);
+      frtree_->Branch("hitmcTkIDs_build", &hitmcTkIDs_build_FR_);
+      frtree_->Branch("hitmcTkIDs_mc_build", &hitmcTkIDs_mc_build_FR_);
+      frtree_->Branch("hitmcTkIDs_fit", &hitmcTkIDs_fit_FR_);
+      frtree_->Branch("hitmcTkIDs_mc_fit", &hitmcTkIDs_mc_fit_FR_);
+
+      frtree_->Branch("hitxs_seed", &hitxs_seed_FR_);
+      frtree_->Branch("hitxs_mc_seed", &hitxs_mc_seed_FR_);
+      frtree_->Branch("hitxs_build", &hitxs_build_FR_);
+      frtree_->Branch("hitxs_mc_build", &hitxs_mc_build_FR_);
+      frtree_->Branch("hitxs_fit", &hitxs_fit_FR_);
+      frtree_->Branch("hitxs_mc_fit", &hitxs_mc_fit_FR_);
+
+      frtree_->Branch("hitys_seed", &hitys_seed_FR_);
+      frtree_->Branch("hitys_mc_seed", &hitys_mc_seed_FR_);
+      frtree_->Branch("hitys_build", &hitys_build_FR_);
+      frtree_->Branch("hitys_mc_build", &hitys_mc_build_FR_);
+      frtree_->Branch("hitys_fit", &hitys_fit_FR_);
+      frtree_->Branch("hitys_mc_fit", &hitys_mc_fit_FR_);
+
+      frtree_->Branch("hitzs_seed", &hitzs_seed_FR_);
+      frtree_->Branch("hitzs_mc_seed", &hitzs_mc_seed_FR_);
+      frtree_->Branch("hitzs_build", &hitzs_build_FR_);
+      frtree_->Branch("hitzs_mc_build", &hitzs_mc_build_FR_);
+      frtree_->Branch("hitzs_fit", &hitzs_fit_FR_);
+      frtree_->Branch("hitzs_mc_fit", &hitzs_mc_fit_FR_);
+    }
+  }
+
+  void TTreeValidation::initializeConfigTree() {
+    // include config ++ real seeding parameters ...
+    configtree_ = std::make_unique<TTree>("configtree", "configtree");
+    configtree_->SetDirectory(0);
+
+    configtree_->Branch("Ntracks", &Ntracks_);
+    configtree_->Branch("Nevents", &Nevents_);
+
+    configtree_->Branch("nLayers", &nLayers_);
+
+    configtree_->Branch("nlayers_per_seed", &nlayers_per_seed_);
+    configtree_->Branch("maxCand", &maxCand_);
+    configtree_->Branch("chi2Cut_min", &chi2Cut_min_);
+    configtree_->Branch("nSigma", &nSigma_);
+    configtree_->Branch("minDPhi", &minDPhi_);
+    configtree_->Branch("maxDPhi", &maxDPhi_);
+    configtree_->Branch("minDEta", &minDEta_);
+    configtree_->Branch("maxDEta", &maxDEta_);
+
+    configtree_->Branch("beamspotX", &beamspotX_);
+    configtree_->Branch("beamspotY", &beamspotY_);
+    configtree_->Branch("beamspotZ", &beamspotZ_);
+
+    configtree_->Branch("minSimPt", &minSimPt_);
+    configtree_->Branch("maxSimPt", &maxSimPt_);
+
+    configtree_->Branch("hitposerrXY", &hitposerrXY_);
+    configtree_->Branch("hitposerrZ", &hitposerrZ_);
+    configtree_->Branch("hitposerrR", &hitposerrR_);
+
+    configtree_->Branch("varXY", &varXY_);
+    configtree_->Branch("varZ", &varZ_);
+
+    configtree_->Branch("ptinverr049", &ptinverr049_);
+    configtree_->Branch("phierr049", &phierr049_);
+    configtree_->Branch("thetaerr049", &thetaerr049_);
+    configtree_->Branch("ptinverr012", &ptinverr012_);
+    configtree_->Branch("phierr012", &phierr012_);
+    configtree_->Branch("thetaerr012", &thetaerr012_);
+  }
+
+  void TTreeValidation::initializeCMSSWEfficiencyTree() {
+    // cmssw reco track efficiency validation
+    cmsswefftree_ = std::make_unique<TTree>("cmsswefftree", "cmsswefftree");
+    cmsswefftree_->SetDirectory(0);
+
+    cmsswefftree_->Branch("evtID", &evtID_ceff_);
+    cmsswefftree_->Branch("cmsswID", &cmsswID_ceff_);
+    cmsswefftree_->Branch("seedID_cmssw", &seedID_cmssw_ceff_);
+
+    // CMSSW
+    cmsswefftree_->Branch("x_cmssw", &x_cmssw_ceff_);
+    cmsswefftree_->Branch("y_cmssw", &y_cmssw_ceff_);
+    cmsswefftree_->Branch("z_cmssw", &z_cmssw_ceff_);
+
+    cmsswefftree_->Branch("pt_cmssw", &pt_cmssw_ceff_);
+    cmsswefftree_->Branch("phi_cmssw", &phi_cmssw_ceff_);
+    cmsswefftree_->Branch("eta_cmssw", &eta_cmssw_ceff_);
+
+    cmsswefftree_->Branch("nHits_cmssw", &nHits_cmssw_ceff_);
+    cmsswefftree_->Branch("nLayers_cmssw", &nLayers_cmssw_ceff_);
+    cmsswefftree_->Branch("lastlyr_cmssw", &lastlyr_cmssw_ceff_);
+
+    // Build
+    cmsswefftree_->Branch("cmsswmask_build", &cmsswmask_build_ceff_);
+    cmsswefftree_->Branch("seedID_build", &seedID_build_ceff_);
+    cmsswefftree_->Branch("mcTrackID_build", &mcTrackID_build_ceff_);
+
+    cmsswefftree_->Branch("pt_build", &pt_build_ceff_);
+    cmsswefftree_->Branch("ept_build", &ept_build_ceff_);
+    cmsswefftree_->Branch("phi_build", &phi_build_ceff_);
+    cmsswefftree_->Branch("ephi_build", &ephi_build_ceff_);
+    cmsswefftree_->Branch("eta_build", &eta_build_ceff_);
+    cmsswefftree_->Branch("eeta_build", &eeta_build_ceff_);
+
+    cmsswefftree_->Branch("x_mc_build", &x_mc_build_ceff_);
+    cmsswefftree_->Branch("y_mc_build", &y_mc_build_ceff_);
+    cmsswefftree_->Branch("z_mc_build", &z_mc_build_ceff_);
+    cmsswefftree_->Branch("pt_mc_build", &pt_mc_build_ceff_);
+    cmsswefftree_->Branch("phi_mc_build", &phi_mc_build_ceff_);
+    cmsswefftree_->Branch("eta_mc_build", &eta_mc_build_ceff_);
+
+    cmsswefftree_->Branch("nHits_build", &nHits_build_ceff_);
+    cmsswefftree_->Branch("nLayers_build", &nLayers_build_ceff_);
+    cmsswefftree_->Branch("nHitsMatched_build", &nHitsMatched_build_ceff_);
+    cmsswefftree_->Branch("fracHitsMatched_build", &fracHitsMatched_build_ceff_);
+    cmsswefftree_->Branch("lastlyr_build", &lastlyr_build_ceff_);
+
+    cmsswefftree_->Branch("xhit_build", &xhit_build_ceff_);
+    cmsswefftree_->Branch("yhit_build", &yhit_build_ceff_);
+    cmsswefftree_->Branch("zhit_build", &zhit_build_ceff_);
+
+    cmsswefftree_->Branch("hitchi2_build", &hitchi2_build_ceff_);
+    cmsswefftree_->Branch("helixchi2_build", &helixchi2_build_ceff_);
+    cmsswefftree_->Branch("score_build", &score_build_ceff_);
+    cmsswefftree_->Branch("dphi_build", &dphi_build_ceff_);
+
+    cmsswefftree_->Branch("duplmask_build", &duplmask_build_ceff_);
+    cmsswefftree_->Branch("nTkMatches_build", &nTkMatches_build_ceff_);
+
+    cmsswefftree_->Branch("itermask_build", &itermask_build_ceff_);
+    cmsswefftree_->Branch("iterduplmask_build", &iterduplmask_build_ceff_);
+
+    // Fit
+    cmsswefftree_->Branch("cmsswmask_fit", &cmsswmask_fit_ceff_);
+    cmsswefftree_->Branch("seedID_fit", &seedID_fit_ceff_);
+    cmsswefftree_->Branch("mcTrackID_fit", &mcTrackID_fit_ceff_);
+
+    cmsswefftree_->Branch("pt_fit", &pt_fit_ceff_);
+    cmsswefftree_->Branch("ept_fit", &ept_fit_ceff_);
+    cmsswefftree_->Branch("phi_fit", &phi_fit_ceff_);
+    cmsswefftree_->Branch("ephi_fit", &ephi_fit_ceff_);
+    cmsswefftree_->Branch("eta_fit", &eta_fit_ceff_);
+    cmsswefftree_->Branch("eeta_fit", &eeta_fit_ceff_);
+
+    cmsswefftree_->Branch("x_mc_fit", &x_mc_fit_ceff_);
+    cmsswefftree_->Branch("y_mc_fit", &y_mc_fit_ceff_);
+    cmsswefftree_->Branch("z_mc_fit", &z_mc_fit_ceff_);
+    cmsswefftree_->Branch("pt_mc_fit", &pt_mc_fit_ceff_);
+    cmsswefftree_->Branch("phi_mc_fit", &phi_mc_fit_ceff_);
+    cmsswefftree_->Branch("eta_mc_fit", &eta_mc_fit_ceff_);
+
+    cmsswefftree_->Branch("nHits_fit", &nHits_fit_ceff_);
+    cmsswefftree_->Branch("nLayers_fit", &nLayers_fit_ceff_);
+    cmsswefftree_->Branch("nHitsMatched_fit", &nHitsMatched_fit_ceff_);
+    cmsswefftree_->Branch("fracHitsMatched_fit", &fracHitsMatched_fit_ceff_);
+    cmsswefftree_->Branch("lastlyr_fit", &lastlyr_fit_ceff_);
+
+    cmsswefftree_->Branch("xhit_fit", &xhit_fit_ceff_);
+    cmsswefftree_->Branch("yhit_fit", &yhit_fit_ceff_);
+    cmsswefftree_->Branch("zhit_fit", &zhit_fit_ceff_);
+
+    cmsswefftree_->Branch("hitchi2_fit", &hitchi2_fit_ceff_);
+    cmsswefftree_->Branch("helixchi2_fit", &helixchi2_fit_ceff_);
+    cmsswefftree_->Branch("score_fit", &score_fit_ceff_);
+    cmsswefftree_->Branch("dphi_fit", &dphi_fit_ceff_);
+
+    cmsswefftree_->Branch("duplmask_fit", &duplmask_fit_ceff_);
+    cmsswefftree_->Branch("nTkMatches_fit", &nTkMatches_fit_ceff_);
+
+    cmsswefftree_->Branch("itermask_fit", &itermask_fit_ceff_);
+    cmsswefftree_->Branch("iterduplmask_fit", &iterduplmask_fit_ceff_);
+
+    cmsswefftree_->Branch("algo_seed", &algo_seed_ceff_);
+
+    if (Config::keepHitInfo) {
+      cmsswefftree_->Branch("hitlyrs_cmssw", &hitlyrs_cmssw_ceff_);
+      cmsswefftree_->Branch("hitlyrs_build", &hitlyrs_build_ceff_);
+      cmsswefftree_->Branch("hitlyrs_mc_build", &hitlyrs_mc_build_ceff_);
+      cmsswefftree_->Branch("hitlyrs_fit", &hitlyrs_fit_ceff_);
+      cmsswefftree_->Branch("hitlyrs_mc_fit", &hitlyrs_mc_fit_ceff_);
+
+      cmsswefftree_->Branch("hitidxs_cmssw", &hitidxs_cmssw_ceff_);
+      cmsswefftree_->Branch("hitidxs_build", &hitidxs_build_ceff_);
+      cmsswefftree_->Branch("hitidxs_mc_build", &hitidxs_mc_build_ceff_);
+      cmsswefftree_->Branch("hitidxs_fit", &hitidxs_fit_ceff_);
+      cmsswefftree_->Branch("hitidxs_mc_fit", &hitidxs_mc_fit_ceff_);
+    }
+  }
+
+  void TTreeValidation::initializeCMSSWFakeRateTree() {
+    // cmssw reco track efficiency validation
+    cmsswfrtree_ = std::make_unique<TTree>("cmsswfrtree", "cmsswfrtree");
+    cmsswfrtree_->SetDirectory(0);
+
+    cmsswfrtree_->Branch("evtID", &evtID_cFR_);
+    cmsswfrtree_->Branch("seedID", &seedID_cFR_);
+    cmsswfrtree_->Branch("mcTrackID", &mcTrackID_cFR_);
+
+    // mc
+    cmsswfrtree_->Branch("x_mc", &x_mc_cFR_);
+    cmsswfrtree_->Branch("y_mc", &y_mc_cFR_);
+    cmsswfrtree_->Branch("z_mc", &z_mc_cFR_);
+    cmsswfrtree_->Branch("pt_mc", &pt_mc_cFR_);
+    cmsswfrtree_->Branch("phi_mc", &phi_mc_cFR_);
+    cmsswfrtree_->Branch("eta_mc", &eta_mc_cFR_);
+
+    // build
+    cmsswfrtree_->Branch("cmsswID_build", &cmsswID_build_cFR_);
+    cmsswfrtree_->Branch("cmsswmask_build", &cmsswmask_build_cFR_);
+
+    cmsswfrtree_->Branch("pt_build", &pt_build_cFR_);
+    cmsswfrtree_->Branch("ept_build", &ept_build_cFR_);
+    cmsswfrtree_->Branch("phi_build", &phi_build_cFR_);
+    cmsswfrtree_->Branch("ephi_build", &ephi_build_cFR_);
+    cmsswfrtree_->Branch("eta_build", &eta_build_cFR_);
+    cmsswfrtree_->Branch("eeta_build", &eeta_build_cFR_);
+
+    cmsswfrtree_->Branch("nHits_build", &nHits_build_cFR_);
+    cmsswfrtree_->Branch("nLayers_build", &nLayers_build_cFR_);
+    cmsswfrtree_->Branch("nHitsMatched_build", &nHitsMatched_build_cFR_);
+    cmsswfrtree_->Branch("fracHitsMatched_build", &fracHitsMatched_build_cFR_);
+    cmsswfrtree_->Branch("lastlyr_build", &lastlyr_build_cFR_);
+
+    cmsswfrtree_->Branch("xhit_build", &xhit_build_cFR_);
+    cmsswfrtree_->Branch("yhit_build", &yhit_build_cFR_);
+    cmsswfrtree_->Branch("zhit_build", &zhit_build_cFR_);
+
+    cmsswfrtree_->Branch("hitchi2_build", &hitchi2_build_cFR_);
+    cmsswfrtree_->Branch("helixchi2_build", &helixchi2_build_cFR_);
+    cmsswfrtree_->Branch("score_build", &score_build_cFR_);
+    cmsswfrtree_->Branch("dphi_build", &dphi_build_cFR_);
+
+    cmsswfrtree_->Branch("duplmask_build", &duplmask_build_cFR_);
+    cmsswfrtree_->Branch("iTkMatches_build", &iTkMatches_build_cFR_);
+
+    cmsswfrtree_->Branch("seedID_cmssw_build", &seedID_cmssw_build_cFR_);
+
+    cmsswfrtree_->Branch("x_cmssw_build", &x_cmssw_build_cFR_);
+    cmsswfrtree_->Branch("y_cmssw_build", &y_cmssw_build_cFR_);
+    cmsswfrtree_->Branch("z_cmssw_build", &z_cmssw_build_cFR_);
+
+    cmsswfrtree_->Branch("pt_cmssw_build", &pt_cmssw_build_cFR_);
+    cmsswfrtree_->Branch("phi_cmssw_build", &phi_cmssw_build_cFR_);
+    cmsswfrtree_->Branch("eta_cmssw_build", &eta_cmssw_build_cFR_);
+
+    cmsswfrtree_->Branch("nHits_cmssw_build", &nHits_cmssw_build_cFR_);
+    cmsswfrtree_->Branch("nLayers_cmssw_build", &nLayers_cmssw_build_cFR_);
+    cmsswfrtree_->Branch("lastlyr_cmssw_build", &lastlyr_cmssw_build_cFR_);
+
+    // fit
+    cmsswfrtree_->Branch("cmsswID_fit", &cmsswID_fit_cFR_);
+    cmsswfrtree_->Branch("cmsswmask_fit", &cmsswmask_fit_cFR_);
+
+    cmsswfrtree_->Branch("pt_fit", &pt_fit_cFR_);
+    cmsswfrtree_->Branch("ept_fit", &ept_fit_cFR_);
+    cmsswfrtree_->Branch("phi_fit", &phi_fit_cFR_);
+    cmsswfrtree_->Branch("ephi_fit", &ephi_fit_cFR_);
+    cmsswfrtree_->Branch("eta_fit", &eta_fit_cFR_);
+    cmsswfrtree_->Branch("eeta_fit", &eeta_fit_cFR_);
+
+    cmsswfrtree_->Branch("nHits_fit", &nHits_fit_cFR_);
+    cmsswfrtree_->Branch("nLayers_fit", &nLayers_fit_cFR_);
+    cmsswfrtree_->Branch("nHitsMatched_fit", &nHitsMatched_fit_cFR_);
+    cmsswfrtree_->Branch("fracHitsMatched_fit", &fracHitsMatched_fit_cFR_);
+    cmsswfrtree_->Branch("lastlyr_fit", &lastlyr_fit_cFR_);
+
+    cmsswfrtree_->Branch("xhit_fit", &xhit_fit_cFR_);
+    cmsswfrtree_->Branch("yhit_fit", &yhit_fit_cFR_);
+    cmsswfrtree_->Branch("zhit_fit", &zhit_fit_cFR_);
+
+    cmsswfrtree_->Branch("hitchi2_fit", &hitchi2_fit_cFR_);
+    cmsswfrtree_->Branch("helixchi2_fit", &helixchi2_fit_cFR_);
+    cmsswfrtree_->Branch("score_fit", &score_fit_cFR_);
+    cmsswfrtree_->Branch("dphi_fit", &dphi_fit_cFR_);
+
+    cmsswfrtree_->Branch("duplmask_fit", &duplmask_fit_cFR_);
+    cmsswfrtree_->Branch("iTkMatches_fit", &iTkMatches_fit_cFR_);
+
+    cmsswfrtree_->Branch("seedID_cmssw_fit", &seedID_cmssw_fit_cFR_);
+
+    cmsswfrtree_->Branch("x_cmssw_fit", &x_cmssw_fit_cFR_);
+    cmsswfrtree_->Branch("y_cmssw_fit", &y_cmssw_fit_cFR_);
+    cmsswfrtree_->Branch("z_cmssw_fit", &z_cmssw_fit_cFR_);
+
+    cmsswfrtree_->Branch("pt_cmssw_fit", &pt_cmssw_fit_cFR_);
+    cmsswfrtree_->Branch("phi_cmssw_fit", &phi_cmssw_fit_cFR_);
+    cmsswfrtree_->Branch("eta_cmssw_fit", &eta_cmssw_fit_cFR_);
+
+    cmsswfrtree_->Branch("nHits_cmssw_fit", &nHits_cmssw_fit_cFR_);
+    cmsswfrtree_->Branch("nLayers_cmssw_fit", &nLayers_cmssw_fit_cFR_);
+    cmsswfrtree_->Branch("lastlyr_cmssw_fit", &lastlyr_cmssw_fit_cFR_);
+
+    cmsswfrtree_->Branch("algorithm", &algorithm_cFR_);
+
+    if (Config::keepHitInfo) {
+      cmsswfrtree_->Branch("hitlyrs_mc", &hitlyrs_mc_cFR_);
+      cmsswfrtree_->Branch("hitlyrs_build", &hitlyrs_build_cFR_);
+      cmsswfrtree_->Branch("hitlyrs_cmssw_build", &hitlyrs_cmssw_build_cFR_);
+      cmsswfrtree_->Branch("hitlyrs_fit", &hitlyrs_fit_cFR_);
+      cmsswfrtree_->Branch("hitlyrs_cmssw_fit", &hitlyrs_cmssw_fit_cFR_);
+
+      cmsswfrtree_->Branch("hitidxs_mc", &hitidxs_mc_cFR_);
+      cmsswfrtree_->Branch("hitidxs_build", &hitidxs_build_cFR_);
+      cmsswfrtree_->Branch("hitidxs_cmssw_build", &hitidxs_cmssw_build_cFR_);
+      cmsswfrtree_->Branch("hitidxs_fit", &hitidxs_fit_cFR_);
+      cmsswfrtree_->Branch("hitidxs_cmssw_fit", &hitidxs_cmssw_fit_cFR_);
+    }
+  }
+
+  void TTreeValidation::initializeFitTree() {
+    fittree_ = std::make_unique<TTree>("fittree", "fittree");
+    fittree_->SetDirectory(0);
+
+    fittree_->Branch("ntotallayers", &ntotallayers_fit_, "ntotallayers_fit_/I");
+    fittree_->Branch("tkid", &tkid_fit_, "tkid_fit_/I");
+    fittree_->Branch("evtid", &evtid_fit_, "evtid_fit_/I");
+
+    fittree_->Branch("z_prop", &z_prop_fit_, "z_prop_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("ez_prop", &ez_prop_fit_, "ez_prop_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("z_hit", &z_hit_fit_, "z_hit_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("ez_hit", &ez_hit_fit_, "ez_hit_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("z_sim", &z_sim_fit_, "z_sim_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("ez_sim", &ez_sim_fit_, "ez_sim_fit_[ntotallayers_fit_]/F");
+
+    fittree_->Branch("pphi_prop", &pphi_prop_fit_, "pphi_prop_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("epphi_prop", &epphi_prop_fit_, "epphi_prop_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("pphi_hit", &pphi_hit_fit_, "pphi_hit_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("epphi_hit", &epphi_hit_fit_, "epphi_hit_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("pphi_sim", &pphi_sim_fit_, "pphi_sim_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("epphi_sim", &epphi_sim_fit_, "epphi_sim_fit_[ntotallayers_fit_]/F");
+
+    fittree_->Branch("pt_up", &pt_up_fit_, "pt_up_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("ept_up", &ept_up_fit_, "ept_up_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("pt_sim", &pt_sim_fit_, "pt_sim_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("ept_sim", &ept_sim_fit_, "ept_sim_fit_[ntotallayers_fit_]/F");
+
+    fittree_->Branch("mphi_up", &mphi_up_fit_, "mphi_up_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("emphi_up", &emphi_up_fit_, "emphi_up_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("mphi_sim", &mphi_sim_fit_, "mphi_sim_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("emphi_sim", &emphi_sim_fit_, "emphi_sim_fit_[ntotallayers_fit_]/F");
+
+    fittree_->Branch("meta_up", &meta_up_fit_, "meta_up_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("emeta_up", &emeta_up_fit_, "emeta_up_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("meta_sim", &meta_sim_fit_, "meta_sim_fit_[ntotallayers_fit_]/F");
+    fittree_->Branch("emeta_sim", &emeta_sim_fit_, "emeta_sim_fit_[ntotallayers_fit_]/F");
+  }
+
+  void TTreeValidation::alignTracks(TrackVec& evt_tracks, TrackExtraVec& evt_extras, bool alignExtra) {
+    std::lock_guard<std::mutex> locker(glock_);
+
+    // redo trackExtras first if necessary
+    if (alignExtra) {
+      TrackExtraVec trackExtra_tmp(evt_tracks.size());
+
+      // align temporary tkExVec with new track collection ordering
+      for (int itrack = 0; itrack < (int)evt_tracks.size(); itrack++) {
+        trackExtra_tmp[itrack] = evt_extras[evt_tracks[itrack].label()];  // label is old seedID!
+      }
+
+      // now copy the temporary back in the old one
+      evt_extras = trackExtra_tmp;
+    }
+
+    // redo track labels to match index in vector
+    for (int itrack = 0; itrack < (int)evt_tracks.size(); itrack++) {
+      evt_tracks[itrack].setLabel(itrack);
+    }
+  }
+
+  void TTreeValidation::collectFitInfo(const FitVal& tmpfitval, int tkid, int layer) {
+    std::lock_guard<std::mutex> locker(glock_);
+
+    fitValTkMapMap_[tkid][layer] = tmpfitval;
+  }
+
+  void TTreeValidation::resetValidationMaps() {
+    std::lock_guard<std::mutex> locker(glock_);
+    // reset fit validation map
+    fitValTkMapMap_.clear();
+
+    // reset map of sim tracks to reco tracks
+    simToSeedMap_.clear();
+    simToBuildMap_.clear();
+    simToFitMap_.clear();
+
+    // reset map of seed tracks to reco tracks
+    seedToBuildMap_.clear();
+    seedToFitMap_.clear();
+
+    // reset map of cmssw tracks to reco tracks
+    cmsswToBuildMap_.clear();
+    cmsswToFitMap_.clear();
+
+    // reset special map of seed labels to cmssw tracks
+    seedToCmsswMap_.clear();
+    cmsswToSeedMap_.clear();
+
+    // reset special map of matching build tracks exactly to cmssw tracks through seedIDs
+    buildToCmsswMap_.clear();
+
+    // reset special maps used for pairing build to fit tracks CMSSW only
+    buildToFitMap_.clear();
+    fitToBuildMap_.clear();
+
+    // reset special maps used for associating seed tracks to reco tracks for sim_val_for_cmssw
+    candToSeedMapDumbCMSSW_.clear();
+    fitToSeedMapDumbCMSSW_.clear();
+  }
+
+  void TTreeValidation::setTrackExtras(Event& ev) {
+    std::lock_guard<std::mutex> locker(glock_);
+
+    const auto& layerhits = ev.layerHits_;
+
+    if (Config::sim_val_for_cmssw || Config::sim_val) {
+      const auto& simhits = ev.simHitsInfo_;
+      const auto& simtracks = ev.simTracks_;
+      const auto& seedtracks = ev.seedTracks_;
+      auto& seedextras = ev.seedTracksExtra_;
+      const auto& buildtracks = ev.candidateTracks_;
+      auto& buildextras = ev.candidateTracksExtra_;
+      const auto& fittracks = ev.fitTracks_;
+      auto& fitextras = ev.fitTracksExtra_;
+
+      // set mcTrackID for seed tracks
+      for (int itrack = 0; itrack < (int)seedtracks.size(); itrack++) {
+        const auto& track = seedtracks[itrack];
+        auto& extra = seedextras[itrack];
+
+        extra.findMatchingSeedHits(track, track, layerhits);
+        extra.setMCTrackIDInfo(
+            track,
+            layerhits,
+            simhits,
+            simtracks,
+            true,
+            (Config::seedInput == simSeeds));  // otherwise seeds are completely unmatched in ToyMC Sim Seeds
+      }
+
+      // set mcTrackID for built tracks
+      for (int itrack = 0; itrack < (int)buildtracks.size(); itrack++) {
+        const auto& track = buildtracks[itrack];
+        auto& extra = buildextras[itrack];
+
+        if (Config::sim_val) {
+          extra.findMatchingSeedHits(track, seedtracks[track.label()], layerhits);
+        } else if (Config::sim_val_for_cmssw) {
+          extra.findMatchingSeedHits(track, seedtracks[candToSeedMapDumbCMSSW_[track.label()]], layerhits);
+        }
+
+        extra.setMCTrackIDInfo(track, layerhits, simhits, simtracks, false, (Config::seedInput == simSeeds));
+      }
+
+      // set mcTrackID for fit tracks
+      for (int itrack = 0; itrack < (int)fittracks.size(); itrack++) {
+        const auto& track = fittracks[itrack];
+        auto& extra = fitextras[itrack];
+
+        if (Config::sim_val) {
+          extra.findMatchingSeedHits(track, seedtracks[track.label()], layerhits);
+        } else if (Config::sim_val_for_cmssw) {
+          extra.findMatchingSeedHits(track, seedtracks[fitToSeedMapDumbCMSSW_[track.label()]], layerhits);
+        }
+
+        extra.setMCTrackIDInfo(track, layerhits, simhits, simtracks, false, (Config::seedInput == simSeeds));
+      }
+    }
+
+    if (Config::cmssw_val) {
+      // store mcTrackID and seedID correctly
+      storeSeedAndMCID(ev);
+
+      const auto& cmsswtracks = ev.cmsswTracks_;
+      const auto& cmsswextras = ev.cmsswTracksExtra_;
+      const auto& seedtracks = ev.seedTracks_;
+      const auto& buildtracks = ev.candidateTracks_;
+      auto& buildextras = ev.candidateTracksExtra_;
+      const auto& fittracks = ev.fitTracks_;
+      auto& fitextras = ev.fitTracksExtra_;
+
+      // store seed hits, reduced parameters, hit map of cmssw tracks, and global hit map
+      RedTrackVec reducedCMSSW;
+      LayIdxIDVecMapMap cmsswHitIDMap;
+      setupCMSSWMatching(ev, reducedCMSSW, cmsswHitIDMap);
+
+      // set cmsswTrackID for built tracks
+      for (int itrack = 0; itrack < (int)buildtracks.size(); itrack++) {
+        const auto& track = buildtracks[itrack];
+        auto& extra = buildextras[itrack];
+
+        // set vector of hitsOnTrack for seed
+        extra.findMatchingSeedHits(track,
+                                   seedtracks[track.label()],
+                                   layerhits);  // itrack == track.label() == seedtrack index == seedtrack.label()
+
+        if (Config::cmsswMatchingFW == trkParamBased) {
+          extra.setCMSSWTrackIDInfoByTrkParams(track, layerhits, cmsswtracks, reducedCMSSW, true);
+        } else if (Config::cmsswMatchingFW == hitBased) {
+          extra.setCMSSWTrackIDInfoByHits(track,
+                                          cmsswHitIDMap,
+                                          cmsswtracks,
+                                          cmsswextras,
+                                          reducedCMSSW,
+                                          -1);             // == -1 for not passing truth info about cmssw tracks
+        } else if (Config::cmsswMatchingFW == labelBased)  // can only be used if using pure seeds!
+        {
+          extra.setCMSSWTrackIDInfoByHits(track,
+                                          cmsswHitIDMap,
+                                          cmsswtracks,
+                                          cmsswextras,
+                                          reducedCMSSW,
+                                          reducedCMSSW[cmsswtracks[buildToCmsswMap_[track.label()]].label()].label());
+        } else {
+          std::cerr << "Specified CMSSW validation, but using an incorrect matching option! Exiting..." << std::endl;
+          exit(1);
+        }
+      }
+
+      // set cmsswTrackID for fit tracks
+      for (int itrack = 0; itrack < (int)fittracks.size(); itrack++) {
+        const auto& track = fittracks[itrack];
+        auto& extra = fitextras[itrack];
+
+        // set vector of hitsOnTrack for seed
+        extra.findMatchingSeedHits(track,
+                                   seedtracks[track.label()],
+                                   layerhits);  // itrack == track.label() == seedtrack index == seedtrack.label()
+
+        if (Config::cmsswMatchingBK == trkParamBased) {
+          extra.setCMSSWTrackIDInfoByTrkParams(track, layerhits, cmsswtracks, reducedCMSSW, true);
+        } else if (Config::cmsswMatchingBK == hitBased) {
+          extra.setCMSSWTrackIDInfoByHits(track,
+                                          cmsswHitIDMap,
+                                          cmsswtracks,
+                                          cmsswextras,
+                                          reducedCMSSW,
+                                          -1);             // == -1 not passing truth info about cmssw
+        } else if (Config::cmsswMatchingBK == labelBased)  // can only be used if using pure seeds!
+        {
+          extra.setCMSSWTrackIDInfoByHits(
+              track,
+              cmsswHitIDMap,
+              cmsswtracks,
+              cmsswextras,
+              reducedCMSSW,
+              reducedCMSSW[cmsswtracks[buildToCmsswMap_[fitToBuildMap_[track.label()]]].label()].label());
+        } else {
+          std::cerr << "Specified CMSSW validation, but using an incorrect matching option! Exiting..." << std::endl;
+          exit(1);
+        }
+      }
+    }
+  }
+
+  void TTreeValidation::makeSimTkToRecoTksMaps(Event& ev) {
+    std::lock_guard<std::mutex> locker(glock_);
+    // map sim track ids to reco labels sort by each (simTracks set in order by default!)
+    TTreeValidation::mapRefTkToRecoTks(ev.seedTracks_, ev.seedTracksExtra_, simToSeedMap_);
+    TTreeValidation::mapRefTkToRecoTks(ev.candidateTracks_, ev.candidateTracksExtra_, simToBuildMap_);
+    TTreeValidation::mapRefTkToRecoTks(ev.fitTracks_, ev.fitTracksExtra_, simToFitMap_);
+  }
+
+  void TTreeValidation::mapRefTkToRecoTks(const TrackVec& evt_tracks,
+                                          TrackExtraVec& evt_extras,
+                                          TkIDToTkIDVecMap& refTkMap) {
+    for (auto itrack = 0; itrack < (int)evt_tracks.size(); ++itrack) {
+      auto&& track(evt_tracks[itrack]);
+      auto&& extra(evt_extras[itrack]);
+      if (Config::sim_val_for_cmssw || Config::sim_val) {
+        if (extra.mcTrackID() >= 0)  // skip fakes, don't store them at all in sim map
+        {
+          refTkMap[extra.mcTrackID()].push_back(
+              track.label());  // store vector of reco tk labels, mapped to the sim track label (i.e. mcTrackID)
+        }
+      }
+      if (Config::cmssw_val) {
+        if (extra.cmsswTrackID() >= 0)  // skip fakes, don't store them at all in cmssw map
+        {
+          refTkMap[extra.cmsswTrackID()].push_back(
+              track.label());  // store vector of reco tk labels, mapped to the cmssw track label (i.e. cmsswTrackID)
+        }
+      }
+    }
+
+    for (auto&& refTkMatches : refTkMap) {
+      if (refTkMatches.second.size() < 2)  // no duplicates
+      {
+        auto& extra(evt_extras[refTkMatches.second[0]]);
+        extra.setDuplicateInfo(0, bool(false));
+      } else  // sort duplicates (ghosts) to keep best one --> best score
+      {
+        // really should sort on indices with a reduced data structure... this is a hack way to do this for now...
+        // e.g. std::pair<int, int> (label, score)
+        TrackVec tmpMatches;
+        for (auto&& label :
+             refTkMatches.second)  // loop over vector of reco track labels, push back the track with each label
+        {
+          tmpMatches.emplace_back(evt_tracks[label]);
+        }
+        //std::sort(tmpMatches.begin(), tmpMatches.end(), sortByHitsChi2); // sort the tracks
+        std::sort(tmpMatches.begin(), tmpMatches.end(), sortByScoreCand);  // sort the tracks
+        for (auto itrack = 0; itrack < (int)tmpMatches.size();
+             itrack++)  // loop over sorted tracks, now set the vector of sorted labels match
+        {
+          refTkMatches.second[itrack] = tmpMatches[itrack].label();
+        }
+
+        int duplicateID = 0;
+        for (auto&& label : refTkMatches.second)  // loop over vector of reco tracsk
+        {
+          auto& extra(evt_extras[label]);
+          extra.setDuplicateInfo(duplicateID, bool(true));
+          duplicateID++;  // used in fake rate trees!
+        }
+      }
+    }
+  }
+
+  void TTreeValidation::makeSeedTkToRecoTkMaps(Event& ev) {
+    std::lock_guard<std::mutex> locker(glock_);
+    // map seed to reco tracks --> seed track collection assumed to map to itself, unless we use some cuts
+    TTreeValidation::mapSeedTkToRecoTk(ev.candidateTracks_, ev.candidateTracksExtra_, seedToBuildMap_);
+    TTreeValidation::mapSeedTkToRecoTk(ev.fitTracks_, ev.fitTracksExtra_, seedToFitMap_);
+  }
+
+  void TTreeValidation::mapSeedTkToRecoTk(const TrackVec& evt_tracks,
+                                          const TrackExtraVec& evt_extras,
+                                          TkIDToTkIDMap& seedTkMap) {
+    for (auto&& track : evt_tracks) {
+      seedTkMap[evt_extras[track.label()].seedID()] = track.label();
+    }
+  }
+
+  void TTreeValidation::makeRecoTkToRecoTkMaps(Event& ev) {
+    std::lock_guard<std::mutex> locker(glock_);
+    TTreeValidation::makeRecoTkToRecoTkMap(
+        buildToFitMap_, ev.candidateTracks_, ev.candidateTracksExtra_, ev.fitTracks_, ev.fitTracksExtra_);
+    TTreeValidation::makeRecoTkToRecoTkMap(
+        fitToBuildMap_, ev.fitTracks_, ev.fitTracksExtra_, ev.candidateTracks_, ev.candidateTracksExtra_);
+  }
+
+  void TTreeValidation::makeRecoTkToRecoTkMap(TkIDToTkIDMap& refToPairMap,
+                                              const TrackVec& reftracks,
+                                              const TrackExtraVec& refextras,
+                                              const TrackVec& pairtracks,
+                                              const TrackExtraVec& pairextras) {
+    // at this point in the code, the labels of the tracks point their position inside the vector
+    // while the seedID is the label prior to relabeling (in reality, this is the MC track ID)
+    for (auto&& reftrack : reftracks) {
+      const auto& refextra = refextras[reftrack.label()];
+      for (auto&& pairtrack : pairtracks) {
+        const auto& pairextra = pairextras[pairtrack.label()];
+        if (refextra.seedID() == pairextra.seedID()) {
+          refToPairMap[reftrack.label()] = pairtrack.label();
+          break;
+        }
+      }
+    }
+  }
+
+  void TTreeValidation::makeCMSSWTkToRecoTksMaps(Event& ev) {
+    std::lock_guard<std::mutex> locker(glock_);
+    // can reuse this function
+    TTreeValidation::mapRefTkToRecoTks(ev.candidateTracks_, ev.candidateTracksExtra_, cmsswToBuildMap_);
+    TTreeValidation::mapRefTkToRecoTks(ev.fitTracks_, ev.fitTracksExtra_, cmsswToFitMap_);
+  }
+
+  void TTreeValidation::makeSeedTkToCMSSWTkMap(Event& ev) {
+    const auto& seedtracks = ev.seedTracks_;
+    const auto& cmsswtracks = ev.cmsswTracks_;
+    for (int itrack = 0; itrack < (int)seedtracks.size(); itrack++) {
+      for (auto&& cmsswtrack : cmsswtracks) {
+        if (cmsswtrack.label() == itrack) {
+          seedToCmsswMap_[seedtracks[itrack].label()] = cmsswtrack.label();
+          break;
+        }
+      }
+    }
+  }
+
+  void TTreeValidation::makeCMSSWTkToSeedTkMap(Event& ev) {
+    const auto& seedtracks = ev.seedTracks_;
+
+    for (const auto& seedToCmsswPair : seedToCmsswMap_) {
+      const auto seedlabel =
+          seedToCmsswPair
+              .first;  // !! in cmssw validation, seed label != seed index in vector as they are not aligned!! --> need to find itrack!
+      const auto cmsswlabel = seedToCmsswPair.second;  // however, cmssw tracks ARE aligned for label == index
+
+      for (int itrack = 0; itrack < (int)seedtracks.size(); itrack++) {
+        const auto& seedtrack = seedtracks[itrack];
+        if (seedtrack.label() == seedlabel) {
+          cmsswToSeedMap_[cmsswlabel] = itrack;
+          break;
+        }
+      }
+    }
+  }
+
+  void TTreeValidation::makeRecoTkToSeedTkMapsDumbCMSSW(Event& ev) {
+    std::lock_guard<std::mutex> locker(glock_);
+    // special functions for matching seeds to reco tracks for sim_val_for_cmssw
+    TTreeValidation::makeRecoTkToSeedTkMapDumbCMSSW(
+        ev.candidateTracksExtra_, ev.seedTracksExtra_, candToSeedMapDumbCMSSW_);
+    TTreeValidation::makeRecoTkToSeedTkMapDumbCMSSW(ev.fitTracksExtra_, ev.seedTracksExtra_, fitToSeedMapDumbCMSSW_);
+  }
+
+  void TTreeValidation::makeRecoTkToSeedTkMapDumbCMSSW(const TrackExtraVec& recoextras,
+                                                       const TrackExtraVec& seedextras,
+                                                       TkIDToTkIDMap& recoToSeedMap) {
+    for (int itrack = 0; itrack < (int)recoextras.size(); itrack++) {
+      const auto reco_seedID = recoextras[itrack].seedID();
+      for (int jtrack = 0; jtrack < (int)seedextras.size(); jtrack++) {
+        const auto seed_seedID = seedextras[jtrack].seedID();
+        if (reco_seedID == seed_seedID) {
+          recoToSeedMap[itrack] = jtrack;
+          break;
+        }
+      }
+    }
+  }
+
+  void TTreeValidation::setTrackScoresDumbCMSSW(Event& ev) {
+    auto& seedtracks = ev.seedTracks_;
+    auto& candtracks = ev.candidateTracks_;
+    auto& fittracks = ev.fitTracks_;
+
+    // first compute score...
+    for (auto& seedtrack : seedtracks) {
+      seedtrack.setScore(getScoreCand(seedtrack));
+    }
+
+    // ...then use map to set seed type to for build/fit tracks and compute scores
+    for (const auto& candToSeedPair : candToSeedMapDumbCMSSW_) {
+      auto& candtrack = candtracks[candToSeedPair.first];
+
+      candtrack.setScore(getScoreCand(candtrack));
+    }
+    for (const auto& fitToSeedPair : fitToSeedMapDumbCMSSW_) {
+      auto& fittrack = fittracks[fitToSeedPair.first];
+
+      fittrack.setScore(getScoreCand(fittrack));
+    }
+  }
+
+  void TTreeValidation::storeSeedAndMCID(Event& ev) {
+    const auto& buildtracks = ev.candidateTracks_;
+    auto& buildextras = ev.candidateTracksExtra_;
+
+    const auto& fittracks = ev.fitTracks_;
+    auto& fitextras = ev.fitTracksExtra_;
+
+    const auto& cmsswtracks = ev.cmsswTracks_;
+    auto& cmsswextras = ev.cmsswTracksExtra_;
+
+    // first set candidate tracks, use as base for fittracks
+    int newlabel = -1;
+    for (int itrack = 0; itrack < (int)buildtracks.size(); itrack++) {
+      auto& extra = buildextras[itrack];
+      const int seedID = extra.seedID();
+
+      extra.setmcTrackID(seedID);
+
+      if (seedToCmsswMap_.count(seedID)) {
+        extra.setseedID(seedToCmsswMap_[seedID]);
+        if (Config::cmsswMatchingFW == labelBased || Config::cmsswMatchingBK == labelBased) {
+          for (int ctrack = 0; ctrack < (int)cmsswextras.size(); ctrack++) {
+            if (cmsswextras[ctrack].seedID() == extra.seedID()) {
+              buildToCmsswMap_[itrack] = cmsswtracks[ctrack].label();  // cmsstracks[ctrack].label() == ctrack!
+              break;
+            }
+          }
+        }
+      } else {
+        extra.setseedID(--newlabel);
+      }
+    }
+
+    // set according to candidate tracks for fit tracks through map
+    for (int itrack = 0; itrack < (int)fittracks.size(); itrack++) {
+      auto& extra = fitextras[itrack];
+
+      extra.setmcTrackID(buildextras[fitToBuildMap_[itrack]].mcTrackID());
+      extra.setseedID(buildextras[fitToBuildMap_[itrack]].seedID());
+    }
+  }
+
+  void TTreeValidation::setupCMSSWMatching(const Event& ev,
+                                           RedTrackVec& reducedCMSSW,
+                                           LayIdxIDVecMapMap& cmsswHitIDMap) {
+    // get the tracks + hits + extras
+    const auto& layerhits = ev.layerHits_;
+    const auto& cmsswtracks = ev.cmsswTracks_;
+    auto& cmsswextras = ev.cmsswTracksExtra_;
+    const auto& seedtracks = ev.seedTracks_;
+
+    // resize accordingly
+    reducedCMSSW.resize(cmsswtracks.size());
+
+    for (int itrack = 0; itrack < (int)cmsswtracks.size(); itrack++) {
+      // get the needed tracks and extras
+      auto& cmsswextra = cmsswextras[itrack];
+      const auto& cmsswtrack = cmsswtracks[itrack];
+      const auto& seedtrack = seedtracks[cmsswToSeedMap_[cmsswtrack.label()]];  // since cmsswtrack.label() == itrack
+
+      // set seed hits!
+      cmsswextra.findMatchingSeedHits(cmsswtrack, seedtrack, layerhits);
+
+      // get tmp vars
+      const auto seedID = cmsswextra.seedID();
+      const auto& params = cmsswtrack.parameters();
+      SVector2 tmpv(params[3], params[5]);
+
+      HitLayerMap tmpmap;
+      for (int ihit = 0; ihit < cmsswtrack.nTotalHits(); ihit++) {
+        const int lyr = cmsswtrack.getHitLyr(ihit);
+        const int idx = cmsswtrack.getHitIdx(ihit);
+
+        // don't bother with storing seed layers in reduced cmssw
+        if (cmsswextra.isSeedHit(lyr, idx))
+          continue;
+
+        if (lyr >= 0 && idx >= 0) {
+          tmpmap[lyr].push_back(idx);
+          cmsswHitIDMap[lyr][idx].push_back(cmsswtrack.label());
+        }
+      }
+
+      // index inside object is label (as cmsswtracks are now aligned)
+      reducedCMSSW[itrack] = ReducedTrack(cmsswtrack.label(), seedID, tmpv, cmsswtrack.momPhi(), tmpmap);
+    }
+  }
+
+  int TTreeValidation::getLastFoundHit(const int trackMCHitID, const int mcTrackID, const Event& ev) {
+    int mcHitID = -1;
+    if (ev.simHitsInfo_[trackMCHitID].mcTrackID() == mcTrackID) {
+      mcHitID = trackMCHitID;
+    } else {
+      mcHitID = ev.simTracks_[mcTrackID].getMCHitIDFromLayer(ev.layerHits_, ev.simHitsInfo_[trackMCHitID].layer());
+    }
+    return mcHitID;
+  }
+
+  int TTreeValidation::getMaskAssignment(const int refID) {
+    // initialize
+    auto refmask = -99;
+
+    if (refID >= 0)  // seed track matched to seed and sim
+    {
+      refmask = 1;  // matched track to sim
+    } else if (refID == -10) {
+      refmask = -2;
+    } else {
+      if (Config::inclusiveShorts)  // only used by standard simval!
+      {
+        if (refID == -1 || refID == -5 || refID == -8 || refID == -9) {
+          refmask = 0;
+        } else if (refID == -2) {
+          refmask = 2;
+        } else  // mcID == -3,-4,-6,-7
+        {
+          refmask = -1;
+        }
+      } else  // only count long tracks (in mtvLike: all reco tracks are counted!)
+      {
+        if (refID == -1 || refID == -9) {
+          refmask = 0;
+        } else if (Config::mtvLikeValidation && refID == -4) {
+          refmask = 2;
+        } else  // mcID == -2,-3,-4,-5,-6,-7,-8: standard simval
+        {
+          refmask = -1;
+        }
+      }
+    }  // end check over not matched
+
+    return refmask;
+  }
+
+  void TTreeValidation::resetFitBranches() {
+    for (int ilayer = 0; ilayer < ntotallayers_fit_; ++ilayer) {
+      z_prop_fit_[ilayer] = -1000.f;
+      ez_prop_fit_[ilayer] = -1000.f;
+      z_hit_fit_[ilayer] = -1000.f;
+      ez_hit_fit_[ilayer] = -1000.f;
+      z_sim_fit_[ilayer] = -1000.f;
+      ez_sim_fit_[ilayer] = -1000.f;
+
+      pphi_prop_fit_[ilayer] = -1000.f;
+      epphi_prop_fit_[ilayer] = -1000.f;
+      pphi_hit_fit_[ilayer] = -1000.f;
+      epphi_hit_fit_[ilayer] = -1000.f;
+      pphi_sim_fit_[ilayer] = -1000.f;
+      epphi_sim_fit_[ilayer] = -1000.f;
+
+      pt_up_fit_[ilayer] = -1000.f;
+      ept_up_fit_[ilayer] = -1000.f;
+      pt_sim_fit_[ilayer] = -1000.f;
+      ept_sim_fit_[ilayer] = -1000.f;
+
+      mphi_up_fit_[ilayer] = -1000.f;
+      emphi_up_fit_[ilayer] = -1000.f;
+      mphi_sim_fit_[ilayer] = -1000.f;
+      emphi_sim_fit_[ilayer] = -1000.f;
+
+      meta_up_fit_[ilayer] = -1000.f;
+      emeta_up_fit_[ilayer] = -1000.f;
+      meta_sim_fit_[ilayer] = -1000.f;
+      emeta_sim_fit_[ilayer] = -1000.f;
+    }
+  }
+
+  void TTreeValidation::fillFitTree(const Event& ev) {
+    std::lock_guard<std::mutex> locker(glock_);
+
+    evtid_fit_ = ev.evtID();
+    const auto& simtracks = ev.simTracks_;
+    const auto& layerhits = ev.layerHits_;
+    const auto& simtrackstates = ev.simTrackStates_;
+
+    for (auto&& fitvalmapmap : fitValTkMapMap_) {
+      TTreeValidation::resetFitBranches();
+
+      tkid_fit_ = fitvalmapmap.first;  // seed id (label) is the same as the mcID
+
+      const auto& simtrack = simtracks[tkid_fit_];
+      const auto& fitvalmap = fitvalmapmap.second;
+      for (int ilayer = 0; ilayer < ntotallayers_fit_; ++ilayer) {
+        if (fitvalmap.count(ilayer)) {
+          const auto& hit = layerhits[ilayer][simtrack.getHitIdx(ilayer)];
+          const auto& initTS = simtrackstates.at(hit.mcHitID());
+          const auto& fitval = fitvalmap.at(ilayer);
+
+          z_hit_fit_[ilayer] = hit.z();
+          ez_hit_fit_[ilayer] = std::sqrt(hit.ezz());
+          z_sim_fit_[ilayer] = initTS.z();
+          ez_sim_fit_[ilayer] = initTS.ezz();
+          z_prop_fit_[ilayer] = fitval.ppz;
+          ez_prop_fit_[ilayer] = fitval.eppz;
+
+          pphi_hit_fit_[ilayer] = hit.phi();
+          epphi_hit_fit_[ilayer] = std::sqrt(hit.ephi());
+          pphi_sim_fit_[ilayer] = initTS.posPhi();
+          epphi_sim_fit_[ilayer] = initTS.eposPhi();
+          pphi_prop_fit_[ilayer] = fitval.ppphi;
+          epphi_prop_fit_[ilayer] = fitval.eppphi;
+
+          pt_up_fit_[ilayer] = fitval.upt;
+          ept_up_fit_[ilayer] = fitval.eupt;
+          pt_sim_fit_[ilayer] = initTS.pT();
+          ept_sim_fit_[ilayer] = initTS.epT();
+
+          mphi_up_fit_[ilayer] = fitval.umphi;
+          emphi_up_fit_[ilayer] = fitval.eumphi;
+          mphi_sim_fit_[ilayer] = initTS.momPhi();
+          emphi_sim_fit_[ilayer] = initTS.emomPhi();
+
+          meta_up_fit_[ilayer] = fitval.umeta;
+          emeta_up_fit_[ilayer] = fitval.eumeta;
+          meta_sim_fit_[ilayer] = initTS.momEta();
+          emeta_sim_fit_[ilayer] = initTS.emomEta();
+        }
+      }
+      fittree_->Fill();
+    }
+  }
+
+  void TTreeValidation::fillFullHitInfo(const Event& ev,
+                                        const Track& track,
+                                        std::vector<int>& lyrs,
+                                        std::vector<int>& idxs,
+                                        std::vector<int>& mcTkIDs,
+                                        std::vector<float>& xs,
+                                        std::vector<float>& ys,
+                                        std::vector<float>& zs) {
+    // get event info
+    const auto& layerHits = ev.layerHits_;
+    const auto& simHitsInfo = ev.simHitsInfo_;
+
+    // resize vectors
+    const auto nTotalHits = track.nTotalHits();
+    lyrs.resize(nTotalHits);
+    idxs.resize(nTotalHits);
+    mcTkIDs.resize(nTotalHits, -99);
+    xs.resize(nTotalHits, -9999.f);
+    ys.resize(nTotalHits, -9999.f);
+    zs.resize(nTotalHits, -9999.f);
+
+    // loop over size of total hits
+    for (auto ihit = 0; ihit < nTotalHits; ihit++) {
+      const auto lyr = track.getHitLyr(ihit);
+      const auto idx = track.getHitIdx(ihit);
+
+      lyrs[ihit] = lyr;
+      idxs[ihit] = idx;
+
+      if (lyr < 0)
+        continue;
+      if (idx < 0)
+        continue;
+
+      const auto& hit = layerHits[lyr][idx];
+      mcTkIDs[ihit] = hit.mcTrackID(simHitsInfo);
+      xs[ihit] = hit.x();
+      ys[ihit] = hit.y();
+      zs[ihit] = hit.z();
+    }
+  }
+
+  void TTreeValidation::fillMinHitInfo(const Track& track, std::vector<int>& lyrs, std::vector<int>& idxs) {
+    for (int ihit = 0; ihit < track.nTotalHits(); ihit++) {
+      lyrs.emplace_back(track.getHitLyr(ihit));
+      idxs.emplace_back(track.getHitIdx(ihit));
+    }
+  }
+
+  void TTreeValidation::fillEfficiencyTree(const Event& ev) {
+    std::lock_guard<std::mutex> locker(glock_);
+
+    const auto ievt = ev.evtID();
+    const auto& evt_sim_tracks = ev.simTracks_;
+    const auto& evt_seed_tracks = ev.seedTracks_;
+    const auto& evt_seed_extras = ev.seedTracksExtra_;
+    const auto& evt_build_tracks = ev.candidateTracks_;
+    const auto& evt_build_extras = ev.candidateTracksExtra_;
+    const auto& evt_fit_tracks = ev.fitTracks_;
+    const auto& evt_fit_extras = ev.fitTracksExtra_;
+    const auto& evt_layer_hits = ev.layerHits_;
+    const auto& evt_sim_trackstates = ev.simTrackStates_;
+
+    unsigned int count = 0;
+    for (const auto& simtrack : evt_sim_tracks) {
+      // clear the branches first
+      if (Config::keepHitInfo) {
+        hitlyrs_mc_eff_.clear();
+        hitlyrs_seed_eff_.clear();
+        hitlyrs_build_eff_.clear();
+        hitlyrs_fit_eff_.clear();
+
+        hitidxs_mc_eff_.clear();
+        hitidxs_seed_eff_.clear();
+        hitidxs_build_eff_.clear();
+        hitidxs_fit_eff_.clear();
+
+        hitmcTkIDs_mc_eff_.clear();
+        hitmcTkIDs_seed_eff_.clear();
+        hitmcTkIDs_build_eff_.clear();
+        hitmcTkIDs_fit_eff_.clear();
+
+        hitxs_mc_eff_.clear();
+        hitxs_seed_eff_.clear();
+        hitxs_build_eff_.clear();
+        hitxs_fit_eff_.clear();
+
+        hitys_mc_eff_.clear();
+        hitys_seed_eff_.clear();
+        hitys_build_eff_.clear();
+        hitys_fit_eff_.clear();
+
+        hitzs_mc_eff_.clear();
+        hitzs_seed_eff_.clear();
+        hitzs_build_eff_.clear();
+        hitzs_fit_eff_.clear();
+      }
+
+      evtID_eff_ = ievt;
+      mcID_eff_ = simtrack.label();
+
+      // generated values
+      x_mc_gen_eff_ = simtrack.x();
+      y_mc_gen_eff_ = simtrack.y();
+      z_mc_gen_eff_ = simtrack.z();
+
+      pt_mc_gen_eff_ = simtrack.pT();
+      phi_mc_gen_eff_ = simtrack.momPhi();
+      eta_mc_gen_eff_ = simtrack.momEta();
+      nHits_mc_eff_ = simtrack.nFoundHits();  // could be that the sim track skips layers!
+      nLayers_mc_eff_ = simtrack.nUniqueLayers();
+      lastlyr_mc_eff_ = simtrack.getLastFoundHitLyr();
+
+      itermask_seed_eff_ = 0;
+      itermask_build_eff_ = 0;
+      itermask_fit_eff_ = 0;
+      iterduplmask_seed_eff_ = 0;
+      iterduplmask_build_eff_ = 0;
+      iterduplmask_fit_eff_ = 0;
+      algo_seed_eff_ = 0;
+
+      if (Config::mtvRequireSeeds) {
+        for (auto aa : ev.simTracksExtra_[count].seedAlgos()) {
+          algo_seed_eff_ = (algo_seed_eff_ | (1 << aa));
+        }
+      }
+      count++;
+
+      // hit indices
+      if (Config::keepHitInfo)
+        TTreeValidation::fillFullHitInfo(ev,
+                                         simtrack,
+                                         hitlyrs_mc_eff_,
+                                         hitidxs_mc_eff_,
+                                         hitmcTkIDs_mc_eff_,
+                                         hitxs_mc_eff_,
+                                         hitys_mc_eff_,
+                                         hitzs_mc_eff_);
+
+      // matched seed track
+      if (simToSeedMap_.count(mcID_eff_) &&
+          simtrack
+              .isFindable())  // recoToSim match : save best match with best score, i.e. simToSeedMap_[matched SimID][first element in vector]
+      {
+        for (unsigned int ii = 0; ii < simToSeedMap_[mcID_eff_].size(); ii++) {
+          const int theAlgo = evt_seed_tracks[simToSeedMap_[mcID_eff_][ii]].algoint();
+          if ((itermask_seed_eff_ >> theAlgo) & 1)
+            iterduplmask_seed_eff_ = (iterduplmask_seed_eff_ | (1 << theAlgo));  //filled at the second time
+          itermask_seed_eff_ = (itermask_seed_eff_ | (1 << theAlgo));
+        }
+        const auto& seedtrack =
+            evt_seed_tracks[simToSeedMap_[mcID_eff_][0]];            // returns seedTrack best matched to sim track
+        const auto& seedextra = evt_seed_extras[seedtrack.label()];  // returns track extra best aligned with seed track
+        mcmask_seed_eff_ = 1;                                        // quick logic for matched
+
+        seedID_seed_eff_ = seedextra.seedID();
+
+        // use this to access correct sim track layer params
+        const int mcHitID =
+            TTreeValidation::getLastFoundHit(seedtrack.getLastFoundMCHitID(evt_layer_hits), mcID_eff_, ev);
+        if (mcHitID >= 0 && Config::readSimTrackStates) {
+          const TrackState& initLayTS = evt_sim_trackstates[mcHitID];
+
+          pt_mc_seed_eff_ = initLayTS.pT();
+          phi_mc_seed_eff_ = initLayTS.momPhi();
+          eta_mc_seed_eff_ = initLayTS.momEta();
+          helixchi2_seed_eff_ = computeHelixChi2(initLayTS.parameters, seedtrack.parameters(), seedtrack.errors());
+
+          mcTSmask_seed_eff_ = 1;
+        } else if (Config::tryToSaveSimInfo)  // can enter this block if: we actually read sim track states, but could not find the mchit OR we chose not to read the sim track states
+        {
+          // reuse info already set
+          pt_mc_seed_eff_ = pt_mc_gen_eff_;
+          phi_mc_seed_eff_ = phi_mc_gen_eff_;
+          eta_mc_seed_eff_ = eta_mc_gen_eff_;
+          helixchi2_seed_eff_ = computeHelixChi2(simtrack.parameters(), seedtrack.parameters(), seedtrack.errors());
+
+          mcTSmask_seed_eff_ = 0;
+        } else {
+          pt_mc_seed_eff_ = -101;
+          phi_mc_seed_eff_ = -101;
+          eta_mc_seed_eff_ = -101;
+          helixchi2_seed_eff_ = -101;
+
+          mcTSmask_seed_eff_ = -2;
+        }
+
+        // last hit info
+        const Hit& lasthit = evt_layer_hits[seedtrack.getLastFoundHitLyr()][seedtrack.getLastFoundHitIdx()];
+        xhit_seed_eff_ = lasthit.x();
+        yhit_seed_eff_ = lasthit.y();
+        zhit_seed_eff_ = lasthit.z();
+
+        pt_seed_eff_ = seedtrack.pT();
+        ept_seed_eff_ = seedtrack.epT();
+        phi_seed_eff_ = seedtrack.momPhi();
+        ephi_seed_eff_ = seedtrack.emomPhi();
+        eta_seed_eff_ = seedtrack.momEta();
+        eeta_seed_eff_ = seedtrack.emomEta();
+
+        // rest of mc info
+        nHits_seed_eff_ = seedtrack.nFoundHits();
+        nLayers_seed_eff_ = seedtrack.nUniqueLayers();
+        nHitsMatched_seed_eff_ = seedextra.nHitsMatched();
+        fracHitsMatched_seed_eff_ = seedextra.fracHitsMatched();
+        lastlyr_seed_eff_ = seedtrack.getLastFoundHitLyr();
+
+        // swim dphi
+        dphi_seed_eff_ = seedextra.dPhi();
+
+        // quality info
+        hitchi2_seed_eff_ = seedtrack.chi2();  // currently not being used
+        score_seed_eff_ = seedtrack.score();   // currently a constant by definition
+
+        duplmask_seed_eff_ = seedextra.isDuplicate();
+        nTkMatches_seed_eff_ = simToSeedMap_[mcID_eff_].size();  // n reco matches to this sim track.
+
+        // hit indices
+        if (Config::keepHitInfo)
+          TTreeValidation::fillFullHitInfo(ev,
+                                           seedtrack,
+                                           hitlyrs_seed_eff_,
+                                           hitidxs_seed_eff_,
+                                           hitmcTkIDs_seed_eff_,
+                                           hitxs_seed_eff_,
+                                           hitys_seed_eff_,
+                                           hitzs_seed_eff_);
+      } else  // unmatched simTracks ... put -99 for all reco values to denote unmatched
+      {
+        mcmask_seed_eff_ = (simtrack.isFindable() ? 0 : -1);  // quick logic for not matched
+
+        seedID_seed_eff_ = -99;
+
+        pt_mc_seed_eff_ = -99;
+        phi_mc_seed_eff_ = -99;
+        eta_mc_seed_eff_ = -99;
+        helixchi2_seed_eff_ = -99;
+
+        mcTSmask_seed_eff_ = -1;  // mask means unmatched sim track
+
+        xhit_seed_eff_ = -2000;
+        yhit_seed_eff_ = -2000;
+        zhit_seed_eff_ = -2000;
+
+        pt_seed_eff_ = -99;
+        ept_seed_eff_ = -99;
+        phi_seed_eff_ = -99;
+        ephi_seed_eff_ = -99;
+        eta_seed_eff_ = -99;
+        eeta_seed_eff_ = -99;
+
+        nHits_seed_eff_ = -99;
+        nLayers_seed_eff_ = -99;
+        nHitsMatched_seed_eff_ = -99;
+        fracHitsMatched_seed_eff_ = -99;
+        lastlyr_seed_eff_ = -99;
+
+        dphi_seed_eff_ = -99;
+
+        hitchi2_seed_eff_ = -99;
+        score_seed_eff_ = -17000;
+
+        duplmask_seed_eff_ = -1;     // mask means unmatched sim track
+        nTkMatches_seed_eff_ = -99;  // unmatched
+      }
+
+      // matched build track
+      if (simToBuildMap_.count(mcID_eff_) &&
+          simtrack
+              .isFindable())  // recoToSim match : save best match with best score i.e. simToBuildMap_[matched SimID][first element in vector]
+      {
+        for (unsigned int ii = 0; ii < simToBuildMap_[mcID_eff_].size(); ii++) {
+          const int theAlgo = evt_build_tracks[simToBuildMap_[mcID_eff_][ii]].algoint();
+          if ((itermask_build_eff_ >> theAlgo) & 1)
+            iterduplmask_build_eff_ = (iterduplmask_build_eff_ | (1 << theAlgo));  //filled at the second time
+          itermask_build_eff_ = (itermask_build_eff_ | (1 << theAlgo));
+        }
+        const auto& buildtrack =
+            evt_build_tracks[simToBuildMap_[mcID_eff_][0]];  // returns buildTrack best matched to sim track
+        const auto& buildextra =
+            evt_build_extras[buildtrack.label()];  // returns track extra best aligned with build track
+        mcmask_build_eff_ = 1;                     // quick logic for matched
+
+        seedID_build_eff_ = buildextra.seedID();
+
+        // use this to access correct sim track layer params
+        const int mcHitID =
+            TTreeValidation::getLastFoundHit(buildtrack.getLastFoundMCHitID(evt_layer_hits), mcID_eff_, ev);
+        if (mcHitID >= 0 && Config::readSimTrackStates) {
+          const TrackState& initLayTS = evt_sim_trackstates[mcHitID];
+
+          pt_mc_build_eff_ = initLayTS.pT();
+          phi_mc_build_eff_ = initLayTS.momPhi();
+          eta_mc_build_eff_ = initLayTS.momEta();
+          helixchi2_build_eff_ = computeHelixChi2(initLayTS.parameters, buildtrack.parameters(), buildtrack.errors());
+
+          mcTSmask_build_eff_ = 1;
+        } else if (Config::tryToSaveSimInfo)  // can enter this block if: we actually read sim track states, but could not find the mchit OR we chose not to read the sim track states
+        {
+          // reuse info already set
+          pt_mc_build_eff_ = pt_mc_gen_eff_;
+          phi_mc_build_eff_ = phi_mc_gen_eff_;
+          eta_mc_build_eff_ = eta_mc_gen_eff_;
+          helixchi2_build_eff_ = computeHelixChi2(simtrack.parameters(), buildtrack.parameters(), buildtrack.errors());
+
+          mcTSmask_build_eff_ = 0;
+        } else {
+          pt_mc_build_eff_ = -101;
+          phi_mc_build_eff_ = -101;
+          eta_mc_build_eff_ = -101;
+          helixchi2_build_eff_ = -101;
+
+          mcTSmask_build_eff_ = -2;
+        }
+
+        // last hit info
+        const Hit& lasthit = evt_layer_hits[buildtrack.getLastFoundHitLyr()][buildtrack.getLastFoundHitIdx()];
+        xhit_build_eff_ = lasthit.x();
+        yhit_build_eff_ = lasthit.y();
+        zhit_build_eff_ = lasthit.z();
+
+        pt_build_eff_ = buildtrack.pT();
+        ept_build_eff_ = buildtrack.epT();
+        phi_build_eff_ = buildtrack.momPhi();
+        ephi_build_eff_ = buildtrack.emomPhi();
+        eta_build_eff_ = buildtrack.momEta();
+        eeta_build_eff_ = buildtrack.emomEta();
+
+        nHits_build_eff_ = buildtrack.nFoundHits();
+        nLayers_build_eff_ = buildtrack.nUniqueLayers();
+        nHitsMatched_build_eff_ = buildextra.nHitsMatched();
+        fracHitsMatched_build_eff_ = buildextra.fracHitsMatched();
+        lastlyr_build_eff_ = buildtrack.getLastFoundHitLyr();
+
+        // swim dphi
+        dphi_build_eff_ = buildextra.dPhi();
+
+        // quality info
+        hitchi2_build_eff_ = buildtrack.chi2();
+        score_build_eff_ = buildtrack.score();
+
+        duplmask_build_eff_ = buildextra.isDuplicate();
+        nTkMatches_build_eff_ = simToBuildMap_[mcID_eff_].size();  // n reco matches to this sim track.
+
+        // hit indices
+        if (Config::keepHitInfo)
+          TTreeValidation::fillFullHitInfo(ev,
+                                           buildtrack,
+                                           hitlyrs_build_eff_,
+                                           hitidxs_build_eff_,
+                                           hitmcTkIDs_build_eff_,
+                                           hitxs_build_eff_,
+                                           hitys_build_eff_,
+                                           hitzs_build_eff_);
+      } else  // unmatched simTracks ... put -99 for all reco values to denote unmatched
+      {
+        mcmask_build_eff_ = (simtrack.isFindable() ? 0 : -1);  // quick logic for not matched
+
+        seedID_build_eff_ = -99;
+
+        pt_mc_build_eff_ = -99;
+        phi_mc_build_eff_ = -99;
+        eta_mc_build_eff_ = -99;
+        helixchi2_build_eff_ = -99;
+
+        mcTSmask_build_eff_ = -1;
+
+        xhit_build_eff_ = -2000;
+        yhit_build_eff_ = -2000;
+        zhit_build_eff_ = -2000;
+
+        pt_build_eff_ = -99;
+        ept_build_eff_ = -99;
+        phi_build_eff_ = -99;
+        ephi_build_eff_ = -99;
+        eta_build_eff_ = -99;
+        eeta_build_eff_ = -99;
+
+        nHits_build_eff_ = -99;
+        nLayers_build_eff_ = -99;
+        nHitsMatched_build_eff_ = -99;
+        fracHitsMatched_build_eff_ = -99;
+        lastlyr_build_eff_ = -99;
+
+        dphi_build_eff_ = -99;
+
+        hitchi2_build_eff_ = -99;
+        score_build_eff_ = -17000;
+
+        duplmask_build_eff_ = -1;     // mask means unmatched sim track
+        nTkMatches_build_eff_ = -99;  // unmatched
+      }
+
+      // matched fit track
+      if (simToFitMap_.count(mcID_eff_) &&
+          simtrack
+              .isFindable())  // recoToSim match : save best match with best score i.e. simToFitMap_[matched SimID][first element in vector]
+      {
+        for (unsigned int ii = 0; ii < simToFitMap_[mcID_eff_].size(); ii++) {
+          const int theAlgo = evt_fit_tracks[simToFitMap_[mcID_eff_][ii]].algoint();
+          if ((itermask_fit_eff_ >> theAlgo) & 1)
+            iterduplmask_fit_eff_ = (iterduplmask_fit_eff_ | (1 << theAlgo));  //filled at the second time
+          itermask_fit_eff_ = (itermask_fit_eff_ | (1 << theAlgo));
+        }
+        const auto& fittrack =
+            evt_fit_tracks[simToFitMap_[mcID_eff_][0]];           // returns fitTrack best matched to sim track
+        const auto& fitextra = evt_fit_extras[fittrack.label()];  // returns track extra best aligned with fit track
+        mcmask_fit_eff_ = 1;                                      // quick logic for matched
+
+        seedID_fit_eff_ = fitextra.seedID();
+
+        // use this to access correct sim track layer params
+        const int mcHitID =
+            TTreeValidation::getLastFoundHit(fittrack.getLastFoundMCHitID(evt_layer_hits), mcID_eff_, ev);
+        if (mcHitID >= 0 && Config::readSimTrackStates) {
+          const TrackState& initLayTS = evt_sim_trackstates[mcHitID];
+
+          pt_mc_fit_eff_ = initLayTS.pT();
+          phi_mc_fit_eff_ = initLayTS.momPhi();
+          eta_mc_fit_eff_ = initLayTS.momEta();
+          helixchi2_fit_eff_ = computeHelixChi2(initLayTS.parameters, fittrack.parameters(), fittrack.errors());
+
+          mcTSmask_fit_eff_ = 1;
+        } else if (Config::tryToSaveSimInfo)  // can enter this block if: we actually read sim track states, but could not find the mchit OR we chose not to read the sim track states
+        {
+          // reuse info already set
+          pt_mc_fit_eff_ = pt_mc_gen_eff_;
+          phi_mc_fit_eff_ = phi_mc_gen_eff_;
+          eta_mc_fit_eff_ = eta_mc_gen_eff_;
+          helixchi2_fit_eff_ = computeHelixChi2(simtrack.parameters(), fittrack.parameters(), fittrack.errors());
+
+          mcTSmask_fit_eff_ = 0;
+        } else {
+          pt_mc_fit_eff_ = -101;
+          phi_mc_fit_eff_ = -101;
+          eta_mc_fit_eff_ = -101;
+          helixchi2_fit_eff_ = -101;
+
+          mcTSmask_fit_eff_ = -2;
+        }
+
+        // last hit info
+        const Hit& lasthit = evt_layer_hits[fittrack.getLastFoundHitLyr()][fittrack.getLastFoundHitIdx()];
+        xhit_fit_eff_ = lasthit.x();
+        yhit_fit_eff_ = lasthit.y();
+        zhit_fit_eff_ = lasthit.z();
+
+        pt_fit_eff_ = fittrack.pT();
+        ept_fit_eff_ = fittrack.epT();
+        phi_fit_eff_ = fittrack.momPhi();
+        ephi_fit_eff_ = fittrack.emomPhi();
+        eta_fit_eff_ = fittrack.momEta();
+        eeta_fit_eff_ = fittrack.emomEta();
+
+        // rest of mc info
+        nHits_fit_eff_ = fittrack.nFoundHits();
+        nLayers_fit_eff_ = fittrack.nUniqueLayers();
+        nHitsMatched_fit_eff_ = fitextra.nHitsMatched();
+        fracHitsMatched_fit_eff_ = fitextra.fracHitsMatched();
+        lastlyr_fit_eff_ = fittrack.getLastFoundHitLyr();
+
+        // swim dphi
+        dphi_fit_eff_ = fitextra.dPhi();
+
+        // quality info
+        hitchi2_fit_eff_ = fittrack.chi2();  // -10 when not used
+        score_fit_eff_ = fittrack.score();
+
+        duplmask_fit_eff_ = fitextra.isDuplicate();
+        nTkMatches_fit_eff_ = simToFitMap_[mcID_eff_].size();  // n reco matches to this sim track.
+
+        // hit indices
+        if (Config::keepHitInfo)
+          TTreeValidation::fillFullHitInfo(ev,
+                                           fittrack,
+                                           hitlyrs_fit_eff_,
+                                           hitidxs_fit_eff_,
+                                           hitmcTkIDs_fit_eff_,
+                                           hitxs_fit_eff_,
+                                           hitys_fit_eff_,
+                                           hitzs_fit_eff_);
+      } else  // unmatched simTracks ... put -99 for all reco values to denote unmatched
+      {
+        mcmask_fit_eff_ = (simtrack.isFindable() ? 0 : -1);  // quick logic for not matched
+
+        seedID_fit_eff_ = -99;
+
+        pt_mc_fit_eff_ = -99;
+        phi_mc_fit_eff_ = -99;
+        eta_mc_fit_eff_ = -99;
+        helixchi2_fit_eff_ = -99;
+
+        mcTSmask_fit_eff_ = -1;
+
+        xhit_fit_eff_ = -2000;
+        yhit_fit_eff_ = -2000;
+        zhit_fit_eff_ = -2000;
+
+        pt_fit_eff_ = -99;
+        ept_fit_eff_ = -99;
+        phi_fit_eff_ = -99;
+        ephi_fit_eff_ = -99;
+        eta_fit_eff_ = -99;
+        eeta_fit_eff_ = -99;
+
+        nHits_fit_eff_ = -99;
+        nLayers_fit_eff_ = -99;
+        nHitsMatched_fit_eff_ = -99;
+        fracHitsMatched_fit_eff_ = -99;
+        lastlyr_fit_eff_ = -99;
+
+        dphi_fit_eff_ = -99;
+
+        hitchi2_fit_eff_ = -99;
+        score_fit_eff_ = -17000;
+
+        duplmask_fit_eff_ = -1;     // mask means unmatched sim track
+        nTkMatches_fit_eff_ = -99;  // unmatched
+      }
+
+      efftree_->Fill();  // fill it once per sim track!
+    }
+  }
+
+  void TTreeValidation::fillFakeRateTree(const Event& ev) {
+    std::lock_guard<std::mutex> locker(glock_);
+
+    const auto ievt = ev.evtID();
+    const auto& evt_sim_tracks =
+        ev.simTracks_;  // store sim info at that final layer!!! --> gen info stored only in eff tree
+    const auto& evt_seed_tracks = ev.seedTracks_;
+    const auto& evt_seed_extras = ev.seedTracksExtra_;
+    const auto& evt_build_tracks = ev.candidateTracks_;
+    const auto& evt_build_extras = ev.candidateTracksExtra_;
+    const auto& evt_fit_tracks = ev.fitTracks_;
+    const auto& evt_fit_extras = ev.fitTracksExtra_;
+    const auto& evt_layer_hits = ev.layerHits_;
+    const auto& evt_sim_trackstates = ev.simTrackStates_;
+
+    for (const auto& seedtrack : evt_seed_tracks) {
+      if (Config::keepHitInfo) {
+        hitlyrs_seed_FR_.clear();
+        hitlyrs_mc_seed_FR_.clear();
+        hitlyrs_build_FR_.clear();
+        hitlyrs_mc_build_FR_.clear();
+        hitlyrs_fit_FR_.clear();
+        hitlyrs_mc_fit_FR_.clear();
+
+        hitidxs_seed_FR_.clear();
+        hitidxs_mc_seed_FR_.clear();
+        hitidxs_build_FR_.clear();
+        hitidxs_mc_build_FR_.clear();
+        hitidxs_fit_FR_.clear();
+        hitidxs_mc_fit_FR_.clear();
+
+        hitmcTkIDs_seed_FR_.clear();
+        hitmcTkIDs_mc_seed_FR_.clear();
+        hitmcTkIDs_build_FR_.clear();
+        hitmcTkIDs_mc_build_FR_.clear();
+        hitmcTkIDs_fit_FR_.clear();
+        hitmcTkIDs_mc_fit_FR_.clear();
+
+        hitxs_seed_FR_.clear();
+        hitxs_mc_seed_FR_.clear();
+        hitxs_build_FR_.clear();
+        hitxs_mc_build_FR_.clear();
+        hitxs_fit_FR_.clear();
+        hitxs_mc_fit_FR_.clear();
+
+        hitys_seed_FR_.clear();
+        hitys_mc_seed_FR_.clear();
+        hitys_build_FR_.clear();
+        hitys_mc_build_FR_.clear();
+        hitys_fit_FR_.clear();
+        hitys_mc_fit_FR_.clear();
+
+        hitzs_seed_FR_.clear();
+        hitzs_mc_seed_FR_.clear();
+        hitzs_build_FR_.clear();
+        hitzs_mc_build_FR_.clear();
+        hitzs_fit_FR_.clear();
+        hitzs_mc_fit_FR_.clear();
+      }
+
+      evtID_FR_ = ievt;
+
+      // seed info
+      const auto& seedextra = evt_seed_extras[seedtrack.label()];
+      seedID_FR_ = seedextra.seedID();
+      seedmask_seed_FR_ =
+          1;  // automatically set to 1, because at the moment no cuts on seeds after conformal+KF fit.  seed triplets filtered by RZ chi2 before fitting.
+
+      // last hit info
+      // const Hit& lasthit = evt_layer_hits[seedtrack.getLastFoundHitLyr()][seedtrack.getLastFoundHitIdx()];
+      xhit_seed_FR_ = 0;  //lasthit.x();
+      yhit_seed_FR_ = 0;  //lasthit.y();
+      zhit_seed_FR_ = 0;  //lasthit.z();
+
+      pt_seed_FR_ = seedtrack.pT();
+      ept_seed_FR_ = seedtrack.epT();
+      phi_seed_FR_ = seedtrack.momPhi();
+      ephi_seed_FR_ = seedtrack.emomPhi();
+      eta_seed_FR_ = seedtrack.momEta();
+      eeta_seed_FR_ = seedtrack.emomEta();
+
+      nHits_seed_FR_ = seedtrack.nFoundHits();
+      nLayers_seed_FR_ = seedtrack.nUniqueLayers();
+      nHitsMatched_seed_FR_ = seedextra.nHitsMatched();
+      fracHitsMatched_seed_FR_ = seedextra.fracHitsMatched();
+      lastlyr_seed_FR_ = seedtrack.getLastFoundHitLyr();
+
+      algorithm_FR_ = seedtrack.algoint();
+
+      // swim dphi
+      dphi_seed_FR_ = seedextra.dPhi();
+
+      // quality info
+      hitchi2_seed_FR_ = seedtrack.chi2();  //--> not currently used
+      score_seed_FR_ = seedtrack.score();
+
+      if (Config::keepHitInfo)
+        TTreeValidation::fillFullHitInfo(ev,
+                                         seedtrack,
+                                         hitlyrs_seed_FR_,
+                                         hitidxs_seed_FR_,
+                                         hitmcTkIDs_seed_FR_,
+                                         hitxs_seed_FR_,
+                                         hitys_seed_FR_,
+                                         hitzs_seed_FR_);
+
+      // sim info for seed track
+      mcID_seed_FR_ = seedextra.mcTrackID();
+      mcmask_seed_FR_ = TTreeValidation::getMaskAssignment(mcID_seed_FR_);
+
+      if (mcmask_seed_FR_ == 1)  // matched track to sim
+      {
+        const auto& simtrack = evt_sim_tracks[mcID_seed_FR_];
+
+        const int mcHitID =
+            TTreeValidation::getLastFoundHit(seedtrack.getLastFoundMCHitID(evt_layer_hits), mcID_seed_FR_, ev);
+        if (mcHitID >= 0 && Config::readSimTrackStates) {
+          const TrackState& initLayTS = evt_sim_trackstates[mcHitID];
+          pt_mc_seed_FR_ = initLayTS.pT();
+          phi_mc_seed_FR_ = initLayTS.momPhi();
+          eta_mc_seed_FR_ = initLayTS.momEta();
+          helixchi2_seed_FR_ = computeHelixChi2(initLayTS.parameters, seedtrack.parameters(), seedtrack.errors());
+
+          mcTSmask_seed_FR_ = 1;
+        } else if (Config::tryToSaveSimInfo) {
+          pt_mc_seed_FR_ = simtrack.pT();
+          phi_mc_seed_FR_ = simtrack.momPhi();
+          eta_mc_seed_FR_ = simtrack.momEta();
+          helixchi2_seed_FR_ = computeHelixChi2(simtrack.parameters(), seedtrack.parameters(), seedtrack.errors());
+
+          mcTSmask_seed_FR_ = 0;
+        } else {
+          pt_mc_seed_FR_ = -101;
+          phi_mc_seed_FR_ = -101;
+          eta_mc_seed_FR_ = -101;
+          helixchi2_seed_FR_ = -101;
+
+          mcTSmask_seed_FR_ = -2;
+        }
+
+        nHits_mc_seed_FR_ = simtrack.nFoundHits();
+        nLayers_mc_seed_FR_ = simtrack.nUniqueLayers();
+        lastlyr_mc_seed_FR_ = simtrack.getLastFoundHitLyr();
+
+        duplmask_seed_FR_ = seedextra.isDuplicate();
+        iTkMatches_seed_FR_ =
+            seedextra
+                .duplicateID();  // ith duplicate seed track, i = 0 "best" match, i > 0 "still matched, real reco, not as good as i-1 track"
+
+        if (Config::keepHitInfo)
+          TTreeValidation::fillFullHitInfo(ev,
+                                           simtrack,
+                                           hitlyrs_mc_seed_FR_,
+                                           hitidxs_mc_seed_FR_,
+                                           hitmcTkIDs_mc_seed_FR_,
+                                           hitxs_mc_seed_FR_,
+                                           hitys_mc_seed_FR_,
+                                           hitzs_mc_seed_FR_);
+      } else {
+        // -99 for all sim info for reco tracks not associated to reco tracks
+        pt_mc_seed_FR_ = -99;
+        phi_mc_seed_FR_ = -99;
+        eta_mc_seed_FR_ = -99;
+        helixchi2_seed_FR_ = -99;
+
+        mcTSmask_seed_FR_ = -1;
+
+        nHits_mc_seed_FR_ = -99;
+        nLayers_mc_seed_FR_ = -99;
+        lastlyr_mc_seed_FR_ = -99;
+
+        duplmask_seed_FR_ = -1;
+        iTkMatches_seed_FR_ = -99;
+      }
+
+      //==========================//
+
+      // fill build information if track still alive
+      if (seedToBuildMap_.count(seedID_FR_)) {
+        seedmask_build_FR_ = 1;  // quick logic
+
+        const auto& buildtrack = evt_build_tracks[seedToBuildMap_[seedID_FR_]];
+        const auto& buildextra = evt_build_extras[buildtrack.label()];
+
+        // last hit info
+        const Hit& lasthit = evt_layer_hits[buildtrack.getLastFoundHitLyr()][buildtrack.getLastFoundHitIdx()];
+        xhit_build_FR_ = lasthit.x();
+        yhit_build_FR_ = lasthit.y();
+        zhit_build_FR_ = lasthit.z();
+
+        pt_build_FR_ = buildtrack.pT();
+        ept_build_FR_ = buildtrack.epT();
+        phi_build_FR_ = buildtrack.momPhi();
+        ephi_build_FR_ = buildtrack.emomPhi();
+        eta_build_FR_ = buildtrack.momEta();
+        eeta_build_FR_ = buildtrack.emomEta();
+
+        nHits_build_FR_ = buildtrack.nFoundHits();
+        nLayers_build_FR_ = buildtrack.nUniqueLayers();
+        nHitsMatched_build_FR_ = buildextra.nHitsMatched();
+        fracHitsMatched_build_FR_ = buildextra.fracHitsMatched();
+        lastlyr_build_FR_ = buildtrack.getLastFoundHitLyr();
+
+        // swim dphi
+        dphi_build_FR_ = buildextra.dPhi();
+
+        // quality info
+        hitchi2_build_FR_ = buildtrack.chi2();
+        score_build_FR_ = buildtrack.score();
+
+        if (Config::keepHitInfo)
+          TTreeValidation::fillFullHitInfo(ev,
+                                           buildtrack,
+                                           hitlyrs_build_FR_,
+                                           hitidxs_build_FR_,
+                                           hitmcTkIDs_build_FR_,
+                                           hitxs_build_FR_,
+                                           hitys_build_FR_,
+                                           hitzs_build_FR_);
+
+        // sim info for build track
+        mcID_build_FR_ = buildextra.mcTrackID();
+        mcmask_build_FR_ = TTreeValidation::getMaskAssignment(mcID_build_FR_);
+
+        if (mcmask_build_FR_ == 1)  // build track matched to seed and sim
+        {
+          const auto& simtrack = evt_sim_tracks[mcID_build_FR_];
+
+          const int mcHitID =
+              TTreeValidation::getLastFoundHit(buildtrack.getLastFoundMCHitID(evt_layer_hits), mcID_build_FR_, ev);
+          if (mcHitID >= 0 && Config::readSimTrackStates) {
+            const TrackState& initLayTS = evt_sim_trackstates[mcHitID];
+            pt_mc_build_FR_ = initLayTS.pT();
+            phi_mc_build_FR_ = initLayTS.momPhi();
+            eta_mc_build_FR_ = initLayTS.momEta();
+            helixchi2_build_FR_ = computeHelixChi2(initLayTS.parameters, buildtrack.parameters(), buildtrack.errors());
+
+            mcTSmask_build_FR_ = 1;
+          } else if (Config::tryToSaveSimInfo) {
+            pt_mc_build_FR_ = simtrack.pT();
+            phi_mc_build_FR_ = simtrack.momPhi();
+            eta_mc_build_FR_ = simtrack.momEta();
+            helixchi2_build_FR_ = computeHelixChi2(simtrack.parameters(), buildtrack.parameters(), buildtrack.errors());
+
+            mcTSmask_build_FR_ = 0;
+          } else {
+            pt_mc_build_FR_ = -101;
+            phi_mc_build_FR_ = -101;
+            eta_mc_build_FR_ = -101;
+            helixchi2_build_FR_ = -101;
+
+            mcTSmask_build_FR_ = -2;
+          }
+
+          nHits_mc_build_FR_ = simtrack.nFoundHits();
+          nLayers_mc_build_FR_ = simtrack.nUniqueLayers();
+          lastlyr_mc_build_FR_ = simtrack.getLastFoundHitLyr();
+
+          duplmask_build_FR_ = buildextra.isDuplicate();
+          iTkMatches_build_FR_ =
+              buildextra
+                  .duplicateID();  // ith duplicate build track, i = 0 "best" match, i > 0 "still matched, real reco, not as good as i-1 track"
+
+          if (Config::keepHitInfo)
+            TTreeValidation::fillFullHitInfo(ev,
+                                             simtrack,
+                                             hitlyrs_mc_build_FR_,
+                                             hitidxs_mc_build_FR_,
+                                             hitmcTkIDs_mc_build_FR_,
+                                             hitxs_mc_build_FR_,
+                                             hitys_mc_build_FR_,
+                                             hitzs_mc_build_FR_);
+        } else  // build track matched only to seed not to sim
+        {
+          // -99 for all sim info for reco tracks not associated to reco tracks
+          pt_mc_build_FR_ = -99;
+          phi_mc_build_FR_ = -99;
+          eta_mc_build_FR_ = -99;
+          helixchi2_build_FR_ = -99;
+
+          mcTSmask_build_FR_ = -1;
+
+          nHits_mc_build_FR_ = -99;
+          nLayers_mc_build_FR_ = -99;
+          lastlyr_mc_build_FR_ = -99;
+
+          duplmask_build_FR_ = -1;
+          iTkMatches_build_FR_ = -99;
+        }  // matched seed to build, not build to sim
+      }
+
+      else  // seed has no matching build track (therefore no matching sim to build track)
+      {
+        seedmask_build_FR_ = 0;  // quick logic
+
+        // -3000 for position info if no build track for seed
+        xhit_build_FR_ = -3000;
+        yhit_build_FR_ = -3000;
+        zhit_build_FR_ = -3000;
+
+        // -100 for all reco info as no actual build track for this seed
+        pt_build_FR_ = -100;
+        ept_build_FR_ = -100;
+        phi_build_FR_ = -100;
+        ephi_build_FR_ = -100;
+        eta_build_FR_ = -100;
+        eeta_build_FR_ = -100;
+
+        nHits_build_FR_ = -100;
+        nLayers_build_FR_ = -100;
+        nHitsMatched_build_FR_ = -100;
+        fracHitsMatched_build_FR_ = -100;
+        lastlyr_build_FR_ = -100;
+
+        dphi_build_FR_ = -100;
+
+        hitchi2_build_FR_ = -100;
+        score_build_FR_ = -5001;
+
+        // keep -100 for all sim variables as no such reco exists for this seed
+        mcmask_build_FR_ = -2;  // do not want to count towards build FR
+        mcID_build_FR_ = -100;
+
+        pt_mc_build_FR_ = -100;
+        phi_mc_build_FR_ = -100;
+        eta_mc_build_FR_ = -100;
+        helixchi2_build_FR_ = -100;
+
+        mcTSmask_build_FR_ = -3;
+
+        nHits_mc_build_FR_ = -100;
+        nLayers_mc_build_FR_ = -100;
+        lastlyr_mc_build_FR_ = -100;
+
+        duplmask_build_FR_ = -2;
+        iTkMatches_build_FR_ = -100;
+      }
+
+      //============================// fit tracks
+      if (seedToFitMap_.count(seedID_FR_)) {
+        seedmask_fit_FR_ = 1;  // quick logic
+
+        const auto& fittrack = evt_fit_tracks[seedToFitMap_[seedID_FR_]];
+        const auto& fitextra = evt_fit_extras[fittrack.label()];
+
+        // last hit info
+        const Hit& lasthit = evt_layer_hits[fittrack.getLastFoundHitLyr()][fittrack.getLastFoundHitIdx()];
+        xhit_fit_FR_ = lasthit.x();
+        yhit_fit_FR_ = lasthit.y();
+        zhit_fit_FR_ = lasthit.z();
+
+        pt_fit_FR_ = fittrack.pT();
+        ept_fit_FR_ = fittrack.epT();
+        phi_fit_FR_ = fittrack.momPhi();
+        ephi_fit_FR_ = fittrack.emomPhi();
+        eta_fit_FR_ = fittrack.momEta();
+        eeta_fit_FR_ = fittrack.emomEta();
+
+        nHits_fit_FR_ = fittrack.nFoundHits();
+        nLayers_fit_FR_ = fittrack.nUniqueLayers();
+        nHitsMatched_fit_FR_ = fitextra.nHitsMatched();
+        fracHitsMatched_fit_FR_ = fitextra.fracHitsMatched();
+        lastlyr_fit_FR_ = fittrack.getLastFoundHitLyr();
+
+        // swim dphi
+        dphi_fit_FR_ = fitextra.dPhi();
+
+        // quality info
+        hitchi2_fit_FR_ = fittrack.chi2();  // -10 when not used
+        score_fit_FR_ = fittrack.score();
+
+        if (Config::keepHitInfo)
+          TTreeValidation::fillFullHitInfo(ev,
+                                           fittrack,
+                                           hitlyrs_fit_FR_,
+                                           hitidxs_fit_FR_,
+                                           hitmcTkIDs_fit_FR_,
+                                           hitxs_fit_FR_,
+                                           hitys_fit_FR_,
+                                           hitzs_fit_FR_);
+
+        // sim info for fit track
+        mcID_fit_FR_ = fitextra.mcTrackID();
+        mcmask_fit_FR_ = TTreeValidation::getMaskAssignment(mcID_fit_FR_);
+
+        if (mcmask_fit_FR_ == 1)  // fit track matched to seed and sim
+        {
+          const auto& simtrack = evt_sim_tracks[mcID_fit_FR_];
+
+          const int mcHitID = TTreeValidation::getLastFoundHit(
+              fittrack.getLastFoundMCHitID(evt_layer_hits), mcID_fit_FR_, ev);  // only works for outward fit for now
+          if (mcHitID >= 0 && Config::readSimTrackStates) {
+            const TrackState& initLayTS = evt_sim_trackstates[mcHitID];
+            pt_mc_fit_FR_ = initLayTS.pT();
+            phi_mc_fit_FR_ = initLayTS.momPhi();
+            eta_mc_fit_FR_ = initLayTS.momEta();
+            helixchi2_fit_FR_ = computeHelixChi2(initLayTS.parameters, fittrack.parameters(), fittrack.errors());
+
+            mcTSmask_fit_FR_ = 1;
+          } else if (Config::tryToSaveSimInfo) {
+            pt_mc_fit_FR_ = simtrack.pT();
+            phi_mc_fit_FR_ = simtrack.momPhi();
+            eta_mc_fit_FR_ = simtrack.momEta();
+            helixchi2_fit_FR_ = computeHelixChi2(simtrack.parameters(), fittrack.parameters(), fittrack.errors());
+
+            mcTSmask_fit_FR_ = 0;
+          } else {
+            pt_mc_fit_FR_ = -101;
+            phi_mc_fit_FR_ = -101;
+            eta_mc_fit_FR_ = -101;
+            helixchi2_fit_FR_ = -101;
+
+            mcTSmask_fit_FR_ = -2;
+          }
+
+          nHits_mc_fit_FR_ = simtrack.nFoundHits();
+          nLayers_mc_fit_FR_ = simtrack.nUniqueLayers();
+          lastlyr_mc_fit_FR_ = simtrack.getLastFoundHitLyr();
+
+          duplmask_fit_FR_ = fitextra.isDuplicate();
+          iTkMatches_fit_FR_ =
+              fitextra
+                  .duplicateID();  // ith duplicate fit track, i = 0 "best" match, i > 0 "still matched, real reco, not as good as i-1 track"
+
+          if (Config::keepHitInfo)
+            TTreeValidation::fillFullHitInfo(ev,
+                                             simtrack,
+                                             hitlyrs_mc_fit_FR_,
+                                             hitidxs_mc_fit_FR_,
+                                             hitmcTkIDs_mc_fit_FR_,
+                                             hitxs_mc_fit_FR_,
+                                             hitys_mc_fit_FR_,
+                                             hitzs_mc_fit_FR_);
+        } else  // fit track matched only to seed not to sim
+        {
+          // -99 for all sim info for reco tracks not associated to reco tracks
+          pt_mc_fit_FR_ = -99;
+          phi_mc_fit_FR_ = -99;
+          eta_mc_fit_FR_ = -99;
+          helixchi2_fit_FR_ = -99;
+
+          mcTSmask_fit_FR_ = -1;
+
+          nHits_mc_fit_FR_ = -99;
+          nLayers_mc_fit_FR_ = -99;
+          lastlyr_mc_fit_FR_ = -99;
+
+          duplmask_fit_FR_ = -1;
+          iTkMatches_fit_FR_ = -99;
+        }  // matched seed to fit, not fit to sim
+      }
+
+      else  // seed has no matching fit track (therefore no matching sim to fit track)
+      {
+        seedmask_fit_FR_ = 0;  // quick logic
+
+        // -3000 for position info if no fit track for seed
+        xhit_fit_FR_ = -3000;
+        yhit_fit_FR_ = -3000;
+        zhit_fit_FR_ = -3000;
+
+        // -100 for all reco info as no actual fit track for this seed
+        pt_fit_FR_ = -100;
+        ept_fit_FR_ = -100;
+        phi_fit_FR_ = -100;
+        ephi_fit_FR_ = -100;
+        eta_fit_FR_ = -100;
+        eeta_fit_FR_ = -100;
+
+        nHits_fit_FR_ = -100;
+        nLayers_fit_FR_ = -100;
+        nHitsMatched_fit_FR_ = -100;
+        fracHitsMatched_fit_FR_ = -100;
+        lastlyr_fit_FR_ = -100;
+
+        dphi_fit_FR_ = -100;
+
+        hitchi2_fit_FR_ = -100;
+        score_fit_FR_ = -5001;
+
+        // keep -100 for all sim variables as no such reco exists for this seed
+        mcmask_fit_FR_ = -2;  // do not want to count towards fit FR
+        mcID_fit_FR_ = -100;
+
+        pt_mc_fit_FR_ = -100;
+        phi_mc_fit_FR_ = -100;
+        eta_mc_fit_FR_ = -100;
+        helixchi2_fit_FR_ = -100;
+
+        mcTSmask_fit_FR_ = -3;
+
+        nHits_mc_fit_FR_ = -100;
+        nLayers_mc_fit_FR_ = -100;
+        lastlyr_mc_fit_FR_ = -100;
+
+        duplmask_fit_FR_ = -2;
+        iTkMatches_fit_FR_ = -100;
+      }
+
+      frtree_->Fill();  // fill once per seed!
+    }                   // end of seed to seed loop
+  }
+
+  void TTreeValidation::fillConfigTree() {
+    std::lock_guard<std::mutex> locker(glock_);
+
+    Ntracks_ = Config::nTracks;
+    Nevents_ = Config::nEvents;
+
+    nLayers_ = Config::nLayers;
+
+    nlayers_per_seed_ = Config::ItrInfo[0].m_params.nlayers_per_seed;
+    maxCand_ = Config::ItrInfo[0].m_params.maxCandsPerSeed;
+    chi2Cut_min_ = Config::ItrInfo[0].m_params.chi2Cut_min;
+    nSigma_ = Config::nSigma;
+    minDPhi_ = Config::minDPhi;
+    maxDPhi_ = Config::maxDPhi;
+    minDEta_ = Config::minDEta;
+    maxDEta_ = Config::maxDEta;
+
+    beamspotX_ = Config::beamspotX;
+    beamspotY_ = Config::beamspotY;
+    beamspotZ_ = Config::beamspotZ;
+
+    minSimPt_ = Config::minSimPt;
+    maxSimPt_ = Config::maxSimPt;
+
+    hitposerrXY_ = Config::hitposerrXY;
+    hitposerrZ_ = Config::hitposerrZ;
+    hitposerrR_ = Config::hitposerrR;
+    varXY_ = Config::varXY;
+    varZ_ = Config::varZ;
+
+    ptinverr049_ = Config::ptinverr049;
+    phierr049_ = Config::phierr049;
+    thetaerr049_ = Config::thetaerr049;
+    ptinverr012_ = Config::ptinverr012;
+    phierr012_ = Config::phierr012;
+    thetaerr012_ = Config::thetaerr012;
+
+    configtree_->Fill();
+  }
+
+  void TTreeValidation::fillCMSSWEfficiencyTree(const Event& ev) {
+    std::lock_guard<std::mutex> locker(glock_);
+
+    const auto ievt = ev.evtID();
+    const auto& evt_sim_tracks = ev.simTracks_;
+    const auto& evt_cmssw_tracks = ev.cmsswTracks_;
+    const auto& evt_cmssw_extras = ev.cmsswTracksExtra_;
+    const auto& evt_build_tracks = ev.candidateTracks_;
+    const auto& evt_build_extras = ev.candidateTracksExtra_;
+    const auto& evt_fit_tracks = ev.fitTracks_;
+    const auto& evt_fit_extras = ev.fitTracksExtra_;
+    const auto& evt_layer_hits = ev.layerHits_;
+
+    for (const auto& cmsswtrack : evt_cmssw_tracks) {
+      // clear hit info
+      if (Config::keepHitInfo) {
+        hitlyrs_cmssw_ceff_.clear();
+        hitlyrs_build_ceff_.clear();
+        hitlyrs_mc_build_ceff_.clear();
+        hitlyrs_fit_ceff_.clear();
+        hitlyrs_mc_fit_ceff_.clear();
+
+        hitidxs_cmssw_ceff_.clear();
+        hitidxs_build_ceff_.clear();
+        hitidxs_mc_build_ceff_.clear();
+        hitidxs_fit_ceff_.clear();
+        hitidxs_mc_fit_ceff_.clear();
+      }
+
+      const auto& cmsswextra = evt_cmssw_extras[cmsswtrack.label()];
+
+      evtID_ceff_ = ievt;
+      cmsswID_ceff_ = cmsswtrack.label();
+      seedID_cmssw_ceff_ = cmsswextra.seedID();
+
+      // PCA parameters
+      x_cmssw_ceff_ = cmsswtrack.x();
+      y_cmssw_ceff_ = cmsswtrack.y();
+      z_cmssw_ceff_ = cmsswtrack.z();
+
+      pt_cmssw_ceff_ = cmsswtrack.pT();
+      phi_cmssw_ceff_ = cmsswtrack.momPhi();
+      eta_cmssw_ceff_ = cmsswtrack.momEta();
+
+      nHits_cmssw_ceff_ = cmsswtrack.nFoundHits();
+      nLayers_cmssw_ceff_ = cmsswtrack.nUniqueLayers();
+      lastlyr_cmssw_ceff_ = cmsswtrack.getLastFoundHitLyr();
+
+      itermask_build_ceff_ = 0;
+      itermask_fit_ceff_ = 0;
+      iterduplmask_build_ceff_ = 0;
+      iterduplmask_fit_ceff_ = 0;
+      algo_seed_ceff_ = 0;
+
+      for (auto aa : cmsswextra.seedAlgos())
+        algo_seed_ceff_ = (algo_seed_ceff_ | (1 << aa));
+
+      if (Config::keepHitInfo)
+        TTreeValidation::fillMinHitInfo(cmsswtrack, hitlyrs_cmssw_ceff_, hitidxs_cmssw_ceff_);
+
+      // matched build track
+      if (cmsswToBuildMap_.count(cmsswID_ceff_) &&
+          cmsswtrack
+              .isFindable())  // recoToCmssw match : save best match with best score i.e. cmsswToBuildMap_[matched CmsswID][first element in vector]
+      {
+        for (unsigned int ii = 0; ii < cmsswToBuildMap_[cmsswID_ceff_].size(); ii++) {
+          const int theAlgo = evt_build_tracks[cmsswToBuildMap_[cmsswID_ceff_][ii]].algoint();
+          if ((itermask_build_ceff_ >> theAlgo) & 1)
+            iterduplmask_build_ceff_ = (iterduplmask_build_ceff_ | (1 << theAlgo));  //filled at the second time
+          itermask_build_ceff_ = (itermask_build_ceff_ | (1 << theAlgo));
+        }
+
+        const auto& buildtrack =
+            evt_build_tracks[cmsswToBuildMap_[cmsswID_ceff_][0]];  // returns buildTrack best matched to cmssw track
+        const auto& buildextra =
+            evt_build_extras[buildtrack.label()];  // returns track extra best aligned with build track
+        cmsswmask_build_ceff_ = 1;                 // quick logic for matched
+
+        seedID_build_ceff_ = buildextra.seedID();
+        mcTrackID_build_ceff_ = buildextra.mcTrackID();
+
+        // track parameters
+        pt_build_ceff_ = buildtrack.pT();
+        ept_build_ceff_ = buildtrack.epT();
+        phi_build_ceff_ = buildtrack.momPhi();
+        ephi_build_ceff_ = buildtrack.emomPhi();
+        eta_build_ceff_ = buildtrack.momEta();
+        eeta_build_ceff_ = buildtrack.emomEta();
+
+        // gen info
+        if (mcTrackID_build_ceff_ >= 0) {
+          const auto& simtrack = evt_sim_tracks[mcTrackID_build_ceff_];
+          x_mc_build_ceff_ = simtrack.x();
+          y_mc_build_ceff_ = simtrack.y();
+          z_mc_build_ceff_ = simtrack.z();
+          pt_mc_build_ceff_ = simtrack.pT();
+          phi_mc_build_ceff_ = simtrack.momPhi();
+          eta_mc_build_ceff_ = simtrack.momEta();
+
+          if (Config::keepHitInfo)
+            TTreeValidation::fillMinHitInfo(simtrack, hitlyrs_mc_build_ceff_, hitidxs_mc_build_ceff_);
+        } else {
+          x_mc_build_ceff_ = -1000;
+          y_mc_build_ceff_ = -1000;
+          z_mc_build_ceff_ = -1000;
+          pt_mc_build_ceff_ = -99;
+          phi_mc_build_ceff_ = -99;
+          eta_mc_build_ceff_ = -99;
+        }
+
+        // hit/layer info
+        nHits_build_ceff_ = buildtrack.nFoundHits();
+        nLayers_build_ceff_ = buildtrack.nUniqueLayers();
+        nHitsMatched_build_ceff_ = buildextra.nHitsMatched();
+        fracHitsMatched_build_ceff_ = buildextra.fracHitsMatched();
+        lastlyr_build_ceff_ = buildtrack.getLastFoundHitLyr();
+
+        // hit info
+        const Hit& lasthit = evt_layer_hits[buildtrack.getLastFoundHitLyr()][buildtrack.getLastFoundHitIdx()];
+        xhit_build_ceff_ = lasthit.x();
+        yhit_build_ceff_ = lasthit.y();
+        zhit_build_ceff_ = lasthit.z();
+
+        // quality info
+        hitchi2_build_ceff_ = buildtrack.chi2();
+        helixchi2_build_ceff_ = buildextra.helixChi2();
+        score_build_ceff_ = buildtrack.score();
+
+        // swim dphi
+        dphi_build_ceff_ = buildextra.dPhi();
+
+        // duplicate info
+        duplmask_build_ceff_ = buildextra.isDuplicate();
+        nTkMatches_build_ceff_ = cmsswToBuildMap_[cmsswID_ceff_].size();  // n reco matches to this cmssw track.
+
+        if (Config::keepHitInfo)
+          TTreeValidation::fillMinHitInfo(buildtrack, hitlyrs_build_ceff_, hitidxs_build_ceff_);
+      } else  // unmatched cmsswtracks ... put -99 for all reco values to denote unmatched
+      {
+        cmsswmask_build_ceff_ = (cmsswtrack.isFindable() ? 0 : -1);  // quick logic for not matched
+
+        seedID_build_ceff_ = -99;
+        mcTrackID_build_ceff_ = -99;
+
+        pt_build_ceff_ = -99;
+        ept_build_ceff_ = -99;
+        phi_build_ceff_ = -99;
+        ephi_build_ceff_ = -99;
+        eta_build_ceff_ = -99;
+        eeta_build_ceff_ = -99;
+
+        x_mc_build_ceff_ = -2000;
+        y_mc_build_ceff_ = -2000;
+        z_mc_build_ceff_ = -2000;
+        pt_mc_build_ceff_ = -99;
+        phi_mc_build_ceff_ = -99;
+        eta_mc_build_ceff_ = -99;
+
+        nHits_build_ceff_ = -99;
+        nLayers_build_ceff_ = -99;
+        nHitsMatched_build_ceff_ = -99;
+        fracHitsMatched_build_ceff_ = -99;
+        lastlyr_build_ceff_ = -99;
+
+        xhit_build_ceff_ = -2000;
+        yhit_build_ceff_ = -2000;
+        zhit_build_ceff_ = -2000;
+
+        hitchi2_build_ceff_ = -99;
+        helixchi2_build_ceff_ = -99;
+        score_build_ceff_ = -17000;
+
+        dphi_build_ceff_ = -99;
+
+        duplmask_build_ceff_ = -1;     // mask means unmatched cmssw track
+        nTkMatches_build_ceff_ = -99;  // unmatched
+      }
+
+      // matched fit track
+      if (cmsswToFitMap_.count(cmsswID_ceff_) &&
+          cmsswtrack
+              .isFindable())  // recoToCmssw match : save best match with best score i.e. cmsswToFitMap_[matched CmsswID][first element in vector]
+      {
+        for (unsigned int ii = 0; ii < cmsswToFitMap_[cmsswID_ceff_].size(); ii++) {
+          const int theAlgo = evt_build_tracks[cmsswToFitMap_[cmsswID_ceff_][ii]].algoint();
+          if ((itermask_fit_ceff_ >> theAlgo) & 1)
+            iterduplmask_fit_ceff_ = (iterduplmask_fit_ceff_ | (1 << theAlgo));  //filled at the second time
+          itermask_fit_ceff_ = (itermask_fit_ceff_ | (1 << theAlgo));
+        }
+
+        const auto& fittrack =
+            evt_fit_tracks[cmsswToFitMap_[cmsswID_ceff_][0]];     // returns fitTrack best matched to cmssw track
+        const auto& fitextra = evt_fit_extras[fittrack.label()];  // returns track extra best aligned with fit track
+        cmsswmask_fit_ceff_ = 1;                                  // quick logic for matched
+
+        seedID_fit_ceff_ = fitextra.seedID();
+        mcTrackID_fit_ceff_ = fitextra.mcTrackID();
+
+        // track parameters
+        pt_fit_ceff_ = fittrack.pT();
+        ept_fit_ceff_ = fittrack.epT();
+        phi_fit_ceff_ = fittrack.momPhi();
+        ephi_fit_ceff_ = fittrack.emomPhi();
+        eta_fit_ceff_ = fittrack.momEta();
+        eeta_fit_ceff_ = fittrack.emomEta();
+
+        // gen info
+        if (mcTrackID_fit_ceff_ >= 0) {
+          const auto& simtrack = evt_sim_tracks[mcTrackID_fit_ceff_];
+          x_mc_fit_ceff_ = simtrack.x();
+          y_mc_fit_ceff_ = simtrack.y();
+          z_mc_fit_ceff_ = simtrack.z();
+          pt_mc_fit_ceff_ = simtrack.pT();
+          phi_mc_fit_ceff_ = simtrack.momPhi();
+          eta_mc_fit_ceff_ = simtrack.momEta();
+
+          if (Config::keepHitInfo)
+            TTreeValidation::fillMinHitInfo(simtrack, hitlyrs_mc_fit_ceff_, hitidxs_mc_fit_ceff_);
+        } else {
+          x_mc_fit_ceff_ = -1000;
+          y_mc_fit_ceff_ = -1000;
+          z_mc_fit_ceff_ = -1000;
+          pt_mc_fit_ceff_ = -99;
+          phi_mc_fit_ceff_ = -99;
+          eta_mc_fit_ceff_ = -99;
+        }
+
+        // hit/layer info
+        nHits_fit_ceff_ = fittrack.nFoundHits();
+        nLayers_fit_ceff_ = fittrack.nUniqueLayers();
+        nHitsMatched_fit_ceff_ = fitextra.nHitsMatched();
+        fracHitsMatched_fit_ceff_ = fitextra.fracHitsMatched();
+        lastlyr_fit_ceff_ = fittrack.getLastFoundHitLyr();
+
+        // hit info
+        const Hit& lasthit = evt_layer_hits[fittrack.getLastFoundHitLyr()][fittrack.getLastFoundHitIdx()];
+        xhit_fit_ceff_ = lasthit.x();
+        yhit_fit_ceff_ = lasthit.y();
+        zhit_fit_ceff_ = lasthit.z();
+
+        // quality info
+        hitchi2_fit_ceff_ = fittrack.chi2();
+        helixchi2_fit_ceff_ = fitextra.helixChi2();
+        score_fit_ceff_ = fittrack.score();
+
+        // swim dphi
+        dphi_fit_ceff_ = fitextra.dPhi();
+
+        // duplicate info
+        duplmask_fit_ceff_ = fitextra.isDuplicate();
+        nTkMatches_fit_ceff_ = cmsswToFitMap_[cmsswID_ceff_].size();  // n reco matches to this cmssw track.
+
+        if (Config::keepHitInfo)
+          TTreeValidation::fillMinHitInfo(fittrack, hitlyrs_fit_ceff_, hitidxs_fit_ceff_);
+      } else  // unmatched cmsswtracks ... put -99 for all reco values to denote unmatched
+      {
+        cmsswmask_fit_ceff_ = (cmsswtrack.isFindable() ? 0 : -1);  // quick logic for not matched
+
+        seedID_fit_ceff_ = -99;
+        mcTrackID_fit_ceff_ = -99;
+
+        pt_fit_ceff_ = -99;
+        ept_fit_ceff_ = -99;
+        phi_fit_ceff_ = -99;
+        ephi_fit_ceff_ = -99;
+        eta_fit_ceff_ = -99;
+        eeta_fit_ceff_ = -99;
+
+        x_mc_fit_ceff_ = -2000;
+        y_mc_fit_ceff_ = -2000;
+        z_mc_fit_ceff_ = -2000;
+        pt_mc_fit_ceff_ = -99;
+        phi_mc_fit_ceff_ = -99;
+        eta_mc_fit_ceff_ = -99;
+
+        nHits_fit_ceff_ = -99;
+        nLayers_fit_ceff_ = -99;
+        nHitsMatched_fit_ceff_ = -99;
+        fracHitsMatched_fit_ceff_ = -99;
+        lastlyr_fit_ceff_ = -99;
+
+        xhit_fit_ceff_ = -2000;
+        yhit_fit_ceff_ = -2000;
+        zhit_fit_ceff_ = -2000;
+
+        hitchi2_fit_ceff_ = -99;
+        helixchi2_fit_ceff_ = -99;
+        score_fit_ceff_ = -17000;
+
+        dphi_fit_ceff_ = -99;
+
+        duplmask_fit_ceff_ = -1;     // mask means unmatched cmssw track
+        nTkMatches_fit_ceff_ = -99;  // unmatched
+      }
+
+      cmsswefftree_->Fill();
+    }
+  }
+
+  void TTreeValidation::fillCMSSWFakeRateTree(const Event& ev) {
+    std::lock_guard<std::mutex> locker(glock_);
+
+    auto ievt = ev.evtID();
+    const auto& evt_sim_tracks = ev.simTracks_;
+    const auto& evt_cmssw_tracks = ev.cmsswTracks_;
+    const auto& evt_cmssw_extras = ev.cmsswTracksExtra_;
+    const auto& evt_build_tracks = ev.candidateTracks_;
+    const auto& evt_build_extras = ev.candidateTracksExtra_;
+    const auto& evt_fit_tracks = ev.fitTracks_;
+    const auto& evt_fit_extras = ev.fitTracksExtra_;
+    const auto& evt_layer_hits = ev.layerHits_;
+
+    for (const auto& buildtrack : evt_build_tracks) {
+      if (Config::keepHitInfo) {
+        hitlyrs_mc_cFR_.clear();
+        hitlyrs_build_cFR_.clear();
+        hitlyrs_cmssw_build_cFR_.clear();
+        hitlyrs_fit_cFR_.clear();
+        hitlyrs_cmssw_fit_cFR_.clear();
+
+        hitidxs_mc_cFR_.clear();
+        hitidxs_build_cFR_.clear();
+        hitidxs_cmssw_build_cFR_.clear();
+        hitidxs_fit_cFR_.clear();
+        hitidxs_cmssw_fit_cFR_.clear();
+      }
+
+      algorithm_cFR_ = buildtrack.algoint();
+
+      const auto& buildextra = evt_build_extras[buildtrack.label()];
+
+      // same for fit and build tracks
+      evtID_cFR_ = ievt;
+      seedID_cFR_ = buildextra.seedID();
+      mcTrackID_cFR_ = buildextra.mcTrackID();
+
+      // track parameters
+      pt_build_cFR_ = buildtrack.pT();
+      ept_build_cFR_ = buildtrack.epT();
+      phi_build_cFR_ = buildtrack.momPhi();
+      ephi_build_cFR_ = buildtrack.emomPhi();
+      eta_build_cFR_ = buildtrack.momEta();
+      eeta_build_cFR_ = buildtrack.emomEta();
+
+      // gen info
+      if (mcTrackID_cFR_ >= 0) {
+        const auto& simtrack = evt_sim_tracks[mcTrackID_cFR_];
+        x_mc_cFR_ = simtrack.x();
+        y_mc_cFR_ = simtrack.y();
+        z_mc_cFR_ = simtrack.z();
+        pt_mc_cFR_ = simtrack.pT();
+        phi_mc_cFR_ = simtrack.momPhi();
+        eta_mc_cFR_ = simtrack.momEta();
+
+        if (Config::keepHitInfo)
+          TTreeValidation::fillMinHitInfo(simtrack, hitlyrs_mc_cFR_, hitidxs_mc_cFR_);
+      } else {
+        x_mc_cFR_ = -1000;
+        y_mc_cFR_ = -1000;
+        z_mc_cFR_ = -1000;
+        pt_mc_cFR_ = -99;
+        phi_mc_cFR_ = -99;
+        eta_mc_cFR_ = -99;
+      }
+
+      // hit/layer info
+      nHits_build_cFR_ = buildtrack.nFoundHits();
+      nLayers_build_cFR_ = buildtrack.nUniqueLayers();
+      nHitsMatched_build_cFR_ = buildextra.nHitsMatched();
+      fracHitsMatched_build_cFR_ = buildextra.fracHitsMatched();
+      lastlyr_build_cFR_ = buildtrack.getLastFoundHitLyr();
+
+      // hit info
+      const Hit& lasthit = evt_layer_hits[buildtrack.getLastFoundHitLyr()][buildtrack.getLastFoundHitIdx()];
+      xhit_build_cFR_ = lasthit.x();
+      yhit_build_cFR_ = lasthit.y();
+      zhit_build_cFR_ = lasthit.z();
+
+      // quality info
+      hitchi2_build_cFR_ = buildtrack.chi2();
+      helixchi2_build_cFR_ = buildextra.helixChi2();
+      score_build_cFR_ = buildtrack.score();
+
+      // stored dphi
+      dphi_build_cFR_ = buildextra.dPhi();
+
+      if (Config::keepHitInfo)
+        TTreeValidation::fillMinHitInfo(buildtrack, hitlyrs_build_cFR_, hitidxs_build_cFR_);
+
+      // cmssw match?
+      cmsswID_build_cFR_ = buildextra.cmsswTrackID();
+      cmsswmask_build_cFR_ = TTreeValidation::getMaskAssignment(cmsswID_build_cFR_);
+
+      if (cmsswmask_build_cFR_ == 1)  // matched track to cmssw
+      {
+        const auto& cmsswtrack = evt_cmssw_tracks[cmsswID_build_cFR_];
+        const auto& cmsswextra = evt_cmssw_extras[cmsswtrack.label()];
+
+        seedID_cmssw_build_cFR_ = cmsswextra.seedID();
+
+        x_cmssw_build_cFR_ = cmsswtrack.x();
+        y_cmssw_build_cFR_ = cmsswtrack.y();
+        z_cmssw_build_cFR_ = cmsswtrack.z();
+
+        pt_cmssw_build_cFR_ = cmsswtrack.pT();
+        phi_cmssw_build_cFR_ = cmsswtrack.momPhi();
+        eta_cmssw_build_cFR_ = cmsswtrack.momEta();
+
+        nHits_cmssw_build_cFR_ = cmsswtrack.nFoundHits();
+        nLayers_cmssw_build_cFR_ = cmsswtrack.nUniqueLayers();
+        lastlyr_cmssw_build_cFR_ = cmsswtrack.getLastFoundHitLyr();
+
+        // duplicate info
+        duplmask_build_cFR_ = buildextra.isDuplicate();
+        iTkMatches_build_cFR_ = buildextra.duplicateID();
+
+        if (Config::keepHitInfo)
+          TTreeValidation::fillMinHitInfo(cmsswtrack, hitlyrs_cmssw_build_cFR_, hitidxs_cmssw_build_cFR_);
+      } else  // unmatched cmsswtracks ... put -99 for all reco values to denote unmatched
+      {
+        seedID_cmssw_build_cFR_ = -99;
+
+        x_cmssw_build_cFR_ = -2000;
+        y_cmssw_build_cFR_ = -2000;
+        z_cmssw_build_cFR_ = -2000;
+
+        pt_cmssw_build_cFR_ = -99;
+        phi_cmssw_build_cFR_ = -99;
+        eta_cmssw_build_cFR_ = -99;
+
+        nHits_cmssw_build_cFR_ = -99;
+        nLayers_cmssw_build_cFR_ = -99;
+        lastlyr_cmssw_build_cFR_ = -99;
+
+        duplmask_build_cFR_ = -1;
+        iTkMatches_build_cFR_ = -99;
+      }
+
+      // ensure there is a fit track to mess with
+      if (buildToFitMap_.count(buildtrack.label())) {
+        const auto& fittrack = evt_fit_tracks[buildToFitMap_[buildtrack.label()]];
+        const auto& fitextra = evt_fit_extras[fittrack.label()];
+
+        // track parameters
+        pt_fit_cFR_ = fittrack.pT();
+        ept_fit_cFR_ = fittrack.epT();
+        phi_fit_cFR_ = fittrack.momPhi();
+        ephi_fit_cFR_ = fittrack.emomPhi();
+        eta_fit_cFR_ = fittrack.momEta();
+        eeta_fit_cFR_ = fittrack.emomEta();
+
+        // hit/layer info
+        nHits_fit_cFR_ = fittrack.nFoundHits();
+        nLayers_fit_cFR_ = fittrack.nUniqueLayers();
+        nHitsMatched_fit_cFR_ = fitextra.nHitsMatched();
+        fracHitsMatched_fit_cFR_ = fitextra.fracHitsMatched();
+        lastlyr_fit_cFR_ = fittrack.getLastFoundHitLyr();
+
+        // hit info
+        const Hit& lasthit = evt_layer_hits[fittrack.getLastFoundHitLyr()][fittrack.getLastFoundHitIdx()];
+        xhit_fit_cFR_ = lasthit.x();
+        yhit_fit_cFR_ = lasthit.y();
+        zhit_fit_cFR_ = lasthit.z();
+
+        // chi2 info
+        hitchi2_fit_cFR_ = fittrack.chi2();
+        helixchi2_fit_cFR_ = fitextra.helixChi2();
+        score_fit_cFR_ = fittrack.score();
+
+        // stored dphi
+        dphi_fit_cFR_ = fitextra.dPhi();
+
+        if (Config::keepHitInfo)
+          TTreeValidation::fillMinHitInfo(buildtrack, hitlyrs_fit_cFR_, hitidxs_fit_cFR_);
+
+        // cmssw match?
+        cmsswID_fit_cFR_ = fitextra.cmsswTrackID();
+        cmsswmask_fit_cFR_ = TTreeValidation::getMaskAssignment(cmsswID_fit_cFR_);
+
+        if (cmsswmask_fit_cFR_ == 1)  // matched track to cmssw
+        {
+          const auto& cmsswtrack = evt_cmssw_tracks[cmsswID_fit_cFR_];
+          const auto& cmsswextra = evt_cmssw_extras[cmsswtrack.label()];
+
+          seedID_cmssw_fit_cFR_ = cmsswextra.seedID();
+
+          x_cmssw_fit_cFR_ = cmsswtrack.x();
+          y_cmssw_fit_cFR_ = cmsswtrack.y();
+          z_cmssw_fit_cFR_ = cmsswtrack.z();
+
+          pt_cmssw_fit_cFR_ = cmsswtrack.pT();
+          phi_cmssw_fit_cFR_ = cmsswtrack.momPhi();
+          eta_cmssw_fit_cFR_ = cmsswtrack.momEta();
+
+          nHits_cmssw_fit_cFR_ = cmsswtrack.nFoundHits();
+          nLayers_cmssw_fit_cFR_ = cmsswtrack.nUniqueLayers();
+          lastlyr_cmssw_fit_cFR_ = cmsswtrack.getLastFoundHitLyr();
+
+          // duplicate info
+          duplmask_fit_cFR_ = fitextra.isDuplicate();
+          iTkMatches_fit_cFR_ = fitextra.duplicateID();
+
+          if (Config::keepHitInfo)
+            TTreeValidation::fillMinHitInfo(fittrack, hitlyrs_cmssw_fit_cFR_, hitidxs_cmssw_fit_cFR_);
+        } else  // unmatched cmsswtracks ... put -99 for all reco values to denote unmatched
+        {
+          seedID_cmssw_fit_cFR_ = -99;
+
+          x_cmssw_fit_cFR_ = -2000;
+          y_cmssw_fit_cFR_ = -2000;
+          z_cmssw_fit_cFR_ = -2000;
+
+          pt_cmssw_fit_cFR_ = -99;
+          phi_cmssw_fit_cFR_ = -99;
+          eta_cmssw_fit_cFR_ = -99;
+
+          nHits_cmssw_fit_cFR_ = -99;
+          nLayers_cmssw_fit_cFR_ = -99;
+          lastlyr_cmssw_fit_cFR_ = -99;
+
+          duplmask_fit_cFR_ = -1;
+          iTkMatches_fit_cFR_ = -99;
+        }
+      } else  // no fit track to match to a build track!
+      {
+        pt_fit_cFR_ = -100;
+        ept_fit_cFR_ = -100;
+        phi_fit_cFR_ = -100;
+        ephi_fit_cFR_ = -100;
+        eta_fit_cFR_ = -100;
+        eeta_fit_cFR_ = -100;
+
+        nHits_fit_cFR_ = -100;
+        nLayers_fit_cFR_ = -100;
+        nHitsMatched_fit_cFR_ = -100;
+        fracHitsMatched_fit_cFR_ = -100;
+        lastlyr_fit_cFR_ = -100;
+
+        xhit_fit_cFR_ = -3000;
+        yhit_fit_cFR_ = -3000;
+        zhit_fit_cFR_ = -3000;
+
+        hitchi2_fit_cFR_ = -100;
+        helixchi2_fit_cFR_ = -100;
+        score_fit_cFR_ = -5001;
+        dphi_fit_cFR_ = -100;
+
+        cmsswID_fit_cFR_ = -100;
+        cmsswmask_fit_cFR_ = -2;
+
+        seedID_cmssw_fit_cFR_ = -100;
+
+        x_cmssw_fit_cFR_ = -3000;
+        y_cmssw_fit_cFR_ = -3000;
+        z_cmssw_fit_cFR_ = -3000;
+
+        pt_cmssw_fit_cFR_ = -100;
+        phi_cmssw_fit_cFR_ = -100;
+        eta_cmssw_fit_cFR_ = -100;
+
+        nHits_cmssw_fit_cFR_ = -100;
+        nLayers_cmssw_fit_cFR_ = -100;
+        lastlyr_cmssw_fit_cFR_ = -100;
+
+        duplmask_fit_cFR_ = -2;
+        iTkMatches_fit_cFR_ = -100;
+      }
+
+      cmsswfrtree_->Fill();
+    }
+  }
+
+  void TTreeValidation::saveTTrees() {
+    std::lock_guard<std::mutex> locker(glock_);
+    f_->cd();
+
+    if (Config::sim_val_for_cmssw || Config::sim_val) {
+      efftree_->SetDirectory(f_.get());
+      efftree_->Write();
+
+      frtree_->SetDirectory(f_.get());
+      frtree_->Write();
+    }
+    if (Config::cmssw_val) {
+      cmsswefftree_->SetDirectory(f_.get());
+      cmsswefftree_->Write();
+
+      cmsswfrtree_->SetDirectory(f_.get());
+      cmsswfrtree_->Write();
+    }
+    if (Config::fit_val) {
+      fittree_->SetDirectory(f_.get());
+      fittree_->Write();
+    }
+
+    configtree_->SetDirectory(f_.get());
+    configtree_->Write();
+  }
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/TTreeValidation.h b/RecoTracker/MkFitCore/standalone/TTreeValidation.h
new file mode 100644
index 0000000000000..dced37bd20a30
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/TTreeValidation.h
@@ -0,0 +1,421 @@
+#ifndef RecoTracker_MkFitCore_standalone_TTreeValidation_h
+#define RecoTracker_MkFitCore_standalone_TTreeValidation_h
+
+#include "Validation.h"
+
+#ifdef NO_ROOT
+namespace mkfit {
+
+  class TTreeValidation : public Validation {
+  public:
+    TTreeValidation(std::string) {}
+  };
+}  // end namespace mkfit
+#else
+
+#include <unordered_map>
+#include <mutex>
+#include "TFile.h"
+#include "TTree.h"
+#include "TROOT.h"
+
+namespace mkfit {
+  // FitVal defined in Validation.h
+  typedef std::map<int, FitVal> FitValLayMap;
+  typedef std::unordered_map<int, FitValLayMap> TkIDtoFitValLayMapMap;
+
+  class TTreeValidation : public Validation {
+  public:
+    TTreeValidation(std::string fileName, const TrackerInfo* trk_info);
+    ~TTreeValidation() {}
+
+    void initializeEfficiencyTree();
+    void initializeFakeRateTree();
+    void initializeConfigTree();
+    void initializeCMSSWEfficiencyTree();
+    void initializeCMSSWFakeRateTree();
+    void initializeFitTree();
+
+    void alignTracks(TrackVec& evt_tracks, TrackExtraVec& evt_extra, bool alignExtra) override;
+
+    void collectFitInfo(const FitVal& tmpfitval, int tkid, int layer) override;
+
+    void resetValidationMaps() override;
+    void resetFitBranches();
+
+    void setTrackExtras(Event& ev) override;
+
+    void makeSimTkToRecoTksMaps(Event& ev) override;
+    void mapRefTkToRecoTks(const TrackVec& evt_tracks, TrackExtraVec& evt_extras, TkIDToTkIDVecMap& refTkMap);
+    void makeSeedTkToRecoTkMaps(Event& ev) override;
+    void mapSeedTkToRecoTk(const TrackVec& evt_tracks, const TrackExtraVec& evt_extras, TkIDToTkIDMap& seedTkMap);
+    void makeRecoTkToRecoTkMaps(Event& ev) override;
+    void makeRecoTkToRecoTkMap(TkIDToTkIDMap& refToPairMap,
+                               const TrackVec& reftracks,
+                               const TrackExtraVec& refextras,
+                               const TrackVec& pairtracks,
+                               const TrackExtraVec& pairextras);
+    void makeCMSSWTkToRecoTksMaps(Event& ev) override;
+    void makeSeedTkToCMSSWTkMap(Event& ev) override;
+    void makeCMSSWTkToSeedTkMap(Event& ev) override;
+    void makeRecoTkToSeedTkMapsDumbCMSSW(Event& ev) override;
+    void makeRecoTkToSeedTkMapDumbCMSSW(const TrackExtraVec& recoextras,
+                                        const TrackExtraVec& seedextras,
+                                        TkIDToTkIDMap& recoToSeedMap);
+
+    void setTrackScoresDumbCMSSW(Event&) override;
+
+    void storeSeedAndMCID(Event& ev);
+    void setupCMSSWMatching(const Event& ev, RedTrackVec& reducedCMSSW, LayIdxIDVecMapMap& cmsswHitIDMap);
+
+    int getLastFoundHit(const int trackMCHitID, const int mcTrackID, const Event& ev);
+    int getMaskAssignment(const int refID);
+
+    void fillMinHitInfo(const Track& track, std::vector<int>& lyrs, std::vector<int>& idxs);
+    void fillFullHitInfo(const Event& ev,
+                         const Track& track,
+                         std::vector<int>& lyrs,
+                         std::vector<int>& idxs,
+                         std::vector<int>& mcTkIDs,
+                         std::vector<float>& xs,
+                         std::vector<float>& ys,
+                         std::vector<float>& zs);
+
+    void fillEfficiencyTree(const Event& ev) override;
+    void fillFakeRateTree(const Event& ev) override;
+    void fillConfigTree() override;
+    void fillCMSSWEfficiencyTree(const Event& ev) override;
+    void fillCMSSWFakeRateTree(const Event& ev) override;
+    void fillFitTree(const Event& ev) override;
+
+    void saveTTrees() override;
+
+  private:
+    std::unique_ptr<TFile> f_;  // output file!
+
+    TkIDtoFitValLayMapMap fitValTkMapMap_;  // map used for fit validation in mplex
+
+    // Sim to Reco Maps
+    TkIDToTkIDVecMap simToSeedMap_;
+    TkIDToTkIDVecMap simToBuildMap_;
+    TkIDToTkIDVecMap simToFitMap_;
+
+    // Reco to Reco Maps
+    TkIDToTkIDMap seedToBuildMap_;
+    TkIDToTkIDMap seedToFitMap_;
+
+    // CMSSW to Reco Maps
+    TkIDToTkIDVecMap cmsswToBuildMap_;
+    TkIDToTkIDVecMap cmsswToFitMap_;
+
+    // Special map for CMSSW tracks to seed track labels --> NOT used for fake rate!!
+    TkIDToTkIDMap seedToCmsswMap_;
+    TkIDToTkIDMap cmsswToSeedMap_;
+
+    // Special map for geting exact CMSSW track that originate build track from seed track through seedIDs
+    TkIDToTkIDMap buildToCmsswMap_;
+
+    // Special map for associating candidate to fit tracks in CMSSW only
+    TkIDToTkIDMap buildToFitMap_;
+    TkIDToTkIDMap fitToBuildMap_;
+
+    // Special map for associating reco tracks to seed tracks for sim_val_for_cmssw
+    TkIDToTkIDMap candToSeedMapDumbCMSSW_;
+    TkIDToTkIDMap fitToSeedMapDumbCMSSW_;
+
+    // Efficiency Tree
+    std::unique_ptr<TTree> efftree_;
+    int evtID_eff_ = 0, mcID_eff_ = 0;
+    int mcmask_seed_eff_ = 0, mcmask_build_eff_ = 0, mcmask_fit_eff_ = 0;
+    int seedID_seed_eff_ = 0, seedID_build_eff_ = 0, seedID_fit_eff_ = 0;
+    int mcTSmask_seed_eff_ = 0, mcTSmask_build_eff_ = 0, mcTSmask_fit_eff_ = 0;
+
+    // for efficiency and duplicate rate plots
+    float x_mc_gen_eff_ = 0., y_mc_gen_eff_ = 0., z_mc_gen_eff_ = 0.;
+    float pt_mc_gen_eff_ = 0., phi_mc_gen_eff_ = 0., eta_mc_gen_eff_ = 0.;
+    float nLayers_mc_eff_;
+    int nHits_mc_eff_ = 0, lastlyr_mc_eff_ = 0;
+
+    // for getting last hit positions track ended up on
+    float xhit_seed_eff_ = 0., xhit_build_eff_ = 0., xhit_fit_eff_ = 0.;
+    float yhit_seed_eff_ = 0., yhit_build_eff_ = 0., yhit_fit_eff_ = 0.;
+    float zhit_seed_eff_ = 0., zhit_build_eff_ = 0., zhit_fit_eff_ = 0.;
+
+    // for track resolutions / pulls
+    float pt_mc_seed_eff_ = 0., pt_mc_build_eff_ = 0., pt_mc_fit_eff_ = 0.;
+    float pt_seed_eff_ = 0., pt_build_eff_ = 0., pt_fit_eff_ = 0., ept_seed_eff_ = 0., ept_build_eff_ = 0.,
+          ept_fit_eff_ = 0.;
+    float phi_mc_seed_eff_ = 0., phi_mc_build_eff_ = 0., phi_mc_fit_eff_ = 0.;
+    float phi_seed_eff_ = 0., phi_build_eff_ = 0., phi_fit_eff_ = 0., ephi_seed_eff_ = 0., ephi_build_eff_ = 0.,
+          ephi_fit_eff_ = 0.;
+    float eta_mc_seed_eff_ = 0., eta_mc_build_eff_ = 0., eta_mc_fit_eff_ = 0.;
+    float eta_seed_eff_ = 0., eta_build_eff_ = 0., eta_fit_eff_ = 0., eeta_seed_eff_ = 0., eeta_build_eff_ = 0.,
+          eeta_fit_eff_ = 0.;
+
+    // for hit countings
+    int nHits_seed_eff_ = 0, nHits_build_eff_ = 0, nHits_fit_eff_ = 0;
+    float nLayers_seed_eff_ = 0, nLayers_build_eff_ = 0, nLayers_fit_eff_ = 0;
+    int nHitsMatched_seed_eff_ = 0, nHitsMatched_build_eff_ = 0, nHitsMatched_fit_eff_ = 0;
+    float fracHitsMatched_seed_eff_ = 0, fracHitsMatched_build_eff_ = 0, fracHitsMatched_fit_eff_ = 0;
+    int lastlyr_seed_eff_ = 0, lastlyr_build_eff_ = 0, lastlyr_fit_eff_ = 0;
+
+    // swim phi
+    float dphi_seed_eff_ = 0., dphi_build_eff_ = 0., dphi_fit_eff_ = 0.;
+
+    // quality info of tracks
+    float hitchi2_seed_eff_ = 0., hitchi2_build_eff_ = 0., hitchi2_fit_eff_ = 0.;
+    float helixchi2_seed_eff_ = 0., helixchi2_build_eff_ = 0., helixchi2_fit_eff_ = 0.;
+    int score_seed_eff_ = 0, score_build_eff_ = 0, score_fit_eff_ = 0;
+
+    // for duplicate track matches
+    int duplmask_seed_eff_ = 0, duplmask_build_eff_ = 0, duplmask_fit_eff_ = 0;
+    int nTkMatches_seed_eff_ = 0, nTkMatches_build_eff_ = 0, nTkMatches_fit_eff_ = 0;
+
+    //iterations provenance for each match - a bit mask similar to the ones used in the hit masking
+    ULong64_t itermask_seed_eff_ = 0, itermask_build_eff_ = 0, itermask_fit_eff_ = 0;
+    ULong64_t iterduplmask_seed_eff_ = 0, iterduplmask_build_eff_ = 0, iterduplmask_fit_eff_ = 0;
+    ULong64_t algo_seed_eff_ = 0;
+
+    // hit info
+    std::vector<int> hitlyrs_mc_eff_, hitlyrs_seed_eff_, hitlyrs_build_eff_, hitlyrs_fit_eff_;
+    std::vector<int> hitidxs_mc_eff_, hitidxs_seed_eff_, hitidxs_build_eff_, hitidxs_fit_eff_;
+    std::vector<int> hitmcTkIDs_mc_eff_, hitmcTkIDs_seed_eff_, hitmcTkIDs_build_eff_, hitmcTkIDs_fit_eff_;
+    std::vector<float> hitxs_mc_eff_, hitxs_seed_eff_, hitxs_build_eff_, hitxs_fit_eff_;
+    std::vector<float> hitys_mc_eff_, hitys_seed_eff_, hitys_build_eff_, hitys_fit_eff_;
+    std::vector<float> hitzs_mc_eff_, hitzs_seed_eff_, hitzs_build_eff_, hitzs_fit_eff_;
+
+    // Fake Rate tree and variables
+    std::unique_ptr<TTree> frtree_;
+    int evtID_FR_ = 0, seedID_FR_ = 0;
+
+    int seedmask_seed_FR_ = 0, seedmask_build_FR_ = 0, seedmask_fit_FR_ = 0;
+
+    // for getting last hit positions track ended up on
+    float xhit_seed_FR_ = 0., xhit_build_FR_ = 0., xhit_fit_FR_ = 0.;
+    float yhit_seed_FR_ = 0., yhit_build_FR_ = 0., yhit_fit_FR_ = 0.;
+    float zhit_seed_FR_ = 0., zhit_build_FR_ = 0., zhit_fit_FR_ = 0.;
+
+    // track state info
+    float pt_mc_seed_FR_ = 0., pt_mc_build_FR_ = 0., pt_mc_fit_FR_ = 0.;
+    float pt_seed_FR_ = 0., pt_build_FR_ = 0., pt_fit_FR_ = 0., ept_seed_FR_ = 0., ept_build_FR_ = 0., ept_fit_FR_ = 0.;
+    float phi_mc_seed_FR_ = 0., phi_mc_build_FR_ = 0., phi_mc_fit_FR_ = 0.;
+    float phi_seed_FR_ = 0., phi_build_FR_ = 0., phi_fit_FR_ = 0., ephi_seed_FR_ = 0., ephi_build_FR_ = 0.,
+          ephi_fit_FR_ = 0.;
+    float eta_mc_seed_FR_ = 0., eta_mc_build_FR_ = 0., eta_mc_fit_FR_ = 0.;
+    float eta_seed_FR_ = 0., eta_build_FR_ = 0., eta_fit_FR_ = 0., eeta_seed_FR_ = 0., eeta_build_FR_ = 0.,
+          eeta_fit_FR_ = 0.;
+
+    int nHits_seed_FR_ = 0, nHits_build_FR_ = 0, nHits_fit_FR_ = 0;
+    float nLayers_seed_FR_ = 0, nLayers_build_FR_ = 0, nLayers_fit_FR_ = 0;
+    int nHitsMatched_seed_FR_ = 0, nHitsMatched_build_FR_ = 0, nHitsMatched_fit_FR_ = 0;
+    float fracHitsMatched_seed_FR_ = 0, fracHitsMatched_build_FR_ = 0, fracHitsMatched_fit_FR_ = 0;
+    int lastlyr_seed_FR_ = 0, lastlyr_build_FR_ = 0, lastlyr_fit_FR_ = 0;
+    int algorithm_FR_ = 0;
+
+    // swim phi
+    float dphi_seed_FR_ = 0., dphi_build_FR_ = 0., dphi_fit_FR_ = 0.;
+
+    float hitchi2_seed_FR_ = 0., hitchi2_build_FR_ = 0., hitchi2_fit_FR_ = 0.;
+    int score_seed_FR_ = 0, score_build_FR_ = 0, score_fit_FR_ = 0;
+
+    int mcID_seed_FR_ = 0, mcID_build_FR_ = 0, mcID_fit_FR_ = 0;
+    int mcmask_seed_FR_ = 0, mcmask_build_FR_ = 0, mcmask_fit_FR_ = 0;
+    int mcTSmask_seed_FR_ = 0, mcTSmask_build_FR_ = 0, mcTSmask_fit_FR_ = 0;
+    int nHits_mc_seed_FR_ = 0, nHits_mc_build_FR_ = 0, nHits_mc_fit_FR_ = 0;
+    float nLayers_mc_seed_FR_ = 0, nLayers_mc_build_FR_ = 0, nLayers_mc_fit_FR_ = 0;
+    int lastlyr_mc_seed_FR_ = 0, lastlyr_mc_build_FR_ = 0, lastlyr_mc_fit_FR_ = 0;
+
+    float helixchi2_seed_FR_ = 0., helixchi2_build_FR_ = 0., helixchi2_fit_FR_ = 0.;
+
+    int duplmask_seed_FR_ = 0, duplmask_build_FR_ = 0, duplmask_fit_FR_ = 0;
+    int iTkMatches_seed_FR_ = 0, iTkMatches_build_FR_ = 0, iTkMatches_fit_FR_ = 0;
+
+    // hit info
+    std::vector<int> hitlyrs_seed_FR_, hitlyrs_build_FR_, hitlyrs_fit_FR_, hitlyrs_mc_seed_FR_, hitlyrs_mc_build_FR_,
+        hitlyrs_mc_fit_FR_;
+    std::vector<int> hitidxs_seed_FR_, hitidxs_build_FR_, hitidxs_fit_FR_, hitidxs_mc_seed_FR_, hitidxs_mc_build_FR_,
+        hitidxs_mc_fit_FR_;
+    std::vector<int> hitmcTkIDs_seed_FR_, hitmcTkIDs_build_FR_, hitmcTkIDs_fit_FR_, hitmcTkIDs_mc_seed_FR_,
+        hitmcTkIDs_mc_build_FR_, hitmcTkIDs_mc_fit_FR_;
+    std::vector<float> hitxs_seed_FR_, hitxs_build_FR_, hitxs_fit_FR_, hitxs_mc_seed_FR_, hitxs_mc_build_FR_,
+        hitxs_mc_fit_FR_;
+    std::vector<float> hitys_seed_FR_, hitys_build_FR_, hitys_fit_FR_, hitys_mc_seed_FR_, hitys_mc_build_FR_,
+        hitys_mc_fit_FR_;
+    std::vector<float> hitzs_seed_FR_, hitzs_build_FR_, hitzs_fit_FR_, hitzs_mc_seed_FR_, hitzs_mc_build_FR_,
+        hitzs_mc_fit_FR_;
+
+    // Configuration tree
+    std::unique_ptr<TTree> configtree_;
+    int Ntracks_ = 0, Nevents_ = 0;
+    float nLayers_ = 0;
+    int nlayers_per_seed_ = 0, maxCand_ = 0;
+    float chi2Cut_min_ = 0., nSigma_ = 0., minDPhi_ = 0., maxDPhi_ = 0., minDEta_ = 0., maxDEta_ = 0.;
+    float beamspotX_ = 0., beamspotY_ = 0., beamspotZ_ = 0.;
+    float minSimPt_ = 0., maxSimPt_ = 0.;
+    float hitposerrXY_ = 0., hitposerrZ_ = 0., hitposerrR_ = 0.;
+    float varXY_ = 0., varZ_ = 0.;
+    float ptinverr049_ = 0., phierr049_ = 0., thetaerr049_ = 0., ptinverr012_ = 0., phierr012_ = 0., thetaerr012_ = 0.;
+
+    // CMSSW Efficiency tree
+    std::unique_ptr<TTree> cmsswefftree_;
+    int evtID_ceff_ = 0, cmsswID_ceff_ = 0, seedID_cmssw_ceff_ = 0;
+
+    float x_cmssw_ceff_ = 0., y_cmssw_ceff_ = 0., z_cmssw_ceff_ = 0.;
+    float pt_cmssw_ceff_ = 0., phi_cmssw_ceff_ = 0., eta_cmssw_ceff_ = 0.;
+    float nLayers_cmssw_ceff_ = 0;
+    int nHits_cmssw_ceff_ = 0, lastlyr_cmssw_ceff_ = 0;
+
+    // build
+    int seedID_build_ceff_ = 0, mcTrackID_build_ceff_ = 0;
+    int cmsswmask_build_ceff_ = 0;
+
+    float pt_build_ceff_ = 0., ept_build_ceff_ = 0.;
+    float phi_build_ceff_ = 0., ephi_build_ceff_ = 0.;
+    float eta_build_ceff_ = 0., eeta_build_ceff_ = 0.;
+
+    float x_mc_build_ceff_ = 0., y_mc_build_ceff_ = 0., z_mc_build_ceff_ = 0.;
+    float pt_mc_build_ceff_ = 0., phi_mc_build_ceff_ = 0., eta_mc_build_ceff_ = 0.;
+
+    float nLayers_build_ceff_ = 0;
+    int nHits_build_ceff_ = 0, nHitsMatched_build_ceff_ = 0, lastlyr_build_ceff_ = 0;
+    float fracHitsMatched_build_ceff_ = 0;
+
+    float xhit_build_ceff_ = 0., yhit_build_ceff_ = 0., zhit_build_ceff_ = 0.;
+
+    // chi2 of tracks + phi swim
+    float hitchi2_build_ceff_ = 0., helixchi2_build_ceff_ = 0.;
+    int score_build_ceff_ = 0;
+    float dphi_build_ceff_ = 0.;
+
+    int duplmask_build_ceff_ = 0, nTkMatches_build_ceff_ = 0;
+
+    // fit
+    int seedID_fit_ceff_ = 0, mcTrackID_fit_ceff_ = 0;
+    int cmsswmask_fit_ceff_ = 0;
+
+    float pt_fit_ceff_ = 0., ept_fit_ceff_ = 0.;
+    float phi_fit_ceff_ = 0., ephi_fit_ceff_ = 0.;
+    float eta_fit_ceff_ = 0., eeta_fit_ceff_ = 0.;
+
+    float x_mc_fit_ceff_ = 0., y_mc_fit_ceff_ = 0., z_mc_fit_ceff_ = 0.;
+    float pt_mc_fit_ceff_ = 0., phi_mc_fit_ceff_ = 0., eta_mc_fit_ceff_ = 0.;
+
+    float nLayers_fit_ceff_ = 0;
+    int nHits_fit_ceff_ = 0, nHitsMatched_fit_ceff_ = 0, lastlyr_fit_ceff_ = 0;
+    float fracHitsMatched_fit_ceff_ = 0;
+
+    float xhit_fit_ceff_ = 0., yhit_fit_ceff_ = 0., zhit_fit_ceff_ = 0.;
+
+    // chi2 of tracks + phi swim
+    float hitchi2_fit_ceff_ = 0., helixchi2_fit_ceff_ = 0.;
+    int score_fit_ceff_ = 0;
+    float dphi_fit_ceff_ = 0.;
+
+    int duplmask_fit_ceff_ = 0, nTkMatches_fit_ceff_ = 0;
+
+    ULong64_t itermask_build_ceff_ = 0, itermask_fit_ceff_ = 0;
+    ULong64_t iterduplmask_build_ceff_ = 0, iterduplmask_fit_ceff_ = 0;
+    ULong64_t algo_seed_ceff_ = 0;
+
+    // hit info
+    std::vector<int> hitlyrs_cmssw_ceff_, hitlyrs_build_ceff_, hitlyrs_mc_build_ceff_, hitlyrs_fit_ceff_,
+        hitlyrs_mc_fit_ceff_;
+    std::vector<int> hitidxs_cmssw_ceff_, hitidxs_build_ceff_, hitidxs_mc_build_ceff_, hitidxs_fit_ceff_,
+        hitidxs_mc_fit_ceff_;
+
+    // CMSSW FakeRate tree
+    std::unique_ptr<TTree> cmsswfrtree_;
+    int evtID_cFR_ = 0, seedID_cFR_ = 0, mcTrackID_cFR_ = 0;
+
+    //seed algo
+    int algorithm_cFR_ = 0;
+
+    // build info
+    int cmsswID_build_cFR_ = 0, cmsswmask_build_cFR_ = 0;
+
+    float pt_build_cFR_ = 0., ept_build_cFR_ = 0.;
+    float phi_build_cFR_ = 0., ephi_build_cFR_ = 0.;
+    float eta_build_cFR_ = 0., eeta_build_cFR_ = 0.;
+
+    float x_mc_cFR_ = 0., y_mc_cFR_ = 0., z_mc_cFR_ = 0.;
+    float pt_mc_cFR_ = 0., phi_mc_cFR_ = 0., eta_mc_cFR_ = 0.;
+
+    float nLayers_build_cFR_ = 0;
+    int nHits_build_cFR_ = 0, nHitsMatched_build_cFR_ = 0, lastlyr_build_cFR_ = 0;
+    float fracHitsMatched_build_cFR_ = 0;
+
+    float xhit_build_cFR_ = 0., yhit_build_cFR_ = 0., zhit_build_cFR_ = 0.;
+
+    // chi2 of tracks
+    float hitchi2_build_cFR_ = 0., helixchi2_build_cFR_ = 0.;
+    int score_build_cFR_ = 0;
+    float dphi_build_cFR_ = 0.;
+
+    // for duplicate track matches
+    int duplmask_build_cFR_ = 0, iTkMatches_build_cFR_ = 0;
+
+    // cmssw info
+    int seedID_cmssw_build_cFR_ = 0;
+    float x_cmssw_build_cFR_ = 0., y_cmssw_build_cFR_ = 0., z_cmssw_build_cFR_ = 0.;
+    float pt_cmssw_build_cFR_ = 0., phi_cmssw_build_cFR_ = 0., eta_cmssw_build_cFR_ = 0.;
+    float nLayers_cmssw_build_cFR_ = 0;
+    int nHits_cmssw_build_cFR_ = 0, lastlyr_cmssw_build_cFR_ = 0;
+
+    // fit info
+    int cmsswID_fit_cFR_ = 0, cmsswmask_fit_cFR_ = 0;
+
+    float pt_fit_cFR_ = 0., ept_fit_cFR_ = 0.;
+    float phi_fit_cFR_ = 0., ephi_fit_cFR_ = 0.;
+    float eta_fit_cFR_ = 0., eeta_fit_cFR_ = 0.;
+
+    float nLayers_fit_cFR_ = 0;
+    int nHits_fit_cFR_ = 0, nHitsMatched_fit_cFR_ = 0, lastlyr_fit_cFR_ = 0;
+    float fracHitsMatched_fit_cFR_ = 0;
+
+    float xhit_fit_cFR_ = 0., yhit_fit_cFR_ = 0., zhit_fit_cFR_ = 0.;
+
+    // chi2 of tracks
+    float hitchi2_fit_cFR_ = 0., helixchi2_fit_cFR_ = 0.;
+    int score_fit_cFR_ = 0;
+    float dphi_fit_cFR_ = 0.;
+
+    // for duplicate track matches
+    int duplmask_fit_cFR_ = 0, iTkMatches_fit_cFR_ = 0;
+
+    // cmssw info
+    int seedID_cmssw_fit_cFR_ = 0;
+    float x_cmssw_fit_cFR_ = 0., y_cmssw_fit_cFR_ = 0., z_cmssw_fit_cFR_ = 0.;
+    float pt_cmssw_fit_cFR_ = 0., phi_cmssw_fit_cFR_ = 0., eta_cmssw_fit_cFR_ = 0.;
+    float nLayers_cmssw_fit_cFR_ = 0;
+    int nHits_cmssw_fit_cFR_ = 0, lastlyr_cmssw_fit_cFR_ = 0;
+
+    // hit info
+    std::vector<int> hitlyrs_mc_cFR_, hitlyrs_build_cFR_, hitlyrs_cmssw_build_cFR_, hitlyrs_fit_cFR_,
+        hitlyrs_cmssw_fit_cFR_;
+    std::vector<int> hitidxs_mc_cFR_, hitidxs_build_cFR_, hitidxs_cmssw_build_cFR_, hitidxs_fit_cFR_,
+        hitidxs_cmssw_fit_cFR_;
+
+    // Fit tree (for fine tuning z-phi windows and such --> MPlex Only
+    std::unique_ptr<TTree> fittree_;
+    int ntotallayers_fit_ = 0, tkid_fit_ = 0, evtid_fit_ = 0;
+
+    static const int nfvs_ = 24;
+    std::vector<float> fvs_[nfvs_];
+
+    std::vector<float>&z_prop_fit_ = fvs_[0], &ez_prop_fit_ = fvs_[1];
+    std::vector<float>&z_hit_fit_ = fvs_[2], &ez_hit_fit_ = fvs_[3], &z_sim_fit_ = fvs_[4], &ez_sim_fit_ = fvs_[5];
+    std::vector<float>&pphi_prop_fit_ = fvs_[6], &epphi_prop_fit_ = fvs_[7];
+    std::vector<float>&pphi_hit_fit_ = fvs_[8], &epphi_hit_fit_ = fvs_[9], &pphi_sim_fit_ = fvs_[10],
+    &epphi_sim_fit_ = fvs_[11];
+    std::vector<float>&pt_up_fit_ = fvs_[12], &ept_up_fit_ = fvs_[13], &pt_sim_fit_ = fvs_[14],
+    &ept_sim_fit_ = fvs_[15];
+    std::vector<float>&mphi_up_fit_ = fvs_[16], &emphi_up_fit_ = fvs_[17], &mphi_sim_fit_ = fvs_[18],
+    &emphi_sim_fit_ = fvs_[19];
+    std::vector<float>&meta_up_fit_ = fvs_[20], &emeta_up_fit_ = fvs_[21], &meta_sim_fit_ = fvs_[22],
+    &emeta_sim_fit_ = fvs_[23];
+
+    std::mutex glock_;
+  };
+}  // end namespace mkfit
+#endif
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/TrackExtra.cc b/RecoTracker/MkFitCore/standalone/TrackExtra.cc
new file mode 100644
index 0000000000000..8954b4eef1ce8
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/TrackExtra.cc
@@ -0,0 +1,513 @@
+#include "RecoTracker/MkFitCore/standalone/TrackExtra.h"
+#include "RecoTracker/MkFitCore/standalone/ConfigStandalone.h"
+
+//#define DEBUG
+#include "RecoTracker/MkFitCore/src/Debug.h"
+
+namespace mkfit {
+
+  //==============================================================================
+  // TrackExtra
+  //==============================================================================
+
+  void TrackExtra::findMatchingSeedHits(const Track& reco_trk,
+                                        const Track& seed_trk,
+                                        const std::vector<HitVec>& layerHits) {
+    // outer loop over reco hits
+    for (int reco_ihit = 0; reco_ihit < reco_trk.nTotalHits(); ++reco_ihit) {
+      const int reco_lyr = reco_trk.getHitLyr(reco_ihit);
+      const int reco_idx = reco_trk.getHitIdx(reco_ihit);
+
+      // ensure layer exists
+      if (reco_lyr < 0)
+        continue;
+
+      // make sure it is a real hit
+      if ((reco_idx < 0) || (static_cast<size_t>(reco_idx) >= layerHits[reco_lyr].size()))
+        continue;
+
+      // inner loop over seed hits
+      for (int seed_ihit = 0; seed_ihit < seed_trk.nTotalHits(); ++seed_ihit) {
+        const int seed_lyr = seed_trk.getHitLyr(seed_ihit);
+        const int seed_idx = seed_trk.getHitIdx(seed_ihit);
+
+        // ensure layer exists
+        if (seed_lyr < 0)
+          continue;
+
+        // check that lyrs are the same
+        if (reco_lyr != seed_lyr)
+          continue;
+
+        // make sure it is a real hit
+        if ((seed_idx < 0) || (static_cast<size_t>(seed_idx) >= layerHits[seed_lyr].size()))
+          continue;
+
+        // finally, emplace if idx is the same
+        if (reco_idx == seed_idx)
+          matchedSeedHits_.emplace_back(seed_idx, seed_lyr);
+      }
+    }
+  }
+
+  bool TrackExtra::isSeedHit(const int lyr, const int idx) const {
+    return (std::find_if(matchedSeedHits_.begin(), matchedSeedHits_.end(), [=](const auto& matchedSeedHit) {
+              return ((matchedSeedHit.layer == lyr) && (matchedSeedHit.index == idx));
+            }) != matchedSeedHits_.end());
+  }
+
+  int TrackExtra::modifyRefTrackID(const int foundHits,
+                                   const int minHits,
+                                   const TrackVec& reftracks,
+                                   const int trueID,
+                                   const int duplicate,
+                                   int refTrackID) {
+    // Modify refTrackID based on nMinHits and findability
+    if (duplicate) {
+      refTrackID = -10;
+    } else {
+      if (refTrackID >= 0) {
+        if (reftracks[refTrackID].isFindable()) {
+          if (foundHits < minHits)
+            refTrackID = -2;
+          //else                     refTrackID = refTrackID;
+        } else  // ref track is not findable
+        {
+          if (foundHits < minHits)
+            refTrackID = -3;
+          else
+            refTrackID = -4;
+        }
+      } else if (refTrackID == -1) {
+        if (trueID >= 0) {
+          if (reftracks[trueID].isFindable()) {
+            if (foundHits < minHits)
+              refTrackID = -5;
+            //else                     refTrackID = refTrackID;
+          } else  // sim track is not findable
+          {
+            if (foundHits < minHits)
+              refTrackID = -6;
+            else
+              refTrackID = -7;
+          }
+        } else {
+          if (foundHits < minHits)
+            refTrackID = -8;
+          else
+            refTrackID = -9;
+        }
+      }
+    }
+    return refTrackID;
+  }
+
+  // Generic 50% reco to sim matching after seed
+  void TrackExtra::setMCTrackIDInfo(const Track& trk,
+                                    const std::vector<HitVec>& layerHits,
+                                    const MCHitInfoVec& globalHitInfo,
+                                    const TrackVec& simtracks,
+                                    const bool isSeed,
+                                    const bool isPure) {
+    dprintf("TrackExtra::setMCTrackIDInfo for track with label %d, total hits %d, found hits %d\n",
+            trk.label(),
+            trk.nTotalHits(),
+            trk.nFoundHits());
+
+    std::vector<int> mcTrackIDs;         // vector of found mcTrackIDs on reco track
+    int nSeedHits = nMatchedSeedHits();  // count seed hits
+
+    // loop over all hits stored in reco track, storing valid mcTrackIDs
+    for (int ihit = 0; ihit < trk.nTotalHits(); ++ihit) {
+      const int lyr = trk.getHitLyr(ihit);
+      const int idx = trk.getHitIdx(ihit);
+
+      // ensure layer exists
+      if (lyr < 0)
+        continue;
+
+      // skip seed layers (unless, of course, we are validating the seed tracks themselves)
+      if (!Config::mtvLikeValidation && !isSeed && isSeedHit(lyr, idx))
+        continue;
+
+      // make sure it is a real hit
+      if ((idx >= 0) && (static_cast<size_t>(idx) < layerHits[lyr].size())) {
+        // get mchitid and then get mcTrackID
+        const int mchitid = layerHits[lyr][idx].mcHitID();
+        mcTrackIDs.push_back(globalHitInfo[mchitid].mcTrackID());
+
+        dprintf("  ihit=%3d   trk.hit_idx=%4d  trk.hit_lyr=%2d   mchitid=%4d  mctrkid=%3d\n",
+                ihit,
+                idx,
+                lyr,
+                mchitid,
+                globalHitInfo[mchitid].mcTrackID());
+      } else {
+        dprintf("  ihit=%3d   trk.hit_idx=%4d  trk.hit_lyr=%2d\n", ihit, idx, lyr);
+      }
+    }
+
+    int mccount = 0;          // count up the mcTrackID with the largest count
+    int mcTrackID = -1;       // initialize mcTrackID
+    if (!mcTrackIDs.empty())  // protection against tracks which do not make it past the seed
+    {
+      // sorted list ensures that mcTrackIDs are counted properly
+      std::sort(mcTrackIDs.begin(), mcTrackIDs.end());
+
+      // don't count bad mcTrackIDs (id < 0)
+      mcTrackIDs.erase(std::remove_if(mcTrackIDs.begin(), mcTrackIDs.end(), [](const int id) { return id < 0; }),
+                       mcTrackIDs.end());
+
+      int n_ids = mcTrackIDs.size();
+      int i = 0;
+      while (i < n_ids) {
+        int j = i + 1;
+        while (j < n_ids && mcTrackIDs[j] == mcTrackIDs[i])
+          ++j;
+
+        int n = j - i;
+        if (mcTrackIDs[i] >= 0 && n > mccount) {
+          mcTrackID = mcTrackIDs[i];
+          mccount = n;
+        }
+        i = j;
+      }
+
+      // total found hits in hit index array, excluding seed if necessary
+      const int nCandHits = ((Config::mtvLikeValidation || isSeed) ? trk.nFoundHits() : trk.nFoundHits() - nSeedHits);
+
+      // 75% or 50% matching criterion
+      if ((Config::mtvLikeValidation ? (4 * mccount > 3 * nCandHits) : (2 * mccount >= nCandHits))) {
+        // require that most matched is the mcTrackID!
+        if (isPure) {
+          if (mcTrackID == seedID_)
+            mcTrackID_ = mcTrackID;
+          else
+            mcTrackID_ = -1;  // somehow, this guy followed another simtrack!
+        } else {
+          mcTrackID_ = mcTrackID;
+        }
+      } else  // failed 50% matching criteria
+      {
+        mcTrackID_ = -1;
+      }
+
+      // recount matched hits for pure sim tracks if reco track is unmatched
+      if (isPure && mcTrackID == -1) {
+        mccount = 0;
+        for (auto id : mcTrackIDs) {
+          if (id == seedID_)
+            mccount++;
+        }
+      }
+
+      // store matched hit info
+      nHitsMatched_ = mccount;
+      fracHitsMatched_ = float(nHitsMatched_) / float(nCandHits);
+
+      // compute dPhi
+      dPhi_ =
+          (mcTrackID >= 0 ? squashPhiGeneral(simtracks[mcTrackID].swimPhiToR(trk.x(), trk.y()) - trk.momPhi()) : -99.f);
+    } else {
+      mcTrackID_ = mcTrackID;  // defaults from -1!
+      nHitsMatched_ = -99;
+      fracHitsMatched_ = -99.f;
+      dPhi_ = -99.f;
+    }
+
+    // Modify mcTrackID based on length of track (excluding seed tracks, of course) and findability
+    if (!isSeed) {
+      mcTrackID_ = modifyRefTrackID(trk.nFoundHits() - nSeedHits,
+                                    Config::nMinFoundHits - nSeedHits,
+                                    simtracks,
+                                    (isPure ? seedID_ : -1),
+                                    trk.getDuplicateValue(),
+                                    mcTrackID_);
+    }
+
+    dprint("Track " << trk.label() << " best mc track " << mcTrackID_ << " count " << mccount << "/"
+                    << trk.nFoundHits());
+  }
+
+  typedef std::pair<int, float> idchi2Pair;
+  typedef std::vector<idchi2Pair> idchi2PairVec;
+
+  inline bool sortIDsByChi2(const idchi2Pair& cand1, const idchi2Pair& cand2) { return cand1.second < cand2.second; }
+
+  inline int getMatchBin(const float pt) {
+    if (pt < 0.75f)
+      return 0;
+    else if (pt < 1.f)
+      return 1;
+    else if (pt < 2.f)
+      return 2;
+    else if (pt < 5.f)
+      return 3;
+    else if (pt < 10.f)
+      return 4;
+    else
+      return 5;
+  }
+
+  void TrackExtra::setCMSSWTrackIDInfoByTrkParams(const Track& trk,
+                                                  const std::vector<HitVec>& layerHits,
+                                                  const TrackVec& cmsswtracks,
+                                                  const RedTrackVec& redcmsswtracks,
+                                                  const bool isBkFit) {
+    // get temporary reco track params
+    const SVector6& trkParams = trk.parameters();
+    const SMatrixSym66& trkErrs = trk.errors();
+
+    // get bin used for cuts in dphi, chi2 based on pt
+    const int bin = getMatchBin(trk.pT());
+
+    // temps needed for chi2
+    SVector2 trkParamsR;
+    trkParamsR[0] = trkParams[3];
+    trkParamsR[1] = trkParams[5];
+
+    SMatrixSym22 trkErrsR;
+    trkErrsR[0][0] = trkErrs[3][3];
+    trkErrsR[1][1] = trkErrs[5][5];
+    trkErrsR[0][1] = trkErrs[3][5];
+    trkErrsR[1][0] = trkErrs[5][3];
+
+    // cands is vector of possible cmssw tracks we could match
+    idchi2PairVec cands;
+
+    // first check for cmmsw tracks we match by chi2
+    for (const auto& redcmsswtrack : redcmsswtracks) {
+      const float chi2 = std::abs(computeHelixChi2(redcmsswtrack.parameters(), trkParamsR, trkErrsR, false));
+      if (chi2 < Config::minCMSSWMatchChi2[bin])
+        cands.push_back(std::make_pair(redcmsswtrack.label(), chi2));
+    }
+
+    // get min chi2
+    float minchi2 = -1e6;
+    if (!cands.empty()) {
+      std::sort(cands.begin(), cands.end(), sortIDsByChi2);  // in case we just want to stop at the first dPhi match
+      minchi2 = cands.front().second;
+    }
+
+    // set up defaults
+    int cmsswTrackID = -1;
+    int nHitsMatched = 0;
+    float bestdPhi = Config::minCMSSWMatchdPhi[bin];
+    float bestchi2 = minchi2;
+
+    // loop over possible cmssw tracks
+    for (auto&& cand : cands) {
+      // get cmssw track
+      const auto label = cand.first;
+      const auto& cmsswtrack = cmsswtracks[label];
+
+      // get diff in track mom. phi: swim phi of cmssw track to reco track R if forward built tracks
+      const float diffPhi =
+          squashPhiGeneral((isBkFit ? cmsswtrack.momPhi() : cmsswtrack.swimPhiToR(trk.x(), trk.y())) - trk.momPhi());
+
+      // check for best matched track by phi
+      if (std::abs(diffPhi) < std::abs(bestdPhi)) {
+        const HitLayerMap& hitLayerMap = redcmsswtracks[label].hitLayerMap();
+        int matched = 0;
+
+        // loop over hits on reco track
+        for (int ihit = 0; ihit < trk.nTotalHits(); ihit++) {
+          const int lyr = trk.getHitLyr(ihit);
+          const int idx = trk.getHitIdx(ihit);
+
+          // skip seed layers
+          if (isSeedHit(lyr, idx))
+            continue;
+
+          // skip if bad index or cmssw track does not have that layer
+          if (idx < 0 || !hitLayerMap.count(lyr))
+            continue;
+
+          // loop over hits in layer for the cmssw track
+          for (auto cidx : hitLayerMap.at(lyr)) {
+            // since we can only pick up on hit on a layer, break loop after finding hit
+            if (cidx == idx) {
+              matched++;
+              break;
+            }
+          }
+        }  // end loop over hits on reco track
+
+        // now save the matched info
+        bestdPhi = diffPhi;
+        nHitsMatched = matched;
+        cmsswTrackID = label;
+        bestchi2 = cand.second;
+      }  // end check over dPhi
+    }    // end loop over cands
+
+    // set cmsswTrackID
+    cmsswTrackID_ = cmsswTrackID;  // defaults to -1!
+    helixChi2_ = bestchi2;
+    dPhi_ = bestdPhi;
+
+    // get seed hits
+    const int nSeedHits = nMatchedSeedHits();
+
+    // Modify cmsswTrackID based on length and findability
+    cmsswTrackID_ = modifyRefTrackID(trk.nFoundHits() - nSeedHits,
+                                     Config::nMinFoundHits - nSeedHits,
+                                     cmsswtracks,
+                                     -1,
+                                     trk.getDuplicateValue(),
+                                     cmsswTrackID_);
+
+    // other important info
+    nHitsMatched_ = nHitsMatched;
+    fracHitsMatched_ =
+        float(nHitsMatched_) / float(trk.nFoundHits() - nSeedHits);  // seed hits may already be included!
+  }
+
+  void TrackExtra::setCMSSWTrackIDInfoByHits(const Track& trk,
+                                             const LayIdxIDVecMapMap& cmsswHitIDMap,
+                                             const TrackVec& cmsswtracks,
+                                             const TrackExtraVec& cmsswextras,
+                                             const RedTrackVec& redcmsswtracks,
+                                             const int cmsswlabel) {
+    // reminder: cmsswlabel >= 0 indicates we are using pure seeds and matching by cmsswlabel
+
+    // map of cmssw labels, and hits matched to that label
+    std::unordered_map<int, int> labelMatchMap;
+
+    // loop over mkfit track hits
+    for (int ihit = 0; ihit < trk.nTotalHits(); ihit++) {
+      const int lyr = trk.getHitLyr(ihit);
+      const int idx = trk.getHitIdx(ihit);
+
+      if (lyr < 0 || idx < 0)
+        continue;  // standard check
+      if (isSeedHit(lyr, idx))
+        continue;  // skip seed layers
+      if (!cmsswHitIDMap.count(lyr))
+        continue;  // make sure at least one cmssw track has this hit lyr!
+      if (!cmsswHitIDMap.at(lyr).count(idx))
+        continue;  // make sure at least one cmssw track has this hit id!
+      {
+        for (const auto label : cmsswHitIDMap.at(lyr).at(idx)) {
+          labelMatchMap[label]++;
+        }
+      }
+    }
+
+    // make list of cmssw tracks that pass criteria --> could have multiple overlapping tracks!
+    std::vector<int> labelMatchVec;
+    for (const auto labelMatchPair : labelMatchMap) {
+      const auto cmsswlabel = labelMatchPair.first;
+      const auto nMatchedHits = labelMatchPair.second;
+
+      // 50% matching criterion
+      if ((2 * nMatchedHits) >= (cmsswtracks[cmsswlabel].nUniqueLayers() - cmsswextras[cmsswlabel].nMatchedSeedHits()))
+        labelMatchVec.push_back(cmsswlabel);
+    }
+
+    // initialize tmpID for later use
+    int cmsswTrackID = -1;
+
+    // protect against no matches!
+    if (!labelMatchVec.empty()) {
+      // sort by best matched: most hits matched , then ratio of matches (i.e. which cmssw track is shorter)
+      std::sort(labelMatchVec.begin(), labelMatchVec.end(), [&](const int label1, const int label2) {
+        if (labelMatchMap[label1] == labelMatchMap[label2]) {
+          const auto& track1 = cmsswtracks[label1];
+          const auto& track2 = cmsswtracks[label2];
+
+          const auto& extra1 = cmsswextras[label1];
+          const auto& extra2 = cmsswextras[label2];
+
+          return ((track1.nUniqueLayers() - extra1.nMatchedSeedHits()) <
+                  (track2.nUniqueLayers() - extra2.nMatchedSeedHits()));
+        }
+        return labelMatchMap[label1] > labelMatchMap[label2];
+      });
+
+      // pick the longest track!
+      cmsswTrackID = labelMatchVec.front();
+
+      // set cmsswTrackID_ (if cmsswlabel >= 0, we are matching by label and label exists!)
+      if (cmsswlabel >= 0) {
+        if (cmsswTrackID == cmsswlabel) {
+          cmsswTrackID_ = cmsswTrackID;
+        } else {
+          cmsswTrackID = cmsswlabel;  // use this for later
+          cmsswTrackID_ = -1;
+        }
+      } else  // not matching by pure id
+      {
+        cmsswTrackID_ = cmsswTrackID;  // the longest track is matched
+      }
+
+      // set nHits matched to cmssw track
+      nHitsMatched_ = labelMatchMap[cmsswTrackID];
+    } else  // did not match a single cmssw track with 50% hits shared
+    {
+      // by default sets to -1
+      cmsswTrackID_ = cmsswTrackID;
+
+      // tmp variable
+      int nHitsMatched = 0;
+
+      // use truth info
+      if (cmsswlabel >= 0) {
+        cmsswTrackID = cmsswlabel;
+        nHitsMatched = labelMatchMap[cmsswTrackID];
+      } else {
+        // just get the cmssw track with the most matches!
+        for (const auto labelMatchPair : labelMatchMap) {
+          if (labelMatchPair.second > nHitsMatched) {
+            cmsswTrackID = labelMatchPair.first;
+            nHitsMatched = labelMatchPair.second;
+          }
+        }
+      }
+
+      nHitsMatched_ = nHitsMatched;
+    }
+
+    // set chi2, dphi based on tmp cmsswTrackID
+    if (cmsswTrackID >= 0) {
+      // get tmps for chi2, dphi
+      const SVector6& trkParams = trk.parameters();
+      const SMatrixSym66& trkErrs = trk.errors();
+
+      // temps needed for chi2
+      SVector2 trkParamsR;
+      trkParamsR[0] = trkParams[3];
+      trkParamsR[1] = trkParams[5];
+
+      SMatrixSym22 trkErrsR;
+      trkErrsR[0][0] = trkErrs[3][3];
+      trkErrsR[1][1] = trkErrs[5][5];
+      trkErrsR[0][1] = trkErrs[3][5];
+      trkErrsR[1][0] = trkErrs[5][3];
+
+      // set chi2 and dphi
+      helixChi2_ = std::abs(computeHelixChi2(redcmsswtracks[cmsswTrackID].parameters(), trkParamsR, trkErrsR, false));
+      dPhi_ = squashPhiGeneral(cmsswtracks[cmsswTrackID].swimPhiToR(trk.x(), trk.y()) - trk.momPhi());
+    } else {
+      helixChi2_ = -99.f;
+      dPhi_ = -99.f;
+    }
+
+    // get nSeedHits
+    const int nSeedHits = nMatchedSeedHits();
+
+    // Modify cmsswTrackID based on length and findability
+    cmsswTrackID_ = modifyRefTrackID(trk.nFoundHits() - nSeedHits,
+                                     Config::nMinFoundHits - nSeedHits,
+                                     cmsswtracks,
+                                     cmsswlabel,
+                                     trk.getDuplicateValue(),
+                                     cmsswTrackID_);
+
+    // other important info
+    fracHitsMatched_ = (cmsswTrackID >= 0 ? (float(nHitsMatched_) / float(cmsswtracks[cmsswTrackID].nUniqueLayers() -
+                                                                          cmsswextras[cmsswTrackID].nMatchedSeedHits()))
+                                          : 0.f);
+  }
+
+}  // namespace mkfit
diff --git a/RecoTracker/MkFitCore/standalone/TrackExtra.h b/RecoTracker/MkFitCore/standalone/TrackExtra.h
new file mode 100644
index 0000000000000..cbf6eebb0a51a
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/TrackExtra.h
@@ -0,0 +1,131 @@
+#ifndef RecoTracker_MkFitCore_standalone_TrackExtra_h
+#define RecoTracker_MkFitCore_standalone_TrackExtra_h
+
+#include "RecoTracker/MkFitCore/interface/Track.h"
+
+#include <array>
+#include <map>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace mkfit {
+  //==============================================================================
+  // ReducedTrack
+  //==============================================================================
+
+  struct ReducedTrack  // used for cmssw reco track validation
+  {
+  public:
+    ReducedTrack() {}
+    ReducedTrack(const int label, const int seedID, const SVector2& params, const float phi, const HitLayerMap& hitmap)
+        : label_(label), seedID_(seedID), parameters_(params), phi_(phi), hitLayerMap_(hitmap) {}
+
+    int label() const { return label_; }
+    int seedID() const { return seedID_; }
+    const SVector2& parameters() const { return parameters_; }
+    float momPhi() const { return phi_; }
+    const HitLayerMap& hitLayerMap() const { return hitLayerMap_; }
+
+    int label_;
+    int seedID_;
+    SVector2 parameters_;
+    float phi_;
+    HitLayerMap hitLayerMap_;
+  };
+  typedef std::vector<ReducedTrack> RedTrackVec;
+
+  typedef std::map<int, std::map<int, std::vector<int>>> LayIdxIDVecMapMap;
+  typedef std::map<int, std::unordered_set<int>> TrkIDLaySetMap;
+  typedef std::array<int, 2> PairIdx;
+  typedef std::vector<PairIdx> PairIdxVec;
+  typedef std::array<int, 3> TripletIdx;
+  typedef std::vector<TripletIdx> TripletIdxVec;
+
+  //==============================================================================
+  // TrackExtra
+  //==============================================================================
+
+  class TrackExtra;
+  typedef std::vector<TrackExtra> TrackExtraVec;
+
+  class TrackExtra {
+  public:
+    TrackExtra() : seedID_(std::numeric_limits<int>::max()) {}
+    TrackExtra(int seedID) : seedID_(seedID) {}
+
+    int modifyRefTrackID(const int foundHits,
+                         const int minHits,
+                         const TrackVec& reftracks,
+                         const int trueID,
+                         const int duplicate,
+                         int refTrackID);
+    void setMCTrackIDInfo(const Track& trk,
+                          const std::vector<HitVec>& layerHits,
+                          const MCHitInfoVec& globalHitInfo,
+                          const TrackVec& simtracks,
+                          const bool isSeed,
+                          const bool isPure);
+    void setCMSSWTrackIDInfoByTrkParams(const Track& trk,
+                                        const std::vector<HitVec>& layerHits,
+                                        const TrackVec& cmsswtracks,
+                                        const RedTrackVec& redcmsswtracks,
+                                        const bool isBkFit);
+    void setCMSSWTrackIDInfoByHits(const Track& trk,
+                                   const LayIdxIDVecMapMap& cmsswHitIDMap,
+                                   const TrackVec& cmsswtracks,
+                                   const TrackExtraVec& cmsswextras,
+                                   const RedTrackVec& redcmsswtracks,
+                                   const int cmsswlabel);
+    int mcTrackID() const { return mcTrackID_; }
+    int nHitsMatched() const { return nHitsMatched_; }
+    float fracHitsMatched() const { return fracHitsMatched_; }
+    int seedID() const { return seedID_; }
+    bool isDuplicate() const { return isDuplicate_; }
+    int duplicateID() const { return duplicateID_; }
+    void setDuplicateInfo(int duplicateID, bool isDuplicate) {
+      duplicateID_ = duplicateID;
+      isDuplicate_ = isDuplicate;
+    }
+    int cmsswTrackID() const { return cmsswTrackID_; }
+    float helixChi2() const { return helixChi2_; }
+    float dPhi() const { return dPhi_; }
+    void findMatchingSeedHits(const Track& reco_trk, const Track& seed_trk, const std::vector<HitVec>& layerHits);
+    bool isSeedHit(const int lyr, const int idx) const;
+    int nMatchedSeedHits() const { return matchedSeedHits_.size(); }
+
+    void setmcTrackID(int mcTrackID) { mcTrackID_ = mcTrackID; }
+    void setseedID(int seedID) { seedID_ = seedID; }
+
+    void addAlgo(int algo) { seedAlgos_.push_back(algo); }
+    const std::vector<int> seedAlgos() const { return seedAlgos_; }
+
+  private:
+    friend class Track;
+
+    int mcTrackID_;
+    int nHitsMatched_;
+    float fracHitsMatched_;
+    int seedID_;
+    int duplicateID_;
+    bool isDuplicate_;
+    int cmsswTrackID_;
+    float helixChi2_;
+    float dPhi_;
+    HoTVec matchedSeedHits_;
+    std::vector<int> seedAlgos_;
+  };
+
+  typedef std::vector<TrackState> TSVec;
+  typedef std::vector<TSVec> TkIDToTSVecVec;
+  typedef std::vector<std::pair<int, TrackState>> TSLayerPairVec;
+  typedef std::vector<std::pair<int, float>> FltLayerPairVec;  // used exclusively for debugtree
+
+  // Map typedefs needed for mapping different sets of tracks to another
+  typedef std::unordered_map<int, int> TkIDToTkIDMap;
+  typedef std::unordered_map<int, std::vector<int>> TkIDToTkIDVecMap;
+  typedef std::unordered_map<int, TrackState> TkIDToTSMap;
+  typedef std::unordered_map<int, TSLayerPairVec> TkIDToTSLayerPairVecMap;
+
+}  // namespace mkfit
+
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/Validation.cc b/RecoTracker/MkFitCore/standalone/Validation.cc
new file mode 100644
index 0000000000000..2751dc522f3a3
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/Validation.cc
@@ -0,0 +1,16 @@
+#include "TTreeValidation.h"
+
+namespace mkfit {
+
+  Validation* Validation::make_validation(const std::string& fileName, const TrackerInfo* trk_info) {
+#ifndef NO_ROOT
+    if (Config::sim_val_for_cmssw || Config::sim_val || Config::fit_val || Config::cmssw_val) {
+      return new TTreeValidation(fileName, trk_info);
+    }
+#endif
+    return new Validation();
+  }
+
+  Validation::Validation() {}
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/standalone/Validation.h b/RecoTracker/MkFitCore/standalone/Validation.h
new file mode 100644
index 0000000000000..271fc68a6b5c9
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/Validation.h
@@ -0,0 +1,82 @@
+#ifndef RecoTracker_MkFitCore_standalone_Validation_h
+#define RecoTracker_MkFitCore_standalone_Validation_h
+
+#include "RecoTracker/MkFitCore/interface/Track.h"
+#include "RecoTracker/MkFitCore/standalone/ConfigStandalone.h"
+#include "RecoTracker/MkFitCore/standalone/TrackExtra.h"
+
+namespace mkfit {
+
+  class Event;
+
+  // Fit Validation objects -- mplex only
+  struct FitVal {
+  public:
+    FitVal() {}
+    FitVal(float ppz,
+           float eppz,
+           float ppphi,
+           float eppphi,
+           float upt,
+           float eupt,
+           float umphi,
+           float eumphi,
+           float umeta,
+           float eumeta)
+        : ppz(ppz),
+          eppz(eppz),
+          ppphi(ppphi),
+          eppphi(eppphi),
+          upt(upt),
+          eupt(eupt),
+          umphi(umphi),
+          eumphi(eumphi),
+          umeta(umeta),
+          eumeta(eumeta) {}
+
+    // first p or u = propagated or updated
+    // middle: p or m/nothing = position or momentum
+    // begining: e = error (already sqrt)
+    float ppz, eppz, ppphi, eppphi;
+    float upt, eupt, umphi, eumphi, umeta, eumeta;
+  };
+
+  class Validation {
+  public:
+    virtual ~Validation() {}
+
+    virtual void alignTracks(TrackVec&, TrackExtraVec&, bool) {}
+
+    virtual void resetValidationMaps() {}
+    virtual void resetDebugVectors() {}
+
+    virtual void collectFitInfo(const FitVal&, int, int) {}
+
+    virtual void setTrackExtras(Event& ev) {}
+    virtual void makeSimTkToRecoTksMaps(Event&) {}
+    virtual void makeSeedTkToRecoTkMaps(Event&) {}
+    virtual void makeRecoTkToRecoTkMaps(Event&) {}
+    virtual void makeCMSSWTkToRecoTksMaps(Event&) {}
+    virtual void makeSeedTkToCMSSWTkMap(Event&) {}
+    virtual void makeCMSSWTkToSeedTkMap(Event&) {}
+    virtual void makeRecoTkToSeedTkMapsDumbCMSSW(Event&) {}
+
+    virtual void setTrackScoresDumbCMSSW(Event&) {}
+
+    virtual void fillEfficiencyTree(const Event&) {}
+    virtual void fillFakeRateTree(const Event&) {}
+    virtual void fillConfigTree() {}
+    virtual void fillCMSSWEfficiencyTree(const Event&) {}
+    virtual void fillCMSSWFakeRateTree(const Event&) {}
+    virtual void fillFitTree(const Event&) {}
+
+    virtual void saveTTrees() {}
+
+    static Validation* make_validation(const std::string&, const TrackerInfo*);
+
+  protected:
+    Validation();
+  };
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/attic/BestCands.h b/RecoTracker/MkFitCore/standalone/attic/BestCands.h
new file mode 100644
index 0000000000000..8908845ace8c3
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/attic/BestCands.h
@@ -0,0 +1,233 @@
+#ifndef RecoTracker_MkFitCore_standalone_attic_BestCands_h
+#define RecoTracker_MkFitCore_standalone_attic_BestCands_h
+
+#include "Config.h"
+
+#include <cstdio>
+#include <limits>
+
+namespace CandsGPU {
+
+  constexpr int trkIdx_sentinel = -1;
+  constexpr int hitIdx_sentinel = -1;
+  constexpr int nhits_sentinel = -1;
+  constexpr float chi2_sentinel = std::numeric_limits<float>::max();
+
+  template <typename T>
+  __device__ void swap_values(T& a, T& b) {
+    T c(a);
+    a = b;
+    b = c;
+  }
+
+  template <int MaxCandsPerSeed, int BlockSize>
+  struct BestCands {
+    // AoS would generate bank conflicts when used in SM
+    int trkIdx[MaxCandsPerSeed][BlockSize];
+    int hitIdx[MaxCandsPerSeed][BlockSize];
+    int nhits[MaxCandsPerSeed][BlockSize];
+    float chi2[MaxCandsPerSeed][BlockSize];
+
+    __device__ void reset(int itrack);
+    __device__ void update(int itrack, int cand_trIdx, int cand_hitIdx, int cand_nhits, float cand_chi2);
+    __device__ void heapify(int itrack, int idx, int heap_size);
+    __device__ int left(int idx);
+    __device__ int right(int idx);
+
+    __device__ bool better(int icand_fst, int fst, int icand_snd, int snd);
+
+    __device__ void heap_sort(int icand, int heap_size);
+    __device__ void merge_cands_for_seed(int iseed, int icand);
+    __device__ void swap_nodes(int icand_fst, int fst, int icand_snd, int snd);
+    __device__ void copy_node(int icand_fst, int fst, int icand_snd, int snd);
+
+    __device__ int count_valid_cands(int itrack);
+
+    // TODO: Should really return a IdxChi2List
+    __device__ void get_cand_info(
+        const int tid, const int cid, int& my_trkIdx, int& my_hitIdx, int& my_nhits, float& my_chi2);
+    __device__ int get_nhits(const int tid, const int cid) { return nhits[cid][tid]; }
+
+    __device__ void print_heap(const int tid);
+  };
+
+  template <int M, int B>
+  __device__ void BestCands<M, B>::reset(int itrack) {
+    for (auto j = 0; j < M; ++j) {
+      trkIdx[j][itrack] = trkIdx_sentinel;
+      hitIdx[j][itrack] = hitIdx_sentinel;
+      nhits[j][itrack] = nhits_sentinel;
+      chi2[j][itrack] = chi2_sentinel;
+    }
+  }
+
+  template <int M, int B>
+  __device__ void BestCands<M, B>::update(int itrack, int cand_trIdx, int cand_hitIdx, int cand_nhits, float cand_chi2) {
+    if (cand_nhits < nhits[0][itrack])
+      return;
+    if (cand_chi2 > chi2[0][itrack])
+      return;
+    trkIdx[0][itrack] = cand_trIdx;
+    hitIdx[0][itrack] = cand_hitIdx;
+    nhits[0][itrack] = cand_nhits;
+    chi2[0][itrack] = cand_chi2;
+
+    heapify(itrack, 0, M);
+  }
+
+  template <int M, int B>
+  __device__ void BestCands<M, B>::print_heap(const int tid) {
+    for (int cid = 0; cid < M; cid++) {
+      printf(">>>>> tid %d rowIdx %d hitIdx %d nhits %d chi2 %f\n",
+             tid,
+             cid,
+             hitIdx[cid][tid],
+             nhits[cid][tid],
+             chi2[cid][tid]);
+    }
+  }
+
+  template <int M, int B>
+  __device__ int BestCands<M, B>::left(int idx) {
+    return (++idx << 1) - 1;
+  }
+
+  template <int M, int B>
+  __device__ int BestCands<M, B>::right(int idx) {
+    return ++idx << 1;
+  }
+
+  template <int M, int B>
+  __device__ bool BestCands<M, B>::better(int icand_fst, int fst, int icand_snd, int snd) {
+    return (nhits[fst][icand_fst] > nhits[snd][icand_snd]) ||
+           ((nhits[fst][icand_fst] == nhits[snd][icand_snd]) && (chi2[fst][icand_fst] < chi2[snd][icand_snd]));
+  }
+
+  template <int M, int B>
+  __device__ void BestCands<M, B>::swap_nodes(int icand_fst, int fst, int icand_snd, int snd) {
+    swap_values(trkIdx[fst][icand_fst], trkIdx[snd][icand_snd]);
+    swap_values(hitIdx[fst][icand_fst], hitIdx[snd][icand_snd]);
+    swap_values(nhits[fst][icand_fst], nhits[snd][icand_snd]);
+    swap_values(chi2[fst][icand_fst], chi2[snd][icand_snd]);
+  }
+
+  template <int M, int B>
+  __device__ void BestCands<M, B>::copy_node(int icand_fst, int fst, int icand_snd, int snd) {
+    trkIdx[snd][icand_snd] = trkIdx[fst][icand_fst];
+    hitIdx[snd][icand_snd] = hitIdx[fst][icand_fst];
+    nhits[snd][icand_snd] = nhits[fst][icand_fst];
+    chi2[snd][icand_snd] = chi2[fst][icand_fst];
+  }
+
+  template <int M, int B>
+  __device__ void BestCands<M, B>::heapify(int icand, int idx, int heap_size) {
+    // We want to move idx down so the smallest value is at the root
+    int smallest = -1;
+    while (idx != smallest) {
+      if (idx < 0 || idx >= heap_size / 2)
+        return;
+
+      smallest = idx;
+      if (heap_size > left(idx) && better(icand, smallest, icand, left(idx)))
+        smallest = left(idx);
+      if (heap_size > right(idx) && better(icand, smallest, icand, right(idx)))
+        smallest = right(idx);
+
+      if (smallest != idx) {
+        swap_nodes(icand, smallest, icand, idx);
+        idx = smallest;
+        smallest = -1;
+      }
+    }
+  }
+
+  template <int M, int B>
+  __device__ void BestCands<M, B>::merge_cands_for_seed(int iseed, int icand) {
+    int itrack = iseed * M + icand;
+// TODO: Need a better way to reduce candidates.
+//       So far, binary tree reduction is a bit slower than the naive approach
+#if 1
+    if (icand) {
+      heap_sort(itrack, M);
+    }
+    __syncthreads();  // cand 0 waits;
+    if (icand)
+      return;  // reduction by the first cand of each seed
+
+    for (int i = itrack + 1; i < itrack + M; ++i) {  // over cands
+      for (int j = 0; j < M; ++j) {                  // inside heap
+        if (better(i, j, itrack, 0)) {
+          copy_node(i, j, itrack, 0);
+          heapify(itrack, 0, M);
+        } else {
+          break;
+        }
+      }
+    }
+    heap_sort(itrack, M);
+    __syncthreads();  // TODO: Volta: sync only on MaxCandsPerSeeds threads
+#else
+
+    for (int step = 2; step <= Config::maxCandsPerSeed; step <<= 1) {
+      if (icand % step == step / 2) {
+        heap_sort(itrack, M);
+      }
+      __syncthreads();
+
+      if (icand % step == 0) {
+        int i = itrack + step / 2;
+        if ((i < iseed * M + M) && (i < B) && (icand + step / 2 < M)) {
+          for (int j = 0; j < M; ++j) {  // inside heap
+            if (better(i, j, itrack, 0)) {
+              copy_node(i, j, itrack, 0);
+              heapify(itrack, 0, M);
+            } else {
+              break;
+            }
+          }
+        }
+      }
+      //__syncthreads();
+    }
+
+    if (icand == 0) {
+      heap_sort(itrack, M);
+    }
+    __syncthreads();
+#endif
+  }
+
+  template <int M, int B>
+  __device__ void BestCands<M, B>::heap_sort(int icand, int heap_size) {
+    int num_unsorted_elts = heap_size;
+    // Assume that we have a heap with the worst one at the root.
+    for (int i = heap_size - 1; i > 0; --i) {
+      swap_nodes(icand, 0, icand, i);  // worst at the end
+      heapify(icand, 0, --num_unsorted_elts);
+    }
+  }
+
+  template <int MaxCandsPerSeed, int BlockSize>
+  __device__ void BestCands<MaxCandsPerSeed, BlockSize>::get_cand_info(
+      const int tid, const int cid, int& my_trkIdx, int& my_hitIdx, int& my_nhits, float& my_chi2) {
+    if (cid < MaxCandsPerSeed && tid < BlockSize) {
+      my_trkIdx = trkIdx[cid][tid];
+      my_hitIdx = hitIdx[cid][tid];
+      my_nhits = nhits[cid][tid];
+      my_chi2 = chi2[cid][tid];
+    }
+  }
+
+  template <int M, int B>
+  __device__ int BestCands<M, B>::count_valid_cands(int itrack) {
+    int count = 0;
+    for (int i = 0; i < M; ++i) {
+      if (trkIdx[i][itrack] != trkIdx_sentinel)
+        ++count;
+    }
+    return count;
+  }
+
+}  // namespace CandsGPU
+
+#endif  // _BEST_CANDS_H_
diff --git a/RecoTracker/MkFitCore/standalone/attic/align_alloc.h b/RecoTracker/MkFitCore/standalone/attic/align_alloc.h
new file mode 100644
index 0000000000000..c947aeba0fb40
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/attic/align_alloc.h
@@ -0,0 +1,112 @@
+#include <cstdint>
+
+/**
+ * Allocator for aligned data.
+ *
+ * Modified from the Mallocator from Stephan T. Lavavej.
+ * <http://blogs.msdn.com/b/vcblog/archive/2008/08/28/the-mallocator.aspx>
+ */
+template <typename T, std::size_t Alignment>
+class aligned_allocator {
+public:
+  // The following will be the same for virtually all allocators.
+  typedef T* pointer;
+  typedef const T* const_pointer;
+  typedef T& reference;
+  typedef const T& const_reference;
+  typedef T value_type;
+  typedef std::size_t size_type;
+  typedef ptrdiff_t difference_type;
+
+  T* address(T& r) const { return &r; }
+
+  const T* address(const T& s) const { return &s; }
+
+  std::size_t max_size() const {
+    // The following has been carefully written to be independent of
+    // the definition of size_t and to avoid signed/unsigned warnings.
+    return (static_cast<std::size_t>(0) - static_cast<std::size_t>(1)) / sizeof(T);
+  }
+
+  // The following must be the same for all allocators.
+  template <typename U>
+  struct rebind {
+    typedef aligned_allocator<U, Alignment> other;
+  };
+
+  bool operator!=(const aligned_allocator& other) const { return !(*this == other); }
+
+  void construct(T* const p, const T& t) const {
+    void* const pv = static_cast<void*>(p);
+
+    new (pv) T(t);
+  }
+
+  void construct(T* const p) { return construct(p, value_type()); }
+
+  void destroy(T* const p) const { p->~T(); }
+
+  // Returns true if and only if storage allocated from *this
+  // can be deallocated from other, and vice versa.
+  // Always returns true for stateless allocators.
+  bool operator==(const aligned_allocator& other) const { return true; }
+
+  // Default constructor, copy constructor, rebinding constructor, and destructor.
+  // Empty for stateless allocators.
+  aligned_allocator() {}
+
+  aligned_allocator(const aligned_allocator&) {}
+
+  template <typename U>
+  aligned_allocator(const aligned_allocator<U, Alignment>&) {}
+
+  ~aligned_allocator() {}
+
+  // The following will be different for each allocator.
+  T* allocate(const std::size_t n) const {
+    // The return value of allocate(0) is unspecified.
+    // Mallocator returns NULL in order to avoid depending
+    // on malloc(0)'s implementation-defined behavior
+    // (the implementation can define malloc(0) to return NULL,
+    // in which case the bad_alloc check below would fire).
+    // All allocators can return NULL in this case.
+    if (n == 0) {
+      return NULL;
+    }
+
+    // All allocators should contain an integer overflow check.
+    // The Standardization Committee recommends that std::length_error
+    // be thrown in the case of integer overflow.
+    if (n > max_size()) {
+      throw std::length_error("aligned_allocator<T>::allocate() - Integer overflow.");
+    }
+
+    // Mallocator wraps malloc().
+    void* const pv = std::aligned_alloc(Alignment, n * sizeof(T));
+
+    // Allocators should throw std::bad_alloc in the case of memory allocation failure.
+    if (pv == NULL) {
+      throw std::bad_alloc();
+    }
+
+    return static_cast<T*>(pv);
+  }
+
+  void deallocate(T* const p, const std::size_t n) const { std::free(p); }
+
+  // The following will be the same for all allocators that ignore hints.
+  template <typename U>
+  T* allocate(const std::size_t n, const U* /* const hint */) const {
+    return allocate(n);
+  }
+
+  // Allocators are not required to be assignable, so
+  // all allocators should have a private unimplemented
+  // assignment operator. Note that this will trigger the
+  // off-by-default (enabled under /Wall) warning C4626
+  // "assignment operator could not be generated because a
+  // base class assignment operator is inaccessible" within
+  // the STL headers, but that warning is useless.
+private:
+  aligned_allocator& operator=(const aligned_allocator&);
+};
diff --git a/RecoTracker/MkFitCore/standalone/attic/fittestMPlex.cc b/RecoTracker/MkFitCore/standalone/attic/fittestMPlex.cc
new file mode 100644
index 0000000000000..a3dd9d93444e2
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/attic/fittestMPlex.cc
@@ -0,0 +1,105 @@
+#include "Matrix.h"
+//#define DEBUG
+#include <Debug.h>
+
+#include "MkFitter.h"
+
+#ifndef NO_ROOT
+#include "TFile.h"
+#include "TTree.h"
+#include <mutex>
+#endif
+
+#include "oneapi/tbb/parallel_for.h"
+
+#include <iostream>
+#include <memory>
+
+#if defined(USE_VTUNE_PAUSE)
+#include "ittnotify.h"
+#endif
+
+//==============================================================================
+// runFittingTestPlex
+//==============================================================================
+
+#include "Pool.h"
+namespace {
+  struct ExecutionContext {
+    mkfit::Pool<mkfit::MkFitter> m_fitters;
+
+    void populate(int n_thr) { m_fitters.populate(n_thr - m_fitters.size()); }
+  };
+
+  ExecutionContext g_exe_ctx;
+  auto retfitr = [](mkfit::MkFitter* mkfp) { g_exe_ctx.m_fitters.ReturnToPool(mkfp); };
+}  // namespace
+
+namespace mkfit {
+
+  double runFittingTestPlex(Event& ev, std::vector<Track>& rectracks) {
+    g_exe_ctx.populate(Config::numThreadsFinder);
+    std::vector<Track>& simtracks = ev.simTracks_;
+
+    const int Nhits = Config::nLayers;
+    // XXX What if there's a missing / double layer?
+    // Eventually, should sort track vector by number of hits!
+    // And pass the number in on each "setup" call.
+    // Reserves should be made for maximum possible number (but this is just
+    // measurments errors, params).
+
+    int theEnd = simtracks.size();
+    int count = (theEnd + NN - 1) / NN;
+
+#ifdef USE_VTUNE_PAUSE
+    __SSC_MARK(0x111);  // use this to resume Intel SDE at the same point
+    __itt_resume();
+#endif
+
+    double time = dtime();
+
+    tbb::parallel_for(tbb::blocked_range<int>(0, count, std::max(1, Config::numSeedsPerTask / NN)),
+                      [&](const tbb::blocked_range<int>& i) {
+                        std::unique_ptr<MkFitter, decltype(retfitr)> mkfp(g_exe_ctx.m_fitters.GetFromPool(), retfitr);
+                        mkfp->setNhits(Nhits);
+                        for (int it = i.begin(); it < i.end(); ++it) {
+                          int itrack = it * NN;
+                          int end = itrack + NN;
+                          /*
+         * MT, trying to slurp and fit at the same time ...
+	  if (theEnd < end) {
+	    end = theEnd;
+	    mkfp->inputTracksAndHits(simtracks, ev.layerHits_, itrack, end);
+	  } else {
+	    mkfp->slurpInTracksAndHits(simtracks, ev.layerHits_, itrack, end); // only safe for a full matriplex
+	  }
+	  
+	  if (Config::cf_fitting) mkfp->ConformalFitTracks(true, itrack, end);
+	  mkfp->FitTracks(end - itrack, &ev, true);
+        */
+
+                          mkfp->inputTracksForFit(simtracks, itrack, end);
+
+                          // XXXX MT - for this need 3 points in ... right
+                          // XXXX if (Config::cf_fitting) mkfp->ConformalFitTracks(true, itrack, end);
+
+                          mkfp->fitTracksWithInterSlurp(ev.layerHits_, end - itrack);
+
+                          mkfp->outputFittedTracks(rectracks, itrack, end);
+                        }
+                      });
+
+    time = dtime() - time;
+
+#ifdef USE_VTUNE_PAUSE
+    __itt_pause();
+    __SSC_MARK(0x222);  // use this to pause Intel SDE at the same point
+#endif
+
+    if (Config::fit_val)
+      ev.validate();
+
+    return time;
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/standalone/attic/fittestMPlex.h b/RecoTracker/MkFitCore/standalone/attic/fittestMPlex.h
new file mode 100644
index 0000000000000..429b370dd8edb
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/attic/fittestMPlex.h
@@ -0,0 +1,12 @@
+#ifndef RecoTracker_MkFitCore_standalone_attic_fittestMPlex_h
+#define RecoTracker_MkFitCore_standalone_attic_fittestMPlex_h
+
+#include "Event.h"
+#include "Track.h"
+
+namespace mkfit {
+
+  double runFittingTestPlex(Event& ev, std::vector<Track>& rectracks);
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/attic/seedtestMPlex.cc b/RecoTracker/MkFitCore/standalone/attic/seedtestMPlex.cc
new file mode 100644
index 0000000000000..7ed366b87e074
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/attic/seedtestMPlex.cc
@@ -0,0 +1,151 @@
+#include "seedtestMPlex.h"
+#include "oneapi/tbb/parallel_for.h"
+
+// #define DEBUG
+#include "Debug.h"
+
+namespace mkfit {
+
+  inline void intersectThirdLayer(
+      const float a, const float b, const float hit1_x, const float hit1_y, float& lay2_x, float& lay2_y) {
+    const float a2 = a * a;
+    const float b2 = b * b;
+    const float a2b2 = a2 + b2;
+    const float lay2rad2 = (Config::fRadialSpacing * Config::fRadialSpacing) * 9.0f;  // average third radius squared
+    const float maxCurvR2 = Config::maxCurvR * Config::maxCurvR;
+
+    const float quad =
+        std::sqrt(2.0f * maxCurvR2 * (a2b2 + lay2rad2) - (a2b2 - lay2rad2) * (a2b2 - lay2rad2) - maxCurvR2 * maxCurvR2);
+    const float pos[2] = {(a2 * a + a * (b2 + lay2rad2 - maxCurvR2) - b * quad) / a2b2,
+                          (b2 * b + b * (a2 + lay2rad2 - maxCurvR2) + a * quad) / a2b2};
+    const float neg[2] = {(a2 * a + a * (b2 + lay2rad2 - maxCurvR2) + b * quad) / a2b2,
+                          (b2 * b + b * (a2 + lay2rad2 - maxCurvR2) - a * quad) / a2b2};
+
+    // since we have two intersection points, arbitrate which one is closer to layer2 hit
+    if (getHypot(pos[0] - hit1_x, pos[1] - hit1_y) < getHypot(neg[0] - hit1_x, neg[1] - hit1_y)) {
+      lay2_x = pos[0];
+      lay2_y = pos[1];
+    } else {
+      lay2_x = neg[0];
+      lay2_y = neg[1];
+    }
+  }
+
+  void findSeedsByRoadSearch(TripletIdxConVec& seed_idcs,
+                             std::vector<LayerOfHits>& evt_lay_hits,
+                             int lay1_size,
+                             Event*& ev) {
+#ifdef DEBUG
+    bool debug(false);
+#endif
+
+    // MIMI hack: Config::nlayers_per_seed = 4
+    // const float seed_z2cut = (Config::nlayers_per_seed * Config::fRadialSpacing) / std::tan(2.0f*std::atan(std::exp(-1.0f*Config::dEtaSeedTrip)));
+#ifdef DEBUG
+    const float seed_z2cut =
+        (4 * Config::fRadialSpacing) / std::tan(2.0f * std::atan(std::exp(-1.0f * Config::dEtaSeedTrip)));
+#endif
+
+    // 0 = first layer, 1 = second layer, 2 = third layer
+    const LayerOfHits& lay1_hits = evt_lay_hits[1];
+    LayerOfHits& lay0_hits = evt_lay_hits[0];
+    LayerOfHits& lay2_hits = evt_lay_hits[2];
+
+    tbb::parallel_for(
+        tbb::blocked_range<int>(0, lay1_size, std::max(1, Config::numHitsPerTask)),
+        [&](const tbb::blocked_range<int>& i) {
+          TripletIdxVec temp_thr_seed_idcs;
+          for (int ihit1 = i.begin(); ihit1 < i.end(); ++ihit1) {
+            const Hit& hit1 = lay1_hits.refHit(ihit1);
+            const float hit1_z = hit1.z();
+
+            dprint("ihit1: " << ihit1 << " mcTrackID: " << hit1.mcTrackID(ev->simHitsInfo_) << " phi: " << hit1.phi()
+                             << " z: " << hit1.z());
+            dprint(" predphi: " << hit1.phi() << "+/-" << Config::lay01angdiff << " predz: " << hit1.z() / 2.0f << "+/-"
+                                << Config::seed_z0cut / 2.0f << std::endl);
+
+            std::vector<int> cand_hit0_indices;  // pass by reference
+            // MIMI lay0_hits.selectHitIndices(hit1_z/2.0f,hit1.phi(),Config::seed_z0cut/2.0f,Config::lay01angdiff,cand_hit0_indices,true,false);
+            // loop over first layer hits
+            for (auto&& ihit0 : cand_hit0_indices) {
+              const Hit& hit0 = lay0_hits.refHit(ihit0);
+              const float hit0_z = hit0.z();
+              const float hit0_x = hit0.x();
+              const float hit0_y = hit0.y();
+              const float hit1_x = hit1.x();
+              const float hit1_y = hit1.y();
+              const float hit01_r2 = getRad2(hit0_x - hit1_x, hit0_y - hit1_y);
+
+              const float quad = std::sqrt((4.0f * Config::maxCurvR * Config::maxCurvR - hit01_r2) / hit01_r2);
+
+              // center of negative curved track
+              const float aneg = 0.5f * ((hit0_x + hit1_x) - (hit0_y - hit1_y) * quad);
+              const float bneg = 0.5f * ((hit0_y + hit1_y) + (hit0_x - hit1_x) * quad);
+
+              // negative points of intersection with third layer
+              float lay2_negx = 0.0f, lay2_negy = 0.0f;
+              intersectThirdLayer(aneg, bneg, hit1_x, hit1_y, lay2_negx, lay2_negy);
+#ifdef DEBUG
+              const float lay2_negphi = getPhi(lay2_negx, lay2_negy);
+#endif
+
+              // center of positive curved track
+              const float apos = 0.5f * ((hit0_x + hit1_x) + (hit0_y - hit1_y) * quad);
+              const float bpos = 0.5f * ((hit0_y + hit1_y) - (hit0_x - hit1_x) * quad);
+
+              // positive points of intersection with third layer
+              float lay2_posx = 0.0f, lay2_posy = 0.0f;
+              intersectThirdLayer(apos, bpos, hit1_x, hit1_y, lay2_posx, lay2_posy);
+#ifdef DEBUG
+              const float lay2_posphi = getPhi(lay2_posx, lay2_posy);
+#endif
+
+              std::vector<int> cand_hit2_indices;
+              // MIMI lay2_hits.selectHitIndices((2.0f*hit1_z-hit0_z),(lay2_posphi+lay2_negphi)/2.0f,
+              // MIMI seed_z2cut,(lay2_posphi-lay2_negphi)/2.0f,
+              // MIMI cand_hit2_indices,true,false);
+
+              dprint(" ihit0: " << ihit0 << " mcTrackID: " << hit0.mcTrackID(ev->simHitsInfo_) << " phi: " << hit0.phi()
+                                << " z: " << hit0.z());
+              dprint("  predphi: " << (lay2_posphi + lay2_negphi) / 2.0f << "+/-" << (lay2_posphi - lay2_negphi) / 2.0f
+                                   << " predz: " << 2.0f * hit1_z - hit0_z << "+/-" << seed_z2cut << std::endl);
+
+          // loop over candidate third layer hits
+          //temp_thr_seed_idcs.reserve(temp_thr_seed_idcs.size()+cand_hit2_indices.size());
+#pragma omp simd
+              for (size_t idx = 0; idx < cand_hit2_indices.size(); ++idx) {
+                const int ihit2 = cand_hit2_indices[idx];
+                const Hit& hit2 = lay2_hits.refHit(ihit2);
+
+                const float lay1_predz = (hit0_z + hit2.z()) / 2.0f;
+                // filter by residual of second layer hit
+                if (std::abs(lay1_predz - hit1_z) > Config::seed_z1cut)
+                  continue;
+
+                const float hit2_x = hit2.x();
+                const float hit2_y = hit2.y();
+
+                // now fit a circle, extract pT and d0 from center and radius
+                const float mr = (hit1_y - hit0_y) / (hit1_x - hit0_x);
+                const float mt = (hit2_y - hit1_y) / (hit2_x - hit1_x);
+                const float a = (mr * mt * (hit2_y - hit0_y) + mr * (hit1_x + hit2_x) - mt * (hit0_x + hit1_x)) /
+                                (2.0f * (mr - mt));
+                const float b = -1.0f * (a - (hit0_x + hit1_x) / 2.0f) / mr + (hit0_y + hit1_y) / 2.0f;
+                const float r = getHypot(hit0_x - a, hit0_y - b);
+
+                // filter by d0 cut 5mm, pT cut 0.5 GeV (radius of 0.5 GeV track)
+                if ((r < Config::maxCurvR) || (std::abs(getHypot(a, b) - r) > Config::seed_d0cut))
+                  continue;
+
+                dprint(" ihit2: " << ihit2 << " mcTrackID: " << hit2.mcTrackID(ev->simHitsInfo_)
+                                  << " phi: " << hit2.phi() << " z: " << hit2.z());
+
+                temp_thr_seed_idcs.emplace_back(TripletIdx{{ihit0, ihit1, ihit2}});
+              }  // end loop over third layer matches
+            }    // end loop over first layer matches
+          }      // end chunk of hits for parallel for
+          seed_idcs.grow_by(temp_thr_seed_idcs.begin(), temp_thr_seed_idcs.end());
+        });  // end parallel for loop over second layer hits
+  }
+
+}  // end namespace mkfit
diff --git a/RecoTracker/MkFitCore/standalone/attic/seedtestMPlex.h b/RecoTracker/MkFitCore/standalone/attic/seedtestMPlex.h
new file mode 100644
index 0000000000000..da224326a7322
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/attic/seedtestMPlex.h
@@ -0,0 +1,16 @@
+#ifndef RecoTracker_MkFitCore_standalone_attic_seedtestMPlex_h
+#define RecoTracker_MkFitCore_standalone_attic_seedtestMPlex_h
+
+#include "Event.h"
+#include "Track.h"
+#include "HitStructures.h"
+
+namespace mkfit {
+
+  void findSeedsByRoadSearch(TripletIdxConVec& seed_idcs,
+                             std::vector<LayerOfHits>& evt_lay_hits,
+                             int lay1_size,
+                             Event*& ev);
+
+}  // end namespace mkfit
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/cmssw-trackerinfo-desc.txt b/RecoTracker/MkFitCore/standalone/cmssw-trackerinfo-desc.txt
new file mode 100644
index 0000000000000..026e60f066596
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/cmssw-trackerinfo-desc.txt
@@ -0,0 +1,80 @@
+Storage of hits in Track objects
+================================
+
+struct HitOnTrack
+{
+  int index : 24;
+  int layer :  8;
+
+  HitOnTrack()             : index(-1), layer (-1) {}
+  HitOnTrack(int i, int l) : index( i), layer ( l) {}
+};
+
+index - index of hit withint the layer it belongs to;
+layer - mapped from CMSSW detector/layer as described below.
+
+
+CMSSW -> TrackerInfo layer numbering
+====================================
+
+CMSSW uses detector (1 - 6) / layer numbering, our code uses a single layer
+index.
+
+stereo = simhit_detId & 3; 0 - single layer, 1 - stereo, 2 - dual layer
+
+*** 1. PIXB, 4 layers
+1 -> 0
+2 -> 1
+3 -> 2
+4 -> 3
+
+*** 3. TIB, 4 layers, 2 innermost are stereo
+1m -> 4
+1s -> 5
+2m -> 6
+2s -> 7
+3  -> 8
+4  -> 9
+
+*** 5. TOB, 6 layers, 2 innermost are stereo
+1m -> 10
+1s -> 11
+2m -> 12
+2s -> 13
+3  -> 14
+4  -> 15
+5  -> 16
+6  -> 17
+
+*** 2. PIXE, 3 layers; separate +z | -z !!!
+1 -> 18 | 45
+2 -> 19 | 46
+3 -> 20 | 47
+
+*** 4. TID, 3 layers, all partially stereo (bottom 2 modules)
+1m -> 21 | 48
+1s -> 22 | 49
+2m -> 23 | 50
+2s -> 24 | 51
+3m -> 25 | 52
+3s -> 26 | 53
+
+*** 6. TOD, 9, layers, all partially stereo (middle one modules, some bottomfringes)
+1m -> 27 | 54
+1s -> 28 | 55
+2m -> 29 | 56
+2s -> 30 | 57
+3m -> 31 | 58
+3s -> 32 | 59
+4m -> 33 | 60
+4s -> 34 | 61
+5m -> 35 | 62
+5s -> 36 | 63
+6m -> 37 | 64
+6s -> 38 | 65
+7m -> 39 | 66
+7s -> 40 | 67
+8m -> 41 | 68
+8s -> 42 | 69
+9m -> 43 | 70
+9s -> 44 | 71
diff --git a/RecoTracker/MkFitCore/standalone/code-mod-tools/re-identifier-do-it.pl b/RecoTracker/MkFitCore/standalone/code-mod-tools/re-identifier-do-it.pl
new file mode 100644
index 0000000000000..2dac1a24cbacc
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/code-mod-tools/re-identifier-do-it.pl
@@ -0,0 +1,71 @@
+#!/ usr / bin / perl
+
+die "Usage: $0 replacement-rules-file" unless - r $ARGV[0];
+
+#$TARGET = "functions";
+$TARGET = "data-members";
+
+if ($TARGET eq "functions") {
+  $PRE = '(::|\s|\.|->|"|\(|\[)';
+  $POST = '(\s*\()';
+}
+elsif($TARGET eq "data-members") {
+  $PRE = '(\W)';
+  $POST = '(\W)';
+  push @FILES, map{"MkFitCore/src/Mk$_.h"} qw{Base Fitter Finder};
+  push @FILES, map{"MkFitCore/src/Mk$_.cc"} qw{Fitter Finder};
+}
+
+open(F, "$ARGV[0]");
+
+while (my $l = <F>) {
+  next if $l = ~m / ^\s* $ / ;
+  next if $l = ~m / ^\s* # / ;
+  chomp $l;
+
+  my($from, $to) = $l = ~m / ^\s*(\w +)(?:\s + (\w +)\s*) ? $ / ;
+
+  if (not defined $to) {
+    if ($TARGET eq "functions") {
+      $to = lcfirst($from);
+    }
+    elsif($TARGET eq "data-members") { $to = 'm_'.$from; }
+  }
+
+#my @matches = `find.- name \*.h - or -name \*.cc | xargs grep - P '${PRE}${from}${POST}'`;
+#print "Replace '$from' --> '$to' in\n ", join(" ", @matches), "\n";
+#next;
+
+  my @matched_files;
+  if ($TARGET eq "functions") {
+    @matched_files = split("\n", `find.- name \*.h - or -name \*.cc | xargs grep - l - P '${PRE}${from}${POST}'`);
+  }
+  elsif($TARGET eq "data-members") { @matched_files = @FILES; }
+
+  next unless @matched_files;
+
+  print "Replace '$from' --> '$to' in ", join(" ", @matched_files), "\n";
+
+    for
+      my $fname(@matched_files) {
+        my $xxx = $ / ;
+        undef $ / ;
+        open(X, '<', $fname) or die "Can not open $fname for reading";
+        my $file = <X>;
+        close(X);
+        $ / = $xxx;
+
+        $file = ~s / ${PRE} ${from} ${POST} / $1${to} $2 / msg;
+
+        my @matches = $file = ~m / ^ .*$from.*$ / mg;
+        print $fname, "\n  ", join("\n  ", @matches), "\n";
+
+        open(X, '>', $fname) or die "Can not open $fname for writing";
+        print X $file;
+        close(X);
+      }
+
+    print "\n";
+}
+
+close(F);
diff --git a/RecoTracker/MkFitCore/standalone/code-mod-tools/re-identifier.pl b/RecoTracker/MkFitCore/standalone/code-mod-tools/re-identifier.pl
new file mode 100644
index 0000000000000..9ad7fc935b13f
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/code-mod-tools/re-identifier.pl
@@ -0,0 +1,102 @@
+#!/ usr / bin / perl
+
+#For full function lowercasing
+#@headers = grep{chomp; $_ !~m !(attic | Ice | CMS - 2017 | MatriplexCommon | binnor) !; } `find.- name \*.h`;
+
+#For MkFinder cleanup
+@headers = map {
+  "MkFitCore/src/Mk$_.h";
+}
+qw{Base Fitter Finder};
+
+#$TARGET = "functions";
+$TARGET = "data-members";
+
+# 1. Grep for classes, structs - so we don't try to fix ctors and dtors.
+
+% ClassesStructs = ();
+% Funcs = ();
+
+% HeaderText = ();
+
+for
+  my $h(@headers) {
+#local $ / = undef;
+    open F, $h;
+    my @lines = <F>;
+    close F;
+
+#filter out  //-style commented lines
+    @lines = grep {
+      $_ !~m !^\s*  //!o; } @lines;
+
+          if ($TARGET eq "data-members") {
+#fileter out class / struct declarations(also forward decls)
+        @lines = grep {
+          $_ !~m !^\s*(class | struct)\s\w !o;
+        }
+        @lines;
+      }
+
+      my $f = join( '', @lines);
+
+#filter out /*-style commented code
+    $f =~ s!/\*.*?\*/ \
+    !!omsg;
+
+      if ($TARGET eq "data-members") {
+#fileter out default value assignments
+        $f = ~s !\s *=\s *\w +\s*;
+        !;
+        !omsg;
+      }
+
+      my @css = $f = ~m /\s(?: class | struct)\s + (\w +) / omsg;
+
+#print "In $h: ", join(" ", @css), "\n";
+
+    for
+      my $cs(@css) { $ClassesStructs{$cs} = 1; }
+
+    $HeaderText{$h} = $f;
+    }
+
+# 2. Grep for stuff that looks like fuctions and starts with a capital letter
+    if ($TARGET eq "functions") {
+    for
+      my $h(keys % HeaderText) {
+        my @foos = $HeaderText{$h} =~ m/\s([A-Z]\w+)\([^)]*\)\s*(?:const)?\s*(?:;|{)/omsg;
+
+#print "In $h: ", join(" ", @foos), "\n";
+
+            my @ffoos;
+        for
+          my $foo(@foos) {
+            next if exists $ClassesStructs{$foo};
+            next if exists $Funcs{$foo};
+            $Funcs{$foo} = 1;
+
+#Needed just for printout
+            push @ffoos, $foo;
+          }
+
+        if (scalar @ffoos) {
+          print "# In $h:\n  ", join("\n  ", @ffoos), "\n";
+        }
+    }
+      }
+
+# 3. Grep for stuff that looks like data members
+    elsif($TARGET eq "data-members") {
+    for
+      my $h(keys % HeaderText) {
+#my @mmbs = $HeaderText{$h } = ~m / ^\s*(?: [\w <>]\s +) + (\w +)(?:\s *\[[\w +:]\]) *\s*; / omg;
+        my @mmbs = $HeaderText{$h} = ~m / (?: [\w<>] +)\s[&*] ? (\w +)(?:\s *\[[\w:] +\]) *\s*;
+        / omg;
+
+        print "In $h: ", join(" ", @mmbs), "\n";
+      }
+    }
+    else {
+      die "Unsupported TARGET $TARGET";
+    }
diff --git a/RecoTracker/MkFitCore/standalone/code-mod-tools/re-include-guard.pl b/RecoTracker/MkFitCore/standalone/code-mod-tools/re-include-guard.pl
new file mode 100755
index 0000000000000..fdd3a6a63d393
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/code-mod-tools/re-include-guard.pl
@@ -0,0 +1,57 @@
+#!/usr/bin/perl
+
+# all files: `find RecoTracker -name \*.h -or -name \*.cc -or -name \*.icc -or -name \*.acc`;
+
+$depth = shift;
+$depth = 1 unless defined $depth;
+
+$pref = `pwd`; chomp $pref;
+$pref =~ s!.*/(RecoTracker/.*)!$1!;
+
+@headers = map { chomp; s!^./!!; $_; } `find . -maxdepth $depth -name \\*.h`;
+
+print "PREF: $pref\n";
+print "HEADERS:\n", join("\n", @headers);
+print "\n\n";
+
+local $/;
+undef $/;
+
+
+for my $file (@headers)
+{
+    open F, $file;
+    $f = <F>;
+    close F;
+
+    # print $file,"\n",$f,"\n\n";
+
+    my $incguard = "$pref/$file";
+    $incguard =~ s!/!_!og;
+    $incguard =~ s!\.(h|H)$!_$1!;
+
+    print "$file   --> $incguard\n";
+
+    if ($f =~ m/^#ifndef\s+(.*)\s+#define\s+(.*)/m)
+    {
+        my $same = $1 eq $2;
+        if (not $same) { print "ERRORRORR incguard ifdef/defnot mathcing --- FIXFIXFIX\n"; next; }
+        my $correct = $1 eq $incguard;
+        print "  found existing include guard $1, $2 -- same $same, correct $correct\n";
+
+        if (not $correct)
+        {
+            print "  FIXFIXFIX\n";
+
+            $f =~ s/^#ifndef\s+(.*)\s+#define\s+(.*)/#ifndef $incguard\n#define $incguard/m;
+        }
+
+        open F, ">$file";
+        print F $f;
+        close F;
+    }
+    else
+    {
+        print "  NONONO include guard\n";
+    }
+}
diff --git a/RecoTracker/MkFitCore/standalone/code-mod-tools/re-include.pl b/RecoTracker/MkFitCore/standalone/code-mod-tools/re-include.pl
new file mode 100755
index 0000000000000..250dda4f238cc
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/code-mod-tools/re-include.pl
@@ -0,0 +1,85 @@
+#!/usr/bin/perl
+
+$RUNDIR = '/data2/matevz/CMSSW_12_2_0_pre2/src';
+
+$pwd = `pwd`; chomp $pwd; die "Has to be run in $RUNDIR" unless $pwd eq $RUNDIR;
+
+# all files: `find RecoTracker -name \*.h -or -name \*.cc -or -name \*.icc -or -name \*.acc`;
+
+### Setup headers, short-headers and header map. Exclude standalone/
+
+@headers = grep { chomp; $_ !~ m!/(?:standalone)/!; } `find RecoTracker -name \*.h`;
+
+@sheaders = map { my $a = $_; $a = $1 if $a =~ m!.*/([^/]+)$!; $a; } @headers;
+
+$NH = scalar(@headers);
+
+%hmap = ();
+for (my $i=0; $i<$NH; ++$i) { $hmap{$sheaders[$i]} = $headers[$i];}
+
+
+# Setup files to process, filter out stuff we don't want to touch.
+
+@files = grep { chomp; $_ !~ m!/(?:attic|dusty-chest)/!; } `find RecoTracker -name \*.h -or -name \*.cc`;
+
+print "HEADERS:\n";
+for (my $i=0; $i<$NH; ++$i) { print "  ", $headers[$i], "  -->  ", $sheaders[$i], "\n"; }
+print "\n\n";
+print "FILES:\n", join("\n", @files);
+print "\n\n";
+
+
+################################################################
+
+for my $file (@files)
+{
+    open F, $file;
+    @lines = <F>;
+    close F;
+
+    # print $file,"\n",$f,"\n\n";
+
+    my $insrc = $file =~ m!^(.*/(?:src|plugins)(?:/.*)?)/[^/]+$!;
+    $insrc = $1 if $insrc;
+
+    print "Processing file $file, N_lines = ", scalar(@lines), ", in_src = ", $insrc, "\n";
+
+    my $changed = 0;
+
+    for my $l (@lines)
+    {
+        if ($l =~ m!^#include\s+"(.*)"\s*!)
+        {
+            my $sh = $1; $sh = $1 if $sh =~ m!.*/([^/]+)$!;
+
+            my $have = exists $hmap{$sh};
+
+            my $line_to_print = $l;
+            chomp $line_to_print;
+            print "Found includeline $line_to_print -- $sh --> $hmap{$sh}\n";
+
+            if ($have)
+            {
+                # replace the line ... but first check if these are in the same src/ directory.
+                my $full_inc = $hmap{$sh};
+                if ($insrc && ($full_inc =~ m!^${insrc}!))
+                {
+                    $full_inc =~ s!^${insrc}/(.*)!$1!;
+                    print "   QQQQQQQ File and include in the same src/ --> shortening to ${full_inc}\n";
+                }
+
+                $l = "#include \"${full_inc}\"\n";
+                $changed = 1;
+
+                print "  new line is $l";
+            }
+        }
+    }
+
+    if ($changed)
+    {
+        open F, ">$file";
+        print F @lines;
+        close F;
+    }
+}
diff --git a/RecoTracker/MkFitCore/standalone/configure b/RecoTracker/MkFitCore/standalone/configure
new file mode 100755
index 0000000000000..ee4b37ebe69b8
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/configure
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Setup for standalone build. To be further improved.
+
+path=$1
+
+if [ -z "$path" ] || [ ! -d "$path" ]; then
+    echo "Argument must be an existing directory."
+    exit 1
+fi
+
+if [ ! -d "$path/RecoTracker" ] ||
+   [ ! -d "$path/RecoTracker/MkFitCore" ] ||
+   [ ! -d "$path/RecoTracker/MkFitCMS" ]; then
+    echo "Source directory must contain RecoTracker/MkFitCore/ and RecoTracker/MkFitCMS/."
+    exit 1
+fi
+
+mkdir -p objs-Core objs-Core-Geoms objs-CMS objs-CMS-Geoms
+
+git clone git@github.com:trackreco/mkFit-external.git
+
+sa=$path/RecoTracker/MkFitCore/standalone
+
+### ln -fs $sa/Makefile.config .
+
+cat > Makefile <<END
+export SRCDIR := $path
+export SADIR  := $path/RecoTracker/MkFitCore/standalone
+export SACMS  := $path/RecoTracker/MkFitCMS/standalone
+
+### include Makefile.config
+
+all:
+
+
+SUBPKGS := Core CylCowWLids CMS
+
+%:
+	\${MAKE} -C objs-Core -f \${SADIR}/Makefile \$@
+	\${MAKE} -C objs-Core-Geoms -f \${SADIR}/Geoms/Makefile \$@
+	\${MAKE} -C objs-CMS -f \${SACMS}/Makefile \$@
+	\${MAKE} -C objs-CMS-Geoms -f \${SACMS}/Geoms/Makefile \$@
+END
diff --git a/RecoTracker/MkFitCore/standalone/index-desc.txt b/RecoTracker/MkFitCore/standalone/index-desc.txt
new file mode 100644
index 0000000000000..7a434ab74f9cc
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/index-desc.txt
@@ -0,0 +1,143 @@
+This README provides light documentation on the various use of indices within the code.
+
+---------
+ Outline
+---------
+
+Section 1: Hit Indices
+Section 2: MCHitID
+Section 3: References Track ID (mcTrackID, cmsswTrackID)
+Section 4: Track Label
+
+------------------------
+ Section 1: Hit Indices
+------------------------
+
+Hit indices attached to HitOnTrack array for each track are described below. 
+
+Idx >= 0  : Track contains a reconstructed hit. Idx indicates position within container where hit resides. Before building in mkFit, idx (with layer number) points to the position within Event::layerHits_ [vector of vector of hits, first index: layer, second index: hit idx]. Just prior to building, hit idxs are translated into their position within layerHits_ to EventOfHits::m_layers_of_hits [vector of LayerOfHits, first index is layer, LayerOfHits class for containing array of Hits]. Building uses layers_of_hits, then for validation, the hits are translated back to layerHits_.
+
+Idx == -1 : Track has not found a hit on this layer (true miss).
+
+Idx == -2 : Track has reached maximum number of holes/misses (-1).
+
+Idx == -3 : Track not in sensitive region of detector, does not count towards efficiency.
+
+Idx == -4 : ??
+
+Idx == -7 : Dummy hit to mark the location of an inactive module
+
+Idx == -9 : Reference track contained a hit that has since been filtered out via CCC before written to memory file. -9 hits are NOT stored in layerHits_.
+
+
+--------------------
+ Section 2: mcHitID
+--------------------
+
+Each hit has a unique identifier, independent of layer number, known as mcHitID, even if it has a hit created by a track (could be pure noise!). 
+
+ID >= 0  : Position within Event::simHitsInfo_ (type: vector<MCHitInfo>). Provides additional information about hit: if hit originates from true MC track, if it is a looper hit, etc.
+
+N.B. Word on mcTrackID within MCHitInfo: >= 0 indicates matched to reference track, == -1 a hit not generated from a saved sim track.
+
+-------------------------
+ Section 3: Ref Track ID
+-------------------------
+
+Reference track ID can refere to CMSSW tracks as reference or Sim tracks as reference, i.e. mcTrackID or cmsswTrackID. The ID is stored within TrackExtra for each reco track during validation. Please see validation manifesto for more details on the process of assigning the ref track ID.
+
+ID >=   0 : a long reco track is matched to a findable reference track (will equal label of reference track and therefore position of ref track within container)
+ID ==  -1 : a long reco track is a true fake, enter numer and denom of FR
+ID ==  -2 : a short reco track is matched to a findable reference track
+ID ==  -3 : a short reco track is matched to an unfindable reference track
+ID ==  -4 : a long reco track is matched to an unfindable reference track
+ID ==  -5 : a short reco track is unmatched to a findable reference track
+ID ==  -6 : a short reco track is unmatched to an unfindable reference track
+ID ==  -7 : a long reco track is unmatched to an unfindable reference track
+ID ==  -8 : a short reco track is unmatched to a track that may not have a reference or the reference track set is not given
+ID ==  -9 : a long reco track is unmatched to a track that may not have a reference or the reference track set is not given
+
+------------------------
+ Section 4: Track Label
+------------------------
+
+** Taken from validation manifesto! ** 
+** Originally from Issue #99 on https://github.com/cerati/mictest **
+
+## Introduction
+
+The label currently has multiple meanings depending on the type of track and where it is in the pipeline between seeding, building, and validation.  To begin, allow me to map out the differences in inputs for the various validation sequences, and the associated track associator function:
+
+1. ToyMC Geom + sim seeds: setMCTrackIDInfo()
+2. CMSSW Geom + sim seeds: setMCTrackIDInfo()
+3. ToyMC Geom + found seeds: setMCTrackIDInfo()
+4. CMSSW Geom + cmssw seeds: setCMSSWTrackIDInfoByTrkParams() or setCMSSWTrackIDInfoByHits()
+5. CMSSW Geom + cmssw seeds + external CMSSW tracks as reference + N^2 cleaning + track parameter matching: setCMSSWTrackIDInfoByTrkParams()
+6. CMSSW Geom + cmssw seeds + external CMSSW tracks as reference + pure seeds: setCMSSWTrackIDInfoByHits(), specifying pure seeds
+7. CMSSW Geom + cmssw seeds + external CMSSW tracks as reference + N^2 cleaning + hit matching: setCMSSWTrackIDInfoByHits()
+8. CMSSW Geom + cmssw tracks as input + sim tracks as reference: setMCTrackIDInfo()
+
+## Important note about hits and relation to the label:
+As a reminder, all hits that originate from a simulated particle will have a **mcHitID_** >= 0.  This is the index to the vector of simHitInfo_, where each element of the vector contains additional information about the hit.  Most importantly, it stores the **mcTrackID_**  that the hit originated from.  
+
+As such, the following must be respected for the tracks inside simTracks_: **label_** == **mcTrackID_** == **position** inside the track vector.  If the simTracks_ are moved, shuffled, sorted, deleted, etc., this means that the matching of candidate tracks via **mcTrackID_'s** via hits via **mcHitID_** will be ruined!
+
+## Case 1. and 2.
+
+In both 1. and 2., the seeds are generated from the simtracks, and as such their **label_** == **mcTrackID_**.  Before the building starts, the seeds can be moved around and into different structures. Regardless, for each seed, a candidate track is created with its **label_** equal to the **label_** of the seed it originated from. At the end of building, the candidate tracks are dumped into their conventional candidateTracks_ collection.  At this point, the **label_** of the track may not be pointing to its **position** inside the vector, but still uniquely identifies it as to which seed it came from.  
+
+So we then create a TrackExtra for the track, storing the **label_** as the **seedID**, and then reassign the **label_** of track to be its **position** inside the candidate track vector.  We actually do this for the seed and fit tracks also.  Each track collection has an associated track extra collection, indexed the same such that candidateTracks_[i] has an associated candidateTracksExtra_[i].
+
+The associator is run for each candidate track, using the fact that the now stored **seedID_** also points to the correct **mcTrackID_** this candidate was created from, counting the number of hits in the candidate track after the seed matching this id.  If more than 50% are matched, the candidate track now sets its track extra **mcTrackID_** == **seedID_**.
+
+We then produce two maps to map the candidate tracks:
+1. simToCandidates: 
+ - map key = **mcTrackID**
+ - mapped value = vector of candidate track **label_'s**, where the **label_'s** now represent the **positions** in the candidate vector for tracks who have the **mcTrackID_** in question
+
+2. seedToCandidates:
+ - map key = **seedID_**
+ - mapped value = **label_** of candidate track (again, the **label_** now being the **position** inside the track vector)
+
+These maps are then used to get the associated sim and reco information for the trees.
+
+## Case 3. and 4.
+
+In both 3. and 4., the seedTracks are not intrinsically related to the simTracks_ .  For 3., the seeds are generated from find_seeds(), and the **label_** assigned to the track is just the index at which the seed was created.  For 4., the **label_** is the **mcTrackID** for the sim track it is most closely assocaited to (as given to us from CMSSW), if it exists. 
+
+If using 4., we do relabeling prior to seed cleaning. If **label_** >= 0, the label stays the same, and becomes its **seedID_**.  In the case of the N^2 cleaning, some seeds may remain which have a **label_** == -1.  Since there might be more than one and we want to uniquely identify them after building, we reassign the **label_'s** with an increasing negative number. So the first seed track with **label_** == -1 has label == -1, the second track with **label_** == -1 then has a new **label_* == -2, third track assigned to == -3, etc.  This can also occur in the pure CMSSW seeds (i.e. cmssw seeds that turn into cmssw reco tracks in the cmsswTracks_ collection), in which case we do the relabling in the same fashion.  The CMSSW seeds are then read in and cleaned.  
+
+It is clear here that **label_** of the seed track does not have to equal the **position** inside the track vector!  So the building proceeds in the same manner as 1. and 2., where each seed first generates a single candidate track with a **label_** equal to the seed **label_** which happens to be its **seedID_**.  The candidateTracks_ are dumped out in some order, where the **label_** is still the **seedID_**.
+
+We then generate a TrackExtra for each candidate track (and seed and fit tracks), with the **seedID_** set to the **label_**, then reassigning the **label_** to be the **position** inside the track vector.
+
+The associator is run, now just counting how many hits on the candidate track are matched to a single **mcTrackID**.  If the fraction of hits matching a single **mcTrackID** is greater than 50%, then the track extra **mcTrackID_** is set to the matched **mcTrackID**.
+
+The associate maps are then used in the same fashion as described above. 
+
+## Case 5.
+
+The seed cleaning and labeling is the same as described for 4. The only difference now is that we run a special sequence before the seeds cleaned and then are sorted in eta, storing the original index position of the seed track as a mapped value of the seed track label.  This is because we wish to keep track of which cmssw track originates from which seed track, and the matching is such that the cmssw track label (before being reassigned to its position) == seedID of the track, which equals the position of a seed track inside the track vector. Not all seed tracks will have this property, as not all seeds become cmssw tracks.
+
+The building proceeds, tracks are dumped out, track extra **seedID_** are set to the track candidate **label_**, and the **label_** is reassigned to the track's **position** inside the vector. We also take the chance to generate a track extra for the CMSSW tracks, storing the **label_** as the **seedID_**, and reassigning the **label_** to the CMSSW track's position inside the cmsswTracks_ vector.  
+
+Afterwards, we set the **mcTrackID** of the candidate track == **seedID** (as described previously). In addition, if the candidate track **label_** is mapped, then we set the **seedID_** == mapped value (i.e. the seedID of the cmssw track before it was realigned --> the position of the seed track in its vector before it was moved in eta bins).  
+
+The candidate track to CMSSW associator is run, matching by chi2 and dphi.  If track finds at least one CMSSW track with a match, the **cmsswTrackID_** is set to the **label_** of the CMSSW track.  We then produce a map of the CMSSW tracks to the candidate mkFit tracks.
+
+cmsswToCandidates: 
+ - map key = **cmsswTrackID** (which is now the position of a cmssw track in cmsswTracks_)
+ - mapped value = vector of candidate track **label_'s**, where the **label_'s** now represent the **positions** in the candidate vector for tracks who have the **cmsswTrackID_** in question
+
+## Case 6.
+
+Can only be used with PURE SEEDS. The meaning of the label here is still the same case 5.  Now, of course, we have a "pure" efficiency denominator made of all the CMSSW reco tracks. A matched is considered if mkFit track shares 50% of its hits after the seed with the CMSSW track it was supposed to matched (i.e. pure seeds).
+
+## Case 7.
+
+Use only with CMSSW validation: counts how many hits from mkFit track are matched to cmssw with a map for labels, as CMSSW tracks can share hits! Then loop over map, storing labels in  vec that have 50% of hits matched to single CMSSW track after common seed (denom is mkFit track nHits).  Then sort by matched CMSSW tracks for each mkFit track, selecting the one with the highest match.
+
+## Case 8.
+
+CMSSW tracks retain label and seed id as before, no building done, just remapping to keep track of sim and seed track info.
+Uses special maps and functions to properly do sim track matching: Event::relabel_cmsswtracks_from_seeds() + TTreeValidation::makeRecoTkToSeedTkMapsDumbCMSSW()
diff --git a/RecoTracker/MkFitCore/standalone/plotting/Common.hh b/RecoTracker/MkFitCore/standalone/plotting/Common.hh
new file mode 100644
index 0000000000000..445e275b942e2
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/Common.hh
@@ -0,0 +1,304 @@
+#ifndef _Common_
+#define _Common_
+
+#include "TString.h"
+#include "TColor.h"
+#include "TStyle.h"
+#include "TFile.h"
+#include "TGraphErrors.h"
+#include "TCanvas.h"
+#include "TLegend.h"
+#include "TMarker.h"
+#include "TAxis.h"
+#include "TH1F.h"
+#include "TF1.h"
+
+#include <iostream>
+#include <vector>
+#include <algorithm>
+
+namespace {
+  void setupStyle() {
+    gStyle->SetOptStat(0);
+    gStyle->SetPadTickX(1);
+    gStyle->SetPadTickY(1);
+  }
+};  // namespace
+
+enum ArchEnum { SNB, KNL, SKL, LNXG, LNXS };
+
+namespace {
+  ArchEnum ARCH;
+  void setupARCHEnum(const TString& arch) {
+    if (arch.Contains("SNB"))
+      ARCH = SNB;
+    else if (arch.Contains("KNL"))
+      ARCH = KNL;
+    else if (arch.Contains("SKL"))
+      ARCH = SKL;
+    else if (arch.Contains("LNX-S"))
+      ARCH = LNXS;
+    else if (arch.Contains("LNX-G"))
+      ARCH = LNXG;
+    else {
+      std::cerr << arch.Data() << " is not an allowed architecture! Exiting... " << std::endl;
+      exit(1);
+    }
+  }
+};  // namespace
+
+struct ArchOpts {
+  Int_t vumin;
+  Int_t vumax;
+
+  Int_t thmin;
+  Int_t thmax;
+
+  Double_t vutimemin;
+  Double_t vutimemax;
+
+  Double_t thtimemin;
+  Double_t thtimemax;
+
+  Double_t vuspeedupmin;
+  Double_t vuspeedupmax;
+
+  Double_t thspeedupmin;
+  Double_t thspeedupmax;
+
+  Double_t thmeiftimemin;
+  Double_t thmeiftimemax;
+
+  Double_t thmeifspeedupmin;
+  Double_t thmeifspeedupmax;
+};
+
+namespace {
+  ArchOpts arch_opt;
+  void setupArch() {
+    if (ARCH == SNB) {
+      arch_opt.vumin = 1;
+      arch_opt.vumax = 8;
+
+      arch_opt.thmin = 1;
+      arch_opt.thmax = 24;
+
+      arch_opt.vutimemin = 0.;
+      arch_opt.vutimemax = 0.5;
+
+      arch_opt.thtimemin = 0.001;
+      arch_opt.thtimemax = 1.;
+
+      arch_opt.vuspeedupmin = 0.;
+      arch_opt.vuspeedupmax = arch_opt.vumax;
+
+      arch_opt.thspeedupmin = 0.;
+      arch_opt.thspeedupmax = arch_opt.thmax;
+
+      arch_opt.thmeiftimemin = 0.01;
+      arch_opt.thmeiftimemax = 0.5;
+
+      arch_opt.thmeifspeedupmin = 0.;
+      arch_opt.thmeifspeedupmax = arch_opt.thmax;
+    } else if (ARCH == KNL) {
+      arch_opt.vumin = 1;
+      arch_opt.vumax = 16;
+
+      arch_opt.thmin = 1;
+      arch_opt.thmax = 256;
+
+      arch_opt.vutimemin = 0.;
+      arch_opt.vutimemax = 1.5;
+
+      arch_opt.thtimemin = 0.001;
+      arch_opt.thtimemax = 1.;
+
+      arch_opt.vuspeedupmin = 0.;
+      arch_opt.vuspeedupmax = arch_opt.vumax;
+
+      arch_opt.thspeedupmin = 0.;
+      arch_opt.thspeedupmax = 80.;
+
+      arch_opt.thmeiftimemin = 0.001;
+      arch_opt.thmeiftimemax = arch_opt.thtimemax;
+
+      arch_opt.thmeifspeedupmin = 0.;
+      arch_opt.thmeifspeedupmax = arch_opt.thspeedupmax;
+    } else if (ARCH == SKL) {
+      arch_opt.vumin = 1;
+      arch_opt.vumax = 16;
+
+      arch_opt.thmin = 1;
+      arch_opt.thmax = 64;
+
+      arch_opt.vutimemin = 0.;
+      arch_opt.vutimemax = 0.25;
+
+      arch_opt.thtimemin = 0.0001;
+      arch_opt.thtimemax = 1.;
+
+      arch_opt.vuspeedupmin = 0.;
+      arch_opt.vuspeedupmax = arch_opt.vumax;
+
+      arch_opt.thspeedupmin = 0.;
+      arch_opt.thspeedupmax = arch_opt.thmax / 2;
+
+      arch_opt.thmeiftimemin = 0.001;
+      arch_opt.thmeiftimemax = arch_opt.thtimemax;
+
+      arch_opt.thmeifspeedupmin = 0.;
+      arch_opt.thmeifspeedupmax = arch_opt.thspeedupmax;
+    } else if (ARCH == LNXG) {
+      arch_opt.vumin = 1;
+      arch_opt.vumax = 16;
+
+      arch_opt.thmin = 1;
+      arch_opt.thmax = 64;
+
+      arch_opt.vutimemin = 0.;
+      arch_opt.vutimemax = 0.25;
+
+      arch_opt.thtimemin = 0.0001;
+      arch_opt.thtimemax = 1.;
+
+      arch_opt.vuspeedupmin = 0.;
+      arch_opt.vuspeedupmax = arch_opt.vumax;
+
+      arch_opt.thspeedupmin = 0.;
+      arch_opt.thspeedupmax = arch_opt.thmax / 2;
+
+      arch_opt.thmeiftimemin = 0.001;
+      arch_opt.thmeiftimemax = arch_opt.thtimemax;
+
+      arch_opt.thmeifspeedupmin = 0.;
+      arch_opt.thmeifspeedupmax = arch_opt.thspeedupmax;
+    } else if (ARCH == LNXS) {
+      arch_opt.vumin = 1;
+      arch_opt.vumax = 16;
+
+      arch_opt.thmin = 1;
+      arch_opt.thmax = 64;
+
+      arch_opt.vutimemin = 0.;
+      arch_opt.vutimemax = 0.25;
+
+      arch_opt.thtimemin = 0.0001;
+      arch_opt.thtimemax = 1.;
+
+      arch_opt.vuspeedupmin = 0.;
+      arch_opt.vuspeedupmax = arch_opt.vumax;
+
+      arch_opt.thspeedupmin = 0.;
+      arch_opt.thspeedupmax = arch_opt.thmax / 2;
+
+      arch_opt.thmeiftimemin = 0.001;
+      arch_opt.thmeiftimemax = arch_opt.thtimemax;
+
+      arch_opt.thmeifspeedupmin = 0.;
+      arch_opt.thmeifspeedupmax = arch_opt.thspeedupmax;
+    } else {
+      std::cerr << "How did this happen?? You did not specify one of the allowed ARCHs!" << std::endl;
+      exit(1);
+    }
+  }
+};  // namespace
+
+enum SuiteEnum { full, forPR, forConf, val };
+
+namespace {
+  SuiteEnum SUITE;
+  void setupSUITEEnum(const TString& suite) {
+    if (suite.Contains("full"))
+      SUITE = full;
+    else if (suite.Contains("forPR"))
+      SUITE = forPR;
+    else if (suite.Contains("forConf"))
+      SUITE = forConf;
+    else if (suite.Contains("val"))
+      SUITE = val;
+    else {
+      std::cerr << suite.Data() << " is not an allowed validation suite! Exiting... " << std::endl;
+      exit(1);
+    }
+  }
+};  // namespace
+
+struct BuildOpts {
+  BuildOpts() {}
+  BuildOpts(const TString& name, const Color_t color, const TString& label) : name(name), color(color), label(label) {}
+
+  TString name;
+  Color_t color;
+  TString label;
+};
+typedef std::vector<BuildOpts> BOVec;
+typedef std::map<TString, BuildOpts> BOMap;
+
+namespace {
+  BOVec builds;
+  UInt_t nbuilds;
+  void setupBuilds(const Bool_t isBenchmark, const Bool_t includeCMSSW) {
+    // tmp map to fill builds vector
+    BOMap buildsMap;
+    buildsMap["BH"] = {"BH", kBlue, "Best Hit"};
+    buildsMap["STD"] = {"STD", kGreen + 1, "Standard"};
+    buildsMap["CE"] = {"CE", kRed, "Clone Engine"};
+    buildsMap["FV"] = {"FV", kMagenta, "Full Vector"};
+    buildsMap["CMSSW"] = {"CMSSW", kBlack, "CMSSW"};
+
+    // KPM: Consult ./xeon_scripts/common-variables.sh to match routines to suite
+    if (SUITE == full) {
+      builds.emplace_back(buildsMap["BH"]);
+      builds.emplace_back(buildsMap["STD"]);
+      builds.emplace_back(buildsMap["CE"]);
+      builds.emplace_back(buildsMap["FV"]);
+    } else if (SUITE == forPR) {
+      if (isBenchmark) {
+        builds.emplace_back(buildsMap["BH"]);
+        builds.emplace_back(buildsMap["CE"]);
+      } else {
+        builds.emplace_back(buildsMap["STD"]);
+        builds.emplace_back(buildsMap["CE"]);
+      }
+    } else if (SUITE == forConf) {
+      builds.emplace_back(buildsMap["CE"]);
+      builds.back().label = "mkFit";  // change label in legend for conference
+    } else if (SUITE == val) {
+      if (isBenchmark) {
+        std::cout << "INFO: val mode has an empty set for isBenchmark" << std::endl;
+      } else {
+        builds.emplace_back(buildsMap["STD"]);
+        builds.emplace_back(buildsMap["CE"]);
+      }
+    } else {
+      std::cerr << "How did this happen?? You did not specify one of the allowed SUITEs!" << std::endl;
+      exit(1);
+    }
+
+    // always check for adding in CMSSW --> never true for isBenchmark
+    if (includeCMSSW)
+      builds.emplace_back(buildsMap["CMSSW"]);
+
+    // set nbuilds
+    nbuilds = builds.size();
+  }
+
+};  // namespace
+
+void GetMinMaxHist(const TH1F* hist, Double_t& min, Double_t& max) {
+  for (auto ibin = 1; ibin <= hist->GetNbinsX(); ibin++) {
+    const auto content = hist->GetBinContent(ibin);
+
+    if (content < min && content != 0.0)
+      min = content;
+    if (content > max)
+      max = content;
+  }
+}
+
+void SetMinMaxHist(TH1F* hist, const Double_t min, const Double_t max, const Bool_t isLogy) {
+  hist->SetMinimum(isLogy ? min / 2.0 : min / 1.05);
+  hist->SetMaximum(isLogy ? max * 2.0 : max * 1.05);
+}
+
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/plotting/PlotBenchmarks.cpp b/RecoTracker/MkFitCore/standalone/plotting/PlotBenchmarks.cpp
new file mode 100644
index 0000000000000..3bee5c0a34407
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/PlotBenchmarks.cpp
@@ -0,0 +1,212 @@
+#include "PlotBenchmarks.hh"
+
+#include <iostream>
+
+PlotBenchmarks::PlotBenchmarks(const TString& arch, const TString& sample, const TString& suite)
+    : arch(arch), sample(sample), suite(suite) {
+  // setup style for plotting
+  setupStyle();
+
+  // get file
+  file = TFile::Open("benchmark_" + arch + "_" + sample + ".root");
+
+  // setup arch enum
+  setupARCHEnum(arch);
+
+  // setup arch options
+  setupArch();
+
+  // setup suite enum
+  setupSUITEEnum(suite);
+
+  // setup build options : true for isBenchmark-type plots, false for no CMSSW
+  setupBuilds(true, false);
+}
+
+PlotBenchmarks::~PlotBenchmarks() { delete file; }
+
+void PlotBenchmarks::RunBenchmarkPlots() {
+  // title options
+  const TString nth = "1";
+  const TString nvu = Form("%iint", arch_opt.vumax);
+
+  // x-axis titles
+  const TString xtitlevu = "Matriplex Vector Width [floats]";
+  const TString xtitleth = "Number of Threads";
+
+  // y-axis titles
+  const TString ytitletime = "Average Build Time per Event [s]";
+  const TString ytitlespeedup = "Average Build Speedup per Event";
+
+  // Do the overlaying!
+  PlotBenchmarks::MakeOverlay("VU_time",
+                              sample + " Vectorization Benchmark on " + arch + " [nTH=" + nth + "]",
+                              xtitlevu,
+                              ytitletime,
+                              arch_opt.vumin,
+                              arch_opt.vumax,
+                              arch_opt.vutimemin,
+                              arch_opt.vutimemax);
+
+  PlotBenchmarks::MakeOverlay("TH_time",
+                              sample + " Parallelization Benchmark on " + arch + " [nVU=" + nvu + "]",
+                              xtitleth,
+                              ytitletime,
+                              arch_opt.thmin,
+                              arch_opt.thmax,
+                              arch_opt.thtimemin,
+                              arch_opt.thtimemax);
+
+  PlotBenchmarks::MakeOverlay("VU_speedup",
+                              sample + " Vectorization Speedup on " + arch + " [nTH=" + nth + "]",
+                              xtitlevu,
+                              ytitlespeedup,
+                              arch_opt.vumin,
+                              arch_opt.vumax,
+                              arch_opt.vuspeedupmin,
+                              arch_opt.vuspeedupmax);
+
+  PlotBenchmarks::MakeOverlay("TH_speedup",
+                              sample + " Parallelization Speedup on " + arch + " [nVU=" + nvu + "]",
+                              xtitleth,
+                              ytitlespeedup,
+                              arch_opt.thmin,
+                              arch_opt.thmax,
+                              arch_opt.thspeedupmin,
+                              arch_opt.thspeedupmax);
+}
+
+void PlotBenchmarks::MakeOverlay(const TString& text,
+                                 const TString& title,
+                                 const TString& xtitle,
+                                 const TString& ytitle,
+                                 const Double_t xmin,
+                                 const Double_t xmax,
+                                 const Double_t ymin,
+                                 const Double_t ymax) {
+  // special setups
+  const Bool_t isVU = text.Contains("VU", TString::kExact);
+  const Bool_t isSpeedup = text.Contains("speedup", TString::kExact);
+
+  // canvas
+  auto canv = new TCanvas();
+  canv->cd();
+  canv->SetGridy();
+  if (!isVU && !isSpeedup)
+    canv->SetLogy();
+
+  // legend
+  const Double_t x1 = (isSpeedup ? 0.20 : 0.60);  // draw legend on left for speedup plots as this part is empty
+  const Double_t y1 = 0.65;
+  const Double_t ylength = builds.size() * 0.05;  // adjust size of legend for how many build routines we are plotting
+  auto leg = new TLegend(x1, y1, x1 + 0.25, y1 + ylength);
+  leg->SetBorderSize(0);
+
+  // setup tgraphs
+  TGEVec graphs(nbuilds);
+  PlotBenchmarks::GetGraphs(graphs, text, title, xtitle, ytitle);
+
+  // get tgraphs for intrinsic plot
+  TGEVec graphs_int(nbuilds);
+  if (isVU)
+    PlotBenchmarks::GetGraphs(graphs_int, text + "_int", title, xtitle, ytitle);
+
+  // Draw graphs
+  for (auto i = 0U; i < nbuilds; i++) {
+    auto& graph = graphs[i];
+    auto& graph_int = graphs_int[i];
+    auto& build = builds[i];
+
+    // draph if graph exists
+    if (graph) {
+      graph->GetXaxis()->SetRangeUser(xmin, xmax);
+      graph->GetYaxis()->SetRangeUser(ymin, ymax);
+      graph->Draw(i > 0 ? "LP SAME" : "ALP");
+
+      // add point for VU with intrinsics
+      if (isVU && graph_int) {
+        graph_int->GetXaxis()->SetRangeUser(xmin, xmax);
+        graph_int->GetYaxis()->SetRangeUser(ymin, ymax);
+        graph_int->Draw("P SAME");
+      }
+
+      // add to legend
+      leg->AddEntry(graph, build.label.Data(), "LP");
+    }
+  }
+
+  // Draw speedup line
+  TF1* scaling = NULL;
+  if (isSpeedup) {
+    scaling = new TF1("ideal_scaling",
+                      "x",
+                      (isVU ? arch_opt.vumin : arch_opt.thmin),
+                      (isVU ? arch_opt.vuspeedupmax : arch_opt.thspeedupmax));
+    scaling->SetLineColor(kBlack);
+    scaling->SetLineStyle(kDashed);
+    scaling->SetLineWidth(2);
+    scaling->Draw("SAME");
+    leg->AddEntry(scaling, "Ideal Scaling", "l");
+  }
+
+  // Draw legend last
+  leg->Draw("SAME");
+
+  // Save the png
+  const TString outname = arch + "_" + sample + "_" + text;
+  canv->SaveAs(outname + ".png");
+
+  // Save log-x version
+  canv->SetLogx();
+  for (auto i = 0U; i < nbuilds; i++) {
+    auto& graph = graphs[i];
+    auto& graph_int = graphs_int[i];
+
+    // need to reset range with logx
+    if (graph) {
+      graph->GetXaxis()->SetRangeUser(xmin, xmax);
+      graph->GetYaxis()->SetRangeUser(ymin, ymax);
+      if (isVU && graph_int) {
+        graph_int->GetXaxis()->SetRangeUser(xmin, xmax);
+        graph_int->GetYaxis()->SetRangeUser(ymin, ymax);
+      }
+    }
+  }
+  canv->Update();
+  canv->SaveAs(outname + "_logx.png");
+
+  // delete everything
+  for (auto i = 0U; i < nbuilds; i++) {
+    delete graphs[i];
+    if (isVU)
+      delete graphs_int[i];
+  }
+  if (isSpeedup)
+    delete scaling;
+  delete leg;
+  delete canv;
+}
+
+void PlotBenchmarks::GetGraphs(
+    TGEVec& graphs, const TString& text, const TString& title, const TString& xtitle, const TString& ytitle) {
+  // special setup for intrinsic only plot
+  const Bool_t isInt = text.Contains("_int", TString::kExact);
+
+  for (auto i = 0U; i < nbuilds; i++) {
+    const auto& build = builds[i];
+    auto& graph = graphs[i];
+
+    // get graph
+    graph = (TGraphErrors*)file->Get("g_" + build.name + "_" + text);
+
+    // restyle if graph exists
+    if (graph) {
+      graph->SetTitle(title + ";" + xtitle + ";" + ytitle);
+
+      graph->SetLineWidth(2);
+      graph->SetLineColor(build.color);
+      graph->SetMarkerStyle(isInt ? kOpenCircle : kFullCircle);
+      graph->SetMarkerColor(build.color);
+    }
+  }
+}
diff --git a/RecoTracker/MkFitCore/standalone/plotting/PlotBenchmarks.hh b/RecoTracker/MkFitCore/standalone/plotting/PlotBenchmarks.hh
new file mode 100644
index 0000000000000..674f16c739baa
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/PlotBenchmarks.hh
@@ -0,0 +1,32 @@
+#ifndef _PlotBenchmarks_
+#define _PlotBenchmarks_
+
+#include "Common.hh"
+
+typedef std::vector<TGraphErrors*> TGEVec;
+
+class PlotBenchmarks {
+public:
+  PlotBenchmarks(const TString& arch, const TString& sample, const TString& suite);
+  ~PlotBenchmarks();
+  void RunBenchmarkPlots();
+  void MakeOverlay(const TString& text,
+                   const TString& title,
+                   const TString& xtitle,
+                   const TString& ytitle,
+                   const Double_t xmin,
+                   const Double_t xmax,
+                   const Double_t ymin,
+                   const Double_t ymax);
+  void GetGraphs(
+      TGEVec& graphs, const TString& text, const TString& title, const TString& xtitle, const TString& ytitle);
+
+private:
+  const TString arch;
+  const TString sample;
+  const TString suite;
+
+  TFile* file;
+};
+
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/plotting/PlotMEIFBenchmarks.cpp b/RecoTracker/MkFitCore/standalone/plotting/PlotMEIFBenchmarks.cpp
new file mode 100644
index 0000000000000..f5cdf8a631ce6
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/PlotMEIFBenchmarks.cpp
@@ -0,0 +1,146 @@
+#include "PlotMEIFBenchmarks.hh"
+
+PlotMEIFBenchmarks::PlotMEIFBenchmarks(const TString& arch, const TString& sample, const TString& build)
+    : arch(arch), sample(sample), build(build) {
+  // setup style for plotting
+  setupStyle();
+
+  // get file
+  file = TFile::Open("benchmarkMEIF_" + arch + "_" + sample + "_" + build + ".root");
+
+  // setup enum
+  setupARCHEnum(arch);
+
+  // setup arch options
+  setupArch();
+
+  // setup events
+  setupEvents();
+}
+
+PlotMEIFBenchmarks::~PlotMEIFBenchmarks() { delete file; }
+
+void PlotMEIFBenchmarks::RunMEIFBenchmarkPlots() {
+  // title options
+  const TString nvu = Form("%iint", arch_opt.vumax);
+
+  // x-axis title
+  const TString xtitleth = "Number of Threads";
+
+  // y-axis title
+  const TString ytitletime = "Averarge Time per Event [s]";
+  const TString ytitlespeedup = "Average Speedup per Event";
+
+  // Do the overlaying!
+  PlotMEIFBenchmarks::MakeOverlay(
+      "time",
+      build + " " + sample + " Multiple Events in Flight Benchmark on " + arch + " [nVU=" + nvu + "]",
+      xtitleth,
+      ytitletime,
+      arch_opt.thmin,
+      arch_opt.thmax,
+      arch_opt.thmeiftimemin,
+      arch_opt.thmeiftimemax);
+
+  PlotMEIFBenchmarks::MakeOverlay(
+      "speedup",
+      build + " " + sample + " Multiple Events in Flight Speedup on " + arch + " [nVU=" + nvu + "]",
+      xtitleth,
+      ytitlespeedup,
+      arch_opt.thmin,
+      arch_opt.thmax,
+      arch_opt.thmeifspeedupmin,
+      arch_opt.thmeifspeedupmax);
+}
+
+void PlotMEIFBenchmarks::MakeOverlay(const TString& text,
+                                     const TString& title,
+                                     const TString& xtitle,
+                                     const TString& ytitle,
+                                     const Double_t xmin,
+                                     const Double_t xmax,
+                                     const Double_t ymin,
+                                     const Double_t ymax) {
+  // special setups
+  const Bool_t isSpeedup = text.Contains("speedup", TString::kExact);
+
+  // canvas
+  auto canv = new TCanvas();
+  canv->cd();
+  canv->SetGridy();
+  if (!isSpeedup)
+    canv->SetLogy();
+  canv->DrawFrame(xmin, ymin, xmax, ymax, "");
+
+  // legend
+  const Double_t x1 = (isSpeedup ? 0.20 : 0.60);
+  const Double_t y1 = 0.65;
+  auto leg = new TLegend(x1, y1, x1 + 0.25, y1 + 0.2);
+  leg->SetBorderSize(0);
+
+  // get tgraphs for meif and draw
+  TGVec graphs(nevents);
+  for (auto i = 0U; i < nevents; i++) {
+    const auto& event = events[i];
+    auto& graph = graphs[i];
+
+    const TString nEV = Form("%i", event.nev);
+    graph = (TGraph*)file->Get("g_" + build + "_MEIF_nEV" + nEV + "_" + text);
+
+    if (graph) {
+      // restyle a bit
+      graph->SetTitle(title + ";" + xtitle + ";" + ytitle);
+
+      graph->SetLineWidth(2);
+      graph->SetLineColor(event.color);
+      graph->SetMarkerStyle(kFullCircle);
+      graph->SetMarkerColor(event.color);
+      graph->GetXaxis()->SetRangeUser(xmin, xmax);
+      graph->GetYaxis()->SetRangeUser(ymin, ymax);
+
+      // draw and add to legend
+      graph->Draw(i > 0 ? "LP SAME" : "ALP");
+      leg->AddEntry(graph, Form("%i Events", event.nev), "LP");
+    }
+  }
+
+  // Draw ideal scaling line
+  TF1* scaling = NULL;
+  if (isSpeedup) {
+    scaling = new TF1("ideal_scaling", "x", arch_opt.thmin, arch_opt.thmeifspeedupmax);
+    scaling->SetLineColor(kBlack);
+    scaling->SetLineStyle(kDashed);
+    scaling->SetLineWidth(2);
+    scaling->Draw("SAME");
+    leg->AddEntry(scaling, "Ideal Scaling", "l");
+  }
+
+  // draw legend last
+  leg->Draw("SAME");
+
+  // Save the png
+  const TString outname = arch + "_" + sample + "_" + build + "_MEIF_" + text;
+  canv->SaveAs(outname + ".png");
+
+  // Save log-x version
+  canv->SetLogx();
+  for (auto i = 0U; i < nevents; i++) {
+    auto& graph = graphs[i];
+
+    // reset axes for logx
+    if (graph) {
+      graph->GetXaxis()->SetRangeUser(xmin, xmax);
+      graph->GetYaxis()->SetRangeUser(ymin, ymax);
+    }
+  }
+  canv->Update();
+  canv->SaveAs(outname + "_logx.png");
+
+  // delete everything
+  for (auto& graph : graphs)
+    delete graph;
+  if (isSpeedup)
+    delete scaling;
+  delete leg;
+  delete canv;
+}
diff --git a/RecoTracker/MkFitCore/standalone/plotting/PlotMEIFBenchmarks.hh b/RecoTracker/MkFitCore/standalone/plotting/PlotMEIFBenchmarks.hh
new file mode 100644
index 0000000000000..5bc6d01e5e95e
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/PlotMEIFBenchmarks.hh
@@ -0,0 +1,67 @@
+#ifndef _PlotMEIFBenchmarks_
+#define _PlotMEIFBenchmarks_
+
+#include "Common.hh"
+
+#include "TGraph.h"
+
+struct EventOpts {
+  EventOpts() {}
+  EventOpts(const Int_t nev, const Color_t color) : nev(nev), color(color) {}
+
+  Int_t nev;
+  Color_t color;
+};
+typedef std::vector<EventOpts> EOVec;
+
+namespace {
+  EOVec events;
+  UInt_t nevents;
+  void setupEvents() {
+    // N.B.: Consult ./xeon_scripts/benchmark-cmssw-ttbar-fulldet-build.sh for matching MEIF to arch
+
+    events.emplace_back(1, kBlack);
+    events.emplace_back(2, kBlue);
+    events.emplace_back(4, kGreen + 1);
+    events.emplace_back(8, kRed);
+    events.emplace_back((ARCH == SNB ? 12 : 16), kMagenta);
+
+    if (ARCH == KNL || ARCH == SKL || ARCH == LNXG || ARCH == LNXS) {
+      events.emplace_back(32, kAzure + 10);
+      events.emplace_back(64, kOrange + 3);
+    }
+    if (ARCH == KNL) {
+      events.emplace_back(128, kViolet - 1);
+    }
+
+    // set nevents once events is set
+    nevents = events.size();
+  }
+};  // namespace
+
+typedef std::vector<TGraph*> TGVec;
+
+class PlotMEIFBenchmarks {
+public:
+  PlotMEIFBenchmarks(const TString& arch, const TString& sample, const TString& build);
+  ~PlotMEIFBenchmarks();
+  void RunMEIFBenchmarkPlots();
+  void MakeOverlay(const TString& text,
+                   const TString& title,
+                   const TString& xtitle,
+                   const TString& ytitle,
+                   const Double_t xmin,
+                   const Double_t xmax,
+                   const Double_t ymin,
+                   const Double_t ymax);
+
+private:
+  const TString arch;
+  const TString sample;
+  const TString build;
+
+  ArchEnum ARCH;
+  TFile* file;
+};
+
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/plotting/PlotValidation.cpp b/RecoTracker/MkFitCore/standalone/plotting/PlotValidation.cpp
new file mode 100644
index 0000000000000..2986ffb3302fd
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/PlotValidation.cpp
@@ -0,0 +1,1137 @@
+#include "PlotValidation.hh"
+
+//////////////////////////////
+// Some light documentation //
+//////////////////////////////
+
+// Indices are as follows
+// e == entries in trees
+// i == kinematic variables: pt, phi, eta
+// j == reco track collections: seed, build, fit
+// k == pt cuts
+// l == rates: eff, dupl rate, ineff (efftree), eff, dupl rate, fake rate (print)
+// m == eta regions: brl, trans, enc (efftree)
+// n == track quality plots: nHits, fracHits, track score (frtree and print)
+// o == matched reco collections: all reco, fake, all match, best match (frtree), all reco, fake, and best match (print)
+// p == diff of kinematic variables: dnhits, dinvpt, deta, dphi (frtree)
+// q == n directories in frtree
+
+// Variable name scheme
+// *_var(s)_* == kinematic variables (index i)
+// *_trk(s)_* == reco track collections (index j)
+// *_ref* == variable associated to reference tracks (CMSSW or Sim)
+// kinematic variable name comes before trk name to maintain consistency with branch names
+// mask name also obeys this rule: which type of mask, then which reco track collection for association
+// in case of "_ref_trk", this means this a reference track variable associated to a given reco track collection
+
+// f* == data member
+// s at the start == "string" version of the variable
+// h at the start == a different string version
+
+PlotValidation::PlotValidation(const TString& inName,
+                               const TString& outName,
+                               const Bool_t cmsswComp,
+                               const int algo,
+                               const Bool_t mvInput,
+                               const Bool_t rmSuffix,
+                               const Bool_t saveAs,
+                               const TString& outType)
+    : fInName(inName),
+      fOutName(outName),
+      fCmsswComp(cmsswComp),
+      fAlgo(algo),
+      fMvInput(mvInput),
+      fRmSuffix(rmSuffix),
+      fSaveAs(saveAs),
+      fOutType(outType) {
+  // Setup
+  PlotValidation::SetupStyle();
+  if (fAlgo > 0 && !fRmSuffix)
+    fOutName = fOutName + "_iter" + algo;
+  PlotValidation::MakeOutDir(fOutName);
+  PlotValidation::SetupBins();
+  PlotValidation::SetupCommonVars();
+
+  // Get input root file or exit!
+  fInRoot = TFile::Open(fInName.Data());
+  if (fInRoot == (TFile*)NULL) {
+    std::cerr << "File: " << fInName.Data() << " does not exist!!! Exiting..." << std::endl;
+    exit(1);
+  }
+  gROOT->cd();
+  efftree = (TTree*)fInRoot->Get((fCmsswComp ? "cmsswefftree" : "efftree"));
+  frtree = (TTree*)fInRoot->Get((fCmsswComp ? "cmsswfrtree" : "frtree"));
+  if (algo > 0)
+    frtree = frtree->CopyTree(Form("algorithm==%i", algo));
+  // make output root file
+  fOutRoot = new TFile(fOutName + "/plots.root", "RECREATE");
+}
+
+PlotValidation::~PlotValidation() {
+  delete efftree;
+  delete frtree;
+  delete fInRoot;
+  delete fOutRoot;  // will delete all pointers to subdirectory
+}
+
+void PlotValidation::Validation(int algo) {
+  std::cout << "Computing Efficiency, Inefficiency, and Duplicate Rate ..." << std::endl;
+  PlotValidation::PlotEffTree(algo);
+
+  std::cout << "Computing Fake Rate, <nHits/track>, and kinematic diffs to " << fSRefTitle.Data() << " tracks ..."
+            << std::endl;
+  PlotValidation::PlotFRTree(algo);
+
+  std::cout << "Printing Totals ..." << std::endl;
+  PlotValidation::PrintTotals(algo);
+
+  if (fMvInput)
+    PlotValidation::MoveInput();
+}
+
+// Loop over efficiency tree: produce efficiency, inefficiency per region of tracker, and duplicate rate
+void PlotValidation::PlotEffTree(int algo) {
+  ////////////////////////////////////////////
+  // Declare strings for branches and plots //
+  ////////////////////////////////////////////
+
+  const TStrVec rates = {"eff", "dr", "ineff"};
+  const TStrVec srates = {"Efficiency", "Duplicate Rate", "Inefficiency"};
+  const UInt_t nrates = rates.size();
+
+  const TStrVec regs = {"brl", "trans", "ec"};
+  const TStrVec sregs = {"Barrel", "Transition", "Endcap"};
+  const FltVec etacuts = {0, 0.9, 1.7, 2.45};
+  const UInt_t nregs = regs.size();
+
+  //////////////////////////
+  // Create and new plots //
+  //////////////////////////
+
+  TEffRefMap plots;
+  for (auto i = 0U; i < fNVars; i++)  // loop over fVars
+  {
+    const auto& var = fVars[i];
+    const auto& svar = fSVars[i];
+    const auto& sunit = fSUnits[i];
+
+    // get bins for the variable of interest
+    const auto& varbins = fVarBins[i];
+    const Double_t* bins = &varbins[0];
+
+    for (auto j = 0U; j < fNTrks; j++)  // loop over tracks
+    {
+      const auto& trk = fTrks[j];
+      const auto& strk = fSTrks[j];
+
+      for (auto k = 0U; k < fNPtCuts; k++)  // loop pver pt cuts
+      {
+        const auto& sptcut = fSPtCuts[k];
+        const auto& hptcut = fHPtCuts[k];
+
+        for (auto l = 0U; l < nrates; l++)  // loop over which rate
+        {
+          const auto& rate = rates[l];
+          const auto& srate = srates[l];
+
+          // plot names and key
+          const TString plotkey = Form("%i_%i_%i_%i", i, j, k, l);
+          const TString plotname = Form("%s_", fCmsswComp ? "cmssw" : "sim") + var + "_" + trk + "_pt" + hptcut;
+          const TString plottitle = strk + " Track " + srate + " vs " + fSRefTitle + " " + svar + " {" + fSVarPt +
+                                    " > " + sptcut + " " + fSUnitPt + "};" + svar + sunit + ";" + srate;
+
+          // eff and dr not split by region
+          if (l < 2) {
+            const TString tmpname = rate + "_" + plotname;
+            plots[plotkey] = new TEfficiency(tmpname.Data(), plottitle.Data(), varbins.size() - 1, bins);
+          } else  // ineff split by region
+          {
+            for (auto m = 0U; m < nregs; m++)  // loop over regions for inefficiency
+            {
+              const auto& reg = regs[m];
+              const auto& sreg = sregs[m];
+
+              const TString tmpkey = Form("%s_%i", plotkey.Data(), m);
+              const TString tmpname = rate + "_" + reg + "_" + plotname;
+              const TString tmptitle = strk + " Track " + srate + " vs " + fSRefTitle + " " + svar + "{" + fSVarPt +
+                                       " > " + sptcut + " " + fSUnitPt + ", " + sreg + "};" + svar + sunit + ";" +
+                                       srate;
+
+              plots[tmpkey] = new TEfficiency(tmpname.Data(), tmptitle.Data(), varbins.size() - 1, bins);
+            }  // end loop over regions
+          }    // end check over plots
+        }      // end loop over plots
+      }        // end loop over pt cuts
+    }          // end loop over tracks
+  }            // end loop over variables
+
+  ////////////////////////////////////////
+  // Floats/Ints to be filled for trees //
+  ////////////////////////////////////////
+
+  // Initialize var arrays, SetBranchAddress
+  FltVec vars_ref(fNVars);            // first index is var. only for ref values! so no extra index
+  TBrRefVec vars_ref_br(fNVars);      // tbranch for each var
+  for (auto i = 0U; i < fNVars; i++)  // loop over trks index
+  {
+    const auto& var = fVars[i];
+    auto& var_ref = vars_ref[i];
+    auto& var_ref_br = vars_ref_br[i];
+
+    // initialize var, branches
+    var_ref = 0.;
+    var_ref_br = 0;
+
+    // Set var branch
+    efftree->SetBranchAddress(
+        var + "_" + ((fSRefVar == "cmssw" || var != "nLayers") ? fSRefVar : fSRefVarTrk), &var_ref, &var_ref_br);
+  }
+
+  // Initialize masks, set branch addresses
+  IntVec refmask_trks(fNTrks);        // need to know if sim track associated to a given reco track type
+  TBrRefVec refmask_trks_br(fNTrks);  // tbranch for each trk
+
+  IntVec duplmask_trks(fNTrks);        // need to know if sim track associated to a given reco track type more than once
+  TBrRefVec duplmask_trks_br(fNTrks);  // tbranch for each trk
+
+  std::vector<ULong64_t> itermask_trks(fNTrks);
+  TBrRefVec itermask_trks_br(fNTrks);
+
+  std::vector<ULong64_t> iterduplmask_trks(fNTrks);
+  TBrRefVec iterduplmask_trks_br(fNTrks);
+
+  ULong64_t algoseed_trk;  // for SIMVALSEED
+  TBranch* algoseed_trk_br;
+
+  for (auto j = 0U; j < fNTrks; j++)  // loop over trks index
+  {
+    const auto& trk = fTrks[j];
+    auto& refmask_trk = refmask_trks[j];
+    auto& refmask_trk_br = refmask_trks_br[j];
+    auto& duplmask_trk = duplmask_trks[j];
+    auto& duplmask_trk_br = duplmask_trks_br[j];
+    auto& itermask_trk = itermask_trks[j];
+    auto& itermask_trk_br = itermask_trks_br[j];
+
+    auto& iterduplmask_trk = iterduplmask_trks[j];
+    auto& iterduplmask_trk_br = iterduplmask_trks_br[j];
+
+    // initialize mcmask, branches
+    refmask_trk = 0;
+    refmask_trk_br = 0;
+
+    // initialize duplmask, branches
+    duplmask_trk = 0;
+    duplmask_trk_br = 0;
+
+    // initialize itermask, branches
+    itermask_trk = 0;
+    itermask_trk_br = 0;
+
+    iterduplmask_trk = 0;
+    iterduplmask_trk_br = 0;
+
+    algoseed_trk = 0;
+    algoseed_trk_br = 0;
+
+    // Set branches
+    efftree->SetBranchAddress(fSRefMask + "mask_" + trk, &refmask_trk, &refmask_trk_br);
+    efftree->SetBranchAddress("duplmask_" + trk, &duplmask_trk, &duplmask_trk_br);
+    efftree->SetBranchAddress("itermask_" + trk, &itermask_trk, &itermask_trk_br);
+    efftree->SetBranchAddress("iterduplmask_" + trk, &iterduplmask_trk, &iterduplmask_trk_br);
+  }
+  efftree->SetBranchAddress("algo_seed", &algoseed_trk, &algoseed_trk_br);
+  ///////////////////////////////////////////////////
+  // Fill histos, compute rates from tree branches //
+  ///////////////////////////////////////////////////
+
+  // loop over entries
+  const auto nentries = efftree->GetEntries();
+  for (auto e = 0U; e < nentries; e++) {
+    // get branches
+    for (auto i = 0U; i < fNVars; i++) {
+      auto& var_ref_br = vars_ref_br[i];
+
+      var_ref_br->GetEntry(e);
+    }
+    for (auto j = 0U; j < fNTrks; j++) {
+      auto& refmask_trk_br = refmask_trks_br[j];
+      auto& duplmask_trk_br = duplmask_trks_br[j];
+      auto& itermask_trk_br = itermask_trks_br[j];
+      auto& iterduplmask_trk_br = iterduplmask_trks_br[j];
+
+      refmask_trk_br->GetEntry(e);
+      duplmask_trk_br->GetEntry(e);
+      itermask_trk_br->GetEntry(e);
+      iterduplmask_trk_br->GetEntry(e);
+    }
+    algoseed_trk_br->GetEntry(e);
+    // use for cuts
+    const auto pt_ref = vars_ref[0];
+
+    // loop over plot indices
+    for (auto k = 0U; k < fNPtCuts; k++)  // loop over pt cuts
+    {
+      const auto ptcut = fPtCuts[k];
+
+      if (pt_ref < ptcut)
+        continue;  // cut on tracks with a low pt
+
+      for (auto i = 0U; i < fNVars; i++)  // loop over vars index
+      {
+        const auto var_ref = vars_ref[i];
+
+        for (auto j = 0U; j < fNTrks; j++)  // loop over trks index
+        {
+          const auto refmask_trk = refmask_trks[j];
+          const auto duplmask_trk = duplmask_trks[j];
+          const auto itermask_trk = itermask_trks[j];
+          const auto iterduplmask_trk = iterduplmask_trks[j];
+
+          const auto effIteration = algo > 0 ? ((itermask_trk >> algo) & 1) : 1;
+          const auto oneIteration = algo > 0 ? ((iterduplmask_trk >> algo) & 1) : 1;
+          const auto ineffIteration = algo > 0 ? (((itermask_trk >> algo) & 1) == 0) : (refmask_trk == 0);
+          const auto seedalgo_flag = (algoseed_trk > 0 && algo > 0) ? ((algoseed_trk >> algo) & 1) : 1;
+
+          // plot key base
+          const TString basekey = Form("%i_%i_%i", i, j, k);
+
+          // efficiency calculation: need ref track to be findable
+          if (refmask_trk != -1 && seedalgo_flag)
+            plots[basekey + "_0"]->Fill((refmask_trk == 1) && effIteration,
+                                        var_ref);  // ref track must be associated to enter numerator (==1)
+
+          // duplicate rate calculation: need ref track to be matched at least once
+          if (duplmask_trk != -1 && effIteration && seedalgo_flag)
+            plots[basekey + "_1"]->Fill((duplmask_trk == 1) && oneIteration,
+                                        var_ref);  // ref track is matched at least twice
+
+          // inefficiency calculation: need ref track to be findable
+          if (refmask_trk != -1) {
+            for (auto m = 0U; m < regs.size(); m++) {
+              const auto eta_ref = std::abs(vars_ref[1]);
+              const auto etalow = etacuts[m];
+              const auto etaup = etacuts[m + 1];
+
+              // ref track must be UNassociated (==0) to enter numerator of inefficiency
+              if ((eta_ref >= etalow) && (eta_ref < etaup))
+                plots[Form("%s_2_%i", basekey.Data(), m)]->Fill(ineffIteration, var_ref);
+            }  // end loop over regions
+          }    // end check over ref tracks being findable
+
+        }  // end loop over fPtCuts
+      }    // end loop over fTrks
+    }      // end loop over fVars
+  }        // end loop over entry in tree
+
+  /////////////////
+  // Make output //
+  /////////////////
+
+  // make subdirs
+  TStrVec dirnames = {"efficiency", "duplicaterate", "inefficiency"};
+  for (auto& dirname : dirnames)
+    dirname += fSRefDir;
+
+  TDirRefVec subdirs(nrates);
+  for (auto l = 0U; l < nrates; l++)
+    subdirs[l] = PlotValidation::MakeSubDirs(dirnames[l]);
+
+  // Draw, divide, and save efficiency plots
+  for (auto i = 0U; i < fNVars; i++) {
+    for (auto j = 0U; j < fNTrks; j++) {
+      for (auto k = 0U; k < fNPtCuts; k++) {
+        for (auto l = 0U; l < nrates; l++) {
+          const auto& dirname = dirnames[l];
+          auto& subdir = subdirs[l];
+
+          const TString plotkey = Form("%i_%i_%i_%i", i, j, k, l);
+          if (l < 2)  // efficiency and duplicate rate
+          {
+            PlotValidation::DrawWriteSavePlot(plots[plotkey], subdir, dirname, "AP");
+            delete plots[plotkey];
+          } else {
+            for (auto m = 0U; m < nregs; m++) {
+              const TString tmpkey = Form("%s_%i", plotkey.Data(), m);
+              PlotValidation::DrawWriteSavePlot(plots[tmpkey], subdir, dirname, "AP");
+              delete plots[tmpkey];
+            }  // end loop over regions
+          }    // end check over plots
+        }      // end loop over plots
+      }        // end loop over pt cuts
+    }          // end loop over tracks
+  }            // end loop over variables
+}
+
+// loop over fake rate tree, producing fake rate, nHits/track, score, and kinematic diffs to cmssw
+void PlotValidation::PlotFRTree(int algo) {
+  ////////////////////////////////////////////
+  // Declare strings for branches and plots //
+  ////////////////////////////////////////////
+
+  // info for quality info (nHits,score), kinematic diffs
+  const TStrVec colls = {"allreco", "fake", "allmatch", "bestmatch"};
+  const TStrVec scolls = {"All Reco", "Fake", "All Match", "Best Match"};
+  const UInt_t ncolls = colls.size();
+
+  // get bins ready
+  const DblVecVec trkqualbins = {fNHitsBins, fFracHitsBins, fScoreBins};
+
+  // diffs
+  const TStrVec dvars = {"dnHits", "dinvpt", "deta", "dphi"};
+  const TStrVec sdvars = {"nHits", "1/p_{T}", "#eta", "#phi"};
+  const UInt_t ndvars = dvars.size();
+
+  // get bins ready
+  const DblVecVec dvarbins = {fDNHitsBins, fDInvPtBins, fDEtaBins, fDPhiBins};
+
+  //////////////////////////
+  // Create and new plots //
+  //////////////////////////
+
+  TEffRefMap plots;
+  TH1FRefMap hists;
+  for (auto j = 0U; j < fNTrks; j++)  // loop over track collection
+  {
+    const auto& trk = fTrks[j];
+    const auto& strk = fSTrks[j];
+
+    for (auto k = 0U; k < fNPtCuts; k++)  // loop over pt cuts
+    {
+      const auto& sptcut = fSPtCuts[k];
+      const auto& hptcut = fHPtCuts[k];
+
+      // initialize efficiency plots
+      for (auto i = 0U; i < fNVars; i++)  // loop over vars
+      {
+        const auto& var = fVars[i];
+        const auto& svar = fSVars[i];
+        const auto& sunit = fSUnits[i];
+
+        // plot names and key
+        const TString plotkey = Form("%i_%i_%i", i, j, k);
+        const TString plotname = "fr_reco_" + var + "_" + trk + "_pt" + hptcut;
+        const TString plottitle = strk + " Track Fake Rate vs Reco " + svar + " {" + fSVarPt + " > " + sptcut + " " +
+                                  fSUnitPt + "};" + svar + sunit + ";Fake Rate";
+
+        // get bins for the variable of interest
+        const auto& varbins = fVarBins[i];
+        const Double_t* bins = &varbins[0];
+
+        plots[plotkey] = new TEfficiency(plotname.Data(), plottitle.Data(), varbins.size() - 1, bins);
+      }  // end loop over vars for efficiency
+
+      // initialize track quality plots
+      for (auto n = 0U; n < fNTrkQual; n++)  // loop over quality vars
+      {
+        const auto& trkqual = fTrkQual[n];
+        const auto& strkqual = fSTrkQual[n];
+
+        // get bins for the variable of interest
+        const auto& varbins = trkqualbins[n];
+        const Double_t* bins = &varbins[0];
+
+        for (auto o = 0U; o < ncolls; o++)  // loop over collection of tracks
+        {
+          const auto& coll = colls[o];
+          const auto& scoll = scolls[o];
+
+          // plot names and key
+          const TString histkey = Form("%i_%i_%i_%i", j, k, n, o);
+          const TString histname = "h_" + trkqual + "_" + coll + "_" + trk + "_pt" + hptcut;
+          const TString histtitle = scoll + " " + strk + " Track vs " + strkqual + " {" + fSVarPt + " > " + sptcut +
+                                    " " + fSUnitPt + "};" + strkqual + ";nTracks";
+
+          // Numerator only type plots only!
+          hists[histkey] = new TH1F(histname.Data(), histtitle.Data(), varbins.size() - 1, bins);
+          hists[histkey]->Sumw2();
+        }  // end loop over tracks collections
+      }    // end loop over hit plots
+
+      // initialize diff plots
+      for (auto p = 0U; p < ndvars; p++)  // loop over kin diff vars
+      {
+        const auto& dvar = dvars[p];
+        const auto& sdvar = sdvars[p];
+
+        // get bins for the variable of interest
+        const auto& varbins = dvarbins[p];
+        const Double_t* bins = &varbins[0];
+
+        // loop over collection of tracks for only matched tracks
+        for (auto o = 2U; o < ncolls; o++) {
+          const auto& coll = colls[o];
+          const auto& scoll = scolls[o];
+
+          // plot names and key
+          const TString histkey = Form("%i_%i_d_%i_%i", j, k, p, o);
+          const TString histname = "h_" + dvar + "_" + coll + "_" + trk + "_pt" + hptcut;
+          const TString histtitle = "#Delta" + sdvar + "(" + scoll + " " + strk + "," + fSRefTitle + ") {" + fSVarPt +
+                                    " > " + sptcut + " " + fSUnitPt + "};" + sdvar + "^{" + scoll + " " + strk + "}-" +
+                                    sdvar + "^{" + fSRefTitle + "};nTracks";
+
+          // Numerator only type plots only!
+          hists[histkey] = new TH1F(histname.Data(), histtitle.Data(), varbins.size() - 1, bins);
+          hists[histkey]->Sumw2();
+        }  // end loop over track collections
+      }    // end loop over diff plots
+
+    }  // end loop over pt cuts
+  }    // end loop over tracks
+
+  ////////////////////////////////////////
+  // Floats/Ints to be filled for trees //
+  ////////////////////////////////////////
+
+  // Initialize var_trk arrays, SetBranchAddress
+  FltVecVec vars_trks(fNVars);        // first index is var, second is type of reco track
+  TBrRefVecVec vars_trks_br(fNVars);  // tbranch for each var
+  for (auto i = 0U; i < fNVars; i++)  // loop over vars index
+  {
+    const auto& var = fVars[i];
+    auto& var_trks = vars_trks[i];
+    auto& var_trks_br = vars_trks_br[i];
+
+    var_trks.resize(fNTrks);
+    var_trks_br.resize(fNTrks);
+
+    for (auto j = 0U; j < fNTrks; j++)  // loop over trks index
+    {
+      const auto& trk = fTrks[j];
+      auto& var_trk = var_trks[j];
+      auto& var_trk_br = var_trks_br[j];
+
+      // initialize var, branches
+      var_trk = 0.;
+      var_trk_br = 0;
+
+      //Set var+trk branch
+      frtree->SetBranchAddress(var + "_" + trk, &var_trk, &var_trk_br);
+    }  // end loop over tracks
+  }    // end loop over vars
+
+  // Initialize masks
+  IntVec refmask_trks(fNTrks);           // need to know if ref track associated to a given reco track type
+  TBrRefVec refmask_trks_br(fNTrks);     // tbranch for each trk
+  IntVec iTkMatches_trks(fNTrks);        // want which matched track!
+  TBrRefVec iTkMatches_trks_br(fNTrks);  // tbranch for each trk
+
+  // Initialize nhits_trk branches
+  IntVec nHits_trks(fNTrks);           // nHits / track
+  TBrRefVec nHits_trks_br(fNTrks);     // branch per track
+  FltVec fracHits_trks(fNTrks);        // fraction of hits matched (most) / track
+  TBrRefVec fracHits_trks_br(fNTrks);  // branch per track
+  IntVec score_trks(fNTrks);           // track score
+  TBrRefVec score_trks_br(fNTrks);     // branch per track
+
+  // Initialize diff branches
+  FltVec nLayers_ref_trks(fNTrks);  // sim/cmssw nUnique layers
+  TBrRefVec nLayers_ref_trks_br(fNTrks);
+  FltVec pt_ref_trks(fNTrks);  // sim/cmssw pt
+  TBrRefVec pt_ref_trks_br(fNTrks);
+  FltVec eta_ref_trks(fNTrks);  // cmssw eta
+  TBrRefVec eta_ref_trks_br(fNTrks);
+  FltVec dphi_trks(fNTrks);  // dphi between reco track and sim/cmssw (computed during matching --> not 100% ideal)
+  TBrRefVec dphi_trks_br(fNTrks);
+
+  // Set branches for tracks
+  for (auto j = 0U; j < fNTrks; j++)  // loop over trks index
+  {
+    const auto& trk = fTrks[j];
+    auto& refmask_trk = refmask_trks[j];
+    auto& refmask_trk_br = refmask_trks_br[j];
+    auto& iTkMatches_trk = iTkMatches_trks[j];
+    auto& iTkMatches_trk_br = iTkMatches_trks_br[j];
+    auto& nHits_trk = nHits_trks[j];
+    auto& nHits_trk_br = nHits_trks_br[j];
+    auto& fracHits_trk = fracHits_trks[j];
+    auto& fracHits_trk_br = fracHits_trks_br[j];
+    auto& score_trk = score_trks[j];
+    auto& score_trk_br = score_trks_br[j];
+    auto& nLayers_ref_trk = nLayers_ref_trks[j];
+    auto& nLayers_ref_trk_br = nLayers_ref_trks_br[j];
+    auto& pt_ref_trk = pt_ref_trks[j];
+    auto& pt_ref_trk_br = pt_ref_trks_br[j];
+    auto& eta_ref_trk = eta_ref_trks[j];
+    auto& eta_ref_trk_br = eta_ref_trks_br[j];
+    auto& dphi_trk = dphi_trks[j];
+    auto& dphi_trk_br = dphi_trks_br[j];
+
+    // initialize masks, branches
+    refmask_trk = 0;
+    refmask_trk_br = 0;
+    iTkMatches_trk = 0;
+    iTkMatches_trk_br = 0;
+
+    // initialize nHits, branches
+    nHits_trk = 0;
+    nHits_trk_br = 0;
+    fracHits_trk = 0.f;
+    fracHits_trk_br = 0;
+    score_trk = 0;
+    score_trk_br = 0;
+
+    // initialize diff branches
+    nLayers_ref_trk = 0;
+    nLayers_ref_trk_br = 0;
+    pt_ref_trk = 0.f;
+    pt_ref_trk_br = 0;
+    eta_ref_trk = 0.f;
+    eta_ref_trk_br = 0;
+    dphi_trk = 0.f;
+    dphi_trk_br = 0;
+
+    // Set Branches
+    frtree->SetBranchAddress(fSRefMask + "mask_" + trk, &refmask_trk, &refmask_trk_br);
+    frtree->SetBranchAddress("iTkMatches_" + trk, &iTkMatches_trk, &iTkMatches_trk_br);
+
+    frtree->SetBranchAddress("nHits_" + trk, &nHits_trk, &nHits_trk_br);
+    frtree->SetBranchAddress("fracHitsMatched_" + trk, &fracHits_trk, &fracHits_trk_br);
+    frtree->SetBranchAddress("score_" + trk, &score_trk, &score_trk_br);
+
+    frtree->SetBranchAddress("nLayers_" + fSRefVarTrk + "_" + trk, &nLayers_ref_trk, &nLayers_ref_trk_br);
+    frtree->SetBranchAddress("pt_" + fSRefVarTrk + "_" + trk, &pt_ref_trk, &pt_ref_trk_br);
+    frtree->SetBranchAddress("eta_" + fSRefVarTrk + "_" + trk, &eta_ref_trk, &eta_ref_trk_br);
+    frtree->SetBranchAddress("dphi_" + trk, &dphi_trk, &dphi_trk_br);
+  }
+
+  ///////////////////////////////////////////////////
+  // Fill histos, compute rates from tree branches //
+  ///////////////////////////////////////////////////
+
+  // loop over entries
+  const UInt_t nentries = frtree->GetEntries();
+  for (auto e = 0U; e < nentries; e++) {
+    // get branches
+    for (auto i = 0U; i < fNVars; i++) {
+      auto& var_trks_br = vars_trks_br[i];
+      for (auto j = 0U; j < fNTrks; j++) {
+        auto& var_trk_br = var_trks_br[j];
+
+        var_trk_br->GetEntry(e);
+      }
+    }
+    for (auto j = 0U; j < fNTrks; j++) {
+      auto& refmask_trk_br = refmask_trks_br[j];
+      auto& iTkMatches_trk_br = iTkMatches_trks_br[j];
+      auto& nHits_trk_br = nHits_trks_br[j];
+      auto& fracHits_trk_br = fracHits_trks_br[j];
+      auto& score_trk_br = score_trks_br[j];
+      auto& nLayers_ref_trk_br = nLayers_ref_trks_br[j];
+      auto& pt_ref_trk_br = pt_ref_trks_br[j];
+      auto& eta_ref_trk_br = eta_ref_trks_br[j];
+      auto& dphi_trk_br = dphi_trks_br[j];
+
+      refmask_trk_br->GetEntry(e);
+      iTkMatches_trk_br->GetEntry(e);
+
+      nHits_trk_br->GetEntry(e);
+      fracHits_trk_br->GetEntry(e);
+      score_trk_br->GetEntry(e);
+
+      nLayers_ref_trk_br->GetEntry(e);
+      pt_ref_trk_br->GetEntry(e);
+      eta_ref_trk_br->GetEntry(e);
+      dphi_trk_br->GetEntry(e);
+    }
+
+    // loop over plot indices
+    for (auto j = 0U; j < fNTrks; j++)  // loop over trks index
+    {
+      const auto pt_trk = vars_trks[0][j];
+      const auto eta_trk = vars_trks[1][j];
+      const auto phi_trk = vars_trks[2][j];
+
+      const auto refmask_trk = refmask_trks[j];
+      const auto iTkMatches_trk = iTkMatches_trks[j];
+      const auto nHits_trk = nHits_trks[j];
+      const auto fracHits_trk = fracHits_trks[j];
+      const auto score_trk = score_trks[j];
+
+      const auto nLayers_ref_trk = nLayers_ref_trks[j];
+      const auto pt_ref_trk = pt_ref_trks[j];
+      const auto eta_ref_trk = eta_ref_trks[j];
+      const auto dphi_trk = dphi_trks[j];
+
+      for (auto k = 0U; k < fNPtCuts; k++)  // loop over pt cuts
+      {
+        const auto ptcut = fPtCuts[k];
+
+        if (pt_trk < ptcut)
+          continue;  // cut on tracks with a low pt
+
+        // fill rate plots
+        for (auto i = 0U; i < fNVars; i++)  // loop over vars index
+        {
+          const auto var_trk = vars_trks[i][j];
+
+          // plot key
+          const TString plotkey = Form("%i_%i_%i", i, j, k);
+
+          // can include masks of 1,0,2 to enter denominator
+          if (refmask_trk >= 0)
+            plots[plotkey]->Fill((refmask_trk == 0), var_trk);  // only completely unassociated reco tracks enter FR
+        }                                                       // end loop over vars
+
+        // base hist key
+        const TString basekey = Form("%i_%i", j, k);  // hist key
+
+        // key strings
+        const TString nhitkey = Form("%s_0", basekey.Data());
+        const TString frackey = Form("%s_1", basekey.Data());
+        const TString scorekey = Form("%s_2", basekey.Data());
+
+        const TString dnhitkey = Form("%s_d_0", basekey.Data());
+        const TString dinvptkey = Form("%s_d_1", basekey.Data());
+        const TString detakey = Form("%s_d_2", basekey.Data());
+        const TString dphikey = Form("%s_d_3", basekey.Data());
+
+        // all reco
+        hists[Form("%s_0", nhitkey.Data())]->Fill(nHits_trk);
+        hists[Form("%s_0", frackey.Data())]->Fill(fracHits_trk);
+        hists[Form("%s_0", scorekey.Data())]->Fill(score_trk);
+
+        if (refmask_trk == 0)  // all fakes
+        {
+          hists[Form("%s_1", nhitkey.Data())]->Fill(nHits_trk);
+          hists[Form("%s_1", frackey.Data())]->Fill(fracHits_trk);
+          hists[Form("%s_1", scorekey.Data())]->Fill(score_trk);
+        } else if (refmask_trk == 1)  // all matches
+        {
+          hists[Form("%s_2", nhitkey.Data())]->Fill(nHits_trk);
+          hists[Form("%s_2", frackey.Data())]->Fill(fracHits_trk);
+          hists[Form("%s_2", scorekey.Data())]->Fill(score_trk);
+
+          hists[Form("%s_2", dnhitkey.Data())]->Fill(nHits_trk - (Int_t)nLayers_ref_trk);
+          hists[Form("%s_2", dinvptkey.Data())]->Fill(1.f / pt_trk - 1.f / pt_ref_trk);
+          hists[Form("%s_2", detakey.Data())]->Fill(eta_trk - eta_ref_trk);
+          hists[Form("%s_2", dphikey.Data())]->Fill(dphi_trk);
+
+          if (iTkMatches_trk == 0)  // best matches only
+          {
+            hists[Form("%s_3", nhitkey.Data())]->Fill(nHits_trk);
+            hists[Form("%s_3", frackey.Data())]->Fill(fracHits_trk);
+            hists[Form("%s_3", scorekey.Data())]->Fill(score_trk);
+
+            hists[Form("%s_3", dnhitkey.Data())]->Fill(nHits_trk - (Int_t)nLayers_ref_trk);
+            hists[Form("%s_3", dinvptkey.Data())]->Fill(1.f / pt_trk - 1.f / pt_ref_trk);
+            hists[Form("%s_3", detakey.Data())]->Fill(eta_trk - eta_ref_trk);
+            hists[Form("%s_3", dphikey.Data())]->Fill(dphi_trk);
+          }  // end check over best matches
+        }    // end check over all matches
+      }      // end loop over pt cuts
+    }        // end loop over trks
+  }          // end loop over entry in tree
+
+  /////////////////
+  // Make output //
+  /////////////////
+
+  // make subdirs
+  TStrVec dirnames = {"fakerate", "quality", "kindiffs"};
+  for (auto& dirname : dirnames)
+    dirname += fSRefDir;
+  const UInt_t ndirs = dirnames.size();
+
+  TDirRefVec subdirs(ndirs);
+  for (auto q = 0U; q < ndirs; q++)
+    subdirs[q] = PlotValidation::MakeSubDirs(dirnames[q]);
+
+  // Draw, divide, and save fake rate plots --> then delete!
+  for (auto j = 0U; j < fNTrks; j++)  // loop over trks
+  {
+    for (auto k = 0U; k < fNPtCuts; k++)  // loop over pt cuts
+    {
+      // fake rate plots
+      for (auto i = 0U; i < fNVars; i++)  // loop over vars
+      {
+        const Int_t diridx = 0;
+        const TString plotkey = Form("%i_%i_%i", i, j, k);
+        PlotValidation::DrawWriteSavePlot(plots[plotkey], subdirs[diridx], dirnames[diridx], "AP");
+        delete plots[plotkey];
+      }
+
+      // track quality plots
+      for (auto n = 0U; n < fNTrkQual; n++)  // loop over track quality vars
+      {
+        for (auto o = 0U; o < ncolls; o++)  // loop over collection of tracks
+        {
+          const Int_t diridx = 1;
+          const TString histkey = Form("%i_%i_%i_%i", j, k, n, o);
+          PlotValidation::DrawWriteSavePlot(hists[histkey], subdirs[diridx], dirnames[diridx], "");
+          delete hists[histkey];
+        }  // end loop over track collections
+      }    // end loop over hit vars
+
+      // kinematic diff plots
+      for (auto p = 0U; p < ndvars; p++)  // loop over diff vars
+      {
+        for (auto o = 2U; o < ncolls; o++)  // loop over collection of tracks for only matched tracks
+        {
+          const Int_t diridx = 2;
+          const TString histkey = Form("%i_%i_d_%i_%i", j, k, p, o);
+          PlotValidation::DrawWriteSavePlot(hists[histkey], subdirs[diridx], dirnames[diridx], "");
+          delete hists[histkey];
+        }  // end loop over track collections
+      }    // end loop over diff plots
+
+    }  // end loop over pt cuts
+  }    // end loop over tracks
+}
+
+void PlotValidation::PrintTotals(int algo) {
+  ///////////////////////////////////////////////
+  // Get number of events and number of tracks //
+  ///////////////////////////////////////////////
+
+  Int_t Nevents = 0;
+  Int_t evtID = 0;
+  TBranch* b_evtID = 0;
+  efftree->SetBranchAddress("evtID", &evtID, &b_evtID);
+  const UInt_t nentries = efftree->GetEntries();
+  for (auto e = 0U; e < nentries; e++) {
+    b_evtID->GetEntry(e);
+    if (evtID > Nevents)
+      Nevents = evtID;
+  }
+
+  const Int_t NtracksMC = efftree->GetEntries();
+  const Float_t ntkspevMC = Float_t(NtracksMC) / Float_t(Nevents);
+  const Int_t NtracksReco = frtree->GetEntries();
+  const Float_t ntkspevReco = Float_t(NtracksReco) / Float_t(Nevents);
+
+  ////////////////////////////////////////////////////////////////////////////////////////////////////////
+  // Print out totals of nHits, frac of Hits shared, track score, eff, FR, DR rate of seeds, build, fit //
+  //                --> numer/denom plots for phi, know it will be in the bounds.                       //
+  ////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  const TStrVec rates = {"eff", "fr", "dr"};
+  const TStrVec srates = {"Efficiency", "Fake Rate", "Duplicate Rate"};
+  const TStrVec dirnames = {"efficiency", "fakerate", "duplicaterate"};
+  const TStrVec types = (fCmsswComp ? TStrVec{"cmssw", "reco", "cmssw"}
+                                    : TStrVec{"sim", "reco", "sim"});  // types will be same size as rates!
+  const UInt_t nrates = rates.size();
+
+  const TStrVec snumers = {
+      fSRefTitle + " Tracks Matched", "Unmatched Reco Tracks", fSRefTitle + " Tracks Matched (nTimes>1)"};
+  const TStrVec sdenoms = {
+      "Eligible " + fSRefTitle + " Tracks", "Eligible Reco Tracks", "Eligible " + fSRefTitle + " Tracks"};
+
+  TEffRefMap plots;
+  for (auto j = 0U; j < fNTrks; j++) {
+    const auto& trk = fTrks[j];
+
+    for (auto k = 0U; k < fNPtCuts; k++) {
+      const auto& hptcut = fHPtCuts[k];
+
+      for (auto l = 0U; l < nrates; l++) {
+        const auto& rate = rates[l];
+        const auto& type = types[l];
+        const auto& dirname = dirnames[l];
+
+        const TString plotkey = Form("%i_%i_%i", j, k, l);
+        const TString plotname = dirname + fSRefDir + "/" + rate + "_" + type + "_phi_" + trk + "_pt" + hptcut;
+        plots[plotkey] = (TEfficiency*)fOutRoot->Get(plotname.Data());
+      }
+    }
+  }
+
+  // want nHits plots for (nearly) all types of tracks
+  const TStrVec colls = {"allreco", "fake", "bestmatch"};
+  const TStrVec scolls = {"All Reco", "Fake", "Best Match"};
+  const UInt_t ncolls = colls.size();
+
+  TH1FRefMap hists;
+  for (auto j = 0U; j < fNTrks; j++) {
+    const auto& trk = fTrks[j];
+
+    for (auto k = 0U; k < fNPtCuts; k++) {
+      const auto& hptcut = fHPtCuts[k];
+
+      for (auto n = 0U; n < fNTrkQual; n++) {
+        const auto& trkqual = fTrkQual[n];
+
+        for (auto o = 0U; o < ncolls; o++) {
+          const auto& coll = colls[o];
+
+          const TString histkey = Form("%i_%i_%i_%i", j, k, n, o);
+          const TString histname = "quality" + fSRefDir + "/h_" + trkqual + "_" + coll + "_" + trk + "_pt" + hptcut;
+          hists[histkey] = (TH1F*)fOutRoot->Get(histname.Data());
+        }
+      }
+    }
+  }
+
+  // setup output stream
+  const TString outfilename = fOutName + "/totals_" + fOutName + fSRefOut + ".txt";
+  std::ofstream totalsout(outfilename.Data());
+
+  std::cout << "--------Track Reconstruction Summary--------" << std::endl;
+  std::cout << "nEvents: " << Nevents << Form(" n%sTracks/evt: ", fSRefTitle.Data()) << ntkspevMC
+            << " nRecoTracks/evt: " << ntkspevReco << std::endl;
+  std::cout << "++++++++++++++++++++++++++++++++++++++++++++" << std::endl;
+  std::cout << std::endl;
+
+  totalsout << "--------Track Reconstruction Summary--------" << std::endl;
+  totalsout << "nEvents: " << Nevents << Form(" n%sTracks/evt: ", fSRefTitle.Data()) << ntkspevMC
+            << " nRecoTracks/evt: " << ntkspevReco << std::endl;
+  totalsout << "++++++++++++++++++++++++++++++++++++++++++++" << std::endl;
+  totalsout << std::endl;
+
+  for (auto k = 0U; k < fNPtCuts; k++) {
+    const auto& ptcut = fPtCuts[k];
+
+    std::cout << Form("xxxxxxxxxx Track pT > %3.1f Cut xxxxxxxxxx", ptcut) << std::endl;
+    std::cout << std::endl;
+
+    totalsout << Form("xxxxxxxxxx Track pT > %3.1f Cut xxxxxxxxxx", ptcut) << std::endl;
+    totalsout << std::endl;
+
+    for (auto j = 0U; j < fNTrks; j++) {
+      const auto& strk = fSTrks[j];
+
+      std::cout << strk.Data() << " Tracks" << std::endl;
+      std::cout << "++++++++++++++++++++++++++++++++++++++++++" << std::endl << std::endl;
+      std::cout << "Quality Info for " << strk.Data() << " Track Collections" << std::endl;
+      std::cout << "==========================================" << std::endl;
+
+      totalsout << strk.Data() << " Tracks" << std::endl;
+      totalsout << "++++++++++++++++++++++++++++++++++++++++++" << std::endl << std::endl;
+      totalsout << "Quality Info for " << strk.Data() << " Track Collections" << std::endl;
+      totalsout << "==========================================" << std::endl;
+      for (auto o = 0U; o < ncolls; o++) {
+        const auto& scoll = scolls[o];
+
+        const Float_t nHits_mean = hists[Form("%i_%i_0_%i", j, k, o)]->GetMean(1);           // 1 is mean of x-axis
+        const Float_t nHits_mean_unc = hists[Form("%i_%i_0_%i", j, k, o)]->GetMeanError(1);  // 1 is mean of x-axis
+        const Float_t fracHits_mean = hists[Form("%i_%i_1_%i", j, k, o)]->GetMean(1);
+        const Float_t fracHits_mean_unc = hists[Form("%i_%i_1_%i", j, k, o)]->GetMeanError(1);
+        const Float_t score_mean = hists[Form("%i_%i_2_%i", j, k, o)]->GetMean(1);
+        const Float_t score_mean_unc = hists[Form("%i_%i_2_%i", j, k, o)]->GetMeanError(1);
+
+        std::cout << scoll.Data() << " Tracks" << std::endl;
+        std::cout << "Mean nHits / Track = " << nHits_mean << " +/- " << nHits_mean_unc << std::endl;
+        std::cout << "Mean Shared Hits / Track = " << fracHits_mean << " +/- " << fracHits_mean_unc << std::endl;
+        std::cout << "Mean Track Score = " << score_mean << " +/- " << score_mean_unc << std::endl;
+        std::cout << "------------------------------------------" << std::endl;
+
+        totalsout << scoll.Data() << " Tracks" << std::endl;
+        totalsout << "Mean nHits / Track = " << nHits_mean << " +/- " << nHits_mean_unc << std::endl;
+        totalsout << "Mean Shared Hits / Track = " << fracHits_mean << " +/- " << fracHits_mean_unc << std::endl;
+        totalsout << "Mean Track Score = " << score_mean << " +/- " << score_mean_unc << std::endl;
+        totalsout << "------------------------------------------" << std::endl;
+      }
+
+      std::cout << std::endl << "Rates for " << strk.Data() << " Tracks" << std::endl;
+      std::cout << "==========================================" << std::endl;
+
+      totalsout << std::endl << "Rates for " << strk.Data() << " Tracks" << std::endl;
+      totalsout << "==========================================" << std::endl;
+      for (auto l = 0U; l < nrates; l++) {
+        const auto& snumer = snumers[l];
+        const auto& sdenom = sdenoms[l];
+        const auto& srate = srates[l];
+
+        EffStruct effs;
+        PlotValidation::GetTotalEfficiency(plots[Form("%i_%i_%i", j, k, l)], effs);
+
+        std::cout << snumer.Data() << ": " << effs.passed_ << std::endl;
+        std::cout << sdenom.Data() << ": " << effs.total_ << std::endl;
+        std::cout << "------------------------------------------" << std::endl;
+        std::cout << srate.Data() << ": " << effs.eff_ << ", -" << effs.elow_ << ", +" << effs.eup_ << std::endl;
+        std::cout << "------------------------------------------" << std::endl;
+
+        totalsout << snumer.Data() << ": " << effs.passed_ << std::endl;
+        totalsout << sdenom.Data() << ": " << effs.total_ << std::endl;
+        totalsout << "------------------------------------------" << std::endl;
+        totalsout << srate.Data() << ": " << effs.eff_ << ", -" << effs.elow_ << ", +" << effs.eup_ << std::endl;
+        totalsout << "------------------------------------------" << std::endl;
+      }
+      std::cout << std::endl << std::endl;
+      totalsout << std::endl << std::endl;
+    }
+  }
+
+  // delete everything
+  for (auto& hist : hists)
+    delete hist.second;
+  for (auto& plot : plots)
+    delete plot.second;
+}
+
+template <typename T>
+void PlotValidation::DrawWriteSavePlot(T*& plot, TDirectory*& subdir, const TString& subdirname, const TString& option) {
+  // cd into root subdir and save
+  subdir->cd();
+  plot->SetDirectory(subdir);
+  plot->Write(plot->GetName(), TObject::kWriteDelete);
+
+  // draw it
+  if (fSaveAs) {
+    auto canv = new TCanvas();
+    canv->cd();
+    plot->Draw(option.Data());
+
+    // first save log
+    canv->SetLogy(1);
+    canv->SaveAs(Form("%s/%s/log/%s.%s", fOutName.Data(), subdirname.Data(), plot->GetName(), fOutType.Data()));
+
+    // then lin
+    canv->SetLogy(0);
+    canv->SaveAs(Form("%s/%s/lin/%s.%s", fOutName.Data(), subdirname.Data(), plot->GetName(), fOutType.Data()));
+
+    delete canv;
+  }
+}
+
+void PlotValidation::GetTotalEfficiency(const TEfficiency* eff, EffStruct& effs) {
+  effs.passed_ = eff->GetPassedHistogram()->Integral();
+  effs.total_ = eff->GetTotalHistogram()->Integral();
+
+  auto tmp_eff = new TEfficiency("tmp_eff", "tmp_eff", 1, 0, 1);
+  tmp_eff->SetTotalEvents(1, effs.total_);
+  tmp_eff->SetPassedEvents(1, effs.passed_);
+
+  effs.eff_ = tmp_eff->GetEfficiency(1);
+  effs.elow_ = tmp_eff->GetEfficiencyErrorLow(1);
+  effs.eup_ = tmp_eff->GetEfficiencyErrorUp(1);
+
+  delete tmp_eff;
+}
+
+void PlotValidation::MakeOutDir(const TString& outdirname) {
+  // make output directory
+  FileStat_t dummyFileStat;
+  if (gSystem->GetPathInfo(outdirname.Data(), dummyFileStat) == 1) {
+    const TString mkDir = "mkdir -p " + outdirname;
+    gSystem->Exec(mkDir.Data());
+  }
+}
+
+void PlotValidation::MoveInput() {
+  const TString mvin = "mv " + fInName + " " + fOutName;
+  gSystem->Exec(mvin.Data());
+}
+
+TDirectory* PlotValidation::MakeSubDirs(const TString& subdirname) {
+  PlotValidation::MakeOutDir(fOutName + "/" + subdirname);
+  PlotValidation::MakeOutDir(fOutName + "/" + subdirname + "/lin");
+  PlotValidation::MakeOutDir(fOutName + "/" + subdirname + "/log");
+
+  return fOutRoot->mkdir(subdirname.Data());
+}
+
+void PlotValidation::SetupStyle() {
+  // General style
+  gROOT->Reset();
+  gStyle->SetOptStat("emou");
+  gStyle->SetTitleFontSize(0.04);
+  gStyle->SetOptFit(1011);
+  gStyle->SetStatX(0.9);
+  gStyle->SetStatW(0.1);
+  gStyle->SetStatY(1.0);
+  gStyle->SetStatH(0.08);
+}
+
+void PlotValidation::SetupBins() {
+  // pt bins
+  PlotValidation::SetupVariableBins(
+      "0 0.25 0.5 0.75 1 1.25 1.5 1.75 2 2.5 3 3.5 4 4.5 5 5 6 7 8 9 10 15 20 25 30 40 50 100 200 500 1000", fPtBins);
+
+  // eta bins
+  PlotValidation::SetupFixedBins(60, -3, 3, fEtaBins);
+
+  // phi bins
+  PlotValidation::SetupFixedBins(70, -3.5, 3.5, fPhiBins);
+
+  // nLayers bins
+  PlotValidation::SetupFixedBins(26, -0.5, 25.5, fNLayersBins);
+
+  // nHits bins
+  PlotValidation::SetupFixedBins(40, 0, 40, fNHitsBins);
+
+  // fraction hits matched bins
+  PlotValidation::SetupFixedBins(110, 0, 1.1, fFracHitsBins);
+
+  // track score bins
+  PlotValidation::SetupFixedBins(50, -500, 5000, fScoreBins);
+
+  // dNhits
+  PlotValidation::SetupFixedBins(40, -20, 20, fDNHitsBins);
+
+  // dinvpt
+  PlotValidation::SetupFixedBins(45, -1.0, 1.0, fDInvPtBins);
+
+  // dphi
+  PlotValidation::SetupFixedBins(45, -0.1, 0.1, fDPhiBins);
+
+  // deta
+  PlotValidation::SetupFixedBins(45, -0.1, 0.1, fDEtaBins);
+}
+
+void PlotValidation::SetupVariableBins(const std::string& s_bins, DblVec& bins) {
+  std::stringstream ss(s_bins);
+  Double_t boundary;
+  while (ss >> boundary)
+    bins.emplace_back(boundary);
+}
+
+void PlotValidation::SetupFixedBins(const UInt_t nBins, const Double_t low, const Double_t high, DblVec& bins) {
+  const Double_t width = (high - low) / nBins;
+
+  for (auto i = 0U; i <= nBins; i++)
+    bins.emplace_back(i * width + low);
+}
+
+void PlotValidation::SetupCommonVars() {
+  // common kinematic variables
+  fVars = {"pt", "eta", "phi", "nLayers"};
+  fSVars = {"p_{T}", "#eta", "#phi", "Number of layers"};  // svars --> labels for histograms for given variable
+  fSUnits = {"GeV/c", "", "", ""};                         // units --> labels for histograms for given variable
+  fNVars = fVars.size();
+
+  fSVarPt = fSVars[0];
+  fSUnitPt = fSUnits[0];
+
+  // add square brackets around units
+  for (auto& sunit : fSUnits) {
+    if (!sunit.EqualTo("")) {
+      sunit.Prepend(" [");
+      sunit.Append("]");
+    }
+  }
+
+  // get bins ready for rate variables
+  fVarBins = {fPtBins, fEtaBins, fPhiBins, fNLayersBins};
+
+  // which tracks to use
+  fTrks = (fCmsswComp ? TStrVec{"build", "fit"} : TStrVec{"seed", "build", "fit"});
+  fSTrks = (fCmsswComp ? TStrVec{"Build", "Fit"}
+                       : TStrVec{"Seed", "Build", "Fit"});  // strk --> labels for histograms for given track type
+  fNTrks = fTrks.size();
+
+  // which pt cuts
+  fPtCuts = {0.f, 0.9f, 2.f};
+  for (const auto ptcut : fPtCuts) {
+    fSPtCuts.emplace_back(Form("%3.1f", ptcut));
+  }
+  for (const auto& sptcut : fSPtCuts) {
+    TString hptcut = sptcut;
+    hptcut.ReplaceAll(".", "p");
+    fHPtCuts.emplace_back(hptcut);
+  }
+  fNPtCuts = fPtCuts.size();
+
+  // quality info
+  fTrkQual = {"nHits", "fracHitsMatched", "score"};
+  fSTrkQual = {"nHits / Track", "Highest Fraction of Matched Hits / Track", "Track Score"};
+  fNTrkQual = fTrkQual.size();
+
+  // reference related strings
+  fSRefTitle = (fCmsswComp ? "CMSSW" : "Sim");
+  fSRefVar = (fCmsswComp ? "cmssw" : "mc_gen");
+  fSRefMask = (fCmsswComp ? "cmssw" : "mc");
+  fSRefVarTrk = (fCmsswComp ? "cmssw" : "mc");
+  fSRefDir = (fCmsswComp ? "_cmssw" : "");
+  fSRefOut = (fCmsswComp ? "_cmssw" : "");
+}
diff --git a/RecoTracker/MkFitCore/standalone/plotting/PlotValidation.hh b/RecoTracker/MkFitCore/standalone/plotting/PlotValidation.hh
new file mode 100644
index 0000000000000..456b2e0ac5fa0
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/PlotValidation.hh
@@ -0,0 +1,160 @@
+#ifndef _PlotValidation_
+#define _PlotValidation_
+
+#include "TFile.h"
+#include "TTree.h"
+#include "TBranch.h"
+#include "TDirectory.h"
+#include "TString.h"
+#include "TEfficiency.h"
+#include "TH1F.h"
+#include "TCanvas.h"
+#include "TROOT.h"
+#include "TSystem.h"
+#include "TStyle.h"
+
+#include <string>
+#include <vector>
+#include <map>
+#include <iomanip>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <cmath>
+
+typedef std::vector<Float_t> FltVec;
+typedef std::vector<FltVec> FltVecVec;
+typedef std::vector<Double_t> DblVec;
+typedef std::vector<DblVec> DblVecVec;
+typedef std::vector<Int_t> IntVec;
+typedef std::vector<TString> TStrVec;
+
+typedef std::vector<TBranch*> TBrRefVec;
+typedef std::vector<TBrRefVec> TBrRefVecVec;
+typedef std::vector<TDirectory*> TDirRefVec;
+
+typedef std::map<TString, TH1F*> TH1FRefMap;
+typedef std::map<TString, TEfficiency*> TEffRefMap;
+
+struct EffStruct {
+  EffStruct() {}
+  ~EffStruct() {}
+
+  Float_t passed_;
+  Float_t total_;
+
+  Float_t eff_;
+  Float_t elow_;
+  Float_t eup_;
+};
+
+class PlotValidation {
+public:
+  PlotValidation(const TString& inName,
+                 const TString& outName,
+                 const Bool_t cmsswComp,
+                 const int algo,
+                 const Bool_t mvInput,
+                 const Bool_t rmSuffix,
+                 const Bool_t saveAs,
+                 const TString& outType);
+  ~PlotValidation();
+
+  // setup functions
+  void SetupStyle();
+  void SetupBins();
+  void SetupVariableBins(const std::string& s_bins, DblVec& bins);
+  void SetupFixedBins(const UInt_t nBins, const Double_t low, const Double_t high, DblVec& bins);
+  void SetupCommonVars();
+
+  // main call
+  void Validation(int algo = 0);
+  void PlotEffTree(int algo = 0);
+  void PlotFRTree(int algo = 0);
+  void PrintTotals(int algo = 0);
+
+  // output functions
+  template <typename T>
+  void DrawWriteSavePlot(T*& plot, TDirectory*& subdir, const TString& subdirname, const TString& option);
+
+  // helper functions
+  void MakeOutDir(const TString& outdirname);
+  void GetTotalEfficiency(const TEfficiency* eff, EffStruct& effs);
+  TDirectory* MakeSubDirs(const TString& subdirname);
+  void MoveInput();
+
+private:
+  // input+output config
+  const TString fInName;
+  const Bool_t fCmsswComp;
+  const Bool_t fMvInput;
+  const Bool_t fRmSuffix;
+  const Bool_t fSaveAs;
+  const TString fOutType;
+
+  const int fAlgo;
+
+  // main input
+  TFile* fInRoot;
+  TTree* efftree;
+  TTree* frtree;
+
+  // binning for rate plots
+  DblVec fPtBins;
+  DblVec fEtaBins;
+  DblVec fPhiBins;
+  DblVec fNLayersBins;
+
+  // binning for track quality hists
+  DblVec fNHitsBins;
+  DblVec fFracHitsBins;
+  DblVec fScoreBins;
+
+  // binning for diff hists
+  DblVec fDNHitsBins;
+  DblVec fDInvPtBins;
+  DblVec fDPhiBins;
+  DblVec fDEtaBins;
+
+  // rate vars
+  TStrVec fVars;
+  TStrVec fSVars;
+  TStrVec fSUnits;
+  UInt_t fNVars;
+
+  TString fSVarPt;
+  TString fSUnitPt;
+
+  // rate bins
+  DblVecVec fVarBins;
+
+  // track collections
+  TStrVec fTrks;
+  TStrVec fSTrks;
+  UInt_t fNTrks;
+
+  // pt cuts
+  FltVec fPtCuts;
+  TStrVec fSPtCuts;
+  TStrVec fHPtCuts;
+  UInt_t fNPtCuts;
+
+  // track quality plots
+  TStrVec fTrkQual;
+  TStrVec fSTrkQual;
+  UInt_t fNTrkQual;
+
+  // reference related strings
+  TString fSRefTitle;
+  TString fSRefVar;
+  TString fSRefMask;
+  TString fSRefVarTrk;
+  TString fSRefDir;
+  TString fSRefOut;
+
+  // output variables
+  TString fOutName;
+  TFile* fOutRoot;
+};
+
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/plotting/PlotsFromDump.cpp b/RecoTracker/MkFitCore/standalone/plotting/PlotsFromDump.cpp
new file mode 100644
index 0000000000000..782c4299a0f38
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/PlotsFromDump.cpp
@@ -0,0 +1,102 @@
+#include "PlotsFromDump.hh"
+
+PlotsFromDump::PlotsFromDump(const TString& sample, const TString& build, const TString& suite, const int useARCH)
+    : sample(sample), build(build), suite(suite), useARCH(useARCH) {
+  // setup style for plotting
+  setupStyle();
+
+  // setup suite enum
+  setupSUITEEnum(suite);
+
+  // setup build options : true for isBenchmark-type plots, false for no CMSSW
+  setupBuilds(true, false);
+
+  // get the right build label
+  label =
+      std::find_if(builds.begin(), builds.end(), [&](const auto& ibuild) { return build.EqualTo(ibuild.name); })->label;
+  if (label == "") {
+    std::cerr << build.Data() << " build routine not specified in list of builds! Exiting..." << std::endl;
+    exit(1);
+  }
+
+  // Setup test opts
+  setupTests(useARCH);
+
+  // Setup plot opts
+  setupPlots();
+}
+
+PlotsFromDump::~PlotsFromDump() {}
+
+void PlotsFromDump::RunPlotsFromDump() {
+  // Open ROOT files first
+  std::vector<TFile*> files(ntests);
+  for (auto t = 0U; t < ntests; t++) {
+    const auto& test = tests[t];
+    auto& file = files[t];
+
+    file = TFile::Open("test_" + test.arch + "_" + sample + "_" + build + "_" + test.suffix + ".root");
+  }
+
+  // Outer loop over all overplots
+  for (auto p = 0U; p < nplots; p++) {
+    const auto& plot = plots[p];
+
+    // declare standard stuff
+    const Bool_t isLogy =
+        !(plot.name.Contains("MXPHI", TString::kExact) || plot.name.Contains("MXETA", TString::kExact));
+    auto canv = new TCanvas();
+    canv->cd();
+    canv->SetLogy(isLogy);
+
+    auto leg = new TLegend(0.7, 0.68, 0.98, 0.92);
+
+    Double_t min = 1e9;
+    Double_t max = -1e9;
+
+    std::vector<TH1F*> hists(ntests);
+    for (auto t = 0U; t < ntests; t++) {
+      const auto& test = tests[t];
+      auto& file = files[t];
+      auto& hist = hists[t];
+
+      hist = (TH1F*)file->Get(plot.name + "_" + test.suffix);
+      const TString title = hist->GetTitle();
+      hist->SetTitle(title + " [" + label + " - " + sample + "]");
+      hist->GetXaxis()->SetTitle(plot.xtitle.Data());
+      hist->GetYaxis()->SetTitle(plot.ytitle.Data());
+
+      hist->SetLineColor(test.color);
+      hist->SetMarkerColor(test.color);
+      hist->SetMarkerStyle(test.marker);
+
+      hist->Scale(1.f / hist->Integral());
+      GetMinMaxHist(hist, min, max);
+    }
+
+    for (auto t = 0U; t < ntests; t++) {
+      const auto& test = tests[t];
+      auto& hist = hists[t];
+
+      SetMinMaxHist(hist, min, max, isLogy);
+      hist->Draw(t > 0 ? "P SAME" : "P");
+
+      const TString mean = Form("%4.1f", hist->GetMean());
+      leg->AddEntry(hist, test.arch + " " + test.suffix + " [#mu = " + mean + "]", "p");
+    }
+
+    // draw legend and save plot
+    leg->Draw("SAME");
+    canv->SaveAs(sample + "_" + build + "_" + plot.outname + ".png");
+
+    // delete temps
+    for (auto& hist : hists)
+      delete hist;
+    delete leg;
+    delete canv;
+  }
+
+  // delete files
+  for (auto& file : files)
+    delete file;
+}
diff --git a/RecoTracker/MkFitCore/standalone/plotting/PlotsFromDump.hh b/RecoTracker/MkFitCore/standalone/plotting/PlotsFromDump.hh
new file mode 100644
index 0000000000000..80b6a1a3aea47
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/PlotsFromDump.hh
@@ -0,0 +1,93 @@
+#ifndef _PlotsFromDump_
+#define _PlotsFromDump_
+
+#include "Common.hh"
+
+struct TestOpts {
+  TestOpts() {}
+  TestOpts(const TString& arch, const TString& suffix, const Color_t color, const Marker_t marker)
+      : arch(arch), suffix(suffix), color(color), marker(marker) {}
+
+  TString arch;
+  TString suffix;
+  Color_t color;
+  Marker_t marker;
+};
+typedef std::vector<TestOpts> TOVec;
+
+namespace {
+  TOVec tests;
+  UInt_t ntests;
+  void setupTests(const int useARCH) {
+    // N.B.: Consult ./xeon_scripts/benchmark-cmssw-ttbar-fulldet-build.sh for info on which VU and TH tests were used for making text dumps
+
+    if (useARCH == 0 or useARCH == 2 or useARCH == 3 or useARCH == 4) {
+      tests.emplace_back("SKL-SP", "NVU1_NTH1", kRed + 1, kOpenTriangleUp);
+      tests.emplace_back("SKL-SP", "NVU16int_NTH64", kMagenta + 1, kOpenTriangleDown);
+    }
+    if (useARCH == 3 or useARCH == 4) {
+      tests.emplace_back("SNB", "NVU1_NTH1", kBlue, kOpenDiamond);
+      tests.emplace_back("SNB", "NVU8int_NTH24", kBlack, kOpenCross);
+      tests.emplace_back("KNL", "NVU1_NTH1", kGreen + 1, kOpenTriangleUp);
+      tests.emplace_back("KNL", "NVU16int_NTH256", kOrange + 1, kOpenTriangleDown);
+    }
+    if (useARCH == 1 or useARCH == 2 or useARCH == 4) {
+      tests.emplace_back("LNX-G", "NVU1_NTH1", 7, 40);
+      tests.emplace_back("LNX-G", "NVU16int_NTH64", 8, 42);
+      tests.emplace_back("LNX-S", "NVU1_NTH1", 46, 49);
+      tests.emplace_back("LNX-S", "NVU16int_NTH64", 30, 48);
+    }
+    // set ntests after tests is set up
+    ntests = tests.size();
+  }
+};  // namespace
+
+struct PlotOpts {
+  PlotOpts() {}
+  PlotOpts(const TString& name, const TString& xtitle, const TString& ytitle, const TString& outname)
+      : name(name), xtitle(xtitle), ytitle(ytitle), outname(outname) {}
+
+  TString name;
+  TString xtitle;
+  TString ytitle;
+  TString outname;
+};
+typedef std::vector<PlotOpts> POVec;
+
+namespace {
+  POVec plots;
+  UInt_t nplots;
+  void setupPlots() {
+    // N.B. Consult plotting/makePlotsFromDump.py for info on hist names
+
+    plots.emplace_back("h_MXNH", "Number of Hits Found", "Fraction of Tracks", "nHits");
+    plots.emplace_back("h_MXPT", "p_{T}^{mkFit}", "Fraction of Tracks", "pt");
+    plots.emplace_back("h_MXPHI", "#phi^{mkFit}", "Fraction of Tracks", "phi");
+    plots.emplace_back("h_MXETA", "#eta^{mkFit}", "Fraction of Tracks", "eta");
+
+    plots.emplace_back("h_DCNH", "nHits^{mkFit}-nHits^{CMSSW}", "Fraction of Tracks", "dnHits");
+    plots.emplace_back("h_DCPT", "p_{T}^{mkFit}-p_{T}^{CMSSW}", "Fraction of Tracks", "dpt");
+    plots.emplace_back("h_DCPHI", "#phi^{mkFit}-#phi^{CMSSW}", "Fraction of Tracks", "dphi");
+    plots.emplace_back("h_DCETA", "#eta^{mkFit}-#eta^{CMSSW}", "Fraction of Tracks", "deta");
+
+    // set nplots after plots are set
+    nplots = plots.size();
+  }
+};  // namespace
+
+class PlotsFromDump {
+public:
+  PlotsFromDump(const TString& sample, const TString& build, const TString& suite, const int useARCH);
+  ~PlotsFromDump();
+  void RunPlotsFromDump();
+
+private:
+  const TString sample;
+  const TString build;
+  const TString suite;
+  const int useARCH;
+
+  TString label;
+};
+
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/plotting/StackValidation.cpp b/RecoTracker/MkFitCore/standalone/plotting/StackValidation.cpp
new file mode 100644
index 0000000000000..465d8e8de1686
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/StackValidation.cpp
@@ -0,0 +1,303 @@
+#include "StackValidation.hh"
+
+StackValidation::StackValidation(const TString& label,
+                                 const TString& extra,
+                                 const Bool_t cmsswComp,
+                                 const TString& suite)
+    : label(label), extra(extra), cmsswComp(cmsswComp), suite(suite) {
+  // setup style for plotting
+  setupStyle();
+
+  // setup suite enum
+  setupSUITEEnum(suite);
+
+  // setup build options : true for isBenchmark-type plots, and include cmssw in plots if simval
+  setupBuilds(false, !cmsswComp);
+
+  // set legend y height
+  y1 = 0.80;
+  y2 = y1 + nbuilds * 0.04;  // full + CMSSW = 0.04*5 + 0.8 = 1.0
+
+  // open the files
+  files.resize(nbuilds);
+  for (auto b = 0U; b < nbuilds; b++) {
+    const auto& build = builds[b];
+    auto& file = files[b];
+
+    file = TFile::Open("validation_" + label + "_" + build.name + extra + "/plots.root");
+  }
+
+  // setup ref
+  setupRef(cmsswComp);
+
+  // setup rates
+  setupRates(cmsswComp);
+
+  // setup ptcuts
+  setupPtCuts();
+}
+
+StackValidation::~StackValidation() {
+  for (auto& file : files)
+    delete file;
+}
+
+void StackValidation::MakeValidationStacks() {
+  StackValidation::MakeRatioStacks("build");
+  StackValidation::MakeKinematicDiffStacks("build");
+  StackValidation::MakeQualityStacks("build");
+
+  if (cmsswComp) {
+    StackValidation::MakeRatioStacks("fit");
+    StackValidation::MakeKinematicDiffStacks("fit");
+    StackValidation::MakeQualityStacks("fit");
+  }
+}
+
+void StackValidation::MakeRatioStacks(const TString& trk) {
+  // kinematic variables to plot
+  std::vector<TString> vars = {"pt", "eta", "phi", "nLayers"};
+  const UInt_t nvars = vars.size();
+
+  // indices for loops match PlotValidation.cpp
+  for (auto l = 0U; l < nrates; l++) {
+    const auto& rate = rates[l];
+
+    for (auto k = 0U; k < nptcuts; k++) {
+      const auto& ptcut = ptcuts[k];
+
+      for (auto i = 0U; i < nvars; i++) {
+        const auto& var = vars[i];
+
+        auto canv = new TCanvas();
+        canv->cd();
+
+        auto leg = new TLegend(0.85, y1, 1.0, y2);
+
+        // tmp axis titles, not sure why ROOT is deleting them
+        TString xtitle = "";
+        TString ytitle = "";
+
+        std::vector<TGraphAsymmErrors*> graphs(nbuilds);
+
+        for (auto b = 0U; b < nbuilds; b++) {
+          const auto& build = builds[b];
+          auto& file = files[b];
+          auto& graph = graphs[b];
+
+          graph = ((TEfficiency*)file->Get(rate.dir + refdir + "/" + rate.rate + "_" + rate.sORr + "_" + var + "_" +
+                                           trk + "_pt" + ptcut))
+                      ->CreateGraph();
+          graph->SetLineColor(build.color);
+          graph->SetMarkerColor(build.color);
+
+          // store tmp titles
+          if (b == 0) {
+            xtitle = graph->GetXaxis()->GetTitle();
+            ytitle = graph->GetYaxis()->GetTitle();
+          }
+
+          graph->Draw(b > 0 ? "PZ SAME" : "APZ");
+
+          if (!rate.rate.Contains("ineff", TString::kExact) && !rate.rate.Contains("dr", TString::kExact))
+            graph->GetYaxis()->SetRangeUser(0.0, 1.05);
+          else
+            graph->GetYaxis()->SetRangeUser(0.0, 0.25);
+
+          leg->AddEntry(graph, build.label.Data(), "LEP");
+        }
+
+        // print standard plot for every rate/variable
+        leg->Draw("SAME");
+        canv->SaveAs(label + "_" + rate.rate + "_" + var + "_" + trk + "_pt" + ptcut + extra + ".png");
+
+        // zoom in on pt range
+        if (i == 0) {
+          std::vector<TGraphAsymmErrors*> zoomgraphs(nbuilds);
+          for (auto b = 0U; b < nbuilds; b++) {
+            auto& graph = graphs[b];
+            auto& zoomgraph = zoomgraphs[b];
+
+            zoomgraph = (TGraphAsymmErrors*)graph->Clone(Form("%s_zoom", graph->GetName()));
+            zoomgraph->GetXaxis()->SetRangeUser(0, 10);
+            zoomgraph->Draw(b > 0 ? "PZ SAME" : "APZ");
+          }
+
+          leg->Draw("SAME");
+          canv->SaveAs(label + "_" + rate.rate + "_" + var + "_zoom_" + trk + "_pt" + ptcut + extra + ".png");
+
+          for (auto& zoomgraph : zoomgraphs)
+            delete zoomgraph;
+        }
+
+        // make logx plots for pt: causes LOTS of weird effects... workarounds for now
+        if (i == 0) {
+          canv->SetLogx(1);
+
+          // apparently logx removes titles and ranges???
+          for (auto b = 0U; b < nbuilds; b++) {
+            auto& graph = graphs[b];
+            graph->GetXaxis()->SetRangeUser(0.01, graph->GetXaxis()->GetBinUpEdge(graph->GetXaxis()->GetNbins()));
+
+            if (!rate.rate.Contains("ineff", TString::kExact) && !rate.rate.Contains("dr", TString::kExact))
+              graph->GetYaxis()->SetRangeUser(0.0, 1.05);
+            else
+              graph->GetYaxis()->SetRangeUser(0.0, 0.25);
+
+            graph->GetXaxis()->SetTitle(xtitle);
+            graph->GetYaxis()->SetTitle(ytitle);
+
+            graph->Draw(b > 0 ? "PZ SAME" : "APZ");
+          }
+
+          leg->Draw("SAME");
+          canv->SaveAs(label + "_" + rate.rate + "_" + var + "_logx_" + trk + "_pt" + ptcut + extra + ".png");
+        }
+
+        delete leg;
+        for (auto& graph : graphs)
+          delete graph;
+        delete canv;
+      }
+    }
+  }
+}
+
+void StackValidation::MakeKinematicDiffStacks(const TString& trk) {
+  // variables to plot
+  std::vector<TString> diffs = {"nHits", "invpt", "eta", "phi"};
+  const UInt_t ndiffs = diffs.size();
+
+  // diffferent reco collections
+  std::vector<TString> colls = {"allmatch", "bestmatch"};
+  const UInt_t ncolls = colls.size();
+
+  // indices for loops match PlotValidation.cpp
+  for (auto o = 0U; o < ncolls; o++) {
+    const auto& coll = colls[o];
+
+    for (auto p = 0U; p < ndiffs; p++) {
+      const auto& diff = diffs[p];
+
+      for (auto k = 0U; k < nptcuts; k++) {
+        const auto& ptcut = ptcuts[k];
+
+        const Bool_t isLogy = true;
+        auto canv = new TCanvas();
+        canv->cd();
+        canv->SetLogy(isLogy);
+
+        auto leg = new TLegend(0.85, y1, 1.0, y2);
+
+        // tmp min/max
+        Double_t min = 1e9;
+        Double_t max = -1e9;
+
+        std::vector<TH1F*> hists(nbuilds);
+        for (auto b = 0U; b < nbuilds; b++) {
+          const auto& build = builds[b];
+          auto& file = files[b];
+          auto& hist = hists[b];
+
+          hist = (TH1F*)file->Get("kindiffs" + refdir + "/h_d" + diff + "_" + coll + "_" + trk + "_pt" + ptcut);
+          hist->SetLineColor(build.color);
+          hist->SetMarkerColor(build.color);
+
+          hist->Scale(1.f / hist->Integral());
+          hist->GetYaxis()->SetTitle("Fraction of Tracks");
+
+          GetMinMaxHist(hist, min, max);
+        }
+
+        for (auto b = 0U; b < nbuilds; b++) {
+          const auto& build = builds[b];
+          auto& hist = hists[b];
+
+          SetMinMaxHist(hist, min, max, isLogy);
+          hist->Draw(b > 0 ? "EP SAME" : "EP");
+
+          const TString mean = Form("%4.1f", hist->GetMean());
+          leg->AddEntry(hist, build.label + " " + " [#mu = " + mean + "]", "LEP");
+        }
+
+        leg->Draw("SAME");
+        canv->SaveAs(label + "_" + coll + "_d" + diff + "_" + trk + "_pt" + ptcut + extra + ".png");
+
+        delete leg;
+        for (auto& hist : hists)
+          delete hist;
+        delete canv;
+      }  // end pt cut loop
+    }    // end var loop
+  }      // end coll loop
+}
+
+void StackValidation::MakeQualityStacks(const TString& trk) {
+  // diffferent reco collections
+  std::vector<TString> colls = {"allreco", "fake", "allmatch", "bestmatch"};
+  const UInt_t ncolls = colls.size();
+
+  // quality plots to use: nHits/track and track score
+  std::vector<TString> quals = {"nHits", "score"};
+  const UInt_t nquals = quals.size();
+
+  // indices for loops match PlotValidation.cpp
+  for (auto o = 0U; o < ncolls; o++) {
+    const auto& coll = colls[o];
+
+    for (auto k = 0U; k < nptcuts; k++) {
+      const auto& ptcut = ptcuts[k];
+
+      for (auto n = 0U; n < nquals; n++) {
+        const auto& qual = quals[n];
+
+        const Bool_t isLogy = true;
+        auto canv = new TCanvas();
+        canv->cd();
+        canv->SetLogy(isLogy);
+
+        auto leg = new TLegend(0.85, y1, 1.0, y2);
+
+        // tmp min/max
+        Double_t min = 1e9;
+        Double_t max = -1e9;
+
+        std::vector<TH1F*> hists(nbuilds);
+        for (auto b = 0U; b < nbuilds; b++) {
+          const auto& build = builds[b];
+          auto& file = files[b];
+          auto& hist = hists[b];
+
+          hist = (TH1F*)file->Get("quality" + refdir + "/h_" + qual + "_" + coll + "_" + trk + "_pt" + ptcut);
+          hist->SetLineColor(build.color);
+          hist->SetMarkerColor(build.color);
+
+          hist->Scale(1.f / hist->Integral());
+          hist->GetYaxis()->SetTitle("Fraction of Tracks");
+
+          GetMinMaxHist(hist, min, max);
+        }
+
+        for (auto b = 0U; b < nbuilds; b++) {
+          const auto& build = builds[b];
+          auto& hist = hists[b];
+
+          SetMinMaxHist(hist, min, max, isLogy);
+          hist->Draw(b > 0 ? "EP SAME" : "EP");
+
+          const TString mean = Form("%4.1f", hist->GetMean());
+          leg->AddEntry(hist, build.label + " " + " [#mu = " + mean + "]", "LEP");
+        }
+
+        leg->Draw("SAME");
+        canv->SaveAs(label + "_" + coll + "_" + qual + "_" + trk + "_pt" + ptcut + extra + ".png");
+
+        delete leg;
+        for (auto& hist : hists)
+          delete hist;
+        delete canv;
+
+      }  // end loop over quality variable
+    }    // end pt cut loop
+  }      // end coll loop
+}
diff --git a/RecoTracker/MkFitCore/standalone/plotting/StackValidation.hh b/RecoTracker/MkFitCore/standalone/plotting/StackValidation.hh
new file mode 100644
index 0000000000000..8d90602104e57
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/StackValidation.hh
@@ -0,0 +1,79 @@
+#ifndef _StackValidation_
+#define _StackValidation_
+
+#include "Common.hh"
+
+#include "TEfficiency.h"
+#include "TGraphAsymmErrors.h"
+
+struct RateOpts {
+  RateOpts() {}
+  RateOpts(const TString& dir, const TString& sORr, const TString& rate) : dir(dir), sORr(sORr), rate(rate) {}
+
+  TString dir;
+  TString sORr;  // sim or reco
+  TString rate;
+};
+typedef std::vector<RateOpts> ROVec;
+
+namespace {
+  TString ref;
+  TString refdir;
+  void setupRef(const Bool_t cmsswComp) {
+    ref = (cmsswComp ? "cmssw" : "sim");
+    refdir = (cmsswComp ? "_cmssw" : "");
+  }
+
+  ROVec rates;
+  UInt_t nrates;
+  void setupRates(const Bool_t cmsswComp) {
+    rates.emplace_back("efficiency", ref, "eff");
+    rates.emplace_back("inefficiency", ref, "ineff_brl");
+    rates.emplace_back("inefficiency", ref, "ineff_trans");
+    rates.emplace_back("inefficiency", ref, "ineff_ec");
+    rates.emplace_back("fakerate", "reco", "fr");
+    rates.emplace_back("duplicaterate", ref, "dr");
+
+    // set nrates after rates is set
+    nrates = rates.size();
+  }
+
+  std::vector<TString> ptcuts;
+  UInt_t nptcuts;
+  void setupPtCuts() {
+    std::vector<Float_t> tmp_ptcuts = {0.f, 0.9f, 2.f};
+
+    for (const auto tmp_ptcut : tmp_ptcuts) {
+      TString ptcut = Form("%3.1f", tmp_ptcut);
+      ptcut.ReplaceAll(".", "p");
+      ptcuts.emplace_back(ptcut);
+    }
+
+    // set nptcuts once ptcuts is set
+    nptcuts = ptcuts.size();
+  }
+};  // namespace
+
+class StackValidation {
+public:
+  StackValidation(const TString& label, const TString& extra, const Bool_t cmsswComp, const TString& suite);
+  ~StackValidation();
+  void MakeValidationStacks();
+  void MakeRatioStacks(const TString& trk);
+  void MakeKinematicDiffStacks(const TString& trk);
+  void MakeQualityStacks(const TString& trk);
+
+private:
+  const TString label;
+  const TString extra;
+  const Bool_t cmsswComp;
+  const TString suite;
+
+  // legend height
+  Double_t y1;
+  Double_t y2;
+
+  std::vector<TFile*> files;
+};
+
+#endif
diff --git a/RecoTracker/MkFitCore/standalone/plotting/benchmarkPlots.sh b/RecoTracker/MkFitCore/standalone/plotting/benchmarkPlots.sh
new file mode 100755
index 0000000000000..8b86c765a97d6
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/benchmarkPlots.sh
@@ -0,0 +1,41 @@
+#! /bin/bash
+
+## input
+suite=${1:-"forPR"}
+useARCH=${2:-0}
+lnxuser=${3:-${USER}}
+
+## In case this is run separately from the main script
+source xeon_scripts/common-variables.sh ${suite} ${useARCH} ${lnxuser}
+source xeon_scripts/init-env.sh
+
+for archV in "${arch_array_benchmark[@]}" 
+do echo ${archV} | while read -r archN archO
+    do
+	for build in "${ben_builds[@]}"
+	do echo ${!build} | while read -r bN bO
+	    do
+		# see if a test was run for this build routine
+		vu_check=$( CheckIfVU ${build} )
+		th_check=$( CheckIfTH ${build} )
+
+		echo "Extract benchmarking results for" ${bN} "on" ${archN}
+		python plotting/makeBenchmarkPlots.py ${archN} ${sample} ${bN} ${vu_check} ${th_check}
+	    done
+	done
+
+	echo "Make final plot comparing different build options for" ${archN}
+	root -b -q -l plotting/makeBenchmarkPlots.C\(\"${archN}\",\"${sample}\",\"${suite}\"\)
+	
+	for build in "${meif_builds[@]}"
+	do echo ${!build} | while read -r bN bO
+	    do
+		echo "Extract multiple events in flight benchmark results for" ${bN} "on" ${archN}
+		python plotting/makeMEIFBenchmarkPlots.py ${archN} ${sample} ${bN}
+		
+		echo "Make final plot comparing multiple events in flight for" ${bN} "on" ${archN}
+		root -b -q -l plotting/makeMEIFBenchmarkPlots.C\(\"${archN}\",\"${sample}\",\"${bN}\"\)
+	    done
+	done
+    done
+done
diff --git a/RecoTracker/MkFitCore/standalone/plotting/compareTotals.py b/RecoTracker/MkFitCore/standalone/plotting/compareTotals.py
new file mode 100644
index 0000000000000..5de264e8684c8
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/compareTotals.py
@@ -0,0 +1,531 @@
+import os,sys
+import ROOT
+import copy
+
+doNorm = True
+
+def getCanvasMainPad( logY ):
+  pad1 = ROOT.TPad("pad1", "pad1", 0, 0.2, 1, 1)
+  pad1.SetBottomMargin(0.15)
+  if( logY ):
+    pad1.SetLogy()
+  return pad1
+
+def getCanvasRatioPad( logY ):
+  pad2 = ROOT.TPad("pad2", "pad2", 0, 0, 1, 0.21)
+  pad2.SetTopMargin(0.05)
+  pad2.SetBottomMargin(0.1)
+  return pad2
+
+def getRatioAxes( xMin, xMax, yMin, yMax ):
+  h2_axes_ratio = ROOT.TH2D("axes_ratio", "", 10, xMin, xMax, 10, yMin, yMax )
+  h2_axes_ratio.SetStats(0)
+  h2_axes_ratio.GetXaxis().SetLabelSize(0.00)
+  h2_axes_ratio.GetXaxis().SetTickLength(0.09)
+  h2_axes_ratio.GetYaxis().SetNdivisions(5,5,0)
+  h2_axes_ratio.GetYaxis().SetTitleSize(0.13)
+  h2_axes_ratio.GetYaxis().SetTitleOffset(0.37)
+  h2_axes_ratio.GetYaxis().SetLabelSize(0.13)
+  h2_axes_ratio.GetYaxis().SetTitle("Ratio")
+  return h2_axes_ratio
+
+
+dirnames = []
+dirnames.append(sys.argv[1]) #original (1)
+dirnames.append(sys.argv[2]) #updated (2)
+
+outdir = sys.argv[3]
+if not os.path.exists(outdir):
+  os.makedirs(outdir)
+  os.system("cp web/index.php %s"%(outdir))
+
+names = ["Reference", "Other"]
+if len(sys.argv)==5 and sys.argv[4]=="vsCMSSW":
+  names = ["CMSSW", "mkFit"]
+
+if len(sys.argv)==6:
+  names = [sys.argv[4], sys.argv[5]]
+
+colors = [1,2]
+
+fnames = []
+fs = []
+for d in range(0, len(dirnames)):
+  fnames.append(dirnames[d]+"/plots.root")
+  fs.append(ROOT.TFile.Open(fnames[d]))
+
+subdirs=[]
+
+eff_obj   = []
+eff_pass  = []
+eff_tot   = []
+eff_ratio = []
+#
+hist      = []
+#
+hist_xratio = []
+eff_xratio  = []
+
+for d in range(0, len(fnames)):
+  
+  fs[d].cd()
+
+  for dkey in ROOT.gDirectory.GetListOfKeys():
+    if not dkey.IsFolder():
+      continue
+    if d<=0:
+      subdirs.append(dkey.GetName())
+
+  eff_obj_s   = []
+  eff_pass_s  = []
+  eff_tot_s   = []
+  eff_ratio_s = []
+  #
+  hist_s      = []
+  
+  subhist = []
+  subrate = []
+  
+  for subdir in subdirs:
+    print "In subdir %s:"%subdir
+    fs[d].cd(subdir)
+
+    thiseff_obj  = []
+    thiseff_pass = []
+    thiseff_tot  = []
+    thiseff      = []      
+    #
+    thishist = []
+    #
+    nh=0
+    ne=0
+    #
+    for key in ROOT.gDirectory.GetListOfKeys():
+      obj = key.ReadObj()
+      if obj.IsA().InheritsFrom("TH1"):
+        h = obj
+        #print "Found TH1 %s"%h.GetName()
+          
+        thishist.append(h)
+        thishist[nh].Sumw2()
+        thishist[nh].SetLineColor(colors[d])
+        thishist[nh].SetMarkerColor(colors[d])
+        thishist[nh].SetMarkerSize(0.3)
+        thishist[nh].SetMarkerStyle(20)
+        thishist[nh].SetStats(0)
+        
+        nh=nh+1
+
+      if obj.IsA().InheritsFrom("TEfficiency"):
+        e = obj
+        #print "Found TEfficiency %s"%e.GetName()
+        
+        thiseff_obj .append(e)
+        thiseff_obj[ne].SetLineColor(colors[d])
+        thiseff_obj[ne].SetMarkerColor(colors[d])
+        thiseff_obj[ne].SetMarkerSize(0.3)
+        thiseff_obj[ne].SetMarkerStyle(20)
+        thiseff_pass.append(e.GetPassedHistogram())
+        thiseff_tot .append(e.GetTotalHistogram())
+        thiseff_pass[ne].Sumw2()
+        thiseff_tot[ne] .Sumw2()
+        effname = "%s_rate"%(thiseff_pass[ne].GetName())
+        auxeff = thiseff_pass[ne].Clone(effname)
+        auxeff.Divide(thiseff_pass[ne],thiseff_tot[ne],1.0,1.0,"B")
+        thiseff.append(auxeff)
+        thiseff[ne].SetLineColor(colors[d])
+        thiseff[ne].SetMarkerColor(colors[d])
+        thiseff[ne].SetMarkerSize(0.3)
+        thiseff[ne].SetMarkerStyle(20)
+        thiseff[ne].SetStats(0)
+        
+        ne=ne+1
+
+    hist_s     .append(thishist)
+    #
+    eff_ratio_s.append(thiseff)
+    eff_pass_s .append(thiseff_pass)
+    eff_tot_s  .append(thiseff_tot)
+    eff_obj_s  .append(thiseff_obj)
+        
+  hist       .append(hist_s)
+  hist_xratio.append(hist_s)
+  #
+  eff_ratio .append(eff_ratio_s)
+  eff_xratio.append(eff_ratio_s)
+  eff_pass  .append(eff_pass_s)
+  eff_tot   .append(eff_tot_s)
+  eff_obj   .append(eff_obj_s)
+  
+ratios_hist = []
+ratios_eff  = []
+for dd in range(len(subdirs)):
+  
+  thisratio = []
+  for r in range(len(hist_xratio[0][dd])):
+    auxratio = hist_xratio[1][dd][r].Clone("num")
+    auxden   = hist_xratio[0][dd][r].Clone("den")
+    intnum   = auxratio.Integral(0,-1)
+    intden   = auxden  .Integral(0,-1)
+    if intnum>0:
+      auxratio.Scale(1.0/intnum)
+    if intden>0:
+      auxden.Scale(1.0/intden)
+      auxratio.Divide(auxden)
+    auxratio.SetName("ratio")
+    thisratio.append(auxratio)
+    thisratio[r].GetYaxis().SetTitle("Ratio")
+    thisratio[r].SetLineColor(colors[1])
+    thisratio[r].SetMarkerColor(colors[1])
+    thisratio[r].SetMarkerSize(0)
+    thisratio[r].SetStats(0)
+  ratios_hist.append(thisratio)
+  
+  thisratio = []
+  for r in range(len(eff_xratio[0][dd])):
+    auxratio = eff_xratio[1][dd][r].Clone(  "numerator")
+    auxden   = eff_xratio[0][dd][r].Clone("denominator")
+    auxratio.Divide(auxden)
+    auxratio.SetName("ratio")
+    thisratio.append(auxratio)
+    thisratio[r].GetYaxis().SetTitle("Ratio")
+    thisratio[r].SetLineColor(colors[1])
+    thisratio[r].SetMarkerColor(colors[1])
+    thisratio[r].SetMarkerSize(0)
+    thisratio[r].SetStats(0)
+  ratios_eff.append(thisratio)
+
+### Drawing
+ROOT.gStyle.SetOptStat(0)
+
+outsubdir = []
+for ns,subdir in enumerate(subdirs):
+  thisdir = "%s/%s"%(outdir,subdir)
+  outsubdir.append(thisdir)
+  if not os.path.exists(thisdir):
+    os.mkdir(thisdir)
+    os.system("cp web/index.php %s"%(thisdir))
+
+  for r in range(len(eff_xratio[0][ns])): 
+      
+    outname = eff_obj[0][ns][r].GetName()
+    
+    can = ROOT.TCanvas("can_%s"%outname, "", 600, 600)
+    can.cd()
+    
+    pad1 = getCanvasMainPad(0)
+    pad1.SetTickx()
+    pad1.SetTicky()
+    
+    pad2 = getCanvasRatioPad(0)
+    pad2.SetTickx()
+    pad2.SetTicky()
+    
+    can.cd()
+    pad1.Draw()
+    pad1.cd()
+    
+    ttitle = eff_obj[0][ns][r].GetTitle()
+    xmin = ratios_eff[ns][r].GetXaxis().GetBinLowEdge(1)
+    xmax = ratios_eff[ns][r].GetXaxis().GetBinUpEdge(ratios_eff[ns][r].GetNbinsX())
+    yminM = 0.0
+    ymaxM = 1.2
+    #if "dr" in outname or "ineff" in outname:
+    #  ymaxM = 0.50
+    xtitle = ratios_eff[ns][r].GetXaxis().GetTitle()
+    ytitle = "Efficiency"
+    if "dr" in outname:
+      ytitle = "Duplicate rate"
+    elif "fr" in outname:
+      ytitle = "Fake rate"
+    elif "ineff" in outname:
+      ytitle = "Inefficiency"
+      
+    haxisMain  = ROOT.TH2D("haxisMain" ,ttitle,1,xmin ,xmax,1,yminM,ymaxM)
+    
+    haxisMain.GetXaxis().SetTitle(xtitle)
+    haxisMain.GetXaxis().SetTitleOffset(1.2)
+    haxisMain.GetYaxis().SetTitle(ytitle)
+    haxisMain.GetYaxis().SetTitleOffset(1.4)
+    
+    haxisMain.Draw()
+    eff_obj[0][ns][r].Draw("PE,same")
+    eff_obj[1][ns][r].Draw("PE,same")
+    
+    legend = ROOT.TLegend(0.7,0.7, 0.87, 0.87);
+    legend.SetLineColor(0)
+    legend.SetFillColor(0)
+    legend.AddEntry(eff_obj[0][ns][r], names[0], "PL")
+    legend.AddEntry(eff_obj[1][ns][r], names[1], "PL")
+    legend.Draw("same")
+    
+    can.cd()
+    pad2.Draw()
+    pad2.cd()
+    
+    ymin = 0.9*ratios_eff[ns][r].GetMinimum()
+    ymax = 1.1*ratios_eff[ns][r].GetMaximum()
+    
+    if ymin==0:
+      ymin=0.75
+      if ymax<=ymin:
+        ymin=0.75*ymax
+        ymax=1.25*ymax
+        
+    if ymax<=ymin:
+      ymin=0.0
+      ymax=2.0
+    
+    hraxes = getRatioAxes(xmin,xmax,ymin,ymax)
+    
+    line = ROOT.TLine(xmin,1.0,xmax,1.0)
+    line.SetLineColor(1)
+    line.SetLineStyle(2)
+    
+    hraxes.Draw("")
+    ratios_eff[ns][r].Draw("PE,same")
+    line.Draw("same")
+    
+    can.cd()
+    pad1.Draw()
+    pad2.Draw()
+    
+    can.SaveAs("%s/%s.png"%(thisdir,outname));
+    can.SaveAs("%s/%s.pdf"%(thisdir,outname));
+    
+    can.Update()
+    can.Clear()
+
+    tot        = [eff_tot[0][ns][r].Integral(), eff_tot[1][ns][r].Integral()]
+    passing    = [eff_pass[0][ns][r].Integral(), eff_pass[1][ns][r].Integral()]
+    efficiency = []
+    reldiff    = []
+    for d in range(0,len(tot)):
+      if tot[d]>0:
+        efficiency.append(passing[d]/tot[d])
+      else:
+        efficiency.append(0.0)
+      if efficiency[0]>0:
+        reldiff.append(efficiency[d]/efficiency[0])
+      else:
+        reldiff.append(0.0)
+
+    fo = open("%s/%s.log"%(thisdir,outname),"w+")
+    fo.write( "Totals:" )
+    for d in range(0,len(tot)):
+      fo.write( " %d " % int(tot[d]) ),
+    fo.write( "\nPassing:" )
+    for d in range(0,len(tot)):
+      fo.write( " %d " % int(passing[d]) ),
+    fo.write( "\nRate:" )
+    for d in range(0,len(tot)):
+      fo.write( " %0.4f " % efficiency[d] ),
+    fo.write( "\nRatio(/reference):" )
+    for d in range(0,len(tot)):
+      fo.write( " %0.4f " % reldiff[d] ),
+    fo.write( "\n" )
+
+
+    if "_pt_" in outname:
+      outname = outname+"_logx"
+      
+      can = ROOT.TCanvas("can_%s"%outname, "", 600, 600)
+      can.cd()
+      
+      pad1 = getCanvasMainPad(0)
+      pad1.SetTickx()
+      pad1.SetTicky()
+      pad1.SetLogx()
+      
+      pad2 = getCanvasRatioPad(0)
+      pad2.SetTickx()
+      pad2.SetTicky()
+      pad2.SetLogx()
+      
+      can.cd()
+      pad1.Draw()
+      pad1.cd()
+      
+      ttitle = eff_obj[0][ns][r].GetTitle()
+      xmin = 0.1
+      xmax = ratios_eff[ns][r].GetXaxis().GetBinUpEdge(ratios_eff[ns][r].GetNbinsX())
+      yminM = 0.0
+      ymaxM = 1.2
+      #if "dr" in outname or "ineff" in outname:
+      #  ymaxM = 0.50
+      xtitle = ratios_eff[ns][r].GetXaxis().GetTitle()
+      ytitle = "Efficiency"
+      if "dr" in outname:
+        ytitle = "Duplicate rate"
+      elif "fr" in outname:
+        ytitle = "Fake rate"
+      elif "ineff" in outname:
+        ytitle = "Inefficiency"
+        
+      haxisMain  = ROOT.TH2D("haxisMain" ,ttitle,1,xmin,xmax,1,yminM,ymaxM)
+      
+      haxisMain.GetXaxis().SetTitle(xtitle)
+      haxisMain.GetXaxis().SetTitleOffset(1.2)
+      haxisMain.GetYaxis().SetTitle(ytitle)
+      haxisMain.GetYaxis().SetTitleOffset(1.4)
+      
+      haxisMain.Draw()
+      eff_obj[0][ns][r].Draw("PE,same")
+      eff_obj[1][ns][r].Draw("PE,same")
+      
+      legend = ROOT.TLegend(0.7, 0.7, 0.87, 0.87);
+      legend.SetLineColor(0)
+      legend.SetFillColor(0)
+      legend.AddEntry(eff_obj[0][ns][r], names[0], "PL")
+      legend.AddEntry(eff_obj[1][ns][r], names[1], "PL")
+      legend.Draw("same")
+      
+      can.cd()
+      pad2.Draw()
+      pad2.cd()
+      
+      ymin = 0.9*ratios_eff[ns][r].GetMinimum()
+      ymax = 1.1*ratios_eff[ns][r].GetMaximum()
+      
+      if ymin==0:
+        ymin=0.75
+        if ymax<=ymin:
+          ymin=0.75*ymax
+          ymax=1.25*ymax
+        
+      if ymax<=ymin:
+        ymin=0.0
+        ymax=2.0
+        
+      hraxes = getRatioAxes(xmin,xmax,ymin,ymax)
+      
+      line = ROOT.TLine(xmin,1.0,xmax,1.0)
+      line.SetLineColor(1)
+      line.SetLineStyle(2)
+      
+      hraxes.Draw("")
+      ratios_eff[ns][r].Draw("PE,same")
+      line.Draw("same")
+      
+      can.cd()
+      pad1.Draw()
+      pad2.Draw()
+      
+      can.SaveAs("%s/%s.png"%(thisdir,outname));
+      can.SaveAs("%s/%s.pdf"%(thisdir,outname));
+
+      can.Update()
+      can.Clear()
+      
+    del haxisMain
+    del hraxes
+    del pad1
+    del pad2
+    del can
+
+  ###
+
+  for r in range(len(hist_xratio[0][ns])): 
+      
+    outname = hist[0][ns][r].GetName()
+    
+    can = ROOT.TCanvas("can_%s"%outname, "", 600, 600)
+    can.cd()
+    
+    pad1 = getCanvasMainPad(0)
+    pad1.SetTickx()
+    pad1.SetTicky()
+    
+    pad2 = getCanvasRatioPad(0)
+    pad2.SetTickx()
+    pad2.SetTicky()
+    
+    can.cd()
+    pad1.Draw()
+    pad1.cd()
+    
+    int0   = hist[0][ns][r].Integral(0,-1)
+    int1   = hist[1][ns][r].Integral(0,-1)
+    if int0>0 and doNorm:
+      hist[0][ns][r].Scale(1.0/int0)
+    if int1>0 and doNorm:
+      hist[1][ns][r].Scale(1.0/int1)
+    
+    means = [hist[0][ns][r].GetMean(),hist[1][ns][r].GetMean()]
+
+    ttitle = hist[0][ns][r].GetTitle()
+    xmin = ratios_hist[ns][r].GetXaxis().GetBinLowEdge(1)
+    xmax = ratios_hist[ns][r].GetXaxis().GetBinUpEdge(ratios_hist[ns][r].GetNbinsX())
+    yminM = 0.0
+    ymaxM = hist[0][ns][r].GetMaximum()
+    if hist[1][ns][r].GetMaximum() > ymaxM:
+      ymaxM = hist[1][ns][r].GetMaximum()
+    ymaxM=1.5*ymaxM
+    if ymaxM<=yminM:
+      ymaxM = 1.0
+    xtitle = hist[0][ns][r].GetXaxis().GetTitle()
+    ytitle = "Fraction of tracks"
+    if not doNorm:
+      ytitle = "Number of tracks"
+
+    haxisMain  = ROOT.TH2D("haxisMain" ,ttitle,1,xmin ,xmax,1,yminM,ymaxM)
+    
+    haxisMain.GetXaxis().SetTitle(xtitle)
+    haxisMain.GetXaxis().SetTitleOffset(1.2)
+    haxisMain.GetYaxis().SetTitle(ytitle)
+    haxisMain.GetYaxis().SetTitleOffset(1.4)
+
+    haxisMain.Draw()
+    hist[0][ns][r].Draw("PE,same")
+    hist[1][ns][r].Draw("PE,same")
+    
+    legend = ROOT.TLegend(0.6, 0.7, 0.87, 0.87);
+    legend.SetLineColor(0)
+    legend.SetFillColor(0)
+    legend.AddEntry(hist[0][ns][r], "%s [#mu=%.2f]"%(names[0],means[0]), "PL")
+    legend.AddEntry(hist[1][ns][r], "%s [#mu=%.2f]"%(names[1],means[1]), "PL")
+    legend.Draw("same")
+    
+    can.cd()
+    pad2.Draw()
+    pad2.cd()
+    
+    ymin = 0.9*ratios_hist[ns][r].GetMinimum()
+    ymax = 1.1*ratios_hist[ns][r].GetMaximum()
+    
+    if ymin==0:
+      ymin=0.75
+      if ymax<=ymin:
+        ymin=0.75*ymax
+        ymax=1.25*ymax
+        
+    if ymax<=ymin:
+      ymin=0.0
+      ymax=2.0
+    
+    hraxes = getRatioAxes(xmin,xmax,ymin,ymax)
+    
+    line = ROOT.TLine(xmin,1.0,xmax,1.0)
+    line.SetLineColor(1)
+    line.SetLineStyle(2)
+    
+    hraxes.Draw("")
+    ratios_hist[ns][r].Draw("PE,same")
+    line.Draw("same")
+    
+    can.cd()
+    pad1.Draw()
+    pad2.Draw()
+    
+    can.SaveAs("%s/%s.png"%(thisdir,outname));
+    can.SaveAs("%s/%s.pdf"%(thisdir,outname));
+    
+    can.Update()
+    can.Clear()
+
+    del haxisMain
+    del hraxes
+    del pad1
+    del pad2
+    del can
+
diff --git a/RecoTracker/MkFitCore/standalone/plotting/makeBenchmarkPlots.C b/RecoTracker/MkFitCore/standalone/plotting/makeBenchmarkPlots.C
new file mode 100644
index 0000000000000..4e9031a8293d5
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/makeBenchmarkPlots.C
@@ -0,0 +1,6 @@
+#include "plotting/PlotBenchmarks.cpp+"
+
+void makeBenchmarkPlots(const TString& arch, const TString& sample, const TString& suite) {
+  PlotBenchmarks Benchmarks(arch, sample, suite);
+  Benchmarks.RunBenchmarkPlots();
+}
diff --git a/RecoTracker/MkFitCore/standalone/plotting/makeBenchmarkPlots.py b/RecoTracker/MkFitCore/standalone/plotting/makeBenchmarkPlots.py
new file mode 100644
index 0000000000000..0fb60aa57b631
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/makeBenchmarkPlots.py
@@ -0,0 +1,201 @@
+import os.path, glob, sys
+import ROOT
+import array
+import math
+
+# N.B.: Consult ./xeon_scripts/benchmark-cmssw-ttbar-fulldet-build.sh for info on nTHs, nVUs, and text file names
+
+def run():
+    # command line input
+    arch   = sys.argv[1] # SNB, KNL, SKL-SP
+    sample = sys.argv[2] 
+    build  = sys.argv[3] # BH, STD, CE, FV
+    isVU   = sys.argv[4] # 'true' or 'false': if no argument passed, will not do VU plots
+    isTH   = sys.argv[5] # 'true' or 'false': if no argument passed, will not do TH plots
+
+    # reopen file for writing
+    g = ROOT.TFile('benchmark_'+arch+'_'+sample+'.root','update')
+
+    # Vectorization data points
+    vuvals = ['1','2','4','8']
+    nth = '1'
+    
+    if  arch == 'KNL' or arch == 'SKL-SP' or arch == 'LNX-G' or arch == 'LNX-S':
+        vuvals.append('16')
+        vuvals.append('16int')
+    elif arch == 'SNB' :
+        vuvals.append('8int')
+    else :
+        print arch,'is not a valid architecture! Exiting...'
+        sys.exit(0)
+
+    # call the make plots function
+    if isVU == 'true' :
+        makeplots(arch,sample,build,vuvals,nth,'VU')
+
+    # Parallelization datapoints
+    if arch == 'KNL' :
+        nvu = '16int'
+        thvals = ['1','2','4','8','16','32','64','96','128','160','192','224','256']
+    elif arch == 'SNB' :
+        nvu = '8int'
+        thvals = ['1','2','4','6','8','12','16','20','24']
+    elif arch == 'SKL-SP' :
+        nvu = '16int'
+        thvals = ['1','2','4','8','16','32','48','64']
+    elif arch == 'LNX-G' :
+        nvu = '16int'
+        thvals = ['1','2','4','8','16','32','48','64']
+    elif arch == 'LNX-S' :
+        nvu = '16int'
+        thvals = ['1','2','4','8','16','32','48','64']
+    else :
+        print arch,'is not a valid architecture! Exiting...'
+        sys.exit(0)
+    
+    # call the make plots function
+    if isTH == 'true' :
+        makeplots(arch,sample,build,thvals,nvu,'TH')
+
+    g.Write()
+    g.Close()
+
+def makeplots(arch,sample,build,vals,nC,text):
+    # position in logs
+    if   build == 'BH'  : pos = 8  
+    elif build == 'STD' : pos = 11  
+    elif build == 'CE'  : pos = 14 
+    elif build == 'FV'  : pos = 17
+    else :
+        print build,'is not a valid test! Exiting...'
+        sys.exit(0)
+
+    # time    
+    print arch,sample,build,text
+
+    # define tgraphs vs absolute time and speedup
+    g_time    = ROOT.TGraphErrors(len(vals)-1)
+    g_speedup = ROOT.TGraphErrors(len(vals)-1)
+
+    # make separate plot for intrinsics measurement
+    if text is 'VU' :
+        g_time_int    = ROOT.TGraphErrors(1)
+        g_speedup_int = ROOT.TGraphErrors(1)
+
+    point = 0
+    for val in vals :
+        if    val is '16int': xval = 16.0
+        elif  val is '8int' : xval = 8.0
+        else                : xval = float(val)
+
+        # array of time values
+        yvals = array.array('d');
+
+        # always skip the first event
+        firstFound = False
+
+        # open the correct log file, store times into temp file
+        if   text is 'VU' : os.system('grep Matriplex log_'+arch+'_'+sample+'_'+build+'_NVU'+val+'_NTH'+nC +'.txt >& log_'+arch+'_'+sample+'_'+build+'_'+text+'.txt')
+        elif text is 'TH' : os.system('grep Matriplex log_'+arch+'_'+sample+'_'+build+'_NVU'+nC +'_NTH'+val+'.txt >& log_'+arch+'_'+sample+'_'+build+'_'+text+'.txt')
+        else :
+            print 'VU or TH are the only options for extra text! Exiting...'
+            exit
+
+        # open temp file, store event times into yvals
+        with open('log_'+arch+'_'+sample+'_'+build+'_'+text+'.txt') as f :
+            for line in f :
+                if 'Matriplex' not in line : continue
+                if 'Total' in line : continue
+                if not firstFound :
+                    firstFound = True
+                    continue
+                lsplit = line.split()
+                yvals.append(float(lsplit[pos]))
+
+        # Compute mean and uncertainty on mean from yvals
+        sum = 0.;
+        for yval in range(0,len(yvals)):
+            sum = sum + yvals[yval]
+        if len(yvals) > 0 :
+            mean = sum/len(yvals)
+        else :
+            mean = 0
+        emean = 0.;
+        for yval in range(0,len(yvals)):
+            emean = emean + ((yvals[yval] - mean) * (yvals[yval] - mean))
+        if len(yvals) > 1 :
+            emean = math.sqrt(emean / (len(yvals) - 1))
+            emean = emean/math.sqrt(len(yvals))
+        else :
+            emean = 0
+
+        # Printout value for good measure
+        print val,mean,'+/-',emean
+
+        # store intrinsics val into separate plot
+        if 'int' not in val :
+            g_time.SetPoint(point,xval,mean)
+            g_time.SetPointError(point,0,emean)
+            point = point+1
+        else :
+            g_time_int.SetPoint(0,xval,mean)
+            g_time_int.SetPointError(0,0,emean)
+
+    # always write out the standard plot
+    g_time.Write('g_'+build+'_'+text+'_time')
+
+    # write out separate intrinsics plot
+    if text is 'VU' :
+        g_time_int.Write('g_'+build+'_'+text+'_time_int')
+
+    # Speedup calculation
+    xval0 = array.array('d',[0])
+    yval0 = array.array('d',[0])
+    yerr0 = array.array('d',[0])
+
+    # Get first point to divide by
+    g_time.GetPoint(0,xval0,yval0)
+    yerr0.append(g_time.GetErrorY(0))
+
+    point = 0
+    for val in vals :
+        # set up inputs
+        xval = array.array('d',[0])
+        yval = array.array('d',[0])
+        yerr = array.array('d',[0])
+
+        # get standard plots from standard plot
+        if 'int' not in val :
+            g_time.GetPoint(point,xval,yval)
+            yerr.append(g_time.GetErrorY(point))
+        else :
+            g_time_int.GetPoint(0,xval,yval)
+            yerr.append(g_time_int.GetErrorY(0))
+
+        speedup  = 0.
+        espeedup = 0.
+        if yval[0] > 0. and yval0[0] > 0. : 
+            speedup  = yval0[0]/yval[0]
+            espeedup = speedup * math.sqrt(math.pow(yerr0[0]/yval0[0],2) + math.pow(yerr[0]/yval[0],2))
+
+        # store in the correct plot
+        if 'int' not in val :
+            g_speedup.SetPoint(point,xval[0],speedup)
+            g_speedup.SetPointError(point,0,espeedup)
+            point = point+1
+        else :
+            g_speedup_int.SetPoint(0,xval[0],speedup)
+            g_speedup_int.SetPointError(0,0,espeedup)
+
+    # always write out the standard plot
+    g_speedup.Write('g_'+build+'_'+text+'_speedup')
+
+    # write out separate intrinsics plot
+    if text is 'VU' :
+        g_speedup_int.Write('g_'+build+'_'+text+'_speedup_int')
+
+    # all done
+    return
+
+if __name__ == "__main__":
+    run()
diff --git a/RecoTracker/MkFitCore/standalone/plotting/makeMEIFBenchmarkPlots.C b/RecoTracker/MkFitCore/standalone/plotting/makeMEIFBenchmarkPlots.C
new file mode 100644
index 0000000000000..24e790636f9d8
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/makeMEIFBenchmarkPlots.C
@@ -0,0 +1,6 @@
+#include "plotting/PlotMEIFBenchmarks.cpp+"
+
+void makeMEIFBenchmarkPlots(const TString& arch, const TString& sample, const TString& build) {
+  PlotMEIFBenchmarks MEIFBenchmarks(arch, sample, build);
+  MEIFBenchmarks.RunMEIFBenchmarkPlots();
+}
diff --git a/RecoTracker/MkFitCore/standalone/plotting/makeMEIFBenchmarkPlots.py b/RecoTracker/MkFitCore/standalone/plotting/makeMEIFBenchmarkPlots.py
new file mode 100644
index 0000000000000..1ec202a8c797b
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/makeMEIFBenchmarkPlots.py
@@ -0,0 +1,121 @@
+import os.path, glob, sys
+import ROOT
+import array
+import math
+
+# N.B.: Consult ./xeon_scripts/benchmark-cmssw-ttbar-fulldet-build.sh for info on nTHs, nVUs, and text file names
+
+arch   = sys.argv[1] # SNB, KNL, SKL-SP
+sample = sys.argv[2]
+build  = sys.argv[3] # CE, FV
+
+g = ROOT.TFile('benchmarkMEIF_'+arch+'_'+sample+'_'+build+'.root','recreate')
+
+# Parallelization datapoints
+if arch == 'KNL' :
+    nvu = '16int'
+    thvals = ['1','2','4','8','16','32','64','96','128','160','192','224','256']
+    evvals = ['1','2','4','8','16','32','64','128']
+elif arch == 'SNB' :
+    nvu = '8int'
+    thvals = ['1','2','4','6','8','12','16','20','24']
+    evvals = ['1','2','4','8','12']
+elif arch == 'SKL-SP' :
+    nvu = '16int'
+    thvals = ['1','2','4','8','16','32','48','64']
+    evvals = ['1','2','4','8','16','32','64']
+elif arch == 'LNX-G' :
+    nvu = '16int'
+    thvals = ['1','2','4','8','16','32','48','64']
+    evvals = ['1','2','4','8','16','32','64']
+elif arch == 'LNX-S' :
+    nvu = '16int'
+    thvals = ['1','2','4','8','16','32','48','64']
+    evvals = ['1','2','4','8','16','32','64']
+else :
+    print arch,"is not a valid architecture! Exiting..."
+    sys.exit(0)
+
+# extra text label
+text = 'MEIF'
+
+# text for grepping
+grepnEV  = '=== TOTAL for'
+grepTime = 'Total event loop time'
+
+# needed for speedups
+xval0 = array.array('d',[0])
+yval0 = array.array('d',[0])
+
+# time    
+for evval in evvals :
+    print arch,sample,build,"nEV:",evval
+    
+    # define event float
+    ev = float(evval)
+        
+    # define tgraphs vs absolute time and speedup
+    g_time    = ROOT.TGraph()
+    g_speedup = ROOT.TGraph()
+
+    point = 0
+    for thval in thvals :
+        xval = float(thval)
+        if ev > xval: continue;
+            
+        # extracted time
+        yval = float(0)
+        nev  = float(1)
+
+        # open log file, grep for relevant lines
+        with open('log_'+arch+'_'+sample+'_'+build+'_NVU'+nvu+'_NTH'+thval+'_NEV'+evval+'.txt') as f :
+            for line in f :
+                if grepnEV in line :
+                    lsplit = line.split()                
+                    nev  = float(lsplit[3])
+                elif grepTime in line :
+                    lsplit = line.split()                
+                    yval = float(lsplit[4])
+
+        yval /= nev
+
+        # Printout value for good measure
+        print xval,yval
+
+        # store val
+        g_time.SetPoint(point,xval,yval)
+        point = point+1
+
+    # write out the plot
+    g_time.Write('g_'+build+'_'+text+'_nEV'+evval+'_time')
+
+    # needed for speedup calculation
+    if evval is '1' :
+        g_time.GetPoint(0,xval0,yval0)        
+        
+    # speedup plots
+    point = 0
+    for thval in thvals :
+        xval = float(thval)
+        if ev > xval: continue;
+
+        # set up inputs
+        xval = array.array('d',[0])
+        yval = array.array('d',[0])
+        
+        # get point from time
+        g_time.GetPoint(point,xval,yval)
+
+        speedup  = 0.
+        if yval[0] > 0. : 
+            speedup  = yval0[0]/yval[0]
+                
+        # store in speedup plot
+        g_speedup.SetPoint(point,xval[0],speedup)
+        point = point+1
+
+    # always write out speedup
+    g_speedup.Write('g_'+build+'_'+text+'_nEV'+evval+'_speedup')
+
+g.Write()
+g.Close()
diff --git a/RecoTracker/MkFitCore/standalone/plotting/makePlotsFromDump.C b/RecoTracker/MkFitCore/standalone/plotting/makePlotsFromDump.C
new file mode 100644
index 0000000000000..28c2cb4efcec2
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/makePlotsFromDump.C
@@ -0,0 +1,6 @@
+#include "plotting/PlotsFromDump.cpp+"
+
+void makePlotsFromDump(const TString& sample, const TString& build, const TString& suite, const int useARCH) {
+  PlotsFromDump Plots(sample, build, suite, useARCH);
+  Plots.RunPlotsFromDump();
+}
diff --git a/RecoTracker/MkFitCore/standalone/plotting/makePlotsFromDump.py b/RecoTracker/MkFitCore/standalone/plotting/makePlotsFromDump.py
new file mode 100644
index 0000000000000..79b62e3bfb7b9
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/makePlotsFromDump.py
@@ -0,0 +1,64 @@
+import os.path, glob, sys
+import ROOT
+
+arch   = sys.argv[1]
+sample = sys.argv[2]
+build  = sys.argv[3]
+suffix = sys.argv[4]
+
+g = ROOT.TFile("test_"+arch+"_"+sample+"_"+build+"_"+suffix+".root","recreate")
+
+# declare hists: reco only
+h_MXNH  = ROOT.TH1F("h_MXNH_"+suffix, "nHits/Track", 35, 0, 35)
+h_MXPT  = ROOT.TH1F("h_MXPT_"+suffix, "p_{T}^{mkFit}", 100, 0, 100)
+h_MXETA = ROOT.TH1F("h_MXETA_"+suffix, "#eta^{mkFit}", 25, -2.5, 2.5)
+h_MXPHI = ROOT.TH1F("h_MXPHI_"+suffix, "#phi^{mkFit}", 32, -3.2, 3.2)
+
+h_MXNH.Sumw2()
+h_MXPT.Sumw2()
+h_MXETA.Sumw2()
+h_MXPHI.Sumw2()
+
+# declare hists: diffs 
+h_DCNH  = ROOT.TH1F("h_DCNH_"+suffix, "#DeltanHits(mkFit,CMSSW)", 46, -20.5, 25.5)
+h_DCPT  = ROOT.TH1F("h_DCPT_"+suffix, "#Deltap_{T}(mkFit,CMSSW)", 63, -2.5, 2.5)
+h_DCETA = ROOT.TH1F("h_DCETA_"+suffix, "#Delta#eta(mkFit,CMSSW)", 45, -0.5, 0.5)
+h_DCPHI = ROOT.TH1F("h_DCPHI_"+suffix, "#Delta#phi(mkFit,CMSSW)", 45, -0.5, 0.5)
+
+h_DCNH.Sumw2()
+h_DCPT.Sumw2()
+h_DCETA.Sumw2()
+h_DCPHI.Sumw2()
+
+with open('log_'+arch+'_'+sample+'_'+build+'_'+suffix+'_DumpForPlots.txt') as f :
+    for line in f :
+        if "MX - found track with chi2" in line :
+            lsplit = line.split()
+
+            NH = float(lsplit[8])
+            h_MXNH.Fill(NH)
+
+            PT = float(lsplit[10])
+            h_MXPT.Fill(PT)
+
+            ETA = float(lsplit[12])
+            h_MXETA.Fill(ETA)
+
+            PHI = float(lsplit[14])
+            h_MXPHI.Fill(PHI)
+
+            NHC = float(lsplit[24])
+            if NHC > 0 :
+                h_DCNH.Fill(NH-NHC)
+
+                PTC = float(lsplit[26])
+                h_DCPT.Fill(PT-PTC)
+                
+                ETAC = float(lsplit[28])
+                h_DCETA.Fill(ETA-ETAC)
+                
+                PHIC = float(lsplit[30])
+                h_DCPHI.Fill(PHI-PHIC)
+
+g.Write()
+g.Close()
diff --git a/RecoTracker/MkFitCore/standalone/plotting/makeStressPlot.sh b/RecoTracker/MkFitCore/standalone/plotting/makeStressPlot.sh
new file mode 100755
index 0000000000000..383986c90f40e
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/makeStressPlot.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+## source environment (get ROOT)
+source xeon_scripts/init-env.sh
+
+## Command line config
+infile_name=${1:-"stress_test_SKL-SP_results.txt"}
+graph_label=${2:-"[Turbo=OFF(Long)]"}
+outfile_name=${3:-"noturbo1_long.pdf"}
+
+## reduce stress test results to results used in macro only
+tmp_infile_name="tmp_results.txt"
+> "${tmp_infile_name}"
+
+grep "SSE3" "${infile_name}" >> "${tmp_infile_name}"
+grep "AVX2" "${infile_name}" >> "${tmp_infile_name}"
+grep "AVX512" "${infile_name}" >> "${tmp_infile_name}"
+
+## Run little macro
+root -l -b -q plotting/plotStress.C\(\"${tmp_infile_name}\",\"${graph_label}\",\"${outfile_name}\"\)
+
+## remove tmp file
+rm "${tmp_infile_name}"
diff --git a/RecoTracker/MkFitCore/standalone/plotting/makeThroughputPlots.sh b/RecoTracker/MkFitCore/standalone/plotting/makeThroughputPlots.sh
new file mode 100755
index 0000000000000..0a239c0c49ee1
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/makeThroughputPlots.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+## source environment (get ROOT)
+source xeon_scripts/init-env.sh
+
+## Command line config
+infile_name=${1:-"benchmark2_SKL-SP_results.txt"}
+outfile_name=${2:-"sklsp"}
+graph_label=${3:-""}
+
+## Run little macro
+./plotting/plotThroughput.py ${infile_name} ${outfile_name} ${graph_label}
diff --git a/RecoTracker/MkFitCore/standalone/plotting/makeValidation.C b/RecoTracker/MkFitCore/standalone/plotting/makeValidation.C
new file mode 100644
index 0000000000000..29918d45e2402
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/makeValidation.C
@@ -0,0 +1,9 @@
+#include "plotting/StackValidation.cpp+"
+
+void makeValidation(const TString& label = "",
+                    const TString& extra = "",
+                    const Bool_t cmsswComp = false,
+                    const TString& suite = "forPR") {
+  StackValidation Stacks(label, extra, cmsswComp, suite);
+  Stacks.MakeValidationStacks();
+}
diff --git a/RecoTracker/MkFitCore/standalone/plotting/plotStress.C b/RecoTracker/MkFitCore/standalone/plotting/plotStress.C
new file mode 100644
index 0000000000000..7da47a3d9ac01
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/plotStress.C
@@ -0,0 +1,150 @@
+#include "TString.h"
+#include "TColor.h"
+#include "TStyle.h"
+#include "TGraph.h"
+#include "TLegend.h"
+#include "TCanvas.h"
+
+#include <iostream>
+#include <vector>
+
+///////////////////////
+// Structs for macro //
+///////////////////////
+
+struct setStruct {
+  setStruct() {}
+  setStruct(const TString& label, const Double_t x) : label(label), x(x) {}
+
+  TString label;
+  UInt_t x;
+};
+
+struct xyStruct {
+  xyStruct() {}
+  xyStruct(const Double_t x, const Double_t y) : x(x), y(y) {}
+
+  Double_t x;
+  Double_t y;
+};
+
+struct testStruct {
+  testStruct() {}
+  testStruct(const TString& label, const Color_t color) : label(label), color(color) {}
+
+  TString label;
+  Color_t color;
+
+  std::vector<xyStruct> xyPoints;
+  TGraph* graph;
+};
+
+////////////////
+// Main Macro //
+////////////////
+
+void plotStress(const TString& infile_name, const TString& graph_label, const TString& outfile_name) {
+  // no stats boxes
+  gStyle->SetOptStat(0);
+
+  // which tests to plot
+  std::vector<testStruct> tests = {{"nTH1_nEV1", kBlue},
+                                   {"nTH16_nEV16", kRed + 1},
+                                   {"nTH32_nEV16", kGreen + 1},
+                                   {"nTH32_nEV32", kMagenta},
+                                   {"nTH64_nEV32", kOrange + 1},
+                                   {"nTH64_nEV64", kBlack},
+                                   {"nJOB32", kViolet - 1},
+                                   {"nJOB64", kAzure + 10}};
+
+  // which instruction sets (nVU) to use
+  std::vector<setStruct> sets = {{"SSE3", 4}, {"AVX2", 8}, {"AVX512", 16}};
+
+  // make label for x-axis
+  const auto nset = sets.size();
+  TString set_label;
+  for (auto iset = 0U; iset < nset; iset++) {
+    const auto& set = sets[iset];
+    set_label += Form(" %s (x=%i)%s", set.label.Data(), set.x, (iset + 1 != nset ? "," : ""));
+  }
+
+  // read input file, fill testStruct vector
+  std::ifstream input(infile_name.Data(), std::ios::in);
+  TString test_set_label;
+  Double_t y;
+
+  // hacky read-in, but sufficient for small number of tests
+  while (input >> test_set_label >> y) {
+    for (auto& test : tests) {
+      if (test_set_label.Contains(test.label)) {
+        for (const auto& set : sets) {
+          if (test_set_label.Contains(set.label)) {
+            test.xyPoints.emplace_back(set.x, y);
+            break;
+          }  // end check over input label contains given instruction set label
+        }    // end loop over instruction set labels
+      }      // end check over input label contains given test label
+    }        // end loop over instruction test labels
+  }          // end loop over reading input file
+
+  // setup canvas
+  auto canv = new TCanvas();
+  canv->cd();
+  canv->SetTickx(1);
+  canv->SetTicky(1);
+  canv->SetGridy(1);
+
+  // setup legend
+  auto leg = new TLegend(0.77, 0.8, 0.99, 0.99);
+  leg->SetNColumns(2);
+
+  // loop tests, fill graphs, add to canvas + legend
+  for (auto itest = 0U; itest < tests.size(); itest++) {
+    // get test result
+    auto& test = tests[itest];
+
+    // get test info (points, label, color, graph)
+    const auto& xyPoints = test.xyPoints;
+    const auto& label = test.label;
+    const auto color = test.color;
+    auto& graph = test.graph;
+
+    // make new graph, set style
+    graph = new TGraph(test.xyPoints.size());
+    graph->SetTitle("Time vs ISA Ext " + graph_label);
+    graph->SetLineColor(color);
+    graph->SetMarkerColor(color);
+    graph->SetMarkerStyle(kFullCircle);
+    graph->SetMarkerSize(1);
+
+    // add graph points
+    for (auto ixyPoint = 0U; ixyPoint < xyPoints.size(); ixyPoint++) {
+      const auto& xyPoint = xyPoints[ixyPoint];
+      graph->SetPoint(ixyPoint, xyPoint.x, xyPoint.y);
+    }
+
+    // draw graph
+    graph->Draw(itest > 0 ? "CP SAME" : "ACP");
+
+    // graphs can only set x-y axis info after being drawn
+    graph->GetXaxis()->SetRangeUser(0, 20);
+    graph->GetYaxis()->SetRangeUser(0, 0.2);
+    graph->GetXaxis()->SetTitle("Floats in 1 vector [ISA Extensions: " + set_label + "]");
+    graph->GetYaxis()->SetTitle("Time / evt / physical core [s]");
+
+    // add graph to leg
+    leg->AddEntry(graph, label.Data(), "lp");
+  }
+
+  // draw leg
+  leg->Draw("same");
+
+  // save it
+  canv->SaveAs(outfile_name.Data());
+
+  // delete it all
+  for (auto& test : tests)
+    delete test.graph;
+  delete leg;
+  delete canv;
+}
diff --git a/RecoTracker/MkFitCore/standalone/plotting/plotThroughput.py b/RecoTracker/MkFitCore/standalone/plotting/plotThroughput.py
new file mode 100755
index 0000000000000..08ab6fbe679fe
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/plotThroughput.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+
+import re
+import sys
+import array
+
+import ROOT
+ROOT.gROOT.SetBatch(True)
+ROOT.PyConfig.IgnoreCommandLineOptions = True
+
+
+colors = [
+    ROOT.kBlue,
+    ROOT.kRed+1,
+    ROOT.kBlack
+]
+
+def findBounds(x, ys, xmin=None, ymin=None, xmax=None, ymax=None):
+    if xmin is None:
+        xmin = min(x)
+    if xmax is None:
+        xmax = max(x)
+    if ymin is None:
+        ymin = min([min(y) for y in ys])
+    if ymax is None:
+        ymax = max([max(y) for y in ys]) * 1.1
+
+    return (xmin, ymin, xmax, ymax)
+
+
+def makePlot(name, x, ys, ytitle,
+             title=None,
+             legends=None,
+             ideal1=None,
+             bounds={},
+             legendYmax=0.99
+         ):
+    canv = ROOT.TCanvas()
+    canv.cd()
+    canv.SetTickx(1)
+    canv.SetTicky(1)
+    canv.SetGridy(1)
+
+    bounds = findBounds(x, ys, **bounds)
+    frame = canv.DrawFrame(*bounds)
+
+    frame.GetXaxis().SetTitle("Number of threads")
+    frame.GetYaxis().SetTitle(ytitle)
+    if title is not None:
+        frame.SetTitle(title)
+    frame.Draw("")
+
+    leg = None
+    if legends is not None:
+        leg = ROOT.TLegend(0.77,legendYmax-0.19,0.99,legendYmax)
+
+    graphs = []
+
+    if ideal1 is not None:
+        ymax = bounds[3]
+        ideal_y = [ideal1, ymax]
+        ideal_x = [1, ymax/ideal1]
+        gr = ROOT.TGraph(2, array.array("d", ideal_x), array.array("d", ideal_y))
+        gr.SetLineColor(ROOT.kBlack)
+        gr.SetLineStyle(3)
+        gr.Draw("same")
+        if leg:
+            leg.AddEntry(gr, "Ideal scaling", "l")
+        graphs.append(gr)
+
+    for i, y in enumerate(ys):
+        gr = ROOT.TGraph(len(x), array.array("d", x), array.array("d", y))
+        color = colors[i]
+        gr.SetLineColor(color)
+        gr.SetMarkerColor(color)
+        gr.SetMarkerStyle(ROOT.kFullCircle)
+        gr.SetMarkerSize(1)
+
+        gr.Draw("LP SAME")
+        if leg:
+            leg.AddEntry(gr, legends[i], "lp")
+
+        graphs.append(gr)
+
+    if leg:
+        leg.Draw("same")
+
+    canv.SaveAs(name+".png")
+    canv.SaveAs(name+".pdf")
+
+
+def main(argv):
+    (inputfile, outputfile, graph_label) = argv[1:4]
+
+    re_mt = re.compile("nTH(?P<th>\d+)_nEV(?P<ev>\d+)")
+    re_mp = re.compile("nJOB(?P<job>\d+)")
+
+    mt = {}
+    mp = {}
+
+    f = open(inputfile)
+    for line in f:
+        if not "AVX512" in line:
+            continue
+        comp = line.split(" ")
+        m = re_mt.search(comp[0])
+        if m:
+            if m.group("th") != m.group("ev"):
+                raise Exception("Can't handle yet different numbers of threads (%s) and events (%s)" % (m.group("th"), m.group("ev")))
+            mt[int(m.group("th"))] = float(comp[1])
+            continue
+        m = re_mp.search(comp[0])
+        if m:
+            mp[int(m.group("job"))] = float(comp[1])
+    f.close()
+
+    ncores = sorted(list(set(mt.keys() + mp.keys())))
+    mt_y = [mt[n] for n in ncores]
+    mp_y = [mp[n] for n in ncores]
+    ideal1 = mt_y[0]/ncores[0]
+    ideal1_mp = mp_y[0]/ncores[0]
+
+    makePlot(outputfile+"_throughput", ncores,
+             [mt_y, mp_y],
+             "Throughput (events/s)",
+             title=graph_label,
+             legends=["Multithreading", "Multiprocessing"],
+             ideal1=ideal1,
+             bounds=dict(ymin=0, xmin=0),
+             legendYmax=0.5
+    )
+
+    eff = [mt_y[i]/mp_y[i] for i in xrange(0, len(ncores))]
+    makePlot(outputfile+"_efficiency", ncores,
+             [eff],
+             "Multithreading efficiency (MT/MP)",
+             title=graph_label,
+             bounds=dict(ymin=0.9, ymax=1.1)
+    )
+
+    eff_vs_ideal_mt = [mt_y[i]/(ideal1*n) for i, n in enumerate(ncores)]
+    eff_vs_ideal_mp = [mp_y[i]/(ideal1*n) for i, n in enumerate(ncores)]
+    makePlot(outputfile+"_efficiency_ideal", ncores,
+             [eff_vs_ideal_mt, eff_vs_ideal_mp],
+             "Efficiency wrt. ideal",
+             title=graph_label,
+             legends=["Multithreading", "Multiprocessing"],
+             bounds=dict(ymin=0.8, ymax=1.01, xmax=65),
+             legendYmax=0.9
+    )
+
+    speedup_mt = [mt_y[i]/ideal1 for i in xrange(0, len(ncores))]
+    speedup_mp = [mp_y[i]/ideal1 for i in xrange(0, len(ncores))]
+    makePlot(outputfile+"_speedup", ncores,
+             [speedup_mt, speedup_mp],
+             "Speedup wrt. 1 thread",
+             title=graph_label,
+             legends=["Multithreading", "Multiprocessing"],
+             ideal1=1,
+             bounds=dict(ymin=0, xmin=0),
+             legendYmax=0.5
+    )
+
+
+if __name__ == "__main__":
+    main(sys.argv)
diff --git a/RecoTracker/MkFitCore/standalone/plotting/runValidation.C b/RecoTracker/MkFitCore/standalone/plotting/runValidation.C
new file mode 100644
index 0000000000000..fe6e86fa21c6b
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/runValidation.C
@@ -0,0 +1,27 @@
+#include "plotting/PlotValidation.cpp+"
+
+void runValidation(const TString& test = "",
+                   const Bool_t cmsswComp = false,
+                   const int algo = 0,
+                   const Bool_t mvInput = true,
+                   const Bool_t rmSuffix = true,
+                   const Bool_t saveAs = false,
+                   const TString& image = "pdf") {
+  // PlotValidation arguments
+  // First is additional input name of root file
+  // Second is name of output directory
+  // First boolean argument is to do special CMSSW validation
+  // The second boolean argument == true to move input root file to output directory, false to keep input file where it is.
+  // Third Bool is saving the image files
+  // Last argument is output type of plots
+
+  PlotValidation Val(Form("valtree%s.root", test.Data()),
+                     Form("validation%s", test.Data()),
+                     cmsswComp,
+                     algo,
+                     mvInput,
+                     rmSuffix,
+                     saveAs,
+                     image);
+  Val.Validation(algo);
+}
diff --git a/RecoTracker/MkFitCore/standalone/plotting/textDumpPlots.sh b/RecoTracker/MkFitCore/standalone/plotting/textDumpPlots.sh
new file mode 100755
index 0000000000000..23cb00037ae0c
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/plotting/textDumpPlots.sh
@@ -0,0 +1,28 @@
+#! /bin/bash
+
+## input
+suite=${1:-"forPR"}
+useARCH=${2:-0}
+lnxuser=${3:-${USER}}
+
+## In case this is run separately from the main script
+source xeon_scripts/common-variables.sh ${suite} ${useARCH} ${lnxuser}
+source xeon_scripts/init-env.sh
+
+##### Make plots of track properties (kinematics, nHits, etc) from text files, comparing different machine configurations #####
+for build in "${text_builds[@]}"
+do echo ${!build} | while read -r bN bO
+    do
+	echo "Making plots from text files for" ${sample} ":" ${bN}
+	for archV in "${arch_array_textdump[@]}" 
+	do echo ${archV} | while read -r archN archO
+	    do
+		echo "Extracting plots from dump for" ${archN} ${archO}
+		python plotting/makePlotsFromDump.py ${archN} ${sample} ${bN} ${archO}
+	    done
+	done
+		
+	echo "Making comparison plots from dump for" ${sample} ":" ${bN}
+	root -b -q -l plotting/makePlotsFromDump.C\(\"${sample}\",\"${bN}\",\"${suite}\",${useARCH}\)
+    done
+done
diff --git a/RecoTracker/MkFitCore/standalone/test/CylCowWLids.C b/RecoTracker/MkFitCore/standalone/test/CylCowWLids.C
new file mode 100644
index 0000000000000..5a07e2fd7332f
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/test/CylCowWLids.C
@@ -0,0 +1,77 @@
+// To be used in compiled mode
+
+#include "../CylCowWLids.cc"
+
+#include "TCanvas.h"
+#include "TLine.h"
+
+TrackerInfo g_tracker_info;
+
+//------------------------------------------------------------------------------
+
+void print_etas(LayerInfo &li, float dz) {
+  float r, z;
+  if (li.is_barrel()) {
+    r = li.r_mean();
+    z = li.zmax();
+  } else {
+    r = li.rout();
+    z = li.z_mean();
+  }
+
+  printf("%2d %6.4f %6.4f %6.4f", li.layer_id(), getEta(r, z - dz), getEta(r, z), getEta(r, z + dz));
+
+  if (!li.is_barrel()) {
+    r = li.rin();
+
+    printf("  -  %6.4f %6.4f %6.4f", getEta(r, z - dz), getEta(r, z), getEta(r, z + dz));
+  }
+
+  printf("\n");
+}
+
+//------------------------------------------------------------------------------
+
+void CylCowWLids() {
+  Create_TrackerInfo(g_tracker_info, true);
+
+  float zM = 120;
+  float rM = 100;
+
+  float cScale = 6;
+  TCanvas *c = new TCanvas("cvs", "", cScale * zM, cScale * rM);
+  TPad *p = new TPad("pad", "", 0, 0, 1, 1);
+  p->Draw();
+  p->Update();
+  p->cd();
+
+  p->DrawFrame(0, 0, zM, rM);
+
+  printf("Eta coordinates of edges for z0 (-3, 0, +3) cm\n");
+  printf("----------------------------------------------\n");
+
+  for (auto i : g_tracker_info.m_barrel) {
+    const LayerInfo &li = g_tracker_info.layer(i);
+
+    TLine *l = new TLine(0, li.r_mean(), li.zmax(), li.r_mean());
+    l->SetLineColor(kBlue);
+    l->SetLineWidth(2);
+    l->Draw();
+
+    print_etas(li, 3);
+  }
+
+  for (auto i : g_tracker_info.m_ecap_pos) {
+    LayerInfo &li = g_tracker_info.layer(i);
+
+    TLine *l = new TLine(li.z_mean(), li.rin(), li.z_mean(), li.rout());
+    l->SetLineColor(kMagenta + 3);
+    l->SetLineWidth(2);
+    l->Draw();
+
+    print_etas(li, 3);
+  }
+
+  p->Modified();
+  p->Update();
+}
diff --git a/RecoTracker/MkFitCore/standalone/test/DumpHitSearchStats.icc b/RecoTracker/MkFitCore/standalone/test/DumpHitSearchStats.icc
new file mode 100644
index 0000000000000..eba77b0ece983
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/test/DumpHitSearchStats.icc
@@ -0,0 +1,88 @@
+// To be included after SelectHitRange / Indices().
+
+#define MKFP(_V_) mkfp->_V_.ConstAt(mi, 0, 0)
+#define MKFPI(_V_, _i_, _j_) mkfp->_V_[iI].ConstAt(mi, _i_, _j_)
+#define MKFPAR(_i_) mkfp->Par[iI].ConstAt(mi, _i_, 0)
+#define MKFERR(_i_, _j_) mkfp->Err[iI].ConstAt(mi, _i_, _j_)
+
+const int iI = MkFitter::iP;
+TrackVec &recseeds = m_event->seedTracks_;
+int mi = 0;  // mplex index
+for (int ti = itrack; ti < end; ++ti, ++mi) {
+  int label = MKFP(Label);
+  int seed = MKFP(SeedIdx);
+
+  float x = std::min(1000.f, MKFPAR(0)), y = std::min(1000.f, MKFPAR(1)), z = std::min(1000.f, MKFPAR(2));
+  float r2 = x * x + y * y, r = std::sqrt(r2), phi = getPhi(x, y);
+  float dphidx = -y / r2, dphidy = x / r2;
+  float dphi2 = dphidx * dphidx * MKFERR(0, 0) + dphidy * dphidy * MKFERR(1, 1) + 2 * dphidx * dphidy * MKFERR(0, 1);
+  float Dphi = dphi2 >= 0 ? 3 * std::sqrt(dphi2) : dphi2;
+  float Dz = MKFERR(2, 2) >= 0 ? 3 * std::sqrt(MKFERR(2, 2)) : -1;
+
+  float px = std::min(1000.f, MKFPAR(3)), py = std::min(1000.f, MKFPAR(4));
+
+  std::vector<int> indices;
+  m_event_of_hits.m_layers_of_hits[ilay].SelectHitIndices(z, phi, Dz, Dphi, indices, false);
+  int nshi = indices.size();
+
+  // Dump for
+  // printf("Select hits: %2d %2d\n", MKFP(XHitSize), nshi);
+  // if (MKFP(XHitSize) != nshi)
+  // {
+  //   // printf("Select hits: %2d %2d\n", MKFP(XHitSize), nshi);
+  //   m_event_of_hits.m_layers_of_hits[ilay].SelectHitIndices(z, phi, Dz, Dphi, indices, false, true);
+  //   printf("\n");
+  // }
+
+  static bool first = true;
+  if (first) {
+    printf(
+        "ZZZ_ERR event/I:label/I:mc_pt/F:seed/I:seed_pt/F:seed_chi/F:cand/I:layer/I:chi2/F:Nh/I:"
+        "pT/F:r/F:z/F:phi/F:eta/F:Dphi/F:Dz/F:"
+        "etam/F:etaM/F:Nh2p/I:"
+        "err00/F:err11/F:err22/F:err33/F:err44/F:err55/F"
+        "\n");
+    first = false;
+  }
+
+  // ./mkFit | perl -ne 'if (/^ZZZ_ERR/) { s/^ZZZ_ERR //og; print; }' xxx.rtt
+
+  printf(
+      "ZZZ_ERR %d %d %f "
+      "%d %f %f %d "
+      "%d %f %d "
+      "%f %f %f %f %f %f %f "
+      "%f %f %d "
+      "%f %f %f %f %f %f\n",
+      m_event->evtID(),
+      label,
+      m_event->simTracks_[label].pT(),
+      seed,
+      recseeds[seed].pT(),
+      recseeds[seed].chi2(),
+      MKFP(CandIdx),
+      ilay,
+      MKFP(Chi2),
+      mkfp->countValidHits(mi),
+      std::hypot(px, py),
+      r,
+      MKFPAR(2),
+      phi,
+      getEta(r, MKFPAR(2)),
+      Dphi,
+      Dz,
+      getEta(r, z - Dz),
+      getEta(r, z + Dz),
+      MKFP(XHitSize),
+      MKFERR(0, 0),
+      MKFERR(1, 1),
+      MKFERR(2, 2),
+      MKFERR(3, 3),
+      MKFERR(4, 4),
+      MKFERR(5, 5));
+}
+
+#undef MKFP
+#undef MKFPI
+#undef MKFPAR
+#undef MKFERR
diff --git a/RecoTracker/MkFitCore/standalone/test/Matriplex/GMtest.cxx b/RecoTracker/MkFitCore/standalone/test/Matriplex/GMtest.cxx
new file mode 100644
index 0000000000000..0f389a2a1eb92
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/test/Matriplex/GMtest.cxx
@@ -0,0 +1,227 @@
+#include "Math/SMatrix.h"
+
+#include "MatriplexSym.h"
+
+#include <random>
+
+/*
+# Generate .ah files (make sure DIM, DOM and pattern match):
+  ./GMtest.pl
+# Compile:
+  icc -std=gnu++11 -openmp -mavx -O3 -I.. -I../.. GMtest.cxx -o GMtest
+*/
+
+typedef long long long64;
+
+const int N   = 16;
+
+const int DIM =  3;
+const int DOM =  6;
+
+#ifdef MPLEX_INTRINSICS
+# if defined(__AVX512F__)
+#   warning "MPLEX_INTRINSICS CMP_EPS = 2e-7 --> 3e-7"
+const float CMP_EPS = 3e-7;
+# elif defined(__AVX__)
+#   warning "MPLEX_INTRINSICS CMP_EPS = 2e-7 --> 5e-7"
+const float CMP_EPS = 5e-7;
+# else
+#   warning "MPLEX_INTRINSICS CMP_EPS = 2e-7"
+const float CMP_EPS = 2e-7;
+# endif
+#else
+# if defined(__AVX512F__)
+#   warning "NO MPLEX_INTRINSICS CMP_EPS = 4e-7"
+const float CMP_EPS = 4e-7;
+# else
+#   warning "NO MPLEX_INTRINSICS CMP_EPS = 4e-7 --> 5e-7"
+const float CMP_EPS = 5e-7;
+# endif
+#endif
+
+typedef ROOT::Math::SMatrix<float, DIM, DOM>                                     SMatX;
+typedef ROOT::Math::SMatrix<float, DOM, DIM>                                     SMatXT;
+typedef ROOT::Math::SMatrix<float, DIM, DIM, ROOT::Math::MatRepSym<float, DIM> > SMatS;
+
+typedef Matriplex::Matriplex   <float, DIM, DOM, N>   MPlexX;
+typedef Matriplex::Matriplex   <float, DOM, DIM, N>   MPlexXT;
+typedef Matriplex::MatriplexSym<float, DIM,      N>   MPlexS;
+
+void Multify(const MPlexS& A, const MPlexX& B, MPlexX& C)
+{
+   // C = A * B
+
+   typedef float T;
+
+   const T *a = A.fArray; __assume_aligned(a, 64);
+   const T *b = B.fArray; __assume_aligned(b, 64);
+         T *c = C.fArray; __assume_aligned(c, 64);
+
+#include "multify.ah"
+}
+
+void MultifyTranspose(const MPlexS& A, const MPlexX& B, MPlexXT& C)
+{
+   // C = BT * A;
+
+   typedef float T;
+
+   const T *a = A.fArray; __assume_aligned(a, 64);
+   const T *b = B.fArray; __assume_aligned(b, 64);
+         T *c = C.fArray; __assume_aligned(c, 64);
+
+#include "multify-transpose.ah"
+}
+
+int main()
+{
+  SMatS   a[N];
+  SMatX   b[N],  c[N];
+  SMatXT  bt[N], ct[N];
+
+  MPlexS  A;
+  MPlexX  B, C;
+  MPlexXT CT;
+
+  std::default_random_engine      gen(0xbeef0133);
+  std::normal_distribution<float> dis(1.0, 0.05);
+
+  long64 count = 1;
+
+init:
+
+  for (int m = 0; m < N; ++m)
+  {
+    for (int i = 0; i < 3; ++i)
+    {
+      for (int j = i; j < 6; ++j)
+      {
+        if (j < DIM)  a[m](i,j) = dis(gen);
+
+        b[m](i,j) = dis(gen);
+      }
+    }
+
+    // Enforce pattern from GMtest.pl
+    a[m](1, 1) = 1;
+    b[m](0, 4) = 0;
+    b[m](1, 1) = 1;
+    b[m](1, 3) = 1;
+    b[m](1, 4) = 0;
+    b[m](2, 4) = 0;
+
+    A.CopyIn(m, a[m].Array());
+    B.CopyIn(m, b[m].Array());
+
+    c[m]  = a[m] * b[m];
+
+    bt[m] = ROOT::Math::Transpose(b[m]);
+    ct[m] = bt[m] * a[m];
+  }
+
+  Multify(A, B, C);
+  MultifyTranspose(A, B, CT);
+
+  for (int m = 0; m < N; ++m)
+  {
+    bool dump = false;
+
+    for (int j = 0; j < DIM; ++j)
+    {
+      for (int k = 0; k < DOM; ++k)
+      {
+        // There are occasional diffs up to 4.768372e-07 on host, very very
+        // rarely on MIC. Apparently this is a rounding difference between AVX
+        // and normal maths. On MIC it might be usage of FMA?
+        // The above was for 3x3.
+        // For 6x6 practically all elements differ by 4.768372e-07, some
+        // by 9.536743e-07.
+        if (std::abs(c[m](j,k) - C.At(m, j, k)) > CMP_EPS)
+        {
+          dump = true;
+          printf("MULTIFY   M=%d  %d,%d d=%e (count = %lld)\n", m, j, k, c[m](j,k) - C.At(m, j, k), count);
+        }
+      }
+    }
+
+    if (dump && false)
+    {
+      printf("\n");
+      for (int i = 0; i < DIM; ++i)
+      {
+        for (int j = 0; j < DOM; ++j)
+          printf("%8f ", c[m](i,j));
+        printf("\n");
+      }
+      printf("\n");
+
+      for (int i = 0; i < DIM; ++i)
+      {
+        for (int j = 0; j < DOM; ++j)
+          printf("%8f ", C.At(m, i, j));
+        printf("\n");
+      }
+      printf("\n");
+    }
+    if (dump)
+    {
+      printf("\n");
+    }
+  }
+
+  // Shameless cut-n-paste of above dump for transpose check with minor changes.
+  // Should make a function, I know ... but ... no time to lose.
+
+  for (int m = 0; m < N; ++m)
+  {
+    bool dump = false;
+
+    for (int j = 0; j < DOM; ++j)
+    {
+      for (int k = 0; k < DIM; ++k)
+      {
+        // There are occasional diffs up to 4.768372e-07 on host, very very
+        // rarely on MIC. Apparently this is a rounding difference between AVX
+        // and normal maths. On MIC it might be usage of FMA?
+        // The above was for 3x3.
+        // For 6x6 practically all elements differ by 4.768372e-07, some
+        // by 9.536743e-07.
+        if (std::abs(ct[m](j,k) - CT.At(m, j, k)) > CMP_EPS)
+        {
+          dump = true;
+          printf("TRANSPOSE M=%d  %d,%d d=%e (count = %lld)\n", m, j, k, ct[m](j,k) - CT.At(m, j, k), count);
+        }
+      }
+    }
+
+    if (dump && false)
+    {
+      printf("\n");
+      for (int i = 0; i < DOM; ++i)
+      {
+        for (int j = 0; j < DIM; ++j)
+          printf("%8f ", ct[m](i,j));
+        printf("\n");
+      }
+      printf("\n");
+
+      for (int i = 0; i < DIM; ++i)
+      {
+        for (int j = 0; j < DOM; ++j)
+          printf("%8f ", CT.At(m, i, j));
+        printf("\n");
+      }
+      printf("\n");
+    }
+    if (dump)
+    {
+      printf("\n");
+    }
+  }
+
+
+  ++count;
+  goto init;
+
+  return 0;
+}
diff --git a/RecoTracker/MkFitCore/standalone/test/Matriplex/GMtest.pl b/RecoTracker/MkFitCore/standalone/test/Matriplex/GMtest.pl
new file mode 100755
index 0000000000000..e1c4d2baaf867
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/test/Matriplex/GMtest.pl
@@ -0,0 +1,65 @@
+#!/usr/bin/perl
+
+use lib "..";
+
+use GenMul;
+
+### If you're going to run GMtest.cxx and you do some changes here
+### you *MUST* bring DIM, DOM and pattern assumptions in sync!
+
+my $DIM = 3;
+my $DOM = 6;
+
+$a = new GenMul::MatrixSym('name'=>'a', 'M'=>$DIM);
+$a->set_pattern(<<"FNORD");
+x
+x 1 
+x x x
+FNORD
+
+$b = new GenMul::Matrix('name'=>'b', 'M'=>$DIM, 'N'=>$DOM);
+$b->set_pattern(<<"FNORD");
+x x x x 0 x
+x 1 x 1 0 x
+x x x x 0 x
+FNORD
+
+$c = new GenMul::Matrix('name'=>'c', 'M'=>$DIM, 'N'=>$DOM);
+
+
+$bt = new GenMul::MatrixTranspose($b);
+$bt->print_info();
+$bt->print_pattern();
+
+$ct = new GenMul::Matrix('name'=>'c', 'M'=>$DOM, 'N'=>$DIM);
+
+# ----------------------------------------------------------------------
+
+# E.g. to skip matrix size check:
+#   $m = new GenMul::Multiply('no_size_check'=>1);
+# Note that matrix dimensions that you pass into auto-generated
+# function still has to match matrix dimensions set here.
+
+$m = new GenMul::Multiply;
+
+$m->dump_multiply_std_and_intrinsic("multify.ah", $a, $b, $c);
+
+$m->dump_multiply_std_and_intrinsic("multify-transpose.ah", $bt, $a, $ct);
+
+# To separate outputs of each function:
+#
+# open STD, ">multify.ah";
+# select STD;
+
+# $m->multiply_standard($a, $b, $c);
+
+# close STD;
+
+# # print "\n", '-' x 80, "\n\n";
+
+# open INT, ">multify_intr.ah";
+# select INT;
+
+# $m->multiply_intrinsic($a, $b, $c);
+
+# close INT;
diff --git a/RecoTracker/MkFitCore/standalone/test/Matriplex/m512_test.cxx b/RecoTracker/MkFitCore/standalone/test/Matriplex/m512_test.cxx
new file mode 100644
index 0000000000000..c8e9eb8bcedea
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/test/Matriplex/m512_test.cxx
@@ -0,0 +1,44 @@
+#include "immintrin.h"
+
+#include <cstdio>
+
+const int NN = 64;
+
+#define LD(a, i)      _mm512_load_ps(&a[i*16])
+#define ADD(a, b)     _mm512_add_ps(a, b) 
+#define MUL(a, b)     _mm512_mul_ps(a, b)
+#define FMA(a, b, v)  _mm512_fmadd_ps(a, b, v)
+#define ST(a, i, r)   _mm512_store_ps(&a[i*16], r)
+
+// Can even be global!
+__m512 all_ones = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+
+int main()
+{
+  float *p = (float*) std::aligned_alloc(64, NN*sizeof(float));
+  float *q = (float*) std::aligned_alloc(64, NN*sizeof(float));
+
+  for (int i = 0; i < NN; ++i)
+  {
+    p[i] = i;
+  }
+
+  __m512 a = LD(p, 0);
+  __m512 b = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 };//LD(p, 1);
+
+  b = all_ones;
+
+  __m512 c = ADD(a, b);
+
+  ST(q, 0, c);
+
+  for (int i = 0; i < 16; ++i)
+  {
+    printf("%2d %4.0f %4.0f %4.0f\n", i, p[i], p[i+16], q[i]);
+  }
+
+  std::free(p);
+  std::free(q);
+
+  return 0;
+}
diff --git a/RecoTracker/MkFitCore/standalone/test/binnor_demo.cxx b/RecoTracker/MkFitCore/standalone/test/binnor_demo.cxx
new file mode 100644
index 0000000000000..d325fcb409154
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/test/binnor_demo.cxx
@@ -0,0 +1,381 @@
+#include "binnor.h"
+#include <random>
+
+// build as:
+// c++ -o binnor_demo -std=c++17 binnor_demo.cxx
+
+using namespace mkfit;
+
+int main()
+{
+    constexpr float    PI    = 3.14159265358979323846;
+    constexpr float TwoPI    = 6.28318530717958647692;
+    constexpr float PIOver2  = PI / 2.0f;
+    constexpr float PIOver4  = PI / 4.0f;
+
+    axis_pow2_u1<float, unsigned short, 16, 8> phi(-PI, PI);
+
+    printf("Axis phi: M-bits=%d, N-bits=%d  Masks M:0x%x N:0x%x\n",
+           phi.c_M, phi.c_N, phi.c_M_mask, phi.c_N_mask);
+
+    /*
+    for (float p = -TwoPI; p < TwoPI; p += TwoPI / 15.4f) {
+        printf("  phi=%-9f m=%5d n=%3d m2n=%3d n_safe=%3d\n", p,
+               phi.R_to_M_bin(p), phi.R_to_N_bin(p),
+               phi.M_bin_to_N_bin( phi.R_to_M_bin(p) ),
+               phi.R_to_N_bin_safe(p) );
+    }
+    */
+
+    axis<float, unsigned short, 12, 6> eta(-2.6, 2.6, 20u);
+
+    printf("Axis eta: M-bits=%d, N-bits=%d    n_bins=%d\n",
+           eta.c_M, eta.c_N, eta.m_n_bins);
+
+    binnor<unsigned int, decltype(phi), decltype(eta), 24, 8> b(phi, eta);
+
+    // typedef typeof(b) type_b;
+    printf("Have binnor, size of vec = %zu, sizeof(C_pair) = %d\n",
+           b.m_bins.size(), sizeof( decltype(b)::C_pair) );
+
+    std::mt19937 rnd(std::random_device{}());
+    std::uniform_real_distribution<float> d_phi(-PI, PI);
+    std::uniform_real_distribution<float> d_eta(-2.55, 2.55);
+
+    const int NN = 100000;
+
+    struct track { float phi, eta; };
+    std::vector<track> tracks;
+    tracks.reserve(NN);
+
+    b.begin_registration(NN); // optional, reserves construction vector
+
+    for (int i = 0; i < NN; ++i)
+    {
+        tracks.push_back( { d_phi(rnd), d_eta(rnd) } );
+        b.register_entry(tracks.back().phi, tracks.back().eta);
+        // printf("made track %3d:  phi=%f  eta=%f\n", i, tracks.back().phi, tracks.back().eta);
+    }
+
+    b.finalize_registration();
+
+    // for (int i = 0; i < NN; ++i)
+    // {
+    //     const track &t = tracks[ b.m_ranks[i] ];
+    //     printf("%3d  %3d  phi=%f  eta=%f\n", i, b.m_ranks[i], t.phi, t.eta);
+    // }
+
+    printf("\n\n--- Single bin access:\n\n");
+    auto nbin = b.get_n_bin(0.f, 0.f);
+    auto cbin = b.get_content(0.f, 0.f);
+    printf("For (phi 0, eta 0; %u, %u) got first %d, count %d\n", nbin.bin1, nbin.bin2, cbin.first, cbin.count);
+    for (auto i = cbin.first; i < cbin.first + cbin.count; ++i) {
+        const track &t = tracks[ b.m_ranks[i] ];
+        printf("%3d  %3d  phi=%f  eta=%f\n", i, b.m_ranks[i], t.phi, t.eta);
+    }
+
+    printf("\n\n--- Range access:\n\n");
+    auto phi_rng = phi.Rrdr_to_N_bins(-PI+0.02, 0.1);
+    auto eta_rng = eta.Rrdr_to_N_bins(1.3, .2);
+    printf("phi bin range: %u, %u; eta %u, %u\n", phi_rng.begin, phi_rng.end, eta_rng.begin, eta_rng.end);
+    for (auto i_phi = phi_rng.begin; i_phi != phi_rng.end; i_phi = phi.next_N_bin(i_phi))
+    {
+        for (auto i_eta = eta_rng.begin; i_eta != eta_rng.end; i_eta = eta.next_N_bin(i_eta))
+        {
+            printf(" at i_phi=%u, i_eta=%u\n", i_phi, i_eta);
+            auto cbin = b.get_content(i_phi, i_eta);
+            for (auto i = cbin.first; i < cbin.first + cbin.count; ++i) {
+                const track &t = tracks[ b.m_ranks[i] ];
+                printf("   %3d  %3d  phi=%f  eta=%f\n", i, b.m_ranks[i], t.phi, t.eta);
+            }
+        }
+    }
+
+
+    b.reset_contents();
+
+    return 0;
+}
+
+
+
+// buildtestMPlex.cc::runBtpCe_MultiIter(), loop over seed cleaning multiple times to measure time:
+/*
+    if ( itconf.m_requires_dupclean_tight ) {
+      double t0 = dtime();
+      TrackVec xxx; int n_comp;
+      for (int i=0;i<1000;++i) {
+      xxx = seeds;
+      n_comp = StdSeq::clean_cms_seedtracks_iter(&xxx, itconf, eoh.m_beam_spot);
+      }
+      printf("Seedacleena of %d seeds, out_seeds %d, 1000 times, N_comparisons=%d, took %.5fs\n",
+             (int)seeds.size(), (int)xxx.size(), n_comp, dtime() - t0);
+      seeds = xxx;
+    }
+*/
+
+// Example clean seeds using binnor.
+// Further enhancements possible by moving the iteration into binnor class (+iterator),
+// doing pre-selection on m_cons fine m-bins.
+// Perf notes: https://gist.github.com/osschar/2dcd2b01e7c15cc25aa6489f3b242ccb
+/*
+//=========================================================================
+// Seed cleaning (multi-iter)
+//=========================================================================
+int clean_cms_seedtracks_iter(TrackVec *seed_ptr, const IterationConfig& itrcfg, const BeamSpot &bspot)
+{
+  const float etamax_brl = Config::c_etamax_brl;
+  const float dpt_common = Config::c_dpt_common;
+
+  const float dzmax_bh = itrcfg.m_params.c_dzmax_bh;
+  const float drmax_bh = itrcfg.m_params.c_drmax_bh;
+  const float dzmax_eh = itrcfg.m_params.c_dzmax_eh;
+  const float drmax_eh = itrcfg.m_params.c_drmax_eh;
+  const float dzmax_bl = itrcfg.m_params.c_dzmax_bl;
+  const float drmax_bl = itrcfg.m_params.c_drmax_bl;
+  const float dzmax_el = itrcfg.m_params.c_dzmax_el;
+  const float drmax_el = itrcfg.m_params.c_drmax_el;
+
+  const float ptmin_hpt  = itrcfg.m_params.c_ptthr_hpt;
+
+  const float dzmax2_inv_bh = 1.f/(dzmax_bh*dzmax_bh);
+  const float drmax2_inv_bh = 1.f/(drmax_bh*drmax_bh);
+  const float dzmax2_inv_eh = 1.f/(dzmax_eh*dzmax_eh);
+  const float drmax2_inv_eh = 1.f/(drmax_eh*drmax_eh);
+  const float dzmax2_inv_bl = 1.f/(dzmax_bl*dzmax_bl);
+  const float drmax2_inv_bl = 1.f/(drmax_bl*drmax_bl);
+  const float dzmax2_inv_el = 1.f/(dzmax_el*dzmax_el);
+  const float drmax2_inv_el = 1.f/(drmax_el*drmax_el);
+
+  // Merge hits from overlapping seeds?
+  // For now always true, we require extra hits after seed.
+  const bool  merge_hits = true; // itrcfg.merge_seed_hits_during_cleaning();
+
+  if (seed_ptr == nullptr) return 0;
+  TrackVec &seeds = *seed_ptr;
+
+  const int ns = seeds.size();
+  #ifdef DEBUG
+   std::cout << "before seed cleaning "<< seeds.size()<<std::endl;
+  #endif
+  TrackVec cleanSeedTracks;
+  cleanSeedTracks.reserve(ns);
+  std::vector<bool> writetrack(ns, true);
+
+  const float invR1GeV = 1.f/Config::track1GeVradius;
+
+  std::vector<int>    nHits(ns);
+  std::vector<int>    charge(ns);
+  std::vector<float>  oldPhi(ns);
+  std::vector<float>  pos2(ns);
+  std::vector<float>  eta(ns);
+  std::vector<float>  ctheta(ns);
+  std::vector<float>  invptq(ns);
+  std::vector<float>  pt(ns);
+  std::vector<float>  x(ns);
+  std::vector<float>  y(ns);
+  std::vector<float>  z(ns);
+  std::vector<float>  d0(ns);
+  int i1,i2; //for the sorting
+
+  axis_pow2_u1<float, unsigned short, 16, 8> ax_phi(-Config::PI, Config::PI);
+  axis<float, unsigned short, 8, 8>         ax_eta(-2.6, 2.6, 30u);
+
+  binnor<unsigned int, decltype(ax_phi), decltype(ax_eta), 24, 8> b(ax_phi, ax_eta);
+  b.begin_registration(ns);
+
+  for(int ts=0; ts<ns; ts++){
+    const Track & tk = seeds[ts];
+    nHits[ts] = tk.nFoundHits();
+    charge[ts] = tk.charge();
+    oldPhi[ts] = tk.momPhi();
+    pos2[ts] = std::pow(tk.x(), 2) + std::pow(tk.y(), 2);
+    eta[ts] = tk.momEta();
+    ctheta[ts] = 1.f/std::tan(tk.theta());
+    invptq[ts] = tk.charge()*tk.invpT();
+    pt[ts] = tk.pT();
+    x[ts] = tk.x();
+    y[ts] = tk.y();
+    z[ts] = tk.z();
+    d0[ts] = tk.d0BeamSpot(bspot.x,bspot.y);
+
+    // If one is sure values are *within* axis ranges:
+    // b.register_entry(oldPhi[ts], eta[ts]);
+    b.register_entry_safe(oldPhi[ts], eta[ts]);
+  }
+
+  b.finalize_registration();
+
+  int n_comparisons = 0;
+
+  // for(int ts=0; ts<ns; ts++){
+  for(int sorted_ts=0; sorted_ts<ns; sorted_ts++){
+    int ts = b.m_ranks[sorted_ts];
+
+    // printf("Checking sorted_ts=%d ts=%d wwrite=%d\n", sorted_ts, ts, (int) writetrack[ts]);
+    if (not writetrack[ts]) continue;//FIXME: this speed up prevents transitive masking; check build cost!
+
+    const float oldPhi1 = oldPhi[ts];
+    const float pos2_first = pos2[ts];
+    const float Eta1 = eta[ts];
+    const float Pt1 = pt[ts];
+    const float invptq_first = invptq[ts];
+
+    // To study some more details -- need EventOfHits for this
+    int  n_ovlp_hits_added = 0;
+    // int  n_ovlp_hits_same_module = 0;
+    // int  n_ovlp_hits_shared = 0;
+    // int  n_ovlp_tracks = 0;
+
+    auto phi_rng = ax_phi.Rrdr_to_N_bins(oldPhi[ts], 0.08);
+    auto eta_rng = ax_eta.Rrdr_to_N_bins(eta[ts], .1);
+    // printf("sorted_ts=%d ts=%d -- phi bin range: %u, %u; eta %u, %u\n", sorted_ts, ts, phi_rng.begin, phi_rng.end, eta_rng.begin, eta_rng.end);
+    for (auto i_phi = phi_rng.begin; i_phi != phi_rng.end; i_phi = ax_phi.next_N_bin(i_phi))
+    {
+    for (auto i_eta = eta_rng.begin; i_eta != eta_rng.end; i_eta = ax_eta.next_N_bin(i_eta))
+    {
+    // printf(" at i_phi=%u, i_eta=%u\n", i_phi, i_eta);
+    const auto cbin = b.get_content(i_phi, i_eta);
+    for (auto i = cbin.first; i < cbin.end(); ++i)
+    {
+    //#pragma simd // Vectorization via simd had issues with icc
+    // for (int tss= ts+1; tss<ns; tss++)
+    //for (int sorted_tss= sorted_ts+1; sorted_tss<ns; sorted_tss++)
+    // {
+      int tss = b.m_ranks[i];
+      if (tss <= ts) continue;
+
+      const float Pt2 = pt[tss];
+
+      ////// Always require charge consistency. If different charge is assigned, do not remove seed-track
+      if(charge[tss] != charge[ts])
+        continue;
+
+      const float thisDPt = std::abs(Pt2-Pt1);
+      ////// Require pT consistency between seeds. If dpT is large, do not remove seed-track.
+      if( thisDPt > dpt_common*(Pt1) )
+        // continue;
+        break; // following seeds will only be farther away in pT
+
+      ++n_comparisons;
+
+      const float Eta2 = eta[tss];
+      const float deta2 = std::pow(Eta1-Eta2, 2);
+
+      const float oldPhi2 = oldPhi[tss];
+
+      const float pos2_second = pos2[tss];
+      const float thisDXYSign05 = pos2_second > pos2_first ? -0.5f : 0.5f;
+
+      const float thisDXY = thisDXYSign05*sqrt( std::pow(x[ts]-x[tss], 2) + std::pow(y[ts]-y[tss], 2) );
+
+      const float invptq_second = invptq[tss];
+
+      const float newPhi1 = oldPhi1-thisDXY*invR1GeV*invptq_first;
+      const float newPhi2 = oldPhi2+thisDXY*invR1GeV*invptq_second;
+
+      const float dphi = cdist(std::abs(newPhi1-newPhi2));
+
+      const float dr2 = deta2+dphi*dphi;
+
+      const float thisDZ = z[ts]-z[tss]-thisDXY*(ctheta[ts]+ctheta[tss]);
+      const float dz2 = thisDZ*thisDZ;
+
+      ////// Reject tracks within dR-dz elliptical window.
+      ////// Adaptive thresholds, based on observation that duplicates are more abundant at large pseudo-rapidity and low track pT
+      bool overlapping = false;
+      if(std::abs(Eta1)<etamax_brl){
+        if(Pt1>ptmin_hpt){if(dz2*dzmax2_inv_bh+dr2*drmax2_inv_bh<1.0f) overlapping=true; }
+        else{if(dz2*dzmax2_inv_bl+dr2*drmax2_inv_bl<1.0f) overlapping=true; }
+      }
+      else {
+        if(Pt1>ptmin_hpt){if(dz2*dzmax2_inv_eh+dr2*drmax2_inv_eh<1.0f) overlapping=true; }
+        else{if(dz2*dzmax2_inv_el+dr2*drmax2_inv_el<1.0f) overlapping=true; }
+      }
+
+      if(overlapping){
+        //Mark tss as a duplicate
+        i1=ts;
+        i2=tss;
+        if (d0[tss]>d0[ts])
+          writetrack[tss] = false;
+        else {
+          writetrack[ts] = false;
+          i2 = ts;
+          i1 = tss;
+        }
+        // Add hits from tk2 to the seed we are keeping.
+        // NOTE: We only have 3 bits in Track::Status for number of seed hits.
+        //       There is a check at entry and after adding of a new hit.
+        Track &tk = seeds[i1];
+        if (merge_hits && tk.nTotalHits() < 15)
+        {
+          const Track &tk2 = seeds[i2];
+          //We are not actually fitting to the extra hits; use chi2 of 0
+          float fakeChi2 = 0.0;
+
+          for (int j = 0; j < tk2.nTotalHits(); ++j)
+          {
+            int hitidx = tk2.getHitIdx(j);
+            int hitlyr = tk2.getHitLyr(j);
+            if (hitidx >= 0)
+            {
+              bool unique = true;
+              for (int i = 0; i < tk.nTotalHits(); ++i)
+              {
+                if ((hitidx == tk.getHitIdx(i)) && (hitlyr == tk.getHitLyr(i))) {
+                  unique = false;
+                  break;
+                }
+              }
+              if (unique) {
+                tk.addHitIdx(tk2.getHitIdx(j), tk2.getHitLyr(j), fakeChi2);
+                ++n_ovlp_hits_added;
+                if (tk.nTotalHits() >= 15)
+                  break;
+              }
+            }
+          }
+        }
+        if (n_ovlp_hits_added > 0) {
+           tk.sortHitsByLayer();
+           n_ovlp_hits_added = 0;
+        }
+
+        if ( ! writetrack[ts]) goto end_ts_loop;
+      }
+    } //end of inner loop over tss
+    }
+    }
+
+    if (writetrack[ts])
+    {
+      cleanSeedTracks.emplace_back(seeds[ts]);
+    }
+end_ts_loop: ;
+  }
+
+  seeds.swap(cleanSeedTracks);
+
+#ifdef DEBUG
+  {
+    const int ns2 = seeds.size();
+    printf("Number of CMS seeds before %d --> after %d cleaning\n", ns, ns2);
+
+    for (int it = 0; it < ns2; it++)
+    {
+      const Track& ss = seeds[it];
+      printf("  %3i q=%+i pT=%7.3f eta=% 7.3f nHits=%i label=% i\n",
+             it,ss.charge(),ss.pT(),ss.momEta(),ss.nFoundHits(),ss.label());
+    }
+  }
+#endif
+
+#ifdef DEBUG  
+  std::cout << "AFTER seed cleaning "<< seeds.size()<<std::endl;
+#endif
+
+  return n_comparisons; // seeds.size();
+}
+
+*/
diff --git a/RecoTracker/MkFitCore/standalone/test/config-parse/ConfigLinkDef.h b/RecoTracker/MkFitCore/standalone/test/config-parse/ConfigLinkDef.h
new file mode 100644
index 0000000000000..ac3f1dc030fa8
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/test/config-parse/ConfigLinkDef.h
@@ -0,0 +1,5 @@
+#pragma link C++ class mkfit::IterationLayerConfig;
+#pragma link C++ class mkfit::IterationParams;
+// #pragma link C++ class mkfit::IterationSeedPartition;
+#pragma link C++ class mkfit::IterationConfig;
+#pragma link C++ class mkfit::IterationsInfo;
diff --git a/RecoTracker/MkFitCore/standalone/test/config-parse/Makefile b/RecoTracker/MkFitCore/standalone/test/config-parse/Makefile
new file mode 100644
index 0000000000000..7d2e2d8d1881b
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/test/config-parse/Makefile
@@ -0,0 +1,13 @@
+include ../../Makefile.config
+
+all: libConfigDict.so
+
+libConfigDict.so: ConfigDict.cc
+	${CXX} -I.. -I../.. -I$(shell root-config --incdir) ${CPPFLAGS} -fPIC -shared -o $@ $^
+
+ConfigDict.cc: ../IterationConfig.h ../SteeringParams.h ConfigLinkDef.h
+	rootcling -f -I=.. -I=../.. $@ $^
+
+clean:
+	rm -f libConfigDict.so ConfigDict.cc ConfigDict_rdict.pcm
+	find . -size 0 -delete # rootcling leaves some junk around
diff --git a/RecoTracker/MkFitCore/standalone/test/config-parse/dump_vars.C b/RecoTracker/MkFitCore/standalone/test/config-parse/dump_vars.C
new file mode 100644
index 0000000000000..7a52a4cc5cfde
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/test/config-parse/dump_vars.C
@@ -0,0 +1,51 @@
+#include "TClass.h"
+
+#include <string>
+#include <vector>
+
+// Begin AUTO code, some classes commented out.
+
+std::vector<std::string> classes = {
+    // "mkfit::IterationConfig",
+    "mkfit::IterationLayerConfig",
+    "mkfit::IterationParams",
+    // "mkfit::IterationSeedPartition",
+    "mkfit::IterationConfig",
+    "mkfit::IterationsInfo"};
+
+// End AUTO code.
+
+/*
+    1. When running for the first time, after changing of classes:
+       Review extracto.pl
+       Run: ./extracto.pl ../SteeringParams.h
+       Cut-n-paste code fragments above and into Config.LinkDef.h
+
+    2. To run:
+         # setup root environment
+         make
+         root.exe dump_vars.C
+       Then cut-n-paste NLOHMANN defines into SteeringParams.cc 
+*/
+
+void dump_vars() {
+  gSystem->Load("libConfigDict.so");
+
+  for (auto &cls : classes) {
+    printf("NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(%s,\n", cls.c_str());
+
+    TClass *tc = TClass::GetClass(cls.c_str());
+    TList *ml = tc->GetListOfDataMembers();
+    TIter it(ml);
+    TDataMember *dm = (TDataMember *)it.Next();
+    while (dm) {
+      // dm->GetTypeName(), dm->GetFullTypeName(), dm->GetTrueTypeName(),
+      printf("  /* %s */   %s", dm->GetTypeName(), dm->GetName());
+      dm = (TDataMember *)it.Next();
+      if (dm)
+        printf(",");
+      printf("\n");
+    }
+    printf(")\n\n");
+  }
+}
diff --git a/RecoTracker/MkFitCore/standalone/test/config-parse/extracto.pl b/RecoTracker/MkFitCore/standalone/test/config-parse/extracto.pl
new file mode 100755
index 0000000000000..4b1fb3a74441d
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/test/config-parse/extracto.pl
@@ -0,0 +1,19 @@
+#!/usr/bin/perl -n
+
+if (m/^(class|struct)\s(\w+)/)
+{
+    my $soc = $1;
+    my $cls = $2;
+    push @c, $cls if ($soc eq class and $cls =~ m/Iteration/);
+}
+
+END
+{
+    print "// For ConfigLinkDef.h\n";
+    print map { "#pragma link C++ class mkfit::$_;\n" } @c;
+
+    print "\n// For dictgen:\n";
+    print "std::vector<std::string> classes = {\n";
+    print join(",\n", map { "  \"mkfit::$_\"" } @c);
+    print "\n};\n";
+}
diff --git a/RecoTracker/MkFitCore/standalone/test/config-parse/test.json b/RecoTracker/MkFitCore/standalone/test/config-parse/test.json
new file mode 100644
index 0000000000000..1bef9cebd7249
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/test/config-parse/test.json
@@ -0,0 +1,29 @@
+{
+    "m_iterations/0": {
+        "m_layer_configs": [
+            {
+                "[4..10]": {
+                    "m_select_max_dphi": 0.04,
+                    "m_select_min_dphi": 0.02
+                }
+            },
+            {
+                "15/m_select_max_dq" : 14
+            }
+        ]
+
+    },
+    "m_iterations" : {
+        "[1..2]": [
+            {
+                "m_params/chi2Cut":  25
+            },
+            {
+                "m_params": {
+                    "c_drmax_bh": 0.01,
+                    "c_dzmax_bh": 0.01
+                }
+            }
+        ]
+    }
+}
diff --git a/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-10mu-fulldet-build-extrectracks.sh b/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-10mu-fulldet-build-extrectracks.sh
new file mode 100755
index 0000000000000..1abc0ed80ef71
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-10mu-fulldet-build-extrectracks.sh
@@ -0,0 +1,31 @@
+#! /bin/bash
+
+make -j 32 WITH_ROOT:=1
+
+dir=/data2/slava77/samples/2021/
+subdir=10muPt0p2to10HS/
+file=memoryFile.fv6.default.211008-c6b7c67.bin
+fin10mu=${dir}/${subdir}/${file}
+
+base=SNB_CMSSW_10mu
+
+for bV in "BH bh" "STD std" "CE ce"
+do echo $bV | while read -r bN bO
+    do
+	oBase=${base}_10muPt0p2to10HS_${bN}
+	echo "${oBase}: validation [nTH:32, nVU:32]"
+	./mkFit/mkFit --cmssw-n2seeds --cmssw-val-trkparam --input-file ${fin10mu} --build-${bO} --num-thr 32 >& log_${oBase}_NVU32int_NTH32_cmsswval.txt
+	mv valtree.root valtree_${oBase}.root
+    done
+done
+
+make clean
+
+oBase=${base}_10muPt0p2to10HS
+for build in BH STD CE
+do
+    root -b -q -l plotting/runValidation.C\(\"_${oBase}_${build}\",1\)
+done
+root -b -q -l plotting/makeValidation.C\(\"${oBase}\",\"\",1\)
+
+make distclean
diff --git a/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-10mu-fulldet-build.sh b/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-10mu-fulldet-build.sh
new file mode 100755
index 0000000000000..1fae13f4e0968
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-10mu-fulldet-build.sh
@@ -0,0 +1,39 @@
+#! /bin/bash
+
+make -j 32 WITH_ROOT:=1
+
+dir=/data2/slava77/samples/2021/
+subdir=10muPt0p2to10HS/
+file=memoryFile.fv6.default.211008-c6b7c67.bin
+fin10mu=${dir}/${subdir}/${file}
+
+base=SKL-SP_CMSSW_10mu
+
+for sV in "SimSeed --cmssw-simseeds" "CMSSeed --cmssw-n2seeds"
+do echo $sV | while read -r sN sO
+    do
+	for bV in "BH bh" "STD std" "CE ce"
+	do echo $bV | while read -r bN bO
+	    do
+		oBase=${base}_${sN}_10muPt0p2to10HS_${bN}
+		echo "${oBase}: validation [nTH:32, nVU:32]"
+		./mkFit/mkFit ${sO} --sim-val --input-file ${fin10mu} --build-${bO} --num-thr 32 >& log_${oBase}_NVU32int_NTH32_val.txt
+		mv valtree.root valtree_${oBase}.root
+	    done
+	done
+    done
+done
+
+make clean
+
+for seed in SimSeed CMSSeed
+do
+    oBase=${base}_${seed}_10muPt0p2to10HS
+    for build in BH STD CE
+    do
+    	root -b -q -l plotting/runValidation.C\(\"_${oBase}_${build}\"\)
+    done
+    root -b -q -l plotting/makeValidation.C\(\"${oBase}\"\)
+done
+
+make distclean
diff --git a/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-benchmarks-multiiter.sh b/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-benchmarks-multiiter.sh
new file mode 100755
index 0000000000000..b9eb25f6f8b13
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-benchmarks-multiiter.sh
@@ -0,0 +1,237 @@
+#! /bin/bash
+
+###########
+## Input ##
+###########
+
+suite=${1:-"forConf"} # which set of benchmarks to run: full, forPR, forConf, val, valMT1
+style=${2:-"--mtv-like-val"} # option --mtv-like-val
+inputBin=${3:-"112X_TTbar_PU50_MULTI"}
+
+###################
+## Configuration ##
+###################
+
+source xeon_scripts/common-variables.sh ${suite}
+source xeon_scripts/init-env.sh
+export MIMI="CE mimi"
+declare -a val_builds=(MIMI)
+nevents=250
+
+## Common file setup
+case ${inputBin} in 
+"104XPU50CCC_MULTI")
+        echo "Inputs from 2018 initialStep/default PU 50 with CCC with multiple iterations and hit binary mask"
+        dir=/data2/slava77/analysis/CMSSW_10_4_0_patch1_mkFit/pass-df52fcc
+        subdir=/initialStep/default/11024.0_TTbar_13/AVE_50_BX01_25ns/RAW4NT  
+        file=/memoryFile.fv5.clean.writeAll.CCC1620.recT.allSeeds.masks.201023-64302e5.bin
+        ;;
+"112X_TTbar_PU50_MULTI")
+        echo "Inputs from 2021 TTbar (PU50) sample with multiple iterations and hit binary mask"
+        dir=/data2/slava77/samples/
+        subdir=2021/11834.0_TTbar_14TeV+2021/AVE_50_BX01_25ns/
+        file=memoryFile.fv6.default.211008-c6b7c67.bin
+        ;;
+"112X_10mu_MULTI")
+        echo "Inputs from 2021 10mu sample with multiple iterations and hit binary mask"
+        dir=/data2/slava77/samples
+        subdir=2021/10muPt0p2to1000HS
+        file=memoryFile.fv6.default.211008-c6b7c67.bin
+        nevents=20000
+        sample=10mu
+        ;;
+*)
+        echo "INPUT BIN IS UNKNOWN"
+        exit 12
+        ;;
+esac
+
+## Common executable setup
+maxth=64
+maxvu=16
+maxev=32
+if [[  "${suite}" == "valMT1" ]]
+then
+    maxth=1
+    maxev=1
+fi
+seeds="--cmssw-n2seeds"
+exe="./mkFit/mkFit --silent ${seeds} --num-thr ${maxth} --num-thr-ev ${maxev} --input-file ${dir}/${subdir}/${file} --num-events ${nevents} --remove-dup --use-dead-modules"
+
+## Common output setup
+tmpdir="tmp"
+base=${val_arch}_${sample}
+
+## flag to save sim info for matched tracks since track states not read in
+siminfo="--try-to-save-sim-info"
+
+## backward fit flag
+bkfit="--backward-fit"
+
+## validation options: SIMVAL == sim tracks as reference, CMSSWVAL == cmssw tracks as reference
+SIMVAL="SIMVAL --sim-val ${siminfo} ${bkfit} ${style} --num-iters-cmssw 10"
+SIMVAL_SEED="SIMVALSEED --sim-val ${siminfo} ${bkfit} --mtv-require-seeds --num-iters-cmssw 10"
+
+declare -a vals=(SIMVAL SIMVAL_SEED)
+
+## plotting options
+SIMPLOT="SIMVAL all 0 0 1"
+SIMPLOTSEED="SIMVALSEED all 0 0 1"
+SIMPLOT4="SIMVAL iter4 0 4 0"
+SIMPLOTSEED4="SIMVALSEED iter4 0 4 0" 
+SIMPLOT22="SIMVAL iter22 0 22 0"
+SIMPLOTSEED22="SIMVALSEED iter22 0 22 0"
+SIMPLOT23="SIMVAL iter23 0 23 0"
+SIMPLOTSEED23="SIMVALSEED iter23 0 23 0"
+SIMPLOT5="SIMVAL iter5 0 5 0"
+SIMPLOTSEED5="SIMVALSEED iter5 0 5 0"
+SIMPLOT24="SIMVAL iter24 0 24 0"
+SIMPLOTSEED24="SIMVALSEED iter24 0 24 0"
+SIMPLOT7="SIMVAL iter7 0 7 0"
+SIMPLOTSEED7="SIMVALSEED iter7 0 7 0"
+SIMPLOT8="SIMVAL iter8 0 8 0"
+SIMPLOTSEED8="SIMVALSEED iter8 0 8 0"
+SIMPLOT9="SIMVAL iter9 0 9 0"
+SIMPLOTSEED9="SIMVALSEED iter9 0 9 0"
+SIMPLOT10="SIMVAL iter10 0 10 0"
+SIMPLOTSEED10="SIMVALSEED iter10 0 10 0"
+SIMPLOT6="SIMVAL iter6 0 6 0"
+SIMPLOTSEED6="SIMVALSEED iter6 0 6 0"
+
+declare -a plots=(SIMPLOT4 SIMPLOTSEED4 SIMPLOT22 SIMPLOTSEED22 SIMPLOT23 SIMPLOTSEED23 SIMPLOT5 SIMPLOTSEED5 SIMPLOT24 SIMPLOTSEED24 SIMPLOT7 SIMPLOTSEED7 SIMPLOT8 SIMPLOTSEED8 SIMPLOT9 SIMPLOTSEED9 SIMPLOT10 SIMPLOTSEED10 SIMPLOT6 SIMPLOTSEED6)
+
+## special cmssw dummy build
+CMSSW="CMSSW cmssw SIMVAL --sim-val-for-cmssw ${siminfo} --read-cmssw-tracks ${style} --num-iters-cmssw 10"
+CMSSW2="CMSSW cmssw SIMVALSEED --sim-val-for-cmssw ${siminfo} --read-cmssw-tracks --mtv-require-seeds --num-iters-cmssw 10"
+
+###############
+## Functions ##
+###############
+
+## validation function
+function doVal()
+{
+    local bN=${1}
+    local bO=${2}
+    local vN=${3}
+    local vO=${4}
+
+    local oBase=${val_arch}_${sample}_${bN}
+    local bExe="${exe} ${vO} --build-${bO}"
+    
+    echo "${oBase}: ${vN} [nTH:${maxth}, nVU:${maxvu}int, nEV:${maxev}]"
+    ${bExe} >& log_${oBase}_NVU${maxvu}int_NTH${maxth}_NEV${maxev}_${vN}.txt || (echo "Crashed on CMD: "${bExe}; exit 2)
+    
+    if (( ${maxev} > 1 ))
+    then
+        # hadd output files from different threads for this test, then move to temporary directory
+        hadd -O valtree.root valtree_*.root
+        rm valtree_*.root
+    fi
+    mv valtree.root ${tmpdir}/valtree_${oBase}_${vN}.root
+}		
+
+## plotting function
+function plotVal()
+{
+    local base=${1}
+    local bN=${2}
+    local pN=${3}
+    local pO=${4}
+    local iter=${5}
+    local cancel=${6}     
+    local rmsuff=${7}
+
+    echo "Computing observables for: ${base} ${bN} ${pN} ${p0} ${iter} ${cancel}"
+    bExe="root -b -q -l plotting/runValidation.C(\"_${base}_${bN}_${pN}\",${pO},${iter},${cancel},${rmsuff})"
+    echo ${bExe}
+
+    ${bExe} || (echo "Crashed on CMD: "${bExe}; exit 3)
+}
+
+########################
+## Run the validation ##
+########################
+
+## Compile once
+make clean
+mVal="-j 32 WITH_ROOT:=1 AVX_512:=1"
+make ${mVal}
+mkdir -p ${tmpdir}
+
+## Special simtrack validation vs cmssw tracks
+echo ${CMSSW} | while read -r bN bO vN vO
+do
+    doVal "${bN}" "${bO}" "${vN}" "${vO}"
+done
+## Special simtrack validation vs cmssw tracks
+echo ${CMSSW2} | while read -r bN bO vN vO
+do
+    doVal "${bN}" "${bO}" "${vN}" "${vO}"
+done
+
+## Run validation for standard build options
+for val in "${vals[@]}"
+do echo ${!val} | while read -r vN vO
+    do
+	for build in "${val_builds[@]}"
+	do echo ${!build} | while read -r bN bO
+	    do
+		doVal "${bN}" "${bO}" "${vN}" "${vO}"
+	    done
+	done
+    done
+done
+
+## clean up
+make clean ${mVal}
+mv tmp/valtree_*.root .
+rm -rf ${tmpdir}
+
+
+
+## Compute observables and make images
+for plot in "${plots[@]}"
+do echo ${!plot} | while read -r pN suff pO iter cancel
+    do
+	rmsuff=0 # use iterX suffix for output directory
+        ## Compute observables for special dummy CMSSW
+	if [[ "${pN}" == "SIMVAL" || "${pN}" == "SIMVAL_"* ]]
+	then
+	    echo ${CMSSW} | while read -r bN bO val_extras
+	    do
+		plotVal "${base}" "${bN}" "${pN}" "${pO}" "${iter}" "${cancel}" "${rmsuff}"
+	    done
+	fi
+	if [[ "${pN}" == "SIMVALSEED"* ]]
+	then
+	    echo ${CMSSW2} | while read -r bN bO val_extras
+	    do
+		plotVal "${base}" "${bN}" "${pN}" "${pO}" "${iter}" "${cancel}" "${rmsuff}"
+	    done
+	fi
+
+	## Compute observables for builds chosen 
+	for build in "${val_builds[@]}"
+	do echo ${!build} | while read -r bN bO
+	    do
+		plotVal "${base}" "${bN}" "${pN}" "${pO}" "${iter}" "${cancel}" "${rmsuff}"
+	    done
+	done
+	
+	## overlay histograms
+	echo "Overlaying histograms for: ${base} ${vN}"
+        if [[  "${suff}" == "all" ]]
+        then
+	    root -b -q -l plotting/makeValidation.C\(\"${base}\",\"_${pN}\",${pO},\"${suite}\"\)
+        else
+            root -b -q -l plotting/makeValidation.C\(\"${base}\",\"_${pN}_${suff}\",${pO},\"${suite}\"\)
+        fi
+    done
+done
+
+## Final cleanup
+make distclean ${mVal}
+
+## Final message
+echo "Finished physics validation!"
diff --git a/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-benchmarks.sh b/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-benchmarks.sh
new file mode 100755
index 0000000000000..996b0c7402e99
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-benchmarks.sh
@@ -0,0 +1,255 @@
+#! /bin/bash
+
+###########
+## Input ##
+###########
+
+suite=${1:-"forPR"} # which set of benchmarks to run: full, forPR, forConf, val, valMT1
+style=${2:-"--mtv-like-val"} # option --mtv-like-val
+inputBin=${3:-"112X_TTbar_PU50_MULTI"}
+
+###################
+## Configuration ##
+###################
+
+source xeon_scripts/common-variables.sh ${suite}
+source xeon_scripts/init-env.sh
+
+nevents=500
+
+## Common file setup
+case ${inputBin} in 
+"91XPU70CCC")
+        echo "Inputs from 2017 initialStep PU 70 with CCC -- DO NOT WORK ANYMORE"
+        exit 1
+        dir=/data2/slava77/samples/2017/pass-c93773a/initialStep
+        subdir=PU70HS/10224.0_TTbar_13+TTbar_13TeV_TuneCUETP8M1_2017PU_GenSimFullINPUT+DigiFullPU_2017PU+RecoFullPU_2017PU+HARVESTFullPU_2017PU
+        file=memoryFile.fv3.clean.writeAll.CCC1620.recT.082418-25daeda.bin
+        ;;
+"104XPU50CCC")
+        echo "Inputs from 2018 initialStep/default PU 50 with CCC"
+        dir=/data2
+        subdir=
+        file=pu50-ccc-hs.bin
+        ;;
+"112X_TTbar_PU50_MULTI")
+        echo "Inputs from 2021 TTbar (PU50) sample with multiple iterations and hit binary mask"
+        dir=/data2/slava77/samples/
+        subdir=2021/11834.0_TTbar_14TeV+2021/AVE_50_BX01_25ns/
+        file=memoryFile.fv6.default.211008-c6b7c67.bin
+        ;;
+"104X10muCCC")
+        echo "Inputs from 2018 10mu large pt range using the offline initialStep seeds with CCC (phi3)"
+        dir=/data2/slava77/samples/2018/pass-925bb57
+        subdir=initialStep/default/10muPt0p2to1000HS
+        file=memoryFile.fv4.clean.writeAll.CCC1620.recT.191108-c41a0f2.bin
+        nevents=10000
+        sample=CMSSW_10mu
+        ;;
+"112X_10mu_MULTI")
+        echo "Inputs from 2021 10mu sample with multiple iterations and hit binary mask"
+        dir=/data2/slava77/samples
+        subdir=2021/10muPt0p2to1000HS
+        file=memoryFile.fv6.default.211008-c6b7c67.bin
+        nevents=20000
+        sample=10mu
+        ;;
+"104X10muHLT3CCC")
+        echo "Inputs from 2018 10mu large pt range using HLT iter0 seeds as triplets with CCC (phi3)"
+        dir=/data2/slava77/samples/2018/pass-2eaa1f7
+        subdir=hltIter0/default/triplet/10muPt0p2to1000HS
+        file=memoryFile.fv4.clean.writeAll.CCC1620.recT.200122-fcff8a8.bin
+        nevents=10000
+        sample=CMSSW_10mu_HLT3
+        ;;
+"104X10muHLT4CCC")
+        echo "Inputs from 2018 10mu large pt range using HLT iter0 seeds as quadruplets with CCC (phi3)"
+        dir=/data2/slava77/samples/2018/pass-2eaa1f7
+        subdir=hltIter0/default/quadruplet/10muPt0p2to1000HS
+        file=memoryFile.fv4.clean.writeAll.CCC1620.recT.200122-fcff8a8.bin
+        nevents=10000
+        sample=CMSSW_10mu_HLT4
+        ;;
+"104XPU50HLT3CCC")
+        echo "Inputs from 2018 ttbar PU50 using HLT iter0 seeds as triplets with CCC (phi3)"
+        dir=/data2/slava77/samples/2018/pass-2eaa1f7
+        subdir=hltIter0/default/triplet/11024.0_TTbar_13/AVE_50_BX01_25ns
+        file=memoryFile.fv4.clean.writeAll.CCC1620.recT.200122-fcff8a8.bin
+        sample=CMSSW_TTbar_PU50_HLT3
+        ;;
+"104XPU50HLT4CCC")
+        echo "Inputs from 2018 ttbar PU50 using HLT iter0 seeds as quadruplets with CCC (phi3)"
+        dir=/data2/slava77/samples/2018/pass-2eaa1f7
+        subdir=hltIter0/default/quadruplet/11024.0_TTbar_13/AVE_50_BX01_25ns
+        file=memoryFile.fv4.clean.writeAll.CCC1620.recT.200122-fcff8a8.bin
+        sample=CMSSW_TTbar_PU50_HLT4
+        ;;
+*)
+        echo "INPUT BIN IS UNKNOWN"
+        exit 12
+        ;;
+esac
+
+## Common executable setup
+maxth=64
+maxvu=16
+maxev=32
+if [[  "${suite}" == "valMT1" ]]
+then
+    maxth=1
+    maxev=1
+fi
+seeds="--cmssw-n2seeds"
+exe="./mkFit/mkFit --silent ${seeds} --num-thr ${maxth} --num-thr-ev ${maxev} --input-file ${dir}/${subdir}/${file} --num-events ${nevents} --remove-dup --use-dead-modules"
+
+## Common output setup
+tmpdir="tmp"
+base=${val_arch}_${sample}
+
+## flag to save sim info for matched tracks since track states not read in
+siminfo="--try-to-save-sim-info"
+
+## backward fit flag
+bkfit="--backward-fit"
+
+## validation options: SIMVAL == sim tracks as reference, CMSSWVAL == cmssw tracks as reference
+SIMVAL="SIMVAL --sim-val ${siminfo} ${bkfit} ${style}"
+SIMVAL_SEED="SIMVALSEED --sim-val ${siminfo} ${bkfit} --mtv-require-seeds"
+declare -a vals=(SIMVAL SIMVAL_SEED)
+
+## plotting options
+SIMPLOT="SIMVAL 0"
+SIMPLOTSEED="SIMVALSEED 0"
+declare -a plots=(SIMPLOT SIMPLOTSEED)
+
+## special cmssw dummy build
+CMSSW="CMSSW cmssw SIMVAL --sim-val-for-cmssw ${siminfo} --read-cmssw-tracks ${style} --num-iters-cmssw 1"
+CMSSW2="CMSSW cmssw SIMVALSEED --sim-val-for-cmssw ${siminfo} --read-cmssw-tracks --mtv-require-seeds --num-iters-cmssw 1"
+
+###############
+## Functions ##
+###############
+
+## validation function
+function doVal()
+{
+    local bN=${1}
+    local bO=${2}
+    local vN=${3}
+    local vO=${4}
+
+    local oBase=${val_arch}_${sample}_${bN}
+    local bExe="${exe} ${vO} --build-${bO}"
+    
+    echo "${oBase}: ${vN} [nTH:${maxth}, nVU:${maxvu}int, nEV:${maxev}]"
+    ${bExe} >& log_${oBase}_NVU${maxvu}int_NTH${maxth}_NEV${maxev}_${vN}.txt || (echo "Crashed on CMD: "${bExe}; exit 2)
+    
+    if (( ${maxev} > 1 ))
+    then
+        # hadd output files from different threads for this test, then move to temporary directory
+        hadd -O valtree.root valtree_*.root
+        rm valtree_*.root
+    fi
+    mv valtree.root ${tmpdir}/valtree_${oBase}_${vN}.root
+}		
+
+## plotting function
+function plotVal()
+{
+    local base=${1}
+    local bN=${2}
+    local pN=${3}
+    local pO=${4}
+    local iter=${5} # only initialStep
+    local cancel=${6}
+
+    echo "Computing observables for: ${base} ${bN} ${pN}"
+    bExe="root -b -q -l plotting/runValidation.C(\"_${base}_${bN}_${pN}\",${pO},${iter},${cancel})"
+    ${bExe} || (echo "Crashed on CMD: "${bExe}; exit 3)
+}
+
+########################
+## Run the validation ##
+########################
+
+## Compile once
+make clean
+mVal="-j 32 WITH_ROOT:=1 AVX_512:=1"
+make ${mVal}
+mkdir -p ${tmpdir}
+
+## Special simtrack validation vs cmssw tracks
+echo ${CMSSW} | while read -r bN bO vN vO
+do
+    doVal "${bN}" "${bO}" "${vN}" "${vO}"
+done
+## Special simtrack validation vs cmssw tracks
+echo ${CMSSW2} | while read -r bN bO vN vO
+do
+    doVal "${bN}" "${bO}" "${vN}" "${vO}"
+done
+
+## Run validation for standard build options
+for val in "${vals[@]}"
+do echo ${!val} | while read -r vN vO
+    do
+	for build in "${val_builds[@]}"
+	do echo ${!build} | while read -r bN bO
+	    do
+		doVal "${bN}" "${bO}" "${vN}" "${vO}"
+	    done
+	done
+    done
+done
+
+## clean up
+make clean ${mVal}
+mv tmp/valtree_*.root .
+rm -rf ${tmpdir}
+
+## Compute observables and make images
+for plot in "${plots[@]}"
+do echo ${!plot} | while read -r pN pO
+    do
+        ## Compute observables for special dummy CMSSW
+	if [[ "${pN}" == "SIMVAL" ]]
+	then
+	    echo ${CMSSW} | while read -r bN bO val_extras
+	    do
+		iter=4 # only initialStep
+		cancel=1
+		plotVal "${base}" "${bN}" "${pN}" "${pO}" "${iter}" "${cancel}"
+	    done
+	fi
+	if [[ "${pN}" == "SIMVALSEED" ]]
+	then
+	    echo ${CMSSW2} | while read -r bN bO val_extras
+	    do
+		iter=4 # only initialStep
+		cancel=1
+		plotVal "${base}" "${bN}" "${pN}" "${pO}" "${iter}" "${cancel}"
+	    done
+	fi
+
+	## Compute observables for builds chosen 
+	for build in "${val_builds[@]}"
+	do echo ${!build} | while read -r bN bO
+	    do
+		iter=0
+		cancel=1
+		plotVal "${base}" "${bN}" "${pN}" "${pO}" "${iter}" "${cancel}"
+		#plotVal "${base}" "${bN}" "${pN}" "${pO}"
+	    done
+	done
+	
+	## overlay histograms
+	echo "Overlaying histograms for: ${base} ${vN}"
+	root -b -q -l plotting/makeValidation.C\(\"${base}\",\"_${pN}\",${pO},\"${suite}\"\)
+    done
+done
+
+## Final cleanup
+make distclean ${mVal}
+
+## Final message
+echo "Finished physics validation!"
diff --git a/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-ttbar-fulldet-build-extrectracks.sh b/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-ttbar-fulldet-build-extrectracks.sh
new file mode 100755
index 0000000000000..cc891b208f549
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-ttbar-fulldet-build-extrectracks.sh
@@ -0,0 +1,40 @@
+#! /bin/bash
+
+make -j 32 WITH_ROOT:=1
+
+dir=/data2/slava77/samples/2021/11834.0_TTbar_14TeV+2021/
+file=memoryFile.fv6.default.211008-c6b7c67.bin
+
+NoPU=AVE_0_BX01_25ns/
+PU35=AVE_35_BX01_25ns/
+PU50=AVE_50_BX01_25ns/
+PU70=AVE_70_BX01_25ns/
+
+base=SKL-SP_CMSSW_TTbar
+
+for ttbar in NoPU PU35 PU50 PU70 
+do
+    for bV in "BH bh" "STD std" "CE ce"
+    do echo $bV | while read -r bN bO
+	do
+	    oBase=${base}_${ttbar}_${bN}
+	    echo "${oBase}: validation [nTH:32, nVU:32]"
+	    ./mkFit/mkFit --cmssw-n2seeds --cmssw-val-trkparam --input-file ${dir}/${!ttbar}/${file} --build-${bO} --num-thr 32 >& log_${oBase}_NVU32int_NTH32_cmsswval.txt
+	    mv valtree.root valtree_${oBase}.root
+	done
+    done
+done
+
+make clean
+
+for ttbar in NoPU PU35 PU50 PU70 
+do
+    tbase=${base}_${ttbar}
+    for build in BH STD CE
+    do
+	root -b -q -l plotting/runValidation.C\(\"_${tbase}_${build}\",1\)
+    done
+    root -b -q -l plotting/makeValidation.C\(\"${tbase}\",\"\",1\)
+done
+
+make distclean
diff --git a/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-ttbar-fulldet-build.sh b/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-ttbar-fulldet-build.sh
new file mode 100755
index 0000000000000..a69896e247551
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/val_scripts/validation-cmssw-ttbar-fulldet-build.sh
@@ -0,0 +1,48 @@
+#! /bin/bash
+
+make -j 32 WITH_ROOT:=1
+
+dir=/data2/slava77/samples/2021/11834.0_TTbar_14TeV+2021/
+file=memoryFile.fv6.default.211008-c6b7c67.bin
+
+NoPU=AVE_0_BX01_25ns/
+PU35=AVE_35_BX01_25ns/
+PU50=AVE_50_BX01_25ns/
+PU70=AVE_70_BX01_25ns/
+
+base=SKL-SP_CMSSW_TTbar
+
+for ttbar in NoPU PU35 PU50 PU70 
+do
+    for sV in "SimSeed --cmssw-simseeds" "CMSSeed --cmssw-n2seeds"
+    do echo $sV | while read -r sN sO
+	do
+	    for bV in "BH bh" "STD std" "CE ce"
+	    do echo $bV | while read -r bN bO
+		do
+		    oBase=${base}_${ttbar}_${sN}_${bN}
+		    echo "${oBase}: validation [nTH:32, nVU:32]"
+		    ./mkFit/mkFit ${sO} --sim-val --input-file ${dir}/${!ttbar}/${file} --build-${bO} --num-thr 32 >& log_${oBase}_NVU32int_NTH32_val.txt
+		    mv valtree.root valtree_${oBase}.root
+		done
+	    done
+	done
+    done
+done
+
+make clean
+
+for ttbar in NoPU PU35 PU50 PU70 
+do
+    for seed in SimSeed CMSSeed
+    do
+	oBase=${base}_${ttbar}_${seed}
+	for build in BH STD CE
+	do
+	    root -b -q -l plotting/runValidation.C\(\"_${oBase}_${build}\"\)
+	done
+	root -b -q -l plotting/makeValidation.C\(\"${oBase}\"\)
+    done
+done
+
+make distclean
diff --git a/RecoTracker/MkFitCore/standalone/val_scripts/validation-toymc-fulldet-build.sh b/RecoTracker/MkFitCore/standalone/val_scripts/validation-toymc-fulldet-build.sh
new file mode 100755
index 0000000000000..37bd000c607c3
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/val_scripts/validation-toymc-fulldet-build.sh
@@ -0,0 +1,26 @@
+#! /bin/bash
+
+make -j 32 WITH_ROOT:=1
+
+dir=/data2/scratch/toymc
+file=simtracks_fulldet_400x2p5k_val.bin
+
+base=SKL-SP_ToyMC_FullDet
+
+for bV in "BH bh" "STD std" "CE ce"
+do echo $bV | while read -r bN bO
+    do
+	oBase=${base}_${bN}
+	echo "${oBase}: validation [nTH:32, nVU:32]"
+	./mkFit/mkFit --sim-val --read-simtrack-states --seed-input sim --input-file ${dir}/${file} --build-${bO} --num-thr 32 >& log_${oBase}_NVU32int_NTH32_val.txt
+	mv valtree.root valtree_${oBase}.root
+    done
+done
+
+for build in BH STD CE
+do
+    root -b -q -l plotting/runValidation.C\(\"_SNB_ToyMC_FullDet_${build}\"\)
+done
+root -b -q -l plotting/makeValidation.C\(\"SNB_ToyMC_FullDet\"\)
+
+make clean
diff --git a/RecoTracker/MkFitCore/standalone/val_scripts/validationMIC-build-10mu.sh b/RecoTracker/MkFitCore/standalone/val_scripts/validationMIC-build-10mu.sh
new file mode 100755
index 0000000000000..f6431e6de72fa
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/val_scripts/validationMIC-build-10mu.sh
@@ -0,0 +1,49 @@
+#! /bin/bash
+
+
+[ -e "$BIN_DATA_PATH" ] || BIN_DATA_PATH=/data2/slava77/samples/2021/
+fin10mu=${BIN_DATA_PATH}/10muPt0p2to10HS/memoryFile.fv6.default.211008-c6b7c67.bin
+
+runValidation(){
+    for sV in "sim --cmssw-simseeds" "see --cmssw-stdseeds"; do echo $sV | while read -r sN sO; do
+	    if [ "${1}" == "1" ]; then
+		sO="--cmssw-n2seeds"
+	    fi
+	    for bV in "BH bh" "STD std" "CE ce"; do echo $bV | while read -r bN bO; do
+		    oBase=${base}_${sN}_10muPt0p2to10HS_${bN}
+		    nTH=8
+		    echo "${oBase}: validation [nTH:${nTH}, nVU:8]"
+		    ./mkFit/mkFit --sim-val --input-file ${fin10mu} --build-${bO} ${sO} --num-thr ${nTH} >& log_${oBase}_NVU8int_NTH${nTH}_val.txt
+		    mv valtree.root valtree_${oBase}.root
+		done
+	    done
+        done
+    done
+    
+    for opt in sim see
+    do
+	oBase=${base}_${opt}_10muPt0p2to10HS
+	for build in BH STD CE
+	do
+	    root -b -q -l plotting/runValidation.C+\(\"_${oBase}_${build}\"\)
+	done
+	root -b -q -l plotting/makeValidation.C+\(\"${oBase}\"\)
+    done
+}
+
+#cleanup first
+make clean
+make distclean
+make -j 12 WITH_ROOT:=1
+
+export base=SNB_CMSSW_10mu
+echo Run default with base = ${base}
+runValidation 0
+
+export base=SNB_CMSSW_10mu_cleanSeed
+echo Run CLEAN_SEEDS with base = ${base}
+runValidation 1
+
+make distclean
+
+unset base
diff --git a/RecoTracker/MkFitCore/standalone/val_scripts/validationMIC-build-PU70.sh b/RecoTracker/MkFitCore/standalone/val_scripts/validationMIC-build-PU70.sh
new file mode 100755
index 0000000000000..bd0a846856644
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/val_scripts/validationMIC-build-PU70.sh
@@ -0,0 +1,49 @@
+#! /bin/bash
+
+[ -e "$BIN_DATA_PATH" ] || BIN_DATA_PATH=/data2/slava77/samples/2021/11834.0_TTbar_14TeV+2021/
+fin=${BIN_DATA_PATH}/AVE_70_BX01_25ns/memoryFile.fv6.default.211008-c6b7c67.bin
+
+runValidation()
+{
+    for sV in "sim --cmssw-simseeds" "see --cmssw-stdseeds"; do echo $sV | while read -r sN sO; do
+	    if [ "${1}" == "1" ]; then
+		sO="--cmssw-n2seeds"
+	    fi
+            for bV in "BH bh" "STD std" "CE ce"; do echo $bV | while read -r bN bO; do
+		    oBase=${base}_${sN}_${bN}
+		    nTH=8
+		    echo "${oBase}: validation [nTH:${nTH}, nVU:8]"
+		    ./mkFit/mkFit --sim-val --input-file ${fin} --build-${bO} ${sO} --num-thr ${nTH} >& log_${oBase}_NVU8int_NTH${nTH}_val.txt
+		    mv valtree.root valtree_${oBase}.root
+                done
+            done
+        done
+    done
+        
+    for opt in sim see
+    do
+        oBase=${base}_${opt}
+        for build in BH STD CE
+        do
+	    root -b -q -l plotting/runValidation.C+\(\"_${oBase}_${build}\"\)
+        done
+        root -b -q -l plotting/makeValidation.C+\(\"${oBase}\"\)
+    done
+}
+
+#cleanup first
+make clean
+make distclean
+make -j 12 WITH_ROOT:=1
+
+export base=SNB_CMSSW_PU70_clean
+echo Run default build with base = ${base}
+runValidation 0
+
+export base=SNB_CMSSW_PU70_clean_cleanSeed
+echo Run CLEAN_SEEDS with base = ${base}
+runValidation 1
+
+make distclean
+
+unset base
diff --git a/RecoTracker/MkFitCore/standalone/validation-desc.txt b/RecoTracker/MkFitCore/standalone/validation-desc.txt
new file mode 100644
index 0000000000000..3cc06beb75e48
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/validation-desc.txt
@@ -0,0 +1,549 @@
+EDIT HISTORY
+** KPM 16/09/18: move id + label explanation to index-desc.txt
+** KPM 06/08/18: moved preface to README **
+** KPM 11/07/18: added a preface but still need to update this for newest methods/revisions... **
+** KPM 25/02/18: still need to update methods for setting mc/cmssw track id **
+
+PREFACE: This file is a compendium on how the validation runs within mkFit, which makes use of the TTreeValidation class and other supporting macros.  
+
+===================
+ Table of Contents
+===================
+
+A. Overview of code
+B. Overview of routine calls in mkFit
+C. Explanation of validation routines
+  I. Tracks and Extras Prep
+  II. Track Association Routines
+  III. TTree Filling
+D. Definitions of efficiency, fake rate, and duplicate rate
+E. Overview of scripts
+F. Hit map/remapping logic
+G. Extra info on ID and mask assignments
+H. Special note about duplicate rate
+
+=====================
+ A. Overview of code
+=====================
+
+TTreeValidation will only compile the necessary ROOT code with WITH_ROOT:=1 enabled (either manually editting Makefile.config, or at the command line). Always do a make clean before compiling with ROOT, as the code is ifdef'ed. To hide the heavy-duty functions from the main code, TTreeValidation inherits from the virtual class "Validation", and overrides the common functions.  The TTreeValidation object is created once per number of events in flight. The Event object obtains a reference to the validation object (store as a data member "validation_"), so it is up to the Event object to reset some of the data members of the TTreeValidation object on every event.
+
+Three types of validation exist within TTreeValidation:
+[1.] "Building validation", enabled with Config::sim_val, via the command line option: --sim-val [--read-sim-trackstates, for pulls]
+[2.] "CMSSW external tracks building validation", enabled with Config::cmssw_val, via a minimum of the command line options: --cmssw-val --read-cmssw-tracks --geom CMS-2017 --seed-input cmssw [and potentially a seed cleaning --seed-cleaning <str>, and also specifying which cmssw matching --cmssw-matching <str>]
+[3.] "Fit validation", enabled with Config::fit_val, via the command line option: --fit-val
+
+We will ignore fit validation for the moment. The main idea behind the other two is that the validation routines are called outside of the standard timed sections, and as such, we do not care too much about performance, as long as it takes a reasonable amount of time to complete. Of course, the full wall clock time matters when running multiple events in flight, and because there is a lot of I/O as well as moves and stores that would hurt the performance with the validation enabled, these routines are ignored if the command line option "--silent" is enabled.
+
+The building validation takes advantage of filling two trees per event per track, namely:
+[1.] 
+  - efftree (filled once per sim track) 
+  - frtree (filled once per seed track)
+[2.] 
+  - cmsswefftree (filled once per cmssw external track)
+  - cmsswfrtree (filled once per mkFit build track)
+
+[1.] validation exists in the following combinations of geometry seed source:
+  - ToyMC, with --seed-input ["sim", "find]
+  - CMSSW, with --seed-input ["sim", "cmssw"] (+ --seed-cleaning <str> [<str>: "n2", "badlabel", "pure", "none"], + --cmssw-matching <str> [<str>: "trkparam", "hit", "label"])
+
+Upon instantiation of the TTreeValidation object, the respective ROOT trees are defined and allocated on the heap, along with setting the addresses of all the branches. After the building is completed in mkFit, we have to have the tracks in their standard event containers, namely: seedTracks_, candidateTracks_, and fitTracks_. In the standard combinatorial or clone engine, we have to copy out the built tracks from event of combined candidates into candidateTracks_ via: builder.quality_store_tracks() in mkFit/buildtestMPlex.cc.  Since we do not yet have fitting after building, we just set the fitTracks_ equal to the candidateTracks_.  For ease, I will from now on refer to the candidateTracks_ as buildTracks_.
+
+As a reminder, the sim tracks are stored in the Event.cc as simTracks_, while the CMSSW reco tracks are stored as cmsswTracks_. Each track collection has an associated TrackExtra collection, which is stored as {trackname}Extra_ inside the Event object.  It is indexed the same as the collection it references, i.e. track[0] has an associated extra extra[0]. The TrackExtra object contains the mcTrackID, seedID, and cmsswTrackID each mkFit track is associated to. The validation also makes use of simHitsInfo_ (container for storing mcTrackID for each hit), layerHits_, and simTrackStates_ (used for pulls).  See Section B and C for explanations on how the track matching is performed and track information is saved.  Essentially, we store two sets of maps, one which has a key that is an index to the reference track (MC or CMSSW) and a vector of indices for those that match it (for seeds, build tracks, and fit tracks), and the second map which maps the seed track index to its corresponding build and fit tracks.  The reason for having a sim match map for seeds, build tracks, and fit tracks is to keep track of how well the efficiency/fake rate/duplicate improves/degrades with potential cuts between them. And the same reason for having a map of seed to build as well as seed to fit. 
+
+Following each event, each of the track and extra objects are cleared. In addition, the association maps are cleared and reset. After the main loop over events expires, the ROOT file is written out with the TTrees saved via: val.saveTTrees() in mkFit.cc. The destructor for the validation then deletes the trees. The output is "valtree.root", appended by the thread number if using multiple events in flight.  From here, we then take advantage of the following files:
+
+- runValidation.C // macro used for turning TTrees into efficiency/fake rate/duplicate rate plots
+- PlotValidation.cpp/.hh // source code for doing calculations
+- makeValidation.C // plots on a single canvas results for Best Hit (BH), Standard Combinatorial (STD), and Clone Engine (CE)
+
+======================================
+ B. Outline of routine calls in mkFit
+======================================
+
+The following routines are then called after the building (MkBuilder.cc, Event.cc, TTreeValidation.cc, Track.cc):
+
+[1.] builder.sim_val()
+   : (actually run clean_cms_simtracks() when using CMS geom and using sim tracks as reference set)
+   : remap_seed_hits()
+   : remap_cand_hits()
+   : prep_recotracks()
+     : prep_tracks(seedtracks,seedextras)
+       : m_event->validation_.alignTracks(tracks,extras,false)
+     : prep_tracks(buildtracks,buildextras)
+       : m_event->validation_.alignTracks(tracks,extras,false)
+     : prep_tracks(fittracks,fitextras)
+       : m_event->validation_.alignTracks(tracks,extras,false)
+   : if (cmssw-seeds) m_event->clean_cms_simtracks() // label which simtracks are not findable: already set if using sim seeds
+   : m_event->Validate()
+     : validation_.setTrackExtras(*this) 
+       : if (sim seeds) extra.setMCTrackIDInfoByLabel() // Require 50% of found hits after seed to match label of seed/sim track
+         : modifyRecTrackID()
+       : if (--seed-input cmssw || find) extra.setMCTrackIDInfo() // Require 75% of found hits to match a single sim track
+         : modifyRecTrackID()
+     : validation_.makeSimTkToRecoTksMaps(*this)
+       : mapRefTkToRecoTks(seedtracks,seedextras,simToSeedMap) // map key = mcTrackID, map value = vector of seed labels
+       : mapRefTkToRecoTks(buildtracks,buildextras,simToBuildMap) // map key = mcTrackID, map value = vector of build labels
+       : mapRefTkToRecoTks(fittracks,fitextras,simToFitMap) // map key = mcTrackID, map value = vector of fit labels
+     : validation_.makeSeedTkToRecoTkMaps(*this)
+       : mapSeedTkToRecoTk(buildtracks,buildextras,seedToBuildMap) // map key = seedID, map value = build track label
+       : mapSeedTkToRecoTk(fittracks,fitextras,seedToFitMap) // map key = seedID, map value = fit track label
+     : validation_.fillEfficiencyTree(*this)
+     : validation_.fillFakeRateTree(*this)	
+
+[2.] builder.cmssw_val()
+   : (actually runs m_event->validation.makeSeedTkToCMSSWTkMap() from MkBuilder::prepare_seeds())
+   : (when using N^2 cleanings, Event::clean_cms_seedtracks(), or if not using N^2 cleaning, Event::use_seeds_from_cmsswtracks())
+   : remap_cand_hits()
+   : prep_recotracks()
+     : prep_tracks(buildtracks,buildextras)
+       : m_event->validation_.alignTracks(tracks,extras,false)
+   : prep_cmsswtracks()
+     : prep_tracks(cmsswtracks,cmsswextras)	
+       : m_event->validation_.alignTracks(tracks,extras,false)	
+   : m_event->Validate()
+     : validation_.setTrackExtras(*this)
+       : storeSeedAndMCID() 
+       : if (--cmssw-matching trkparam) extra.setCMSSWTrackIDInfoByTrkParams() // Chi2 and dphi matching (also incudes option for nHits matching)
+    	 : modifyRecTrackID()		      
+       : else if (--cmssw-matching hit) extra.setCMSSWTrackIDInfoByHits() // Chi2 and dphi matching (also incudes option for nHits matching)
+    	 : modifyRecTrackID()		      
+       : else if (--cmssw-matching label) extra.setCMSSWTrackIDInfoByLabel() // 50% hit sharing after seed
+    	 : modifyRecTrackID()		      
+     : validation_.makeCMSSWTkToRecoTksMaps(*this)
+       : mapRefTkToRecoTks(buildtracks,buildextras,cmsswToBuildMap)
+     : validation_.fillCMSSWEfficiencyTree(*this)	
+     : validation_.fillCMSSWFakeRateTree(*this)	
+
+=======================================
+ C. Explanation of validation routines
+=======================================
+
+- map/remap hit functions: see notes in section E. Essentially, validation needs all hit indices inside tracks to match the hit indices inside ev.layerHits_.
+
++++++++++++++++++++++++++++
+ I. Tracks and Extras Prep
++++++++++++++++++++++++++++
+
+- clean_cms_simtracks()
+  : loop over sim tracks
+    : mark sim track status not findable if (nLayers < [Config::cmsSelMinLayers == 8])
+    : tracks are not removed from collection, just have this bit set. this way the mcTrackID == position in vector == label
+
+- clean_cms_seedtracks()
+  : cmssw seed tracks are cleaned according to closeness in deta, dphi, dR to other cmssw seed tracks--> duplicate removal
+  : loop over cleaned seed tracks, and if label_ == -1, then incrementally decrease label (so second -1 seed is -2, third is -3)
+
+- prep_tracks(tracks,extras) 
+  : Loop over all track collections in consideration
+    : sort hits inside track by layer : needed for counting unique layers and for association routines
+    : emplace_back a track extra, initialized with the label of the track (which happens to be its seed ID) // if using sim seeds, we know that seed ID == sim ID
+  : m_event->validation_.alignTracks(tracks,extras,alignExtra)   
+
+- alignTracks(tracks,extras,alignExtra)
+  : if alignExtra == true // needed for when a reco track collection, which was previously labeled by its label() == seedID, created its track extra at the same time but the track collection has been moved or sorted
+    : create temporary track extra collection, size of track collection
+    : loop over tracks
+      : set tmp extra to the old track extra collection matching the track label
+    : set the old track extra to equal the new collection
+  : loop over tracks
+    : set the track label equal to the index inside the vector // needed for filling routines which rely on maps of indices between two track collections
+
+- prep_cmsswtracks()
+  : Stanard prep_tracks()
+  : loop over cmssw tracks
+    : Count unique layers = nLayers
+    : set status of cmssw track to notFindable() if: (nUniqueLayers() < [Config::cmsSelMinLayers == 8]) // same criteria for "notFindable()" cmssw sim tracks used for seeds
+
+++++++++++++++++++++++++++++++++
+ II. Track Association Routines
+++++++++++++++++++++++++++++++++
+
+- setTrackExtras(&Event)    
+  : if [1.]
+    : loop over seed tracks
+      : setMCTrackIDInfo(true) : Require 75% of found hits to match a single sim track
+    : loop over build tracks
+       : if (sim seeds) setMCTrackIDInfoByLabel() : Require 50% of found hits after seed to match label of seed/sim track
+       : if (cms seeds) setMCTrackIDInfo(false) : Require 75% of found hits to match a single sim track
+    : loop over fit tracks 
+      : same options as build tracks   
+  : if [2.]
+    : setupCMSSWMatching()
+      : first loop over cmssw tracks
+        : create a vector of "reduced tracks" that stores 1./pt, eta, and associated covariances in reduced track states
+        : add cmssw label to a map of lyr, map of idx, vector of labels
+        : also include track momentum phi, and a list of hits inside a map. map key = layer, map value = vector of hit indices
+    : loop over build tracks
+      : setCMSSWTrackIDInfo() : require matching by chi2 and dphi
+    : storeMCandSeedID()
+
+- modifyRecTrackID() 
+  // Config::nMinFoundHits = 7, Config::nlayers_per_seed = 4 or 3 
+  // nCandHits = trk.nFoundHits() OR trk.nFoundHits()-Config::nlayers_per_seed (see calling function)
+  // nMinHits = Config::nMinFoundHits OR Config::nMinFoundHits-Config::nlayers_per_seed (see calling function)
+  : if track has been marked as a duplicate, mc/cmsswTrackID = -10
+  : else if (mc/cmsswTrackID >= 0) (i.e. the track has successfully matched)
+    : if mc/cmsswTrack is findable
+      : if nCandHits < nMinHits, mc/cmsswTrackID = -2
+    : else
+      : if nCandHits < nMinHits, mc/cmsswTrackID = -3 
+      : else mc/cmsswTrackID = -4 (track is long enough, matched, but that sim track that is unfindable)
+  : else if (mc/cmsswTrackID == -1)
+    : if matching by label, and ref track exists
+      : if ref track is findable
+        : if nCandHits < minHits, ID = -5
+      : else 
+        : if nCandHits < nMinHits, ID = -6
+	: else, ID = -7
+    : else (not matching by label, or ref track does not exist
+      : if nCandHits < nMinHits, ID = -8
+      : else, ID = -9
+  -->return potentially new ID assignment
+
+- setMCTrackIDInfoByLabel()
+  : Loop over found hits on build track after seed
+    : count the hits who have a mcTrackID == seedID_ (i.e. seedID == simTrack label == mcTrackID)
+  : if hits are found after seed
+    : if 50% are matched, mcTrackID == seedID_
+    : else, mcTrackID == -1
+  : mcTrackID = modifyRecTrackID() // nCandhits = nFoundHits-nlayers_per_seed, nMinHits = Config::nMinFoundHits - Config::nlayers_per_seed		      
+    
+- setMCTrackIDInfo(isSeedTrack)
+  : Loop over all found hits on build track (includes seed hits)
+    : count the mcTrackID that appears most from the hits
+  : if 75% of hits on reco track match a single sim track, mcTrackID == mcTrackID of single sim track
+  : else, mcTrackID == -1
+  : if (!isSeedTrack)
+    : modifyRecTrackID() // nCandHits = nFoundHits, nMinHits = Config::nMinFoundHits
+
+- setCMSSWTrackIDInfo()
+  : Loop over all cmssw "reduced" tracks
+    : if helix chi2 < [Config::minCMSSWMatchChi2 == 50]
+      : append label of cmssw track to a vector, along with chi2
+  : sort vector by chi2
+  : loop over label vector
+    : swim cmssw track momentum phi from phi0 to mkFit reco track
+    : if abs(wrapphi(dphi)) < [Config::minCMSSWMatchdPhi == 0.03]
+      : see if dphi < currently best stored mindphi, and if yes, then set this as the new mindphi + label as matched cmsswTrackID
+      : if using nHits matching, check for nHits matched --> currently not used nor tuned
+  : if no label is found, cmsswTrackID == -1
+  : modifyRefTrackID() // nCandHits and nMinHits same as setMCTrackIDInfo()
+
+- setCMSSWTrackIDInfoByLabel()
+  : want to match the hits on the reco track to those on the CMSSW track
+  : loop over hits on reco track after seed
+    : get hit idx and lyr
+      : if the cmssw track has this lyr, loop over hit indices on cmssw track with this layer
+        : if cmssw hit idx matches reco idx, increment nHitsMatched_
+  : follow same logic as setMCTrackIDInfoByLabel() for setting cmsswTrackID
+  : modifyRecTrackID() // nCandHits and nMinHits same as setMCTrackIDInfoByLabel()
+  
+- mapRefTkToRecoTks(tracks,extras,map)
+  : Loop over reco tracks
+    : get track extra for track
+    : if [1.], map[extra.mcTrackID()].push_back(track.label()) // reminder, label() now equals index inside track vector!
+    : if [2.], map[extra.cmsswTrackID()].push_back(track.label()) // reminder, label() now equals index inside track vector!
+  : Loop over pairs in map
+    : if vector of labels size == 1, get track extra for label, and set duplicate index == 0
+    : else
+      : make temp track vector from track labels, sort track vector by nHits (and sum hit chi2 if tracks have same nHits)
+      : set vector of labels to sorted tracks
+      : loop over vector labels
+      	: get track extra for label, and set duplicate index++ 
+
+- mapSeedTkToRecoTk(tracks,extras,map)
+  : loop over reco tracks
+    : map[extra.seedID()] = track.label()
+
+- makeSeedTkToCMSSWTkMap(event)
+  : this is run BEFORE seed cleaning AND BEFORE the seeds are sorted in eta in prepare_seeds()
+  : if seed track index in vector == cmssw track label(), store map key = seed track label(), map value = cmssw track label() in seedToCmsswMap (seedID of cmssw track)
+
+- storeMCandSeedID()
+  : reminder: both the candidate tracks and the cmssw tracks have had their labels reassigned, but their original labels were stored in their track extra seedIDs.  reminder, seedID of candidate track points to the label of the seed track.  label on seed track == sim track reference, if it exists!
+  : loop over candidate tracks
+    : set mcTrackID == seedID_ of track
+    : if seedToCmsswMap[cand.label()] exists, then set the seedID equal to the mapped value (i.e. the seedID of the cmssw track!)
+    : else, set seedID == -1
+  : After this is run, to get the matching CMSSW track, we then need to loop over the CMSSW track extras with an index based loop, popping out when the cmsswextra[i].seedID() == buidextra[j].seedID()
+
+++++++++++++++++++++
+ III. TTree Filling
+++++++++++++++++++++
+
+- fillEfficiencyTree()
+  : loop over simtracks
+    : get mcTrackID (i.e. simTrack.label())
+    : store sim track gen info
+    : if simToSeedMap[mcTrackID] has value
+      : mcmask == 1
+      : get first seed track matched (i.e. the one with the highest nHits --> or lowest sum hit chi2 as provided by sort from above)
+      : store seed track parameters
+      : store nHits, nlayers, last layer, chi2
+      : store duplicate info: nTrks_matched from size() of mapped vector of labels, and duplicateMask == seedtrack.isDuplicate()
+      : get last found hit index
+      	: store hit parameters
+	: if mcTrackID of hit == mcTrackID of sim track // ONLY for when simtrackstates are stored, i.e. in ToyMC only at the moment
+	  : store sim track state momentum info from this layer (from simTrackStates[mcHitID])
+	: else get sim track state of mcTrackID, then store momentum info
+    : else
+      : mcmask == 0, or == -1 if simtrack.isNotFindable()
+    : if simToBuildMap[mcTrackID] has value
+      : repeat as above
+    : if simToFitMap[mcTrackID] has value
+      : repeat as above
+    : fill efftree
+
+- fillFakeRateTree()
+  : loop over seed tracks
+    : get seedID of seed track from track extra
+    : fill seed track parameters + last hit info, nhits, etc
+    : assign mcmask info based on mcTrackID from track extra (see section D and G for explanation of mask assignments)
+    : if mcmask == 1
+      : store gen sim momentum parameters
+      : store nhits info, last layer
+      : store duplicate info: iTh track matched from seedtrack extra, duplicateMask == seedtrack.isDuplicate()
+      : if last hit found has a valid mcHitID
+      	: store sim track state momentum info from simTrackStates[mcHitID]
+    : if seedToBuildMap[seedID] has value
+      : fill build track parameters + last hit info, nhits, etc
+      : assign mcmask info based on mcTrackID from track extra (see section D and G for explanation of mask assignments)
+      : if mcmask == 1
+      	: store gen sim momentum parameters
+        : store nhits info, last layer, duplicate info as above
+        : if last hit found has a valid mcHitID
+      	  : store sim track state momentum info from simTrackStates[mcHitID]
+    : if seedToFitMap[seedID] has value
+      : same as above
+    : fill frtree
+
+- fillCMSSWEfficiencyTree()
+  : loop over cmsswtracks
+    : get label of cmsswtrack, seedID
+    : store cmssw track PCA parameters + nhits, nlayers, last layer
+    : if cmsswToBuilddMap[cmsswtrack.label()] has value
+      : get first build track matched (i.e. the one with the highest nHits --> or lowest sum hit chi2 as provided by sort from above)
+      : store build track parameters + errors
+      : store nHits, nlayers, last layer, last hit parameters, hit and helix chi2, duplicate info, seedID
+      : swim cmssw phi to mkFit track, store it
+    : fill cmsswefftree
+
+- fillCMSSWFakeRateTree()
+  : loop over build tracks
+    : store build track parameters + errors
+    : store nHits, nlayers, last layer, last hit parameters, hit and helix chi2, duplicate info, seedID
+    : get cmsswTrackID, assign cmsswmask according to section D and G
+    : if cmsswmask == 1 
+      : store cmssw track PCA parameters + nhits, nlayers, last layer, seedID
+      : swim cmssw phi to mkFit track, store it
+    : fill cmsswefftree
+
+=============================================================
+ D. Definitions of efficiency, fake rate, and duplicate rate
+=============================================================
+
+Use rootValidation.C to create efficiency, fake rate, and duplicate rate vs. pT, phi, eta. This macro compiles PlotValidation.cpp/.hh. Efficiency uses sim track momentum info. Fake rate uses the reco track momentum. For [1.], plots are made for seed, build, and fit tracks. For [2.], the plots are only against the build tracks. See G. for more details on ID assignments.
+
+root -l -b -q runValidation.C\([agruments]\)
+
+Argument list: 
+First is additional input name of root file [def = ""]
+Second argument is boolean to compute momentum pulls: currently implemented only when sim track states are available (ToyMC validation only)! [def = false]
+Third argument is boolean to do special CMSSW validation [def = false]
+Fourth argument == true to move input root file to output directory, false to keep input file where it is. [def = true]
+Fifth argument is a bool to save the image files [def = false]
+Last argument is output type of plots [def = "pdf"]
+
+Efficiency [PlotValidation::PlotEfficiency()]
+  numerator:   sim tracks with at least one reco track with mcTrackID >= 0 (mcmask_[reco] == 1)
+  denominator: all findable sim tracks (mcmask_[reco] = 0 || == 1)
+  mcmask_[reco] == - 1 excluded from both numerator and denominator because this sim track was not findable!
+
+Fake Rate (with only long reco tracks: Config::inclusiveShorts == false) [PlotValidation::PlotFakeRate()]
+  numerator:   reco tracks with mcTrackID == -1 || == -9
+  denominator: reco tracks with mcTrackID >=  0 || == -1 || == -9
+  mcTrackID | mcmask_[reco] 
+     >= 0   |     1
+    -1,-9   |     0
+     -10    |    -2
+     else   |    -1 // OR the seed track does produce a build/fit track as determined by the seedToBuild/FitMap
+
+N.B. In the MTV-Like SimVal: the requirement on minHits is removed, so all reco tracks are considered.
+ - For the efficiency: only simtracks from the hard scatter (with some quality cuts on d0, dz, and eta) are considered for the denominator and numerator. If a simtrack from the hard-scatter is unmatched, it will not enter the numerator.
+ - For the FR: all reco tracks (regardless of nHits) are in the denominator, and only those that are unmatched to any simtrack are in the numerator. Compared to the standard FR definition, we now allow reco tracks that are matched to any simtrack (regardless of quality of the simtrack, if its from PU, etc.) to enter the denominator. 
+- This means that tracks with mcTrackID == -4 will now have a mcmask_[reco] == 2 for MTV-Like simtrack validation. 
+
+Fake Rate (with all reco tracks: Config::inclusiveShorts == true, enabled with command line option: --inc-shorts) [PlotValidation::PlotFakeRate()]
+  numerator:   reco tracks with mcTrackID == -1 || == -5 || ==  -8 || ==  -9
+  denominator: reco tracks with mcTrackID >=  0 || == -2 || == -1 || == -5 || == -8 || == -9
+  mcTrackID  | mcmask_[reco] 
+    >= 0     |     1
+ -1,-5,-8,-9 |     0
+    -10      |    -2
+     -2      |     2   
+    else     |    -1 // OR the seed track does produce a build/fit track as determined by the seedToBuild/FitMap
+
+Duplicate Rate [PlotValidation::PlotDuplicateRate()], see special note in section H
+  numerator:   sim tracks with more than reco track match (duplmask_[reco] == 1), or another way is nTrks_matched_[reco] > 1
+  denominator: sim tracks with at least one reco track with mcTrackID >= 0 (duplmask_[reco] != -1), or mcmask_[reco] == 1
+
+========================
+ E. Overview of scripts
+========================
+
+I. ./validation-snb-toymc-fulldet-build.sh
+Runs ToyMC full detector tracking for BH, STD, CE, for 400 events with nTracks/event = 2500. Sim seeds only.
+
+To move the images + text files and clean up directory:
+./web/move-toymcval.sh ${outdir name}
+
+II. ./validation-snb-cmssw-10mu-fulldet-build.sh
+Runs CMSSW full detector tracking for BH, STD, CE, for ~1000 events with 10 muons/event, with sim and cmssw seeds, using N^2 cleaning for cmssw seeds.
+Samples are split by eta region. Building is run for each region:
+- ECN2: 2.4 < eta < 1.7
+- ECN1: 1.75 < eta < 0.55
+- BRL: |eta| < 0.6
+- ECP1: 0.55 < eta < 1.75
+- ECP2: 1.7 < eta < 2.4
+
+Validation plots are produced for each sample (region), seeding source, and building routine. At the very end, validation trees are hadd'ed for each region in a given seed source + building routine. Plots are produced again to yield "full-detector" tracking.
+
+To move the images + text files and clean up directory:
+./web/move-cmsswval-10mu.sh ${outdir name}
+
+III. ./validation-snb-cmssw-10mu-fulldet-extrectracks.sh
+Same as II., but now only run with cmssw seeds (as we are comparing directly to cmssw output as the reference).
+
+To move the images + text files and clean up directory:
+./web/move-cmsswval-10mu-extrectracks.sh ${outdir name}
+
+IV. ./validation-snb-cmssw-ttbar-fulldet.sh
+Runs CMSSW full detector tracking for BH, STD, CE, for three different ttbar samples with 100 events each, with sim and cmssw seeds, using N^2 cleaning for cmssw seeds.
+TTbar samples:
+- No PU
+- PU 35
+- PU 70
+
+To move the images + text files and clean up directory:
+./web/move-cmsswval-ttbar.sh ${outdir name}
+
+V. ./validation-snb-cmssw-ttbar-fulldet.sh
+Same as IV., but now only run with cmssw seeds, using cmssw rec tracks as the reference set of tracks.
+
+To move the images + text files and clean up directory:
+./web/move-cmsswval-ttbar-extrectracks.sh ${outdir name}
+
+============================
+ F. Hit map/remapping logic
+============================
+
+*** Originally from mkFit/MkBuilder.cc ***
+
+All built candidate tracks have all hit indices pointing to m_event_of_hits.m_layers_of_hits[layer].m_hits (LOH)
+MC seeds (both CMSSW and toyMC),as well as CMSSW seeds, have seed hit indices pointing to global HitVec m_event->layerHits_[layer] (GLH)
+Found seeds from our code have all seed hit indices pointing to LOH.
+So.. to have universal seed fitting function --> have seed hits point to LOH no matter their origin.
+This means that all MC and CMSSW seeds must be "mapped" from GLH to LOH: map_seed_hits().
+Now InputTracksAndHits() for seed fit will use LOH instead of GLH.
+The output tracks of the seed fitting are now stored in m_event->seedTracks_.
+
+Then building proceeds as normal, using m_event->seedTracks_ as input no matter the choice of seeds. 
+
+For the validation, we can reuse the TrackExtra setMCTrackIDInfo() with a few tricks.
+Since setMCTrackIDInfo by necessity uses GLH, we then need ALL track collections (seed, candidate, fit) to their hits point back to GLH.
+There are also two validation options: w/ or w/o ROOT.
+
+W/ ROOT uses the TTreValidation class which needs seedTracks_, candidateTracks_, and fitTracks_ all stored in m_event.
+The fitTracks_ collection for now is just a copy of candidateTracks_ (eventually may have cuts and things that affect which tracks to fit).
+So... need to "remap" seedTracks_ hits from LOH to GLH with remap_seed_hits().
+And also copy in tracks from EtaBin* to candidateTracks_, and then remap hits from LOH to GLH with quality_store_tracks() and remap_cand_hits().
+W/ ROOT uses sim_val()
+
+W/O ROOT is a bit simpler... as we only need to do the copy out tracks from EtaBin* and then remap just candidateTracks_.
+This uses quality_output()
+
+N.B.1 Since fittestMPlex at the moment is not "end-to-end" with candidate tracks, we can still use the GLH version of InputTracksAndHits()
+N.B.2 Since we inflate LOH by 2% more than GLH, hit indices in building only go to GLH, so all loops are sized to GLH.
+
+==========================================
+ G. Extra info on ID and mask assignments
+==========================================
+
+*** Originally from Track.cc ***
+
+Three basic quantities determine the track ID: 
+ 1. matching criterion (50% after seed for *ByLabel(), 75% for other hit matching, or via chi2+dphi)
+ 2. nCandidateHits found compared nMinHits
+ 3. findability of reference track (if applicable)
+
+Three outcomes exist for each quantity:
+ 1. matching criterion
+    a. reco track passed the matching criterion in set*TrackIDInfo*(): M
+    b. reco track failed the matching criterion in set*TrackIDInfo*(): N
+    c. reco track never made it past its seed, so matching selection by hit matching via reference track label does not exist in set*TrackIDInfoByLabel(): N/A
+ 2. nCandHits compared to nMinHits
+    a. reco track has greater than or equal to the min hits requirement (i.e. is long enough): L
+    b. reco track has less than the min hits requirement (i.e. short): S
+    c. reco track is a pure seed, and calling function is set*TrackIDInfoByLabel(): O, by definition then O also equals S
+ 3. findability of reference track
+    a. reference track is findable (nUniqueLayers >= 8 && pT > 0.5): isF
+    b. reference track is NOT findable (nUniqueLayers < 8 || pT < 0.5): unF
+    c. reference track does not exist in set*TrackIDInfoByLabel(), or we are using set*TrackIDInfo(): ?
+
+*** Originally from TTreeValidation.cc ***
+
+** Mask assignments **
+
+_[reco] = {seed,build,fit}
+
+Logic is as follows: any negative integer means that track is excluded from both the numerator and denominator. A mask with a value greater than 1 means that the track is included in the denominator, but not the numerator.
+
+--> mcmask_[reco] == 1,"associated" reco to sim track [possible duplmask_[reco] == 1,0] {eff and FR}, enter numer and denom of eff, enter denom only of FR
+--> mcmask_[reco] == 0,"unassociated" reco to sim track. by definition no duplicates (no reco to associate to sim tracks!) [possible duplmask_[reco] == -1 {eff and FR}], enter denom only of eff, enter numer and denom of FR
+--> mcmask_[reco] == -1, sim or reco track excluded from denominator (and therefore numerator) [possible duplmask_[reco] == -1] {eff and FR}
+--> mcmask_[reco] == -2, reco track excluded from denominator because it does not exist (and therefore numerator) [possible duplmask_[reco] == -2] {FR}
+--> mcmask_[reco] == 2, reco track included in demoninator of FR, but will not enter numerator: for short "matched" tracks {FR only}
+
+--> nTkMatches_[reco] > 1,   n reco tracks associated to the same sim track ID {eff only}
+--> nTkMatches_[reco] == 1,  1 reco track associated to single sim track ID {eff only}
+--> nTkMatches_[reco] == -99, no reco to sim match {eff only}
+
+--> mcTSmask_[reco] == 1, reco track is associated to sim track, and sim track contains the same hit as the last hit on the reco track
+--> mcTSmask_[reco] == 0, reco track is associated to sim track, and either A) sim track does not contain the last hit found on the reco track or B) the sim trackstates were not read in (still save sim info from gen position via --try-to-save-sim-info
+--> mcTSmask_[reco] == -1, reco track is unassociated to sim track
+--> mcTSmask_[reco] == -2, reco track is associated to sim track, and we fail == 1 and == 0
+--> mcTSmask_[reco] == -3, reco track is unassociated to seed track {FR only}
+
+excluding position variables, as position could be -99!
+--> reco var == -99, "unassociated" reco to sim track [possible mcmask_[reco] == 0,-1,2; possible duplmask_[reco] == -1] {eff only}
+--> sim  var == -99, "unassociated" reco to sim track [possible mcmask_[reco] == 0,-1,2; possible duplmask_[reco] == -1] {FR only}
+--> reco/sim var == -100, "no matching seed to build/fit" track, fill all reco/sim variables -100 [possible mcmask_[reco] == -1, possible duplmask_[reco] == -1] {FR only}
+--> sim  var == -101, reco track is "associated" to sim track, however, sim track does have a hit on the layer the reco track is on
+
+--> seedmask_[reco] == 1, matching seed to reco/fit track [possible mcmask_[reco] == 0,1,2; possible duplmask_[reco] == 0,1,-1] {FR only}
+--> seedmask_[reco] == 0, no matching seed to reco/fit track [possible mcmask_[reco] == -2; possible duplmask_[reco] == -2] {FR only}
+
+--> duplmask_[reco] == 0, only "associated" reco to sim track [possible mcmask_[reco] == 1] {eff and FR}
+--> duplmask_[reco] == 1, more than one "associated" reco to sim track [possible mcmask_[reco] == 1] {eff and FR}
+--> duplmask_[reco] == -1, no "associated" reco to sim track [possible mcmask_[reco] == 0,-1,-2] {eff and FR}
+--> duplmask_[reco] == -2, no matching built/fit track for given seed [possible mcmask_[reco] == -2] {FR only}
+
+--> reco var == -10, variable not yet implemented for given track object
+
+position reco variables
+--> layers_[reco]    ==  -1, reco unassociated to sim tk {eff only}
+--> reco pos+err var == -2000, reco tk is unassociated to sim tk {eff only}
+--> reco pos+err var == -3000, reco tk is unassociated to seed tk {FR only}
+
+======================================
+ H. Special note about duplicate rate
+======================================
+
+*** Originally from PlotValidation.cpp ***
+
+Currently, TEfficiency does not allow you to fill a weighted number in the numerator and NOT the denominator.
+In other words, we cannot fill numerator n-1 times sim track is matched, while denominator is just filled once.
+As a result, DR is simply if a sim track is duplicated once, and not how many times it is duplicated. 
+
+We can revert back to the n-1 filling for the numerator to weight by the amount of times a sim track is duplicated, but this would mean going back to the TH1Fs, and then using the binomial errors (or computing by hand the CP errors or something), in the case that the DR in any bin > 1... This would break the flow of the printouts as well as the stacking macro, but could be done with some mild pain.
diff --git a/RecoTracker/MkFitCore/standalone/web/README_WEBPLOTS.md b/RecoTracker/MkFitCore/standalone/web/README_WEBPLOTS.md
new file mode 100644
index 0000000000000..79e7b7a744103
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/web/README_WEBPLOTS.md
@@ -0,0 +1,133 @@
+# Website Setup
+
+Intro: Below is a short README on how to setup a CERN website to host plots produced by the benchmark and validation scripts. As this is a markdown file, it is best viewd from a web browser. However, please read the main README.md in the top-level directory prior to this README_WEBPLOTS.md! It has useful info on how to use the scripts within this directory, as well as info on how to run the benchmarking.
+
+Apologies on the ordered list style: GitHub Flavored Markdown ignores CSS/HTML style tags, so you cannot override the list settings. Many attempts at using indents also did not work. 
+
+## Outline
+
+1) Setting up a website with an EOS userspace
+   1) Intro
+   2) Step-by-step instructions for setting up your website
+   3) Setting permissions for your website
+   4) Special Notes
+      1) Disclaimer on email addresses
+      2) Notes on ```${webdir}```
+      3) Passwordless scp on LXPLUS
+2) DEPRECATED: Setting up a website with an AFS userspace
+
+## Section 1: Setting up a website with an EOS userspace
+### Section 1.i: Intro
+
+N.B. To access any of the links below, you first need to sign into CERN single login on your web browser! You will otherwise see strange errors when trying to access them.
+
+First, ensure you have an LXPLUS account. If so, a standard ```ssh``` login into LXPLUS will take you to your /afs home directory: ```/afs/cern.ch/user/${FIRST_LETTER}/${USERNAME}```, where the first letter of your LXPLUS username is the variable ```${FIRST_LETTER}```, e.g. Kevin's LXPLUS ```${USERNAME}``` is ```kmcdermo```, so ```${FIRST_LETTER}``` is ```k```. 
+
+Your corresponding /eos user space is: ```/eos/user/${FIRST_LETTER}/${USERNAME}```, which is allotted 1TB of storage. If you are a member of CMS, you may have additional storage on /eos at LXPLUS (through /eos/cms/store/user or /eos/cms/store/group) or the FNAL LPC. However, given the level of integration between CERNBox and the /eos/user space, it is recommended that you use the /eos/user space for storing files for the web. CERN provides now a bit more documentation on CERNBox and its connection to /eos/user space: https://cernbox-manual.web.cern.ch/cernbox-manual/en/.
+
+In case you cannot directly access your /eos/user space, you may need to request it from CERN, following the instructions from the link above. Additional info on EOS and how to work with it can be found here: https://cern.service-now.com/service-portal/article.do?n=KB0001998. Given that EOS is still in transition, anecdotally, it is not quite as stable as AFS, and experiences some strange glitches from time to time. Always check the CERN Service Desk for incidents and planned interventions: https://cern.service-now.com/service-portal/ssb.do. In case you experience problems, open a ticket for an "Incident" through the Service Portal.
+
+At this point, you have to determine if you are either i) looking to migrate your personal website at CERN from AFS to EOS, or ii) create a new personal website on EOS. In order to do option ii), you cannot already have a personal website with CERN. So by default, if you want to setup an website with an EOS space at CERN, and you already have an AFS website, you will need to choose option i).
+
+### Section 1.ii: Step-by-step instructions for setting up your website 
+
+1) Go to the instructions for setting up a website from the CERNBox documentation: https://cernbox-manual.web.cern.ch/cernbox-manual/en/web/. 
+2) Follow along the steps in section 10.2: "Personal website" up until "Create personal website (via Web Services)".  
+If for some reason the CERNBox documenation is down, go to this help page: https://cern.service-now.com/service-portal/article.do?n=KB0004096, and download the images on that page: eosuser-web-[1-4].png. Follow along in order of the images, as the instructions from these are equivalent to the CERNBox documentation.
+3) At this point, you will need to request a website from CERN. Follow the branches i) or ii) below
+   1) **Migrating your previous website from AFS to EOS**
+      1) First, have a look at this link and watch the video: https://cds.cern.ch/record/2286177?ln=en, or read this document (equivalent to the video): https://indico.cern.ch/event/661564/attachments/1512296/2358778/Migrate_Website_from_AFS_to_EOS.DOCX
+      2) Follow the instructions in the video, ensuring to read the text once you click on the button: "Migrate to EOS". You will have to copy your old files over to /eos/user if you want the transition to be seamless.
+   2) **Brand new personal site at CERN**
+       1) Continue with the CERNBox "Personal website" documenation with the section "Create personal website (via Web Services)". Or follow the instructions listed in eosuser-web-10.png from the backup help page.
+4) While waiting for the request, you will need to setup directory browsing and persimissions. Some documentation on this is here: https://espace.cern.ch/webservices-help/websitemanagement/ConfiguringAFSSites/Pages/default.aspx. Please see the section below on what is recommended for restricting access to files.
+
+N.B. Your fancy new website will have the URL: ```https://${USERNAME}.web.cern.ch/${USERNAME}/```.
+
+### Section 1.iii: Setting permissions for your website
+
+There are a couple options here for how to properly configure permissions for your website. At a minimum, if you just want to get your website up and running after it has been approved by CERN, login into LXPLUS and go to your website directory, i.e. ```${webdir}``` == "www" if you followed the instructions from CERNBox exactly: 
+
+```
+cd /eos/user/${FIRST_LETTER}/${USERNAME}/${webdir}
+```
+
+From there, open the file ```.htaccess``` in your favorite editor and add the following text:
+
+```
+Options +Indexes
+```
+
+However, it is recommended that your top-level directory ```${webdir}``` require at least an authenticated user sign-in to access this directory. A minimal example of what your ```.htaccess``` file needs is the text below:
+
+```
+SSLRequireSSL
+AuthType shibboleth
+ShibRequireSession On
+ShibRequireAll On
+ShibExportAssertion Off
+
+Require valid-user
+Options +Indexes
+```
+
+Upon trying to access your website now via a web browser (or another user's website with similar permissions), you will be required to sign-in via CERN's single login. If you wish to further restrict access to only members of this group, add the following line to your ```.htaccess``` file: ```Require ADFS_GROUP mic-trk-rd```. This is now setting a permission such that only members that are subscribed to our mic-trk e-group can access this directory.
+
+**Some discussion on the ```.htaccess``` file**: If you would like to use your personal website for more than just this project, it is recommended that you create a subdirectory ```${mictrkdir}``` under ```${webdir}```. In ```${mictrkdir}```, you then can create another ```.htaccess``` file which includes the restricted access for only members of the mic-trk e-group using the line from above: ```Require ADFS_GROUP mic-trk-rd```. This line would then need to be removed in your top-level ```${webdir}/.htaccess```, in case you want others to have access to other subdirectories related to physics analysis, RECO convener duties, etc. 
+
+### Section 1.iv: Special Notes
+ 
+#### Section 1.iv.a: Disclaimer on email addresses
+ 
+It is imperative that you have your primary email address associated to your CERN account (go to CERN accounts to check this) be the same email used for sending+receiving emails from the mictrk e-group. Otherwise, the line ```Require ADFS_GROUP mic-trk-rd``` will lock you out of viewing your own website on a browser! Unless you have some special CERN account, your primary email for your CERN account is a ```@cern.ch``` Outlook address. 
+
+ 
+#### Section 1.iv.b: Notes on `${webdir}`
+ 
+- If ```${webdir} != "www"```, then you will have to modify the variable ```LXPLUS_OUTDIR``` in ```web/copyAndSendToLXPLUS.sh``` to match the name for ```${webdir}```. 
+- If you decided to make a subdirectory under ```${webdir}``` specifically for this project, then may wish to make the following modifications to: ```web/copyAndSendToLXPLUS.sh```
+  1) Make a new variable ```LXPLUS_WEBDIR=${webdir}```, and set ```LXPLUS_OUTDIR=${mictrkdir}```.
+  2) Modify the ```scp``` to be: ```scp -r ${tarball} ${LXPLUS_HOST}:${LXPLUS_WORKDIR}/${LXPLUS_WEBDIR}/${LXPLUS_OUTDIR}```
+  3) Modify the ```cd``` to be: ```cd ${LXPLUS_WORKDIR}/${LXPLUS_WEBDIR}/${LXPLUS_OUTDIR}```
+  4) Add this line under the untar (i.e. ```tar -zxvf```): ```cd ${LXPLUS_WORKDIR}/${LXPLUS_WEBDIR}```
+ 
+#### Section 1.iv.c: Passwordless scp to LXPLUS
+ 
+Make sure to read Section 10.ii.b in the main README.md on how to take advantage of passwordless scp for transferring plots to LXPLUS via ```./web/move-benchmarks.sh ${plotdir}```.
+
+## Section 2: DEPRECATED: Setting up a website with an AFS userspace
+
+**Special note**: This may not even be an option anymore as CERN is trying to migrate away from AFS to EOS... Therefore, instructions for this section are "as-is".
+
+1) Request CERN website from websites.cern.ch 
+   1) set website to point to AFS directory
+   2) make website match username
+   3) set address to ```/afs/cern.ch/user/${FIRST_LETTER}/${USERNAME}/${dir}```
+   3) make sure ```${dir}``` exists!
+
+2) While waiting for request, do the follow commands in one directory above ${dir}
+   1) ```fs setacl ${dir} webserver:afs read```
+   2) ```afind ${dir} -t d -e "fs setacl -dir {} -acl webserver:afs read"```
+   3) ```cd ${dir}```
+   4) ```touch .htaccess```
+   5) open .htaccess in an editor and paste the following: ```Options +Indexes```
+
+3) Then copy in really the very useful ```index.php``` into ```${dir}``` (optional: will simply make the top-level web GUI nice)
+
+4) Once set up and website is live, copy plots and directories into ```${dir}```
+5) ```cd ${dir}```
+6) ```./makereadable.sh ${subdir}```, for every subdir. If launched from the top-level directory ```${subdir}```, it will handle the searching of subdirs.
+
+As an aside, there are two other directories on LXPLUS every user has access to:
+
+```
+/afs/cern.ch/ubackup/${FIRST_LETTER}/${USERNAME}
+```
+
+and 
+
+```
+/afs/cern.ch/work/${FIRST_LETTER}/${USERNAME}
+``` 
+
+```ubackup``` is a backup of 24h snapshots of ```user```, while ```work``` is not backed up but users can request up to 100 GB of space. The max for ```user``` directories is 10 GB upon request.
diff --git a/RecoTracker/MkFitCore/standalone/web/collectBenchmarks-multi.sh b/RecoTracker/MkFitCore/standalone/web/collectBenchmarks-multi.sh
new file mode 100755
index 0000000000000..320ae9ad23972
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/web/collectBenchmarks-multi.sh
@@ -0,0 +1,103 @@
+#! /bin/bash
+
+###########
+## Input ##
+###########
+
+dir=${1:-"benchmarks"}
+suite=${2:-"forConf"} # which set of benchmarks to run: full, forPR, forConf
+useARCH=${3:-0}
+whichcands=${4:-"build"}
+
+###################
+## Configuration ##
+###################
+source xeon_scripts/common-variables.sh ${suite} ${useARCH}
+source xeon_scripts/init-env.sh
+export MIMI="CE mimi"
+declare -a val_builds=(MIMI)
+
+######################################
+## Move Physics Performance Results ##
+######################################
+
+# Make SimTrack Validation directories
+simdir=("SIMVAL_MTV_iter4" "SIMVAL_MTV_SEED_iter4" "SIMVAL_MTV_iter22" "SIMVAL_MTV_SEED_iter22" "SIMVAL_MTV_iter23" "SIMVAL_MTV_SEED_iter23" "SIMVAL_MTV_iter5" "SIMVAL_MTV_SEED_iter5" "SIMVAL_MTV_iter24" "SIMVAL_MTV_SEED_iter24" "SIMVAL_MTV_iter7" "SIMVAL_MTV_SEED_iter7" "SIMVAL_MTV_iter8" "SIMVAL_MTV_SEED_iter8" "SIMVAL_MTV_iter9" "SIMVAL_MTV_SEED_iter9" "SIMVAL_MTV_iter10" "SIMVAL_MTV_SEED_iter10" "SIMVAL_MTV_iter6" "SIMVAL_MTV_SEED_iter6" )
+simval=("SIMVAL_iter4" "SIMVALSEED_iter4" "SIMVAL_iter22" "SIMVALSEED_iter22" "SIMVAL_iter23" "SIMVALSEED_iter23" "SIMVAL_iter5" "SIMVALSEED_iter5" "SIMVAL_iter24" "SIMVALSEED_iter24" "SIMVAL_iter7" "SIMVALSEED_iter7" "SIMVAL_iter8" "SIMVALSEED_iter8" "SIMVAL_iter9" "SIMVALSEED_iter9" "SIMVAL_iter10" "SIMVALSEED_iter10" "SIMVAL_iter6" "SIMVALSEED_iter6" )
+
+for((i=0;i<${#simdir[@]};++i));do
+
+mkdir -p ${dir}/${simdir[i]}
+mkdir -p ${dir}/${simdir[i]}/logx
+mkdir -p ${dir}/${simdir[i]}/diffs
+mkdir -p ${dir}/${simdir[i]}/nHits
+mkdir -p ${dir}/${simdir[i]}/score
+
+# Move text file dumps for SimTrack Validation
+for build in "${val_builds[@]}"
+do echo ${!build} | while read -r bN bO
+    do
+        vBase=${val_arch}_${sample}_${bN}
+        mv "validation"_${vBase}_${simval[i]}/"totals_validation"_${vBase}_${simval[i]}.txt ${dir}/${simdir[i]}
+    done
+done
+
+# Move dummy CMSSW text file (SimTrack Validation)
+vBase=${val_arch}_${sample}_CMSSW
+mv validation_${vBase}_${simval[i]}/totals_validation_${vBase}_${simval[i]}.txt ${dir}/${simdir[i]}
+
+# Move rate plots for SimTrack Validation
+for rate in eff ineff_brl ineff_trans ineff_ec dr fr
+do
+    for pt in 0p0 0p9 2p0
+    do
+        for var in phi eta nLayers
+        do
+            mv ${val_arch}_${sample}_${rate}_${var}_${whichcands}_"pt"${pt}_${simval[i]}.png ${dir}/${simdir[i]}
+        done
+    done
+
+    # only copy pt > 0 for pt rate plots
+    for var in pt pt_zoom
+    do
+        mv ${val_arch}_${sample}_${rate}_${var}_${whichcands}_"pt0p0"_${simval[i]}.png ${dir}/${simdir[i]}
+    done
+
+    mv ${val_arch}_${sample}_${rate}_"pt_logx"_${whichcands}_"pt0p0"_${simval[i]}.png ${dir}/${simdir[i]}/logx
+done
+
+# Move kinematic diff plots for SimTrack Validation
+for coll in bestmatch allmatch
+do
+    for var in nHits invpt phi eta
+    do
+        for pt in 0p0 0p9 2p0
+        do
+            mv ${val_arch}_${sample}_${coll}_"d"${var}_${whichcands}_"pt"${pt}_${simval[i]}.png ${dir}/${simdir[i]}/diffs
+        done
+    done
+done
+
+# Move track quality plots for SimTrack Validation (nHits,score)
+for coll in allreco fake bestmatch allmatch
+do
+    for pt in 0p0 0p9 2p0
+    do
+        for qual in nHits score
+        do
+            mv ${val_arch}_${sample}_${coll}_${qual}_${whichcands}_"pt"${pt}_${simval[i]}.png ${dir}/${simdir[i]}/${qual}
+        done
+    done
+done
+done
+
+# Final message
+echo "Finished collecting benchmark plots into ${dir}!"
+
+find ${dir}  -mindepth 0 -type d -exec cp web/index.php {} \;
+
+rm -rf log_*.txt
+rm -rf *.root
+rm -rf *.png
+rm -rf validation_*
+
diff --git a/RecoTracker/MkFitCore/standalone/web/collectBenchmarks.sh b/RecoTracker/MkFitCore/standalone/web/collectBenchmarks.sh
new file mode 100755
index 0000000000000..2d42b0c151d78
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/web/collectBenchmarks.sh
@@ -0,0 +1,153 @@
+#! /bin/bash
+
+###########
+## Input ##
+###########
+
+dir=${1:-"benchmarks"}
+suite=${2:-"forPR"} # which set of benchmarks to run: full, forPR, forConf
+useARCH=${3:-0}
+whichcands=${4:-"build"}
+
+###################
+## Configuration ##
+###################
+source xeon_scripts/common-variables.sh ${suite} ${useARCH}
+source xeon_scripts/init-env.sh
+
+######################################
+## Move Compute Performance Results ##
+######################################
+
+# Move subroutine build benchmarks
+builddir="Benchmarks"
+mkdir -p ${dir}/${builddir}
+mkdir -p ${dir}/${builddir}/logx
+
+for ben_arch in "${arch_array[@]}"
+do
+    for benchmark in TH VU
+    do
+	oBase=${ben_arch}_${sample}_${benchmark}
+
+	mv ${oBase}_"time".png ${dir}/${builddir}
+	mv ${oBase}_"speedup".png ${dir}/${builddir}
+
+	mv ${oBase}_"time_logx".png ${dir}/${builddir}/logx
+	mv ${oBase}_"speedup_logx".png ${dir}/${builddir}/logx
+    done
+done
+
+# Move multiple events in flight plots
+meifdir="MultEvInFlight"
+mkdir -p ${dir}/${meifdir}
+mkdir -p ${dir}/${meifdir}/logx
+
+for ben_arch in "${arch_array[@]}" 
+do
+    for build in "${meif_builds[@]}"
+    do echo ${!build} | while read -r bN bO
+	do
+            oBase=${ben_arch}_${sample}_${bN}_"MEIF"
+	    
+            mv ${oBase}_"time".png ${dir}/${meifdir}
+            mv ${oBase}_"speedup".png ${dir}/${meifdir}
+	    
+            mv ${oBase}_"time_logx".png ${dir}/${meifdir}/logx
+            mv ${oBase}_"speedup_logx".png ${dir}/${meifdir}/logx
+	done
+    done
+done
+
+# Move plots from text dump
+dumpdir="PlotsFromDump"
+mkdir -p ${dir}/${dumpdir}
+mkdir -p ${dir}/${dumpdir}/diffs
+
+for build in "${text_builds[@]}"
+do echo ${!build} | while read -r bN bO
+    do
+	for var in nHits pt eta phi
+	do
+	    mv ${sample}_${bN}_${var}.png ${dir}/${dumpdir}
+	    mv ${sample}_${bN}_"d"${var}.png ${dir}/${dumpdir}/diffs
+	done
+    done
+done
+
+######################################
+## Move Physics Performance Results ##
+######################################
+
+# Make SimTrack Validation directories
+simdir=("SIMVAL_MTV" "SIMVAL_MTV_SEED")
+simval=("SIMVAL" "SIMVALSEED")
+
+for((i=0;i<${#simdir[@]};++i));do
+
+mkdir -p ${dir}/${simdir[i]}
+mkdir -p ${dir}/${simdir[i]}/logx
+mkdir -p ${dir}/${simdir[i]}/diffs
+mkdir -p ${dir}/${simdir[i]}/nHits
+mkdir -p ${dir}/${simdir[i]}/score
+
+# Move text file dumps for SimTrack Validation
+for build in "${val_builds[@]}"
+do echo ${!build} | while read -r bN bO
+    do
+	vBase=${val_arch}_${sample}_${bN}
+	mv "validation"_${vBase}_${simval[i]}/"totals_validation"_${vBase}_${simval[i]}.txt ${dir}/${simdir[i]}
+    done
+done
+
+# Move dummy CMSSW text file (SimTrack Validation)
+vBase=${val_arch}_${sample}_CMSSW
+mv validation_${vBase}_${simval[i]}/totals_validation_${vBase}_${simval[i]}.txt ${dir}/${simdir[i]}
+
+# Move rate plots for SimTrack Validation
+for rate in eff ineff_brl ineff_trans ineff_ec dr fr
+do
+    for pt in 0p0 0p9 2p0
+    do
+	for var in phi eta nLayers
+	do 
+	    mv ${val_arch}_${sample}_${rate}_${var}_${whichcands}_"pt"${pt}_${simval[i]}.png ${dir}/${simdir[i]}
+	done
+    done
+
+    # only copy pt > 0 for pt rate plots
+    for var in pt pt_zoom
+    do 
+	mv ${val_arch}_${sample}_${rate}_${var}_${whichcands}_"pt0p0"_${simval[i]}.png ${dir}/${simdir[i]}
+    done
+
+    mv ${val_arch}_${sample}_${rate}_"pt_logx"_${whichcands}_"pt0p0"_${simval[i]}.png ${dir}/${simdir[i]}/logx
+done
+
+# Move kinematic diff plots for SimTrack Validation
+for coll in bestmatch allmatch
+do 
+    for var in nHits invpt phi eta
+    do
+	for pt in 0p0 0p9 2p0
+	do
+	    mv ${val_arch}_${sample}_${coll}_"d"${var}_${whichcands}_"pt"${pt}_${simval[i]}.png ${dir}/${simdir[i]}/diffs
+	done
+    done
+done
+
+# Move track quality plots for SimTrack Validation (nHits,score)
+for coll in allreco fake bestmatch allmatch
+do 
+    for pt in 0p0 0p9 2p0
+    do
+	for qual in nHits score
+	do
+	    mv ${val_arch}_${sample}_${coll}_${qual}_${whichcands}_"pt"${pt}_${simval[i]}.png ${dir}/${simdir[i]}/${qual}
+	done
+    done
+done
+done
+
+# Final message
+echo "Finished collecting benchmark plots into ${dir}!"
diff --git a/RecoTracker/MkFitCore/standalone/web/copyphp.sh b/RecoTracker/MkFitCore/standalone/web/copyphp.sh
new file mode 100755
index 0000000000000..6446edcf77aa3
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/web/copyphp.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+dir=${1}
+
+#cp index.php into all subdirectories
+find ${dir} -mindepth 0 -type d -exec cp web/index.php {} \;
diff --git a/RecoTracker/MkFitCore/standalone/web/index.php b/RecoTracker/MkFitCore/standalone/web/index.php
new file mode 100644
index 0000000000000..f12aa9ad544f6
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/web/index.php
@@ -0,0 +1,84 @@
+<html>
+<head>
+<title><?php echo getcwd(); ?></title>
+<style type='text/css'>
+body {
+    font-family: "Candara", sans-serif;
+    font-size: 9pt;
+    line-height: 10.5pt;
+}
+div.pic h3 { 
+    font-size: 11pt;
+    margin: 0.5em 1em 0.2em 1em;
+}
+div.pic p {
+    font-size: 11pt;
+    margin: 0.2em 1em 0.1em 1em;
+}
+div.pic {
+    display: block;
+    float: left;
+    background-color: white;
+    border: 1px solid #ccc;
+    padding: 2px;
+    text-align: center;
+    margin: 2px 10px 10px 2px;
+    -moz-box-shadow: 7px 5px 5px rgb(80,80,80);    /* Firefox 3.5 */
+    -webkit-box-shadow: 7px 5px 5px rgb(80,80,80); /* Chrome, Safari */
+    box-shadow: 7px 5px 5px rgb(80,80,80);         /* New browsers */  
+}
+a { text-decoration: none; color: rgb(80,0,0); }
+a:hover { text-decoration: underline; color: rgb(255,80,80); }
+</style>
+</head>
+<body>
+<h1><?php echo getcwd(); ?></h1>
+<h2><a name="plots">Plots</a></h2>
+<p><form>Filter: <input type="text" name="match" size="30" value="<?php if (isset($_GET['match'])) print htmlspecialchars($_GET['match']);  ?>" /><input type="Submit" value="Go" /></form></p>
+<div>
+<?php
+$displayed = array();
+if ($_GET['noplots']) {
+    print "Plots will not be displayed.\n";
+} else {
+    $other_exts = array('.pdf', '.cxx', '.eps', '.root', '.txt');
+    $filenames = glob("*.png"); sort($filenames);
+    foreach ($filenames as $filename) {
+        if (isset($_GET['match']) && !fnmatch('*'.$_GET['match'].'*', $filename)) continue;
+        array_push($displayed, $filename);
+        print "<div class='pic'>\n";
+        print "<h3><a href=\"$filename\">$filename</a></h3>";
+        print "<a href=\"$filename\"><img src=\"$filename\" style=\"border: none; width: 300px; \"></a>";
+        $others = array();
+        foreach ($other_exts as $ex) {
+            $other_filename = str_replace('.png', $ex, $filename);
+            if (file_exists($other_filename)) {
+                array_push($others, "<a class=\"file\" href=\"$other_filename\">[" . $ex . "]</a>");
+                if ($ex != '.txt') array_push($displayed, $other_filename);
+            }
+        }
+        if ($others) print "<p>Also as ".implode(', ',$others)."</p>";
+        print "</div>";
+    }
+}
+?>
+</div>
+<div style="display: block; clear:both;">
+<h2><a name="files">Other files</a></h2>
+<ul>
+<?php
+foreach (glob("*") as $filename) {
+    if ($_GET['noplots'] || !in_array($filename, $displayed)) {
+        if (isset($_GET['match']) && !fnmatch('*'.$_GET['match'].'*', $filename)) continue;
+        if (is_dir($filename)) {
+            print "<li>[DIR] <a href=\"$filename\">$filename</a></li>";
+        } else {
+            print "<li><a href=\"$filename\">$filename</a></li>";
+        }
+    }
+}
+?>
+</ul>
+</div>
+</body>
+</html>
diff --git a/RecoTracker/MkFitCore/standalone/web/makereadable.sh b/RecoTracker/MkFitCore/standalone/web/makereadable.sh
new file mode 100755
index 0000000000000..50411b17fcbd8
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/web/makereadable.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+dir=${1}
+
+fs setacl ${dir} webserver:afs read
+afind ${dir} -t d -e "fs setacl -dir {} -acl webserver:afs read"
diff --git a/RecoTracker/MkFitCore/standalone/web/move-benchmarks.sh b/RecoTracker/MkFitCore/standalone/web/move-benchmarks.sh
new file mode 100755
index 0000000000000..9af48b47fa230
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/web/move-benchmarks.sh
@@ -0,0 +1,40 @@
+#! /bin/bash
+
+# command line input
+dir=${1:-"benchmarks"} # Main output dir name
+suite=${2:-"forPR"} # which set of benchmarks to run: full, forPR, forConf
+afs_or_eos=${3:-"eos"} # which user space to use: afs or eos
+lxpuser=${4:-${USER}}
+useARCH=${5:-0}
+multi=${6:-0}
+
+collect=collectBenchmarks.sh
+if [ ${multi} -gt 0 ]
+then
+    dir=${dir}"-multi"
+    collect=collectBenchmarks-multi.sh
+fi
+
+# source global variables
+source xeon_scripts/common-variables.sh ${suite} ${useARCH}
+source xeon_scripts/init-env.sh
+
+# First collect all plots and text files into common dir
+echo "Moving plots and text files locally to ${dir}"
+./web/${collect} ${dir} ${suite} ${useARCH}
+
+# Next copy index.php into ouput dir
+echo "Copying index.php into ${dir}"
+./web/copyphp.sh ${dir}
+
+# Then copy to lxplus
+echo "Moving plots and text files remotely to lxplus"
+./web/tarAndSendToLXPLUS.sh ${dir} ${suite} ${afs_or_eos} ${lxpuser}
+
+# Final cleanup of directory
+echo "Removing local files"
+./xeon_scripts/trashSKL-SP.sh ${useARCH} 
+rm -rf ${dir}
+
+# Final message
+echo "Finished moving benchmark plots to LXPLUS!"
diff --git a/RecoTracker/MkFitCore/standalone/web/move-cmsswval-10mu-extrectracks.sh b/RecoTracker/MkFitCore/standalone/web/move-cmsswval-10mu-extrectracks.sh
new file mode 100755
index 0000000000000..cae7090430559
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/web/move-cmsswval-10mu-extrectracks.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+dir=${1:-plots}
+outdir=${dir}/cmsswval-10mu-extrectracks
+base=SKL-SP_CMSSW_10mu
+
+echo "Moving plots and text files locally to ${outdir}"
+for region in ECN2 ECN1 BRL ECP1 ECP2 FullDet
+do
+    fulldir=${outdir}/${region}
+    mkdir -p ${fulldir}
+    
+    mv ${base}_${region}_*.png ${fulldir}
+    for build in BH STD CE
+    do
+	vbase=validation_${base}_${region}_${build}
+	mv ${vbase}/totals_${vbase}_cmssw.txt ${fulldir}
+    done
+done
+
+mv ${outdir}/FullDet/*png ${outdir}/FullDet/*txt ${outdir}
+rm -rf ${outdir}/FullDet
+
+host=kmcdermo@lxplus.cern.ch
+whost=${host}":~/www"
+echo "Moving plots and text files remotely to ${whost}"
+scp -r ${dir} ${whost}
+
+echo "Executing remotely ./makereadable.sh ${outdir}"
+ssh ${host} bash -c "'
+cd www
+./makereadable.sh ${outdir}
+exit
+'"
+
+echo "Removing local files"
+for region in ECN2 ECN1 BRL ECP1 ECP2 FullDet
+do
+    for build in BH STD CE
+    do
+	testbase=${base}_${region}_${build}
+	rm -rf validation_${testbase}
+	rm -rf log_${testbase}_NVU8int_NTH24_cmsswval.txt 
+    done
+done
+
+rm -rf ${dir}
diff --git a/RecoTracker/MkFitCore/standalone/web/move-cmsswval-10mu.sh b/RecoTracker/MkFitCore/standalone/web/move-cmsswval-10mu.sh
new file mode 100755
index 0000000000000..521025f6ae818
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/web/move-cmsswval-10mu.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+dir=${1:-plots}
+outdir=${dir}/cmsswval-10mu
+base=SKL-SP_CMSSW_10mu
+
+echo "Moving plots and text files locally to ${outdir}"
+for seed in SimSeed CMSSeed
+do
+    for region in ECN2 ECN1 BRL ECP1 ECP2 FullDet
+    do
+	fulldir=${outdir}/${seed}/${region}
+	mkdir -p ${fulldir}
+ 
+	srbase=${seed}_${region}
+	mv ${base}_${srbase}_*.png ${fulldir}
+	for build in BH STD CE
+	do
+	    vbase=validation_${base}_${srbase}_${build}
+	    mv ${vbase}/totals_${vbase}.txt ${fulldir}
+	done
+    done
+    sdir=${outdir}/${seed}
+    mv ${sdir}/FullDet/*png ${sdir}/FullDet/*txt ${sdir}
+    rm -rf ${sdir}/FullDet
+done
+
+host=kmcdermo@lxplus.cern.ch
+whost=${host}":~/www"
+echo "Moving plots and text files remotely to ${whost}"
+scp -r ${dir} ${whost}
+
+echo "Executing remotely ./makereadable.sh ${outdir}"
+ssh ${host} bash -c "'
+cd www
+./makereadable.sh ${outdir}
+exit
+'"
+
+echo "Removing local files"
+for seed in SimSeed CMSSeed
+do
+    for region in ECN2 ECN1 BRL ECP1 ECP2 FullDet
+    do
+	srbase=${seed}_${region}
+	for build in BH STD CE
+	do
+	    testbase=${base}_${srbase}_${build}
+	    rm -rf validation_${testbase}
+	    rm -rf log_${testbase}_NVU8int_NTH24_val.txt 
+	done
+    done
+done
+
+rm -rf ${dir}
diff --git a/RecoTracker/MkFitCore/standalone/web/move-cmsswval-ttbar-extrectracks.sh b/RecoTracker/MkFitCore/standalone/web/move-cmsswval-ttbar-extrectracks.sh
new file mode 100755
index 0000000000000..e8005fed30ff8
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/web/move-cmsswval-ttbar-extrectracks.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+dir=${1:-plots}
+outdir=${dir}/cmsswval-ttbar-extrectracks
+base=SKL-SP_CMSSW_TTbar
+
+echo "Moving plots and text files locally to ${outdir}"
+for ttbar in NoPU PU35 PU70 
+do
+    fulldir=${outdir}/${ttbar}
+    mkdir -p ${fulldir}
+
+    mv ${base}_${ttbar}_*.png ${fulldir}
+    for build in BH STD CE
+    do
+	vbase=validation_${base}_${ttbar}_${build}
+	mv ${vbase}/totals_${vbase}_cmssw.txt ${fulldir}
+    done
+done
+
+host=kmcdermo@lxplus.cern.ch
+whost=${host}":~/www"
+echo "Moving plots and text files remotely to ${whost}"
+scp -r ${dir} ${whost}
+
+echo "Executing remotely ./makereadable.sh ${outdir}"
+ssh ${host} bash -c "'
+cd www
+./makereadable.sh ${outdir}
+exit
+'"
+
+echo "Removing local files"
+for ttbar in NoPU PU35 PU70
+do
+    for build in BH STD CE
+    do
+	testbase=${base}_${ttbar}_${build}
+	rm -rf validation_${testbase}
+	rm -rf log_${testbase}_NVU8int_NTH24_cmsswval.txt 
+    done
+done
+
+rm -rf ${dir}
diff --git a/RecoTracker/MkFitCore/standalone/web/move-cmsswval-ttbar.sh b/RecoTracker/MkFitCore/standalone/web/move-cmsswval-ttbar.sh
new file mode 100755
index 0000000000000..ed86a414b795f
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/web/move-cmsswval-ttbar.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+dir=${1:-plots}
+outdir=${dir}/cmsswval-ttbar
+base=SKL-SP_CMSSW_TTbar
+
+echo "Moving plots and text files locally to ${outdir}"
+for ttbar in NoPU PU35 PU70
+do
+    for seed in SimSeed CMSSeed
+    do
+	fulldir=${outdir}/${ttbar}/${seed}
+	mkdir -p ${fulldir}
+	
+	mv ${base}_${ttbar}_${seed}_*.png ${fulldir}
+	for build in BH STD CE
+	do
+	    vbase=validation_${base}_${ttbar}_${seed}_${build}
+	    mv ${vbase}/totals_${vbase}.txt ${fulldir}
+	done
+    done
+done
+
+host=kmcdermo@lxplus.cern.ch
+whost=${host}":~/www"
+echo "Moving plots and text files remotely to ${whost}"
+scp -r ${dir} ${whost}
+
+echo "Executing remotely ./makereadable.sh ${outdir}"
+ssh ${host} bash -c "'
+cd www
+./makereadable.sh ${outdir}
+exit
+'"
+
+echo "Removing local files"
+for ttbar in NoPU PU35 PU70
+do
+    for seed in SimSeed CMSSeed
+    do
+	for build in BH STD CE
+	do
+	    testbase=${base}_${ttbar}_${seed}_${build}
+	    rm -rf validation_${testbase}
+	    rm -rf log_${testbase}_NVU8int_NTH24_val.txt 
+	done
+    done
+done
+
+rm -rf ${dir}
diff --git a/RecoTracker/MkFitCore/standalone/web/move-toymcval.sh b/RecoTracker/MkFitCore/standalone/web/move-toymcval.sh
new file mode 100755
index 0000000000000..ac5032ce2c626
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/web/move-toymcval.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+dir=${1:-plots}
+outdir=${dir}/toymcval
+base=SKL-SP_ToyMC_FullDet
+
+echo "Moving plots and text files locally to ${outdir}"
+mkdir -p ${outdir}
+mv ${base}_*.png ${outdir}
+for build in BH STD CE
+do
+    vbase=validation_${base}_${build}
+    mv ${vbase}/totals_${vbase}.txt ${outdir}
+done
+
+host=kmcdermo@lxplus.cern.ch
+whost=${host}":~/www"
+echo "Moving plots and text files remotely to ${whost}"
+scp -r ${dir} ${whost}
+
+echo "Executing remotely ./makereadable.sh ${outdir}"
+ssh ${host} bash -c "'
+cd www
+./makereadable.sh ${outdir}
+exit
+'"
+
+echo "Removing local files"
+for build in BH STD CE
+do
+    testbase=${base}_${build}
+    rm -rf validation_${testbase}
+    rm -rf log_${testbase}_NVU8int_NTH24_val.txt 
+done
+
+rm -rf ${dir}
diff --git a/RecoTracker/MkFitCore/standalone/web/tarAndSendToLXPLUS.sh b/RecoTracker/MkFitCore/standalone/web/tarAndSendToLXPLUS.sh
new file mode 100755
index 0000000000000..50eeb692da09b
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/web/tarAndSendToLXPLUS.sh
@@ -0,0 +1,63 @@
+#! /bin/bash
+
+# command line input
+dir=${1:-"benchmarks"} # Main output dir name
+suite=${2:-"forPR"} # which set of benchmarks to run: full, forPR, forConf
+afs_or_eos=${3:-"eos"} # which user space to use: afs or eos
+lxpuser=${4:=${USER}}
+
+# in case this is run alone
+source xeon_scripts/common-variables.sh ${suite}
+source xeon_scripts/init-env.sh
+
+# first tar the directory to be sent
+echo "Tarring plot directory"
+tarball=${dir}.tar.gz
+tar -zcvf ${tarball} ${dir}
+
+# vars for LXPLUS
+LXPLUS_HOST=${lxpuser}@lxplus.cern.ch
+LXPLUS_OUTDIR=www
+LXPLUS_WORKDIR=user/${lxpuser:0:1}/${lxpuser}
+
+if [[ "${afs_or_eos}" == "afs" ]]
+then
+    LXPLUS_WORKDIR=/afs/cern.ch/${LXPLUS_WORKDIR}
+elif [[ "${afs_or_eos}" == "eos" ]]
+then
+    LXPLUS_WORKDIR=/eos/${LXPLUS_WORKDIR}
+else
+    echo "${afs_or_eos} is not a valid option! Choose either 'afs' or 'eos'! Exiting..."
+    exit
+fi
+
+# then send it!
+scp -r ${tarball} ${LXPLUS_HOST}:${LXPLUS_WORKDIR}/${LXPLUS_OUTDIR}
+
+# Make outdir nice and pretty
+if [[ "${afs_or_eos}" == "afs" ]]
+then
+    echo "Unpacking tarball and executing remotely: ./makereadable.sh ${dir}"
+    SSHO ${LXPLUS_HOST} bash -c "'
+    cd ${LXPLUS_WORKDIR}/${LXPLUS_OUTDIR}
+    tar -zxvf ${tarball}
+    ./makereadable.sh ${dir}
+    rm -rf ${tarball}
+    exit
+    '"
+else
+    echo "Unpacking tarball"
+    SSHO ${LXPLUS_HOST} bash -c "'
+    cd ${LXPLUS_WORKDIR}/${LXPLUS_OUTDIR}
+    tar -zxvf ${tarball}
+    rm -rf ${tarball}
+    exit
+    '"
+fi
+
+# remove local tarball
+echo "Removing local tarball of plots"
+rm ${tarball}
+
+# Final message
+echo "Finished tarring and sending plots to LXPLUS!"
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/benchmark-cmssw-ttbar-fulldet-build-remote.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/benchmark-cmssw-ttbar-fulldet-build-remote.sh
new file mode 100755
index 0000000000000..0dede9bcb6318
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/benchmark-cmssw-ttbar-fulldet-build-remote.sh
@@ -0,0 +1,62 @@
+#! /bin/bash
+
+########################
+## Command Line Input ##
+########################
+
+remote_arch=${1} # SNB, KNL, SKL-SP
+suite=${2:-"forPR"} # which set of benchmarks to run: full, forPR, forConf
+useARCH=${3:-0}
+lnxuser=${4:-${USER}}
+
+###################
+## Configuration ##
+###################
+
+source xeon_scripts/common-variables.sh ${suite} ${useARCH} ${lnxuser} 
+source xeon_scripts/init-env.sh
+
+# architecture dependent settings
+if [[ "${remote_arch}" == "SNB" ]]
+then
+    HOST=${SNB_HOST}
+    DIR=${SNB_WORKDIR}/${SNB_TEMPDIR}
+elif [[ "${remote_arch}" == "KNL" ]]
+then
+    HOST=${KNL_HOST}
+    DIR=${KNL_WORKDIR}/${KNL_TEMPDIR}
+elif [[ "${remote_arch}" == "LNX-G" ]]
+then 
+    HOST=${LNXG_HOST}
+    DIR=${LNXG_WORKDIR}/${LNXG_TEMPDIR}
+elif [[ "${remote_arch}" == "LNX-S" ]]
+then
+    HOST=${LNXS_HOST}
+    DIR=${LNXS_WORKDIR}/${LNXS_TEMPDIR}
+else 
+    echo ${remote_arch} "is not a valid architecture! Exiting..."
+    exit
+fi
+
+###################
+## Run The Tests ##
+###################
+
+# execute tests remotely
+echo "Executing ${remote_arch} tests remotely..."
+SSHO ${HOST} bash -c "'
+cd ${DIR}
+./xeon_scripts/benchmark-cmssw-ttbar-fulldet-build.sh ${remote_arch} ${suite} ${useARCH} ${lnxuser} 
+exit
+'"
+
+# copy logs back for plotting
+echo "Copying logs back from ${remote_arch} for plotting"
+scp ${HOST}:${DIR}/log_${remote_arch}_${sample}_*.txt .
+
+# destroy tmp files
+echo "Removing tmp dir on ${remote_arch} remotely"
+SSHO ${HOST} bash -c "'
+rm -rf ${DIR}
+exit
+'"
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/benchmark-cmssw-ttbar-fulldet-build.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/benchmark-cmssw-ttbar-fulldet-build.sh
new file mode 100755
index 0000000000000..046d05d5f2afa
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/benchmark-cmssw-ttbar-fulldet-build.sh
@@ -0,0 +1,162 @@
+#! /bin/bash
+
+###########
+## Input ##
+###########
+
+ben_arch=${1} # SNB, KNL, SKL-SP
+suite=${2:-"forPR"} # which set of benchmarks to run: full, forPR, forConf
+useARCH=${3:-0}
+lnxuser=${4:-${USER}}
+
+###################
+## Configuration ##
+###################
+
+## Source environment and common variables
+source xeon_scripts/common-variables.sh ${suite} ${useARCH} ${lnxuser}
+source xeon_scripts/init-env.sh
+
+## Platform specific settings
+if [[ "${ben_arch}" == "SNB" ]]
+then
+    mOpt="-j 12"
+    maxth=24
+    maxvu=8
+    declare -a nths=("1" "2" "4" "6" "8" "12" "16" "20" "24")
+    declare -a nvus=("1" "2" "4" "8")
+    declare -a nevs=("1" "2" "4" "8" "12")
+elif [[ "${ben_arch}" == "KNL" ]]
+then
+    mOpt="-j 64 AVX_512:=1"
+    maxth=256
+    maxvu=16
+    declare -a nths=("1" "2" "4" "8" "16" "32" "64" "96" "128" "160" "192" "224" "256")
+    declare -a nvus=("1" "2" "4" "8" "16")
+    declare -a nevs=("1" "2" "4" "8" "16" "32" "64" "128")
+elif [[ "${ben_arch}" == "SKL-SP" ]]
+then
+    mOpt="-j 32 AVX_512:=1"
+    maxth=64
+    maxvu=16
+    declare -a nths=("1" "2" "4" "8" "16" "32" "48" "64")
+    declare -a nvus=("1" "2" "4" "8" "16")
+    declare -a nevs=("1" "2" "4" "8" "16" "32" "64")
+elif [[ "${ben_arch}" == "LNX-G" ]]
+then 
+    mOpt="-j 32 AVX_512:=1"
+    maxth=64
+    maxvu=16
+    declare -a nths=("1" "2" "4" "8" "16" "32" "48" "64")
+    declare -a nvus=("1" "2" "4" "8" "16")
+    declare -a nevs=("1" "2" "4" "8" "16" "32" "64")
+elif [[ "${ben_arch}" == "LNX-S" ]]
+then 
+    mOpt="-j 32 AVX_512:=1"
+    maxth=64
+    maxvu=16
+    declare -a nths=("1" "2" "4" "8" "16" "32" "48" "64")
+    declare -a nvus=("1" "2" "4" "8" "16")
+    declare -a nevs=("1" "2" "4" "8" "16" "32" "64")
+else 
+    echo ${ben_arch} "is not a valid architecture! Exiting..."
+    exit
+fi
+
+## Common file setup
+dir=/data2/slava77/samples/
+subdir=2021/11834.0_TTbar_14TeV+2021/AVE_50_BX01_25ns/
+file=memoryFile.fv6.default.211008-c6b7c67.bin
+nevents=20
+
+## Common executable setup
+minth=1
+minvu=1
+seeds="--cmssw-n2seeds"
+exe="./mkFit/mkFit ${seeds} --input-file ${dir}/${subdir}/${file}"
+
+## Common output setup
+dump=DumpForPlots
+base=${ben_arch}_${sample}
+
+####################
+## Run Benchmarks ##
+####################
+
+## compile with appropriate options
+make distclean ${mOpt}
+make ${mOpt}
+
+## Parallelization Benchmarks
+for nth in "${nths[@]}"
+do
+    for build in "${th_builds[@]}"
+    do echo ${!build} | while read -r bN bO
+	do
+	    ## Base executable
+	    oBase=${base}_${bN}
+	    bExe="${exe} --build-${bO} --num-thr ${nth}"
+
+	    ## Building-only benchmark
+	    echo "${oBase}: Benchmark [nTH:${nth}, nVU:${maxvu}int]"
+	    ${bExe} --num-events ${nevents} >& log_${oBase}_NVU${maxvu}int_NTH${nth}.txt
+
+	    ## Multiple Events in Flight benchmark
+	    check_meif=$( CheckIfMEIF ${build} )
+	    if [[ "${check_meif}" == "true" ]]
+	    then
+		for nev in "${nevs[@]}"
+		do
+		    if (( ${nev} <= ${nth} ))
+		    then
+			nproc=$(( ${nevents} * ${nev} ))
+			echo "${oBase}: Benchmark [nTH:${nth}, nVU:${maxvu}int, nEV:${nev}]"
+			${bExe} --silent --num-thr-ev ${nev} --num-events ${nproc} --remove-dup --use-dead-modules --backward-fit >& log_${oBase}_NVU${maxvu}int_NTH${nth}_NEV${nev}.txt
+		    fi
+		done
+	    fi
+
+	    ## nHits validation
+	    check_text=$( CheckIfText ${build} )
+	    if (( ${nth} == ${maxth} )) && [[ "${check_text}" == "true" ]]
+	    then
+		echo "${oBase}: Text dump for plots [nTH:${nth}, nVU:${maxvu}int]"
+		${bExe} --dump-for-plots --quality-val --read-cmssw-tracks --num-events ${nevents} --remove-dup --use-dead-modules --backward-fit >& log_${oBase}_NVU${maxvu}int_NTH${nth}_${dump}.txt
+	    fi
+	done
+    done
+done
+
+## Vectorization Benchmarks
+for nvu in "${nvus[@]}"
+do
+    make clean ${mOpt}
+    make ${mOpt} USE_INTRINSICS:=-DMPT_SIZE=${nvu}
+
+    for build in "${vu_builds[@]}"
+    do echo ${!build} | while read -r bN bO
+	do
+	    ## Common base executable
+	    oBase=${base}_${bN}
+	    bExe="${exe} --build-${bO} --num-thr ${minth} --num-events ${nevents}"
+
+	    ## Building-only benchmark
+	    echo "${oBase}: Benchmark [nTH:${minth}, nVU:${nvu}]"
+	    ${bExe} >& log_${oBase}_NVU${nvu}_NTH${minth}.txt
+
+	    ## nHits validation
+	    check_text=$( CheckIfText ${build} )
+	    if (( ${nvu} == ${minvu} )) && [[ "${check_text}" == "true" ]]
+	    then
+		echo "${oBase}: Text dump for plots [nTH:${minth}, nVU:${nvu}]"
+		${bExe} --dump-for-plots --quality-val --read-cmssw-tracks >& log_${oBase}_NVU${nvu}_NTH${minth}_${dump}.txt
+	    fi
+	done
+    done
+done
+
+## Final cleanup
+make distclean ${mOpt}
+
+## Final message
+echo "Finished compute benchmarks on ${ben_arch}!"
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/benchmarkMIC-build.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/benchmarkMIC-build.sh
new file mode 100755
index 0000000000000..47c30618894f9
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/benchmarkMIC-build.sh
@@ -0,0 +1,83 @@
+#! /bin/bash
+
+[ -e "$BIN_DATA_PATH" ] || BIN_DATA_PATH=/data2/slava77/samples/2021/11834.0_TTbar_14TeV+2021/
+fin=${BIN_DATA_PATH}/AVE_70_BX01_25ns/memoryFile.fv6.default.211008-c6b7c67.bin
+
+runBenchmark()
+{
+#    for sV in "sim --cmssw-simseeds" "see --cmssw-stdseeds"; do echo $sV | while read -r sN sO; do
+    for sV in "see --cmssw-stdseeds"; do echo $sV | while read -r sN sO; do
+            if [ "${1}" == "1" ]; then
+                sO="--cmssw-n2seeds"
+            fi
+            for bV in "BH bh" "STD std" "CE ce"; do echo $bV | while read -r bN bO; do
+		    oBase=${base}_${sN}_${bN}
+		    for nTH in 1 4 8 16 32; do
+		        echo "${oBase}: benchmark [nTH:${nTH}, nVU:8]"
+		        time ./mkFit/mkFit --input-file ${fin} --build-${bO} ${sO} --num-thr ${nTH} >& log_${oBase}_NVU8int_NTH${nTH}_benchmark.txt
+		    done
+                done
+            done
+        done
+    done
+}
+
+#cleanup first
+make clean
+make distclean
+
+make -j 12
+export base=SNB_CMSSW_PU70_clean
+echo Run default build with base = ${base}
+runBenchmark 0
+
+export base=SNB_CMSSW_PU70_clean_cleanSeed
+echo Run CLEAN_SEEDS build with base = ${base}
+runBenchmark 1
+make clean
+make distclean
+
+
+make -j 12 CPPUSERFLAGS+="-march=native -mtune=native" CXXUSERFLAGS+="-march=native -mtune=native"
+export base=SNB_CMSSW_PU70_clean_native
+echo Run native build with base = ${base}
+runBenchmark 0
+
+export base=SNB_CMSSW_PU70_clean_native_cleanSeed
+echo Run CLEAN_SEEDS build with base = ${base}
+runBenchmark 1
+make clean
+make distclean
+
+fin10mu=/data2/slava77/samples/2021/10muPt0p2to10HS/memoryFile.fv6.default.211008-c6b7c67.bin
+
+runBenchmark10mu()
+{
+    for sV in "sim --cmssw-seeds" "see --cmssw-stdseeds"; do echo $sV | while read -r sN sO; do
+            if [ "${1}" == "1" ]; then
+                sO="--cmssw-n2seeds"
+            fi
+            for bV in "BH bh" "STD std" "CE ce"; do echo $bV | while read -r bN bO; do
+                    oBase=${base}_${sN}_10muPt0p2to10HS_${bN}
+                    nTH=8
+                    echo "${oBase}: benchmark [nTH:${nTH}, nVU:8]"
+                    time ./mkFit/mkFit --input-file ${fin10mu} --build-${bO} ${sO} --num-thr ${nTH} >& log_${oBase}_NVU8int_NTH${nTH}_benchmark.txt
+                done
+            done
+        done
+    done
+
+}
+
+#this part has a pretty limited value due to the tiny load in the muon samples
+make -j 12
+export base=SNB_CMSSW_10mu
+echo Run default build with base = ${base}
+runBenchmark10mu 1
+
+make clean
+make distclean
+
+
+unset base
+
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/check.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/check.sh
new file mode 100755
index 0000000000000..728e426b55e6c
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/check.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+echo $PATH
+echo "++++++++++++++++++++++++++++++++++"
+which gcc
+echo "----------------------------------"
+gcc --version
+echo "=================================="
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/common-variables.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/common-variables.sh
new file mode 100755
index 0000000000000..8fea7a43bfaef
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/common-variables.sh
@@ -0,0 +1,212 @@
+#! /bin/bash
+
+# command line input
+suite=${1:-"forPR"} # which set of benchmarks to run: full, forPR, forConf
+useARCH=${2:-0} # which computer cluster to run on. 0=phi3, 1=lnx, 2= phi3+lnx, 3=phi123, 4=phi123+lnx
+lnxuser=${3:-${USER}} #username for lnx computers
+
+# samples
+export sample=${sample:-"CMSSW_TTbar_PU50"}
+
+# Validation architecture
+export val_arch=SKL-SP
+
+# vars for KNL
+export KNL_HOST=${USER}@phi2.t2.ucsd.edu
+export KNL_WORKDIR=/data1/work/${USER}
+export KNL_TEMPDIR=tmp
+
+# vars for SNB
+export SNB_HOST=${USER}@phi1.t2.ucsd.edu
+export SNB_WORKDIR=/data2/nfsmic/${USER}
+export SNB_TEMPDIR=tmp
+
+# vars for LNX7188
+export LNXG_HOST=${lnxuser}@lnx7188.classe.cornell.edu
+export LNXG_WORKDIR=/home/${lnxuser}
+export LNXG_TEMPDIR=/tmp/tmp7188
+
+# vars for LNX4108
+export LNXS_HOST=${lnxuser}@lnx4108.classe.cornell.edu
+export LNXS_WORKDIR=/home/${lnxuser}
+export LNXS_TEMPDIR=/tmp/tmp4108
+
+# SSH options
+function SSHO()
+{
+    ssh -o StrictHostKeyChecking=no < /dev/null "$@"
+}
+export -f SSHO
+
+#################
+## Build Types ##
+#################
+
+export BH="BH bh"
+export STD="STD std"
+export CE="CE ce"
+
+# which set of builds to use based on input from command line
+if [[ "${suite}" == "full" ]]
+then
+    declare -a ben_builds=(BH STD CE)
+    declare -a val_builds=(BH STD CE)
+elif [[ "${suite}" == "forPR" ]]
+then
+    declare -a ben_builds=(BH CE)
+    declare -a val_builds=(STD CE)
+elif [[ "${suite}" == "forConf" ]]
+then
+    declare -a ben_builds=(CE)
+    declare -a val_builds=(CE)
+elif [[ "${suite}" == "val" || "${suite}" == "valMT1" ]]
+then
+    declare -a ben_builds=()
+    declare -a val_builds=(STD CE)
+else
+    echo ${suite} "is not a valid benchmarking suite option! Exiting..."
+    exit
+fi
+
+# set dependent arrays
+th_builds=() ## for parallelization tests
+vu_builds=() ## for vectorization tests
+meif_builds=() ## for multiple-events-in-flight tests
+text_builds=() ## for text dump comparison tests
+
+# loop over ben_builds and set dependent arrays, export when done
+for ben_build in "${ben_builds[@]}"
+do
+    # set th builds : all benchmarks!
+    th_builds+=("${ben_build}")
+    vu_builds+=("${ben_build}")
+    
+    # set meif builds : only do CE
+    if [[ "${ben_build}" == "CE" ]]
+    then
+	meif_builds+=("${ben_build}")
+    fi
+done
+export ben_builds val_builds th_builds vu_builds meif_builds
+
+# th checking
+function CheckIfTH ()
+{
+    local build=${1}
+    local result="false"
+
+    for th_build in "${th_builds[@]}"
+    do 
+	if [[ "${th_build}" == "${build}" ]]
+	then
+	    result="true"
+	    break
+	fi
+    done
+    
+    echo "${result}"
+}
+export -f CheckIfTH
+
+# vu checking
+function CheckIfVU ()
+{
+    local build=${1}
+    local result="false"
+
+    for vu_build in "${vu_builds[@]}"
+    do 
+	if [[ "${vu_build}" == "${build}" ]]
+	then
+	    result="true"
+	    break
+	fi
+    done
+    
+    echo "${result}"
+}
+export -f CheckIfVU
+
+# meif checking
+function CheckIfMEIF ()
+{
+    local build=${1}
+    local result="false"
+
+    for meif_build in "${meif_builds[@]}"
+    do 
+	if [[ "${meif_build}" == "${build}" ]]
+	then
+	    result="true"
+	    break
+	fi
+    done
+    
+    echo "${result}"
+}
+export -f CheckIfMEIF
+
+# set text dump builds: need builds matched in both TH and VU tests
+for ben_build in "${ben_builds[@]}"
+do 
+    check_th=$( CheckIfTH ${ben_build} )
+    check_vu=$( CheckIfVU ${ben_build} )
+
+    if [[ "${check_th}" == "true" ]] && [[ "${check_vu}" == "true" ]]
+    then
+	text_builds+=("${ben_build}")
+    fi
+done
+
+export text_builds
+
+# text checking
+function CheckIfText ()
+{
+    local build=${1}
+    local result="false"
+
+    for text_build in "${text_builds[@]}"
+    do 
+	if [[ "${text_build}" == "${build}" ]]
+	then
+	    result="true"
+	    break
+	fi
+    done
+
+    echo "${result}"
+}
+export -f CheckIfText
+
+Base_Test="NVU1_NTH1"
+if [[ ${useARCH} -eq 0 ]]
+then
+    arch_array=(SKL-SP)
+    arch_array_textdump=("SKL-SP ${Base_Test}" "SKL-SP NVU16int_NTH64")
+    arch_array_benchmark=("SKL-SP skl-sp")
+elif [[ ${useARCH} -eq 1 ]]
+then
+    arch_array=(LNX-G LNX-S)
+    arch_array_textdump=("LNX-G ${Base_Test}" "LNX-G NVU16int_NTH64" "LNX-S ${Base_Test}" "LNX-S NVU16int_NTH64")
+    arch_array_benchmark=("LNX-G lnx-g" "LNX-S lnx-s")
+elif [[ ${useARCH} -eq 2 ]]
+then
+    arch_array=(SKL-SP LNX-G LNX-S)
+    arch_array_textdump=("SKL-SP ${Base_Test}" "SKL-SP NVU16int_NTH64" "LNX-G ${Base_Test}" "LNX-G NVU16int_NTH64" "LNX-S ${Base_Test}" "LNX-S NVU16int_NTH64")
+    arch_array_benchmark=("SKL-SP skl-sp" "LNX-G lnx-g" "LNX-S lnx-s")
+elif [[ ${useARCH} -eq 3 ]]
+then
+    arch_array=(SNB KNL SKL-SP)
+    arch_array_textdump=("SNB ${Base_Test}" "SNB NVU8int_NTH24" "KNL ${Base_Test}" "KNL NVU16int_NTH256" "SKL-SP ${Base_Test}" "SKL-SP NVU16int_NTH64")
+    arch_array_benchmark=("SNB snb" "KNL knl" "SKL-SP skl-sp")
+elif [[ ${useARCH} -eq 4 ]]
+then
+    arch_array=(SNB KNL SKL-SP LNX-G LNX-S)
+    arch_array_textdump=("SNB ${Base_Test}" "SNB NVU8int_NTH24" "KNL ${Base_Test}" "KNL NVU16int_NTH256" "SKL-SP ${Base_Test}" "SKL-SP NVU16int_NTH64" "LNX-G ${Base_Test}" "LNX-G NVU16int_NTH64" "LNX-S ${Base_Test}" "LNX-S NVU16int_NTH64")
+    arch_array_benchmark=("SNB snb" "KNL knl" "SKL-SP skl-sp" "LNX-G lnx-g" "LNX-S lnx-s")
+else
+    echo "${useARCH} is not a valid useARCH option! Exiting..."
+    exit
+fi
+export arch_array arch_array_textdump arch_array_benchmark
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/data-dir-location.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/data-dir-location.sh
new file mode 100755
index 0000000000000..a77d6289c44de
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/data-dir-location.sh
@@ -0,0 +1,19 @@
+#! /bin/bash
+
+# To be sourced where needed
+
+host=`hostname`
+
+if [[ $host == phi2.t2.* ]]; then
+  dir=/data1/scratch/toymc
+  n_sim_thr=128
+elif [[ $host == phiphi.t2.* ]]; then
+  dir=/data/nfsmic/scratch/toymc
+  n_sim_thr=12
+elif [[ $host == phi3.t2.* ]]; then
+  dir=/data2/scratch/toymc
+  n_sim_thr=64
+else
+  dir=/tmp/${USER}/toymc
+  n_sim_thr=8
+fi
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/debug-test.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/debug-test.sh
new file mode 100755
index 0000000000000..c15a6862a4ade
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/debug-test.sh
@@ -0,0 +1,42 @@
+#! /bin/bash
+
+## Use this script to turn auto debug printouts. Warning: debug statements are ifdef'ed and not maintained
+
+## initialize
+source xeon_scripts/common-variables.sh
+source xeon_scripts/init-env.sh
+
+
+## Common setup
+dir=/data2/slava77/samples/2021/
+subdir=10muPt0p2to10HS
+file=memoryFile.fv6.default.211008-c6b7c67.bin
+
+## config for debug
+nevents=10
+maxth=1
+maxvu=1
+maxev=1
+
+## base executable
+exe="./mkFit/mkFit --cmssw-n2seeds --num-thr ${maxth} --num-thr-ev ${maxev} --input-file ${dir}/${subdir}/${file} --num-events ${nevents}"
+
+## Compile once
+mOpt="DEBUG:=1 WITH_ROOT:=1 USE_INTRINSICS:=-DMPT_SIZE=${maxvu} AVX_512:=1"
+make distclean ${mOpt}
+make -j 32 ${mOpt}
+
+## test each build routine to be sure it works!
+for bV in "BH bh" "STD std" "CE ce"
+do echo ${bV} | while read -r bN bO
+    do
+	oBase=${val_arch}_${sample}_${bN}
+	bExe="${exe} --build-${bO}"
+
+	echo "${oBase}: ${vN} [nTH:${maxth}, nVU:${maxvu}, nEV:${maxev}]"
+	${bExe} >& log_${oBase}_NVU${maxvu}_NTH${maxth}_NEV${maxev}_"DEBUG".txt
+    done
+done
+
+## clean up
+make distclean ${mOpt}
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/generateToyMCsamples.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/generateToyMCsamples.sh
new file mode 100755
index 0000000000000..bd8d475fcca48
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/generateToyMCsamples.sh
@@ -0,0 +1,32 @@
+#! /bin/bash
+
+. data-dir-location.sh
+
+mkdir -p ${dir}
+
+# Building test [1 event in flight]
+if [ ! -f ${dir}/simtracks_fulldet_100x2p5k.bin ]; then
+    echo "++++Generating 2.5k tracks/event * 100 events for ToyMC building tests with one event in flight++++"
+    make -j 12
+    ./mkFit/mkFit --num-thr-sim ${n_sim_thr} --num-events 100 --num-tracks 2500 --output-file simtracks_fulldet_100x2p5k.bin
+    mv simtracks_fulldet_100x2p5k.bin ${dir}/
+    make clean
+fi
+
+# Building test [n Events in flight]
+if [ ! -f ${dir}/simtracks_fulldet_5kx2p5k.bin ]; then
+    echo "++++Generating 2.5k tracks/event * 5k events for ToyMC building tests with nEvents in flight++++"
+    make -j 12
+    ./mkFit/mkFit --num-thr-sim ${n_sim_thr} --num-events 5000 --num-tracks 2500 --output-file simtracks_fulldet_5kx2p5k.bin
+    mv simtracks_fulldet_5kx2p5k.bin ${dir}/
+    make clean    
+fi
+
+# Validation tests
+if [ ! -f ${dir}/simtracks_fulldet_500x2p5k_val.bin ]; then
+    echo "++++Generating 2.5k tracks/event * 500 events for ToyMC validation tests++++"
+    make -j 12 WITH_ROOT:=1
+    ./mkFit/mkFit --num-thr-sim ${n_sim_thr} --sim-val --num-events 500 --num-tracks 2500 --output-file simtracks_fulldet_500x2p5k_val.bin
+    mv simtracks_fulldet_500x2p5k_val.bin ${dir}/
+    make clean
+fi
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/init-env.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/init-env.sh
new file mode 100644
index 0000000000000..7a963fc2264ba
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/init-env.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+source /cvmfs/cms.cern.ch/slc7_amd64_gcc820/lcg/root/6.18.04-bcolbf/etc/profile.d/init.sh
+export TBB_GCC=/cvmfs/cms.cern.ch/slc7_amd64_gcc820/external/tbb/2019_U9
+# workaround for https://github.com/cms-sw/cmsdist/issues/5574
+# remove when we switch to a ROOT build where that issues is fixed
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIBJPEG_TURBO_ROOT/lib64
+source /opt/intel/bin/compilervars.sh intel64
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/init-gcc10-env.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/init-gcc10-env.sh
new file mode 100644
index 0000000000000..fe07dc9822fd8
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/init-gcc10-env.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+source /cvmfs/cms.cern.ch/slc7_amd64_gcc10/lcg/root/6.20.06-cms/etc/profile.d/init.sh
+export TBB_GCC=/cvmfs/cms.cern.ch/slc7_amd64_gcc10/external/tbb/2020_U2
+# workaround for https://github.com/cms-sw/cmsdist/issues/5574
+# remove when we switch to a ROOT build where that issues is fixed
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIBJPEG_TURBO_ROOT/lib64
+### source /opt/intel/bin/compilervars.sh intel64
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/runBenchmark.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/runBenchmark.sh
new file mode 100755
index 0000000000000..14637c063276c
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/runBenchmark.sh
@@ -0,0 +1,117 @@
+#! /bin/bash
+
+##### Command Line Input #####
+suite=${1:-"forPR"} # which set of benchmarks to run: full, forPR, forConf
+useARCH=${2:-0}  # 0 phi3 only, 1 lnx only, 2 phi3 + lnx, 3 phi123, 4 phi123 + lnx
+lnxuser=${3:-${USER}}
+
+##### Initialize Benchmarks #####
+source xeon_scripts/common-variables.sh ${suite} ${useARCH} ${lnxuser}
+source xeon_scripts/init-env.sh
+make distclean
+
+##### Check Settings #####
+assert_settings=true
+echo "--------Showing System Settings--------"
+echo "turbo status: "$(cat /sys/devices/system/cpu/intel_pstate/no_turbo)
+echo "scaling governor setting: "$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)
+echo "--------End System Settings ------------"
+if ${assert_settings}
+then
+echo "Ensuring correct settings"
+if [[ $(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor) != "performance" ]]
+then
+echo "performance mode is OFF. Exiting"
+exit 1
+fi
+if [[ $(cat /sys/devices/system/cpu/intel_pstate/no_turbo) == "0" ]]
+then
+echo "Turbo is ON. Exiting"
+exit 1
+fi
+fi
+sleep 3 ## so you can see the settings
+
+##### Launch Tests #####
+if [[ ${useARCH} -eq 1 ]] || [[ ${useARCH} -eq 2 ]] || [[ ${useARCH} -eq 4 ]]
+then
+echo "Tar and send to LNX7188"
+./xeon_scripts/tarAndSendToRemote.sh LNX-G ${suite} ${useARCH} ${lnxuser}
+if [ $? -eq 1 ]; then
+echo "lnx7188 has bad settings. Please fix them and try again"
+exit 1
+fi
+
+echo "Run benchmarking on LNX7188 concurrently with SKL-SP benchmarks" 
+./xeon_scripts/benchmark-cmssw-ttbar-fulldet-build-remote.sh LNX-G ${suite} ${useARCH} ${lnxuser} >& benchmark_lnx-g_dump.txt &
+
+echo "Tar and send to LNX4108"
+./xeon_scripts/tarAndSendToRemote.sh LNX-S ${suite} ${useARCH} ${lnxuser}
+if [ $? -eq 1 ]; then
+echo "lnx4108 has bad settings. Please fix them and try again"
+exit 1
+fi
+
+echo "Run benchmarking on LNX4108 concurrently with SKL-SP benchmarks" 
+./xeon_scripts/benchmark-cmssw-ttbar-fulldet-build-remote.sh LNX-S ${suite} ${useARCH} ${lnxuser} >& benchmark_lnx-s_dump.txt &
+fi
+
+if [[ ${useARCH} -eq 3 ]] || [[ ${useARCH} -eq 4 ]]
+then
+
+echo "Tar and send to KNL"
+./xeon_scripts/tarAndSendToRemote.sh KNL ${suite} ${useARCH} ${lnxuser}
+if [ $? -eq 1 ]; then
+echo "KNL has bad settings. Please fix them and try again"
+exit 1
+fi
+
+echo "Run benchmarking on KNL concurrently with SKL-SP benchmarks" 
+./xeon_scripts/benchmark-cmssw-ttbar-fulldet-build-remote.sh KNL ${suite} ${useARCH} ${lnxuser} >& benchmark_knl_dump.txt &
+
+echo "Tar and send to SNB"
+./xeon_scripts/tarAndSendToRemote.sh SNB ${suite} ${useARCH} ${lnxuser}
+if [ $? -eq 1 ]; then
+echo "SNB has bad settings. Please fix them and try again"
+exit 1
+fi
+
+echo "Run benchmarking on SNB concurrently with SKL-SP benchmarks" 
+./xeon_scripts/benchmark-cmssw-ttbar-fulldet-build-remote.sh SNB ${suite} ${useARCH} ${lnxuser} >& benchmark_snb_dump.txt &
+fi
+
+if [[ ${useARCH} -eq 0 ]] || [[ ${useARCH} -eq 2 ]] || [[ ${useARCH} -eq 3 ]] || [[ ${useARCH} -eq 4 ]]
+then
+echo "Run benchmarking on SKL-SP"
+./xeon_scripts/benchmark-cmssw-ttbar-fulldet-build.sh SKL-SP ${suite} ${useARCH} ${lnxuser} 
+fi
+
+##### Validation tests #####
+echo "Running ROOT based validation"
+./val_scripts/validation-cmssw-benchmarks.sh ${suite} --mtv-like-val
+
+if [[ ${useARCH} -eq 1 ]] || [[ ${useARCH} -eq 2 ]]
+then
+echo "Waiting for LNX-G and LNX-S"
+elif [[ ${useARCH} -eq 3 ]] 
+then
+echo "Waiting for KNL and SNB"
+elif  [[ ${useARCH} -eq 4 ]]
+then 
+echo "Waiting for LNX-G, LNX-S, KNL, and SNB"
+fi
+wait
+
+##### Benchmark Plots #####
+echo "Producing benchmarking plots"
+./plotting/benchmarkPlots.sh ${suite} ${useARCH} ${lnxuser} 
+
+##### Plots from Text Files #####
+echo "Producing plots from text files"
+./plotting/textDumpPlots.sh ${suite} ${useARCH} ${lnxuser} 
+
+##### Final cleanup #####
+make distclean
+
+##### Final message #####
+echo "Finished benchmarking and validation suite!"
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/stress-test-common.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/stress-test-common.sh
new file mode 100644
index 0000000000000..98c4514d3311a
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/stress-test-common.sh
@@ -0,0 +1,144 @@
+#!/bin/bash
+
+##########################
+## Global Configuration ##
+##########################
+
+## Instruction sets defined with "make" command line settings
+export SSE3="CPPUSERFLAGS+=\"-march=core2\" CXXUSERFLAGS+=\"-march=core2\" VEC_GCC=\"-march=core2\" VEC_ICC=\"-march=core2\""
+export AVX=""
+export AVX2="AVX2:=1"
+export AVX512="AVX_512:=1"
+
+## Output options
+export tmp_ext="log"
+export ext="txt"
+
+## Tmp output labels
+export nth_label="nTH"
+export nev_label="nEV"
+export njob_label="nJOB"
+export ncore_label="nCORE"
+export nproc_label="nPROC"
+export nloop_label="nLOOP"
+
+######################
+## N Physical Cores ##
+######################
+
+function GetNCore ()
+{
+    local nth=${1}
+    local maxcore=${2}
+
+    if (( ${nth} <= ${maxcore} ))
+    then
+	local ncore="${nth}"
+    else
+	local ncore="${maxcore}"
+    fi
+ 
+    echo "${ncore}"
+}
+export -f GetNCore
+
+####################
+## Core Test Loop ##
+####################
+
+function MkFitLoop ()
+{
+    local min_duration=${1}
+    local test_exe=${2}
+    local nproc=${3}
+    local njob=${4}
+    
+    local start_time=$( date +"%s" )
+    local end_time=$(( ${start_time} + ${min_duration} ))
+    
+    ## compute number of events to process per job
+    local nproc_per_job=$(( ${nproc} / ${njob} ))
+
+    ## global variable to be read back in main loop to keep track of number of times processed
+    nloop=0
+
+    ## run stress test for min min_duration with an emulated do-while loop: https://stackoverflow.com/a/16491478
+    while
+
+    ## launch jobs in parallel to background : let scheduler put jobs all around
+    for (( ijob = 0 ; ijob < ${njob} ; ijob++ ))
+    do
+	## want each mkFit job to process different events, so compute an offset
+	local start_event=$(( ${nproc_per_job} * ${ijob} ))
+
+        ## run the executable
+	${test_exe} --num-events ${nproc_per_job} --start-event ${start_event} &
+    done
+
+    ## wait for all background processes to finish --> non-ideal as we would rather "stream" jobs launching
+    wait
+    
+    ## increment nloop counter
+    ((nloop++))
+
+    ## perform check now to end loop : if current time is greater than projected end time, break.
+    local current_time=$( date +"%s" )
+    (( ${current_time} <= ${end_time} ))
+    do
+	continue
+    done
+}
+export -f MkFitLoop
+
+########################################
+## Dump Info about Test into Tmp File ##
+########################################
+
+function AppendTmpFile ()
+{
+    local tmp_output_file=${1}
+    local ncore=${2}
+    local nproc=${3}
+    local nloop=${4}
+
+    echo "${ncore_label} ${ncore}" >> "${tmp_output_file}"
+    echo "${nproc_label} ${nproc}" >> "${tmp_output_file}"
+    echo "${nloop_label} ${nloop}" >> "${tmp_output_file}"
+}
+export -f AppendTmpFile
+
+####################################
+## Dump Tmp Output into Main File ##
+####################################
+
+function DumpIntoFile ()
+{
+    local tmp_output_file=${1}
+    local output_file=${2}
+
+    ## get wall-clock time, split 
+    read -ra time_arr < <(grep "real" "${tmp_output_file}")
+    local tmp_time=${time_arr[1]}
+
+    local mins=$( echo "${tmp_time}" | cut -d "m" -f 1 )
+    local secs=$( echo "${tmp_time}" | cut -d "m" -f 2  | cut -d "s" -f 1 )
+    
+    local total_time=$( bc -l <<< "${mins} * 60 + ${secs}" )
+	
+    ## get physical cores used
+    local ncore=$( grep "${ncore_label}" "${tmp_output_file}" | cut -d " " -f 2 )
+
+    ## compute total events processed per core
+    local nloop=$( grep "${nloop_label}" "${tmp_output_file}" | cut -d " " -f 2 )
+    local nproc=$( grep "${nproc_label}" "${tmp_output_file}" | cut -d " " -f 2 )
+
+    local total_proc=$(( ${nloop} * ${nproc} )) 
+    local total_proc_per_core=$( bc -l <<< "${total_proc} / ${ncore}" )
+
+    ## divide time by total events processed per core 
+    local norm_time=$( bc -l <<< "${total_time} / ${total_proc_per_core}" )
+
+    ## dump result into final output file
+    echo "${test_label} ${norm_time}" >> "${output_file}"
+}
+export -f DumpIntoFile
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/stress-test-main.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/stress-test-main.sh
new file mode 100755
index 0000000000000..c7729687f0064
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/stress-test-main.sh
@@ -0,0 +1,218 @@
+#!/bin/bash
+
+###############################################################################
+##                                    README!                                ##
+##                                                                           ##
+## Stress test script to run on phiN, testing different thread/MEIF combos   ##
+## with different instruction set architecture extensions, using default     ## 
+## settings of benchmarking scripts for clone engine track finding + CMSSW   ##
+## n2-seeding, input sample ttbar PU70.                                      ##
+##                                                                           ##
+## Can vary thread/MEIF combos, input file, seeds, building algo by editting ##
+## this script manually.                                                     ##
+##                                                                           ##
+## Command line inputs are which platform to stress (ben_arch), enable       ##
+## TurboBoost OFF/ON (no_turbo), the min time per test (min_duration), the   ##   
+## time between each test (sleep_time), and the number of events to process  ## 
+## per physical core (base_events).                                          ##
+##                                                                           ##
+## N.B.: base_events MUST be a number divisible by 4! This is because the    ##
+## max physical cores on KNL is 64, but the highest nTH/nJOB test is 256.    ##       
+##                                                                           ##
+## Output file lists stress test time per event processed per physical core. ##
+###############################################################################
+
+########################
+## Source Environment ##
+########################
+
+source xeon_scripts/init-env.sh
+source xeon_scripts/stress-test-common.sh
+
+###################
+## Configuration ##
+###################
+
+## Command line inputs
+ben_arch=${1} # SNB (phi1), KNL (phi2), SKL-SP (phi3)
+no_turbo=${2:-1} # Turbo OFF or ON --> default is OFF!
+min_duration=${3:-1800} # min time spent for each test [s]
+sleep_time=${4:-300} # sleep time between tests [s]
+base_nevents=${5:-120} # number of events to process per physical core, must be divisible by 4
+
+## platform specific settings
+if [[ "${ben_arch}" == "SNB" ]]
+then
+    mOpt="-j 12"
+    maxcore=12
+    declare -a instruction_sets=(SSE3 AVX)
+    declare -a thread_combo_arr=("1 1" "6 6" "12 6" "12 12" "24 6" "24 12" "24 24")
+    declare -a njob_arr=("12" "24")
+elif [[ "${ben_arch}" == "KNL" ]]
+then
+    mOpt="-j 64"
+    maxcore=64
+    declare -a instruction_sets=(SSE3 AVX AVX2 AVX512)
+    declare -a thread_combo_arr=("1 1" "32 32" "64 32" "64 64" "128 32" "128 64" "128 128" "256 32" "256 64" "256 128" "256 256")
+    declare -a njob_arr=("32" "64" "128" "256")
+elif [[ "${ben_arch}" == "SKL-SP" ]]
+then
+    mOpt="-j 32"
+    maxcore=32
+    declare -a instruction_sets=(SSE3 AVX AVX2 AVX512)
+    declare -a thread_combo_arr=("1 1" "16 16" "32 16" "32 32" "48 16" "48 32" "64 16" "64 32" "64 64")
+    declare -a njob_arr=("32" "64")
+else 
+    echo "${ben_arch} is not a valid architecture! Exiting..."
+    exit
+fi
+
+## Common file setup
+dir=/data2/slava77/samples/
+subdir=2021/11834.0_TTbar_14TeV+2021/AVE_50_BX01_25ns/
+file=memoryFile.fv6.default.211008-c6b7c67.bin
+
+## Common mkFit options
+seeds="--cmssw-n2seeds"
+algo="--build-ce"
+opts="--silent --remove-dup --use-dead-modules --backward-fit"
+base_exe="./mkFit/mkFit --input-file ${dir}/${subdir}/${file} ${seeds} ${algo} ${opts}"
+
+## Output options
+base_outname="stress_test"
+output_file="${base_outname}_results.${ext}"
+
+## Set TurboBoost option
+echo "${no_turbo}" | PATH=/bin sudo /usr/bin/tee /sys/devices/system/cpu/intel_pstate/no_turbo > /dev/null 2>&1  
+
+###############
+## Run tests ##
+###############
+
+## loop instruction sets (i.e. build minimally)
+for instruction_set in "${instruction_sets[@]}"
+do
+    ## compile once, using settings for the given instruction set
+    make distclean
+    make ${mOpt} ${!instruction_set}
+    
+    ## run thread combo tests (nThreads, nEventsInFlight)
+    for thread_combo in "${thread_combo_arr[@]}"
+    do echo "${thread_combo}" | while read -r nth nev
+	do
+	    ## compute total number of events to process
+	    ncore=$( GetNCore "${nth}" "${maxcore}" ) 
+	    nproc=$(( ${base_nevents} * ${ncore} ))
+
+	    ## print out which test is being performed
+	    test_label="${instruction_set}_${nth_label}${nth}_${nev_label}${nev}"
+	    echo "Running stress test for: ${test_label}..."
+
+	    ## test executable
+	    test_exe="${base_exe} --num-thr ${nth} --num-thr-ev ${nev}"
+
+	    ## output file
+	    tmp_output_file="${base_outname}_${test_label}.${tmp_ext}"
+	    
+	    ## execute test and pipe time to output file: https://stackoverflow.com/a/2409214
+	    { time MkFitLoop "${min_duration}" "${test_exe}" "${nproc}" "1" > /dev/null 2>&1 ; } 2> "${tmp_output_file}"
+
+	    ## pause to let machine cool down between each test
+	    sleep "${sleep_time}"
+
+	    ## add other info about test to tmp file
+	    AppendTmpFile "${tmp_output_file}" "${ncore}" "${nproc}" "${nloop}"
+	done # end loop over reading thread combo
+    done # end loop over thread combos
+
+    ## run special test of N jobs, single thread each
+    for njob in "${njob_arr[@]}"
+    do
+	## compute total number of events to process
+	ncore=$( GetNCore "${njob}" "${maxcore}" ) 
+	nproc=$(( ${base_nevents} * ${ncore} ))
+
+	## print out which test is being performed
+	test_label="${instruction_set}_${njob_label}${njob}"
+	echo "Running stress test for: ${test_label}..."
+
+	## test executable
+	test_exe="${base_exe} --num-thr 1 --num-thr-ev 1"
+
+	## output file
+	tmp_output_file="${base_outname}_${test_label}.${tmp_ext}"
+	    
+	## execute test and pipe time to output file: https://stackoverflow.com/a/2409214
+	{ time MkFitLoop "${min_duration}" "${test_exe}" "${nproc}" "${njob}" > /dev/null 2>&1 ; } 2> "${tmp_output_file}"
+
+        ## add other info about test to tmp file
+	AppendTmpFile "${tmp_output_file}" "${ncore}" "${nproc}" "${nloop}"
+
+	## pause to let machine cool down between each test
+	sleep "${sleep_time}"
+    done # end loop over njob for single thread
+
+done # end loop over instruction set
+
+#######################
+## Make Final Output ##
+#######################
+
+## init output file
+> "${output_file}"
+echo -e "Stress test meta-data\n" >> "${output_file}"
+echo "ben_arch: ${ben_arch}" >> "${output_file}"
+echo "no_turbo: ${no_turbo}" >> "${output_file}"
+echo "min_duration [s]: ${min_duration}" >> "${output_file}"
+echo "sleep_time [s]: ${sleep_time}" >> "${output_file}"
+echo "base_exe: ${base_exe}" >> "${output_file}"
+echo "base_nevents: ${base_nevents}" >> "${output_file}"
+echo -e "\nResults\n" >> "${output_file}"
+
+## loop over all output files, and append results to single file
+for instruction_set in "${instruction_sets[@]}"
+do
+    ## loop over nThread/MEIF tests, and append to single file
+    for thread_combo in "${thread_combo_arr[@]}"
+    do echo "${thread_combo}" | while read -r nth nev
+	do
+	    ## get test label, print it
+	    test_label="${instruction_set}_${nth_label}${nth}_${nev_label}${nev}"
+	    echo "Computing time for: ${test_label}"
+	    
+            ## get tmp output file name
+	    tmp_output_file="${base_outname}_${test_label}.${tmp_ext}"
+	    
+	    ## dump into output file
+	    DumpIntoFile "${tmp_output_file}" "${output_file}"
+	done # end loop over reading thread combo
+    done # end loop over thread combos
+
+    ## loop over single thread njob tests, and append to single file
+    for njob in "${njob_arr[@]}"
+    do
+	## get test label, print it
+	test_label="${instruction_set}_${njob_label}${njob}"
+	echo "Computing time for: ${test_label}"
+	
+	## get tmp output file name
+	tmp_output_file="${base_outname}_${test_label}.${tmp_ext}"
+	
+	## dump into output file
+	DumpIntoFile "${tmp_output_file}" "${output_file}"
+    done # end loop over njob array
+
+done # end loop over instruction set
+
+#########################################
+## Clean up and Restore Default Status ##
+#########################################
+
+make distclean
+echo 1 | PATH=/bin sudo /usr/bin/tee /sys/devices/system/cpu/intel_pstate/no_turbo > /dev/null 2>&1
+
+###################
+## Final Message ##
+###################
+
+echo "Finished stress test!"
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/tarAndSendToRemote.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/tarAndSendToRemote.sh
new file mode 100755
index 0000000000000..60e738a4d17f2
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/tarAndSendToRemote.sh
@@ -0,0 +1,103 @@
+#! /bin/bash
+
+########################
+## Command Line Input ##
+########################
+
+remote_arch=${1} # SNB, KNL, SKL-SP
+suite=${2:-"forPR"} # which set of benchmarks to run: full, forPR, forConf
+useARCH=${3:-0}
+lnxuser=${4:-${USER}}
+
+###################
+## Configuration ##
+###################
+
+source xeon_scripts/common-variables.sh ${suite} ${useARCH} ${lnxuser}
+source xeon_scripts/init-env.sh
+
+# architecture dependent settings
+if [[ "${remote_arch}" == "SNB" ]]
+then
+    HOST=${SNB_HOST}
+    DIR=${SNB_WORKDIR}/${SNB_TEMPDIR}
+elif [[ "${remote_arch}" == "KNL" ]]
+then
+    HOST=${KNL_HOST}
+    DIR=${KNL_WORKDIR}/${KNL_TEMPDIR}
+elif [[ "${remote_arch}" == "LNX-G" ]]
+then
+    HOST=${LNXG_HOST}
+    DIR=${LNXG_WORKDIR}/${LNXG_TEMPDIR}
+elif [[ "${remote_arch}" == "LNX-S" ]]
+then
+    HOST=${LNXS_HOST}
+    DIR=${LNXS_WORKDIR}/${LNXS_TEMPDIR}
+else 
+    echo ${remote_arch} "is not a valid architecture! Exiting..."
+    exit
+fi
+
+##################
+## Tar and Send ##
+##################
+
+assert_settings=true
+echo "--------Showing System Settings--------"
+# unzip tarball remotely
+echo "Untarring repo on ${remote_arch} remotely"
+SSHO ${HOST} bash -c "'
+echo "--------Showing System Settings--------"
+##### Check Settings #####
+echo "turbo status: "$(cat /sys/devices/system/cpu/intel_pstate/no_turbo)
+echo "scaling governor setting: "$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)
+echo "--------End System Settings ------------"
+if ${assert_settings};
+then
+echo "Ensuring correct settings"
+if [[ $(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor) != "performance" ]]
+then
+echo "performance mode is OFF. Exiting"
+exit 1
+fi
+if [[ $(cat /sys/devices/system/cpu/intel_pstate/no_turbo) == "0" ]]
+then
+echo "Turbo is ON. Exiting"
+exit 1
+fi
+fi
+sleep 3 ## so you can see the settings
+'"
+bad=$(SSHO ${HOST} echo $?)
+if [ $bad -eq 1 ]; then
+echo "killed"
+exit 1
+fi
+
+# tar up the directory
+echo "Tarring directory for ${remote_arch}... make sure it is clean!"
+repo=mictest.tar.gz
+tar --exclude-vcs --exclude='*.gz' --exclude='validation*' --exclude='*.root' --exclude='log_*' --exclude='*.png' --exclude='*.o' --exclude='*.om' --exclude='*.d' --exclude='*.optrpt' -zcvf  ${repo} *
+
+# mkdir tmp dir on remote arch
+echo "Making tmp dir on ${remote_arch} remotely"
+SSHO ${HOST} bash -c "'
+mkdir -p ${DIR}
+exit
+'"
+
+# copy tarball
+echo "Copying tarball to ${remote_arch}"
+scp ${repo} ${HOST}:${DIR}
+
+# unzip tarball remotely
+echo "Untarring repo on ${remote_arch} remotely"
+SSHO ${HOST} bash -c "'
+cd ${DIR}
+tar -zxvf ${repo}
+rm ${repo}
+'"
+
+# remove local tarball
+echo "Remove local repo tarball"
+rm ${repo}
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/throughput-test-common.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/throughput-test-common.sh
new file mode 100644
index 0000000000000..920c0f0bfb741
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/throughput-test-common.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+source xeon_scripts/stress-test-common.sh
+
+####################
+## Core Test Loop ##
+####################
+
+function MkFitThroughput ()
+{
+    local test_exe=${1}
+    local nproc=${2}
+    local njob=${3}
+    local tmp_output_file=${4}
+
+    ## launch jobs in parallel to background : let scheduler put jobs all around
+    for (( ijob = 0 ; ijob < ${njob} ; ijob++ ))
+    do
+        ## run the executable
+	{ time ${test_exe} --num-events ${nproc} > /dev/null 2>&1 ; } 2> "${tmp_output_file}.${ijob}" &
+    done
+
+    ## wait for all background processes to finish --> non-ideal as we would rather "stream" jobs launching
+    wait
+}
+export -f MkFitThroughput
+
+####################################
+## Dump Tmp Output into Main File ##
+####################################
+
+function DumpIntoFileThroughput ()
+{
+    local tmp_output_file=${1}
+    local output_file=${2}
+
+    ## get wall-clock time, split
+    total_time=0
+    while read -ra time_arr
+    do
+        local tmp_time=${time_arr[1]}
+        local mins=$( echo "${tmp_time}" | cut -d "m" -f 1 )
+        local secs=$( echo "${tmp_time}" | cut -d "m" -f 2  | cut -d "s" -f 1 )
+        local total_time=$( bc -l <<< "${total_time} + ${mins} * 60 + ${secs}" )
+    done < <(fgrep "real" "${tmp_output_file}")
+
+    ## get physical cores used
+    local ncore=$( grep "${ncore_label}" "${tmp_output_file}" | cut -d " " -f 2 )
+
+    ## compute total events processed per core
+    local njob=$( grep "${nloop_label}" "${tmp_output_file}" | cut -d " " -f 2 )
+    local nproc=$( grep "${nproc_label}" "${tmp_output_file}" | cut -d " " -f 2 )
+
+    ## divide total events by time
+    local throughput=$( bc -l <<< "(${njob} * ${nproc}) / ${total_time}" )
+
+    ## dump result into final output file
+    echo "${test_label} ${throughput}" >> "${output_file}"
+}
+export -f DumpIntoFileThroughput
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/throughput-test-main.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/throughput-test-main.sh
new file mode 100755
index 0000000000000..6362c8f19e41b
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/throughput-test-main.sh
@@ -0,0 +1,178 @@
+#!/bin/bash
+
+source xeon_scripts/init-env.sh
+source xeon_scripts/throughput-test-common.sh
+
+ben_arch=${1} # SNB (phi1), KNL (phi2), SKL-SP (phi3)
+
+if [[ "${ben_arch}" == "KNL" ]]
+then
+    mOpt="-j 64"
+    maxcore=64
+    declare -a instruction_sets=(AVX512)
+    declare -a thread_combo_arr=("1 1" "2 2" "4 4" "8 8" "16 16" "32 32" "64 64" "128 128" "256 256")
+elif [[ "${ben_arch}" == "SKL-SP" ]]
+then
+    mOpt="-j 32"
+    maxcore=32
+    declare -a instruction_sets=(AVX512)
+    declare -a thread_combo_arr=("1 1" "2 2" "4 4" "8 8" "16 16" "32 32" "64 64")
+elif [[ "${ben_arch}" == "LNX-G" ]]
+then
+    mOpt="-j 32"
+    maxcore=32
+    declare -a instruction_sets=(AVX512)
+    declare -a thread_combo_arr=("1 1" "2 2" "4 4" "8 8" "16 16" "32 32" "64 64")
+elif [[ "${ben_arch}" == "LNX-S" ]]
+then
+    mOpt="-j 32"
+    maxcore=32
+    declare -a instruction_sets=(AVX512)
+    declare -a thread_combo_arr=("1 1" "2 2" "4 4" "8 8" "16 16" "32 32" "64 64")
+else
+    echo "${ben_arch} is not a valid architecture! Exiting..."
+    exit
+fi
+
+
+## Common file setup
+dir=/data2/slava77/samples/
+subdir=2021/11834.0_TTbar_14TeV+2021/AVE_50_BX01_25ns/
+file=memoryFile.fv6.default.211008-c6b7c67.bin
+#base_nevents=20 # 7/2 seconds
+base_nevents=2000 # 30/10 minutes
+
+## Common mkFit options
+seeds="--cmssw-n2seeds"
+algo="--build-ce"
+opts="--silent --loop-over-file --remove-dup --use-dead-modules --backward-fit"
+base_exe="./mkFit/mkFit --input-file ${dir}/${subdir}/${file} ${seeds} ${algo} ${opts}"
+
+## Output options
+base_outname="throughput"
+output_file="${base_outname}_results.${ext}"
+
+###############
+## Run tests ##
+###############
+
+## loop instruction sets (i.e. build minimally)
+for instruction_set in "${instruction_sets[@]}"
+do
+    ## compile once, using settings for the given instruction set
+    make distclean
+    make ${mOpt} ${!instruction_set}
+
+    echo "Ensuring the input file is fully in the memory caches"
+    dd if=${dir}/${subdir}/${file} of=/dev/null bs=10M
+    dd if=${dir}/${subdir}/${file} of=/dev/null bs=10M
+    dd if=${dir}/${subdir}/${file} of=/dev/null bs=10M
+    dd if=${dir}/${subdir}/${file} of=/dev/null bs=10M
+
+    ## run thread combo tests (nThreads, nEventsInFlight)
+    for thread_combo in "${thread_combo_arr[@]}"
+    do echo "${thread_combo}" | while read -r nth nev
+	do
+	    ## compute total number of events to process
+	    ncore=${nth}
+	    nproc=$(( ${base_nevents} * ${ncore} ))
+
+	    ## print out which test is being performed
+	    test_label="${instruction_set}_${nth_label}${nth}_${nev_label}${nev}"
+	    echo "$(date) Running throughput test for: ${test_label}..."
+
+	    ## test executable
+	    test_exe="${base_exe} --num-thr ${nth} --num-thr-ev ${nev}"
+
+	    ## output file
+	    tmp_output_file="${base_outname}_${test_label}.${tmp_ext}"
+
+	    ## execute test
+            MkFitThroughput "${test_exe}" "${nproc}" "1" "${tmp_output_file}"
+            cat ${tmp_output_file}.* > ${tmp_output_file}
+
+	    ## add other info about test to tmp file
+	    AppendTmpFile "${tmp_output_file}" "${ncore}" "${nproc}" "1"
+
+
+            ## run a test of N jobs, single thread each
+            njob=${nth}
+
+	    ## print out which test is being performed
+	    test_label="${instruction_set}_${njob_label}${njob}"
+	    echo "$(date) Running throughput test for: ${test_label}..."
+
+	    ## test executable
+	    test_exe="${base_exe} --num-thr 1 --num-thr-ev 1"
+
+	    ## output file
+	    tmp_output_file="${base_outname}_${test_label}.${tmp_ext}"
+
+	    ## execute test
+            MkFitThroughput "${test_exe}" "${base_nevents}" "${njob}" "${tmp_output_file}"
+            cat ${tmp_output_file}.* > ${tmp_output_file}
+
+            ## add other info about test to tmp file
+	    AppendTmpFile "${tmp_output_file}" "1" "${nproc}" "${njob}"
+
+	done # end loop over reading thread combo
+    done # end loop over thread combos
+
+done # end loop over instruction set
+
+#######################
+## Make Final Output ##
+#######################
+
+## init output file
+> "${output_file}"
+echo -e "Throughput test meta-data\n" >> "${output_file}"
+echo "ben_arch: ${ben_arch}" >> "${output_file}"
+echo "base_exe: ${base_exe}" >> "${output_file}"
+echo "base_nevents: ${base_nevents}" >> "${output_file}"
+echo -e "\nResults (events/s)\n" >> "${output_file}"
+
+## loop over all output files, and append results to single file
+for instruction_set in "${instruction_sets[@]}"
+do
+    ## loop over nThread/MEIF tests, and append to single file
+    for thread_combo in "${thread_combo_arr[@]}"
+    do echo "${thread_combo}" | while read -r nth nev
+	do
+	    ## get test label, print it
+	    test_label="${instruction_set}_${nth_label}${nth}_${nev_label}${nev}"
+	    echo "Computing time for: ${test_label}"
+
+            ## get tmp output file name
+	    tmp_output_file="${base_outname}_${test_label}.${tmp_ext}"
+
+	    ## dump into output file
+	    DumpIntoFileThroughput "${tmp_output_file}" "${output_file}"
+
+
+            njob=${nth}
+	    ## get test label, print it
+	    test_label="${instruction_set}_${njob_label}${njob}"
+	    echo "Computing time for: ${test_label}"
+
+	    ## get tmp output file name
+	    tmp_output_file="${base_outname}_${test_label}.${tmp_ext}"
+
+	    ## dump into output file
+	    DumpIntoFileThroughput "${tmp_output_file}" "${output_file}"
+
+	done # end loop over reading thread combo
+    done # end loop over thread combos
+done # end loop over instruction set
+
+#########################################
+## Clean up and Restore Default Status ##
+#########################################
+
+make distclean
+
+###################
+## Final Message ##
+###################
+
+echo "$(date) Finished throughput test!"
diff --git a/RecoTracker/MkFitCore/standalone/xeon_scripts/trashSKL-SP.sh b/RecoTracker/MkFitCore/standalone/xeon_scripts/trashSKL-SP.sh
new file mode 100755
index 0000000000000..a7c04a8410015
--- /dev/null
+++ b/RecoTracker/MkFitCore/standalone/xeon_scripts/trashSKL-SP.sh
@@ -0,0 +1,18 @@
+#! /bin/bash
+
+useARCH=${1:-4}
+
+# Final cleanup script for benchmarks
+if [[ ${useARCH} -eq 3 ]] || [[ ${useARCH} -eq 4 ]] 
+then
+rm -rf benchmark_knl_dump.txt benchmark_snb_dump.txt
+fi
+if [[ ${useARCH} -eq 1 ]] || [[ ${useARCH} -eq 2 ]] || [[ ${useARCH} -eq 4 ]]
+then
+rm -rf benchmark_lnx-g_dump.txt benchmark_lnx-s_dump.txt
+fi
+
+rm -rf log_*.txt
+rm -rf *.root
+rm -rf *.png
+rm -rf validation_*