From bc1ea732cda70c9d733cb153e96b3c9b6f2077e7 Mon Sep 17 00:00:00 2001 From: Stephen Nicholas Swatman Date: Fri, 25 Oct 2024 15:36:19 +0200 Subject: [PATCH] Remove unneeded state from Runge-Kutta stepper Currently, the Runge-Kutta stepper holds a lot of state which is used only internally. The size of this state is 164 bytes, of which only 140 bytes are needed after the step. Keeping the remaining state between steps is therefore taking up a lot of memory. In this PR, I remove this internal state from the externally facing stepper state and turn it into an intermediate struct which is not stored persistently, saving 140 bytes on the size of the stepper state (and, with it, the propagator state). --- core/include/detray/propagator/rk_stepper.hpp | 52 +++++----- core/include/detray/propagator/rk_stepper.ipp | 97 ++++++++++--------- .../include/detray/test/utils/inspectors.hpp | 11 --- 3 files changed, 76 insertions(+), 84 deletions(-) diff --git a/core/include/detray/propagator/rk_stepper.hpp b/core/include/detray/propagator/rk_stepper.hpp index eab488a34..b17787f03 100644 --- a/core/include/detray/propagator/rk_stepper.hpp +++ b/core/include/detray/propagator/rk_stepper.hpp @@ -51,6 +51,20 @@ class rk_stepper final rk_stepper() = default; + struct intermediate_state { + vector3_type b_first{0.f, 0.f, 0.f}; + vector3_type b_middle{0.f, 0.f, 0.f}; + vector3_type b_last{0.f, 0.f, 0.f}; + // t = tangential direction = dr/ds + std::array t; + // q/p + std::array qop; + // dt/ds = d^2r/ds^2 = q/p ( t X B ) + std::array dtds; + // d(q/p)/ds + std::array dqopds; + }; + struct state : public base_type::state { friend rk_stepper; @@ -72,29 +86,27 @@ class rk_stepper final /// @returns the B-field view magnetic_field_type field() const { return m_magnetic_field; } - /// @returns access to the step data - const auto& step_data() const { return m_step_data; } - /// Update the track state by Runge-Kutta-Nystrom integration. DETRAY_HOST_DEVICE - void advance_track(); + void advance_track(const intermediate_state& sd); /// Update the jacobian transport from free propagation DETRAY_HOST_DEVICE - void advance_jacobian(const stepping::config& cfg = {}); + void advance_jacobian(const stepping::config& cfg, + const intermediate_state&); /// evaulate dqopds for a given step size and material DETRAY_HOST_DEVICE - scalar_type evaluate_dqopds(const std::size_t i, const scalar_type h, - const scalar_type dqopds_prev, - const detray::stepping::config& cfg); + detray::pair evaluate_dqopds( + const std::size_t i, const scalar_type h, + const scalar_type dqopds_prev, const detray::stepping::config& cfg); /// evaulate dtds for runge kutta stepping DETRAY_HOST_DEVICE - vector3_type evaluate_dtds(const vector3_type& b_field, - const std::size_t i, const scalar_type h, - const vector3_type& dtds_prev, - const scalar_type qop); + detray::pair evaluate_dtds( + const vector3_type& b_field, const std::size_t i, + const scalar_type h, const vector3_type& dtds_prev, + const scalar_type qop); DETRAY_HOST_DEVICE matrix_type<3, 3> evaluate_field_gradient(const point3_type& pos); @@ -128,20 +140,8 @@ class rk_stepper final } private: - /// stepping data required for RKN4 - struct { - vector3_type b_first{0.f, 0.f, 0.f}; - vector3_type b_middle{0.f, 0.f, 0.f}; - vector3_type b_last{0.f, 0.f, 0.f}; - // t = tangential direction = dr/ds - std::array t; - // q/p - std::array qop; - // dt/ds = d^2r/ds^2 = q/p ( t X B ) - std::array dtds; - // d(q/p)/ds - std::array dqopds; - } m_step_data; + vector3_type m_dtds_3; + scalar_type m_dqopds_3; /// Magnetic field view const magnetic_field_t m_magnetic_field; diff --git a/core/include/detray/propagator/rk_stepper.ipp b/core/include/detray/propagator/rk_stepper.ipp index 8a8e48b5a..0f6d7ed11 100644 --- a/core/include/detray/propagator/rk_stepper.ipp +++ b/core/include/detray/propagator/rk_stepper.ipp @@ -10,11 +10,13 @@ template -DETRAY_HOST_DEVICE inline void -detray::rk_stepper::state::advance_track() { +DETRAY_HOST_DEVICE inline void detray::rk_stepper< + magnetic_field_t, algebra_t, constraint_t, policy_t, inspector_t>::state:: + advance_track( + const detray::rk_stepper::intermediate_state& + sd) { - const auto& sd = m_step_data; const scalar_type h{this->step_size()}; const scalar_type h_6{h * static_cast(1. / 6.)}; auto& track = (*this)(); @@ -49,7 +51,8 @@ template DETRAY_HOST_DEVICE inline void detray::rk_stepper< magnetic_field_t, algebra_t, constraint_t, policy_t, - inspector_t>::state::advance_jacobian(const detray::stepping::config& cfg) { + inspector_t>::state::advance_jacobian(const detray::stepping::config& cfg, + const intermediate_state& sd) { /// The calculations are based on ATL-SOFT-PUB-2009-002. The update of the /// Jacobian matrix is requires only the calculation of eq. 17 and 18. /// Since the terms of eq. 18 are currently 0, this matrix is not needed @@ -73,7 +76,6 @@ DETRAY_HOST_DEVICE inline void detray::rk_stepper< //( JacTransport = D * JacTransport ) auto D = matrix_operator().template identity(); - const auto& sd = m_step_data; const scalar_type h{this->step_size()}; auto& track = (*this)(); @@ -334,32 +336,25 @@ DETRAY_HOST_DEVICE inline auto detray::rk_stepper< const scalar_type h, const scalar_type dqopds_prev, const detray::stepping::config& cfg) - -> scalar_type { + -> detray::pair { const auto& track = (*this)(); - const scalar_type qop = track.qop(); - auto& sd = m_step_data; if (!(this->volume_has_material())) { - sd.qop[i] = qop; - return 0.f; + const scalar_type qop = track.qop(); + return detray::make_pair(0.f, qop); + } else if (cfg.use_mean_loss && i != 0u) { + // qop_n is calculated recursively like the direction of + // evaluate_dtds. + // + // https://doi.org/10.1016/0029-554X(81)90063-X says: + // "For y we have similar formulae as for x, for y' and + // \lambda similar formulae as for x'" + const scalar_type qop = track.qop() + h * dqopds_prev; + return detray::make_pair(this->dqopds(qop), qop); } else { - - if (cfg.use_mean_loss) { - if (i == 0u) { - sd.qop[i] = qop; - } else { - - // qop_n is calculated recursively like the direction of - // evaluate_dtds. - // - // https://doi.org/10.1016/0029-554X(81)90063-X says: - // "For y we have similar formulae as for x, for y' and - // \lambda similar formulae as for x'" - sd.qop[i] = qop + h * dqopds_prev; - } - } - return this->dqopds(sd.qop[i]); + const scalar_type qop = track.qop(); + return detray::make_pair(this->dqopds(qop), qop); } } @@ -370,20 +365,22 @@ DETRAY_HOST_DEVICE inline auto detray::rk_stepper< inspector_t>::state::evaluate_dtds(const vector3_type& b_field, const std::size_t i, const scalar_type h, const vector3_type& dtds_prev, - const scalar_type qop) -> vector3_type { + const scalar_type qop) + -> detray::pair { auto& track = (*this)(); const auto dir = track.dir(); - auto& sd = m_step_data; + + vector3_type t; if (i == 0u) { - sd.t[i] = dir; + t = dir; } else { // Eq (84) of https://doi.org/10.1016/0029-554X(81)90063-X - sd.t[i] = dir + h * dtds_prev; + t = dir + h * dtds_prev; } // dtds = qop * (t X B) from Lorentz force - return qop * vector::cross(sd.t[i], b_field); + return detray::make_pair(vector3_type{qop * vector::cross(t, b_field)}, t); } template dqopds((*this)().qop()); } - return m_step_data.dqopds[3u]; + return m_dqopds_3; } template = 0.f ? step::direction::e_forward @@ -680,11 +683,11 @@ DETRAY_HOST_DEVICE inline bool detray::rk_stepper< } // Advance track state - stepping.advance_track(); + stepping.advance_track(sd); // Advance jacobian transport if (cfg.do_covariance_transport) { - stepping.advance_jacobian(cfg); + stepping.advance_jacobian(cfg, sd); } // Save the current step size diff --git a/tests/include/detray/test/utils/inspectors.hpp b/tests/include/detray/test/utils/inspectors.hpp index 3a4cdd514..4e480758a 100644 --- a/tests/include/detray/test/utils/inspectors.hpp +++ b/tests/include/detray/test/utils/inspectors.hpp @@ -450,17 +450,6 @@ struct print_inspector { debug_stream << "Step size scale factor" << "\t\t" << step_scalor << std::endl; - debug_stream << "Bfield points:" << std::endl; - const auto &f = state.step_data().b_first; - debug_stream << "\tfirst:" << tabs << f[0] << ", " << f[1] << ", " - << f[2] << std::endl; - const auto &m = state.step_data().b_middle; - debug_stream << "\tmiddle:" << tabs << m[0] << ", " << m[1] << ", " - << m[2] << std::endl; - const auto &l = state.step_data().b_last; - debug_stream << "\tlast:" << tabs << l[0] << ", " << l[1] << ", " - << l[2] << std::endl; - debug_stream << std::endl; }