From 8bdf0163ce54ba44fc617900c4ebfc9c40cffc30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Thu, 1 Feb 2024 14:31:01 +0100 Subject: [PATCH 1/7] Fix compiler errors --- .../walberla_kernels/templates/Boundary.tmpl.h | 6 +++--- src/core/electrostatics/p3m.cpp | 2 ++ .../electrostatics/CoulombScafacos.hpp | 1 + .../reactions/EKReactionImplIndexed.cpp | 2 +- .../Dynamic_UBB_double_precision.h | 17 +++++++++-------- .../Dynamic_UBB_single_precision.h | 17 +++++++++-------- 6 files changed, 25 insertions(+), 20 deletions(-) diff --git a/maintainer/walberla_kernels/templates/Boundary.tmpl.h b/maintainer/walberla_kernels/templates/Boundary.tmpl.h index 6bda8f86e0..5079dcc5a6 100644 --- a/maintainer/walberla_kernels/templates/Boundary.tmpl.h +++ b/maintainer/walberla_kernels/templates/Boundary.tmpl.h @@ -41,7 +41,7 @@ #include #include -#include +#include #include {% for header in interface_spec.headers %} @@ -122,7 +122,7 @@ class {{class_name}} {%- endif %} }; - {{class_name}}( const shared_ptr & blocks, + {{class_name}}( const std::shared_ptr & blocks, {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}}{{additional_data_handler.constructor_arguments}}) :{{additional_data_handler.initialiser_list}} {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }} { @@ -177,7 +177,7 @@ class {{class_name}} } template - void fillFromFlagField( const shared_ptr & blocks, ConstBlockDataID flagFieldID, + void fillFromFlagField( const std::shared_ptr & blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID) { for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp index 7bb20f4a27..d8ec633abd 100644 --- a/src/core/electrostatics/p3m.cpp +++ b/src/core/electrostatics/p3m.cpp @@ -599,9 +599,11 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag, } } energy *= prefactor; +#ifdef NPT if (npt_flag) { npt_add_virial_contribution(energy); } +#endif if (not energy_flag) { energy = 0.; } diff --git a/src/script_interface/electrostatics/CoulombScafacos.hpp b/src/script_interface/electrostatics/CoulombScafacos.hpp index 558264315b..827942a38b 100644 --- a/src/script_interface/electrostatics/CoulombScafacos.hpp +++ b/src/script_interface/electrostatics/CoulombScafacos.hpp @@ -31,6 +31,7 @@ #include "script_interface/get_value.hpp" #include "script_interface/scafacos/scafacos.hpp" +#include #include #include #include diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp index 90c14b9069..a02b084d83 100644 --- a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp +++ b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp @@ -105,7 +105,7 @@ void fillFromFlagField(IBlock *block, BlockDataID indexVectorID, } template -void fillFromFlagField(const shared_ptr &blocks, +void fillFromFlagField(const std::shared_ptr &blocks, BlockDataID indexVectorID, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID) { for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h index f2a93a9f94..38e68af361 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h @@ -31,7 +31,8 @@ #include "field/FlagField.h" #include "field/GhostLayerField.h" -#include +#include +#include #include #ifdef __GNUC__ @@ -85,10 +86,10 @@ class Dynamic_UBB_double_precision { }; Dynamic_UBB_double_precision( - const shared_ptr &blocks, BlockDataID pdfsID_, - std::function(const Cell &, - const shared_ptr &, - IBlock &)> &velocityCallback) + const std::shared_ptr &blocks, BlockDataID pdfsID_, + std::function( + const Cell &, const std::shared_ptr &, + IBlock &)> &velocityCallback) : elementInitaliser(velocityCallback), pdfsID(pdfsID_) { auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) { return new IndexVectors(); @@ -118,7 +119,7 @@ class Dynamic_UBB_double_precision { } template - void fillFromFlagField(const shared_ptr &blocks, + void fillFromFlagField(const std::shared_ptr &blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID) { for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) @@ -127,7 +128,7 @@ class Dynamic_UBB_double_precision { } template - void fillFromFlagField(const shared_ptr &blocks, + void fillFromFlagField(const std::shared_ptr &blocks, IBlock *block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID) { auto *indexVectors = block->getData(indexVectorID); @@ -558,7 +559,7 @@ class Dynamic_UBB_double_precision { BlockDataID indexVectorID; std::function( - const Cell &, const shared_ptr &, IBlock &)> + const Cell &, const std::shared_ptr &, IBlock &)> elementInitaliser; public: diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h index 847d63b9ff..e4175e74f1 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h @@ -31,7 +31,8 @@ #include "field/FlagField.h" #include "field/GhostLayerField.h" -#include +#include +#include #include #ifdef __GNUC__ @@ -85,10 +86,10 @@ class Dynamic_UBB_single_precision { }; Dynamic_UBB_single_precision( - const shared_ptr &blocks, BlockDataID pdfsID_, - std::function(const Cell &, - const shared_ptr &, - IBlock &)> &velocityCallback) + const std::shared_ptr &blocks, BlockDataID pdfsID_, + std::function( + const Cell &, const std::shared_ptr &, + IBlock &)> &velocityCallback) : elementInitaliser(velocityCallback), pdfsID(pdfsID_) { auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) { return new IndexVectors(); @@ -118,7 +119,7 @@ class Dynamic_UBB_single_precision { } template - void fillFromFlagField(const shared_ptr &blocks, + void fillFromFlagField(const std::shared_ptr &blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID) { for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) @@ -127,7 +128,7 @@ class Dynamic_UBB_single_precision { } template - void fillFromFlagField(const shared_ptr &blocks, + void fillFromFlagField(const std::shared_ptr &blocks, IBlock *block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID) { auto *indexVectors = block->getData(indexVectorID); @@ -558,7 +559,7 @@ class Dynamic_UBB_single_precision { BlockDataID indexVectorID; std::function( - const Cell &, const shared_ptr &, IBlock &)> + const Cell &, const std::shared_ptr &, IBlock &)> elementInitaliser; public: From 855474f1a7c3f18f637a8f37123b43dc7e204520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Mon, 5 Feb 2024 16:01:05 +0100 Subject: [PATCH 2/7] Remove unused code --- .../walberla_kernels/generate_ek_kernels.py | 6 - src/core/ek/ek_reactions.cpp | 43 - src/python/espressomd/lb.py | 22 - .../DensityPackInfo_double_precision.cpp | 1484 ----------------- .../DensityPackInfo_double_precision.h | 67 - .../DensityPackInfo_single_precision.cpp | 1484 ----------------- .../DensityPackInfo_single_precision.h | 67 - 7 files changed, 3173 deletions(-) delete mode 100644 src/core/ek/ek_reactions.cpp delete mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.cpp delete mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.h delete mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.cpp delete mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.h diff --git a/maintainer/walberla_kernels/generate_ek_kernels.py b/maintainer/walberla_kernels/generate_ek_kernels.py index fbf9dc747f..e54f19023f 100644 --- a/maintainer/walberla_kernels/generate_ek_kernels.py +++ b/maintainer/walberla_kernels/generate_ek_kernels.py @@ -186,12 +186,6 @@ def replace_getData_with_uncheckedFastGetData(filename: str) -> None: index_shape=density_field.index_shape, target=target) - pystencils_walberla.generate_pack_info_from_kernel( - ctx, - f"DensityPackInfo_{precision_suffix}", - ek_electrostatic.continuity(), - target=target) - # ek reactions for i in range(1, max_num_reactants + 1): assignments = list(reaction_obj.generate_reaction(num_reactants=i)) diff --git a/src/core/ek/ek_reactions.cpp b/src/core/ek/ek_reactions.cpp deleted file mode 100644 index c6689e3ed5..0000000000 --- a/src/core/ek/ek_reactions.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "config/config.hpp" - -#ifdef WALBERLA - -#include "ek/ek_reactions.hpp" - -#include - -namespace EK { - -EKReactions ek_reactions; - -void perform_reactions() { - if (ek_reactions.empty()) { - return; - } - - std::for_each(ek_reactions.begin(), ek_reactions.end(), - [](auto const &reaction) { reaction->perform_reaction(); }); -} - -} // namespace EK - -#endif // WALBERLA diff --git a/src/python/espressomd/lb.py b/src/python/espressomd/lb.py index 24afb8bb37..b805a0307f 100644 --- a/src/python/espressomd/lb.py +++ b/src/python/espressomd/lb.py @@ -53,20 +53,6 @@ def __getitem__(self, key): def __str__(self): return f"{self.__class__.__name__}({self.get_params()})" - def _activate(self): - self._activate_method() - - def _deactivate(self): - self._deactivate_method() - - def _activate_method(self): - self.call_method("activate") - utils.handle_errors("HydrodynamicInteraction activation failed") - - def _deactivate_method(self): - self.call_method("deactivate") - utils.handle_errors("HydrodynamicInteraction deactivation failed") - def validate_params(self, params): pass @@ -342,13 +328,6 @@ class LBFluidNodeWalberla(ScriptInterfaceHelper): def required_keys(self): return {"parent_sip", "index"} - def __init__(self, *args, **kwargs): - if "sip" not in kwargs: - super().__init__(*args, **kwargs) - utils.handle_errors("LBFluidNode instantiation failed") - else: - super().__init__(**kwargs) - def __reduce__(self): raise NotImplementedError("Cannot serialize LB fluid node objects") @@ -494,7 +473,6 @@ def __init__(self, *args, **kwargs): slice_range, grid_size) node = LBFluidNodeWalberla(index=np.array([0, 0, 0]), **kwargs) super().__init__(*args, node_sip=node, **kwargs, **extra_kwargs) - utils.handle_errors("LBFluidSliceWalberla instantiation failed") def __iter__(self): lower, upper = self.call_method("get_slice_ranges") diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.cpp deleted file mode 100644 index 39b73e77a9..0000000000 --- a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.cpp +++ /dev/null @@ -1,1484 +0,0 @@ - -// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a - -#include "DensityPackInfo_double_precision.h" -#include "core/DataTypes.h" -#include "core/cell/CellInterval.h" -#include "stencil/Directions.h" - -#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wfloat-equal" -#pragma GCC diagnostic ignored "-Wshadow" -#pragma GCC diagnostic ignored "-Wconversion" -#pragma GCC diagnostic ignored "-Wunused-variable" -#endif - -namespace walberla { -namespace pystencils { - -using walberla::cell::CellInterval; -using walberla::stencil::Direction; - -namespace internal_pack_BSW { -static FUNC_PREFIX void pack_BSW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_BSW - -namespace internal_pack_SW { -static FUNC_PREFIX void pack_SW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3; - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33; - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_SW - -namespace internal_pack_TSW { -static FUNC_PREFIX void pack_TSW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_TSW - -namespace internal_pack_BW { -static FUNC_PREFIX void pack_BW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3; - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35; - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_35_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_BW - -namespace internal_pack_W { -static FUNC_PREFIX void pack_W(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3; - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3; - double *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2; - double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3; - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3; - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33; - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35; - double *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30; - double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36; - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34; - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 3] = _data_j_20_35_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 4] = _data_j_20_30_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 5] = _data_j_20_36_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 6] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 7] = _data_j_20_34_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 8] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_W - -namespace internal_pack_TW { -static FUNC_PREFIX void pack_TW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3; - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36; - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_36_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_TW - -namespace internal_pack_BNW { -static FUNC_PREFIX void pack_BNW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_BNW - -namespace internal_pack_NW { -static FUNC_PREFIX void pack_NW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3; - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34; - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_34_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_NW - -namespace internal_pack_TNW { -static FUNC_PREFIX void pack_TNW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_TNW - -namespace internal_pack_BS { -static FUNC_PREFIX void pack_BS(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1] = _data_j_20_37_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_BS - -namespace internal_pack_S { -static FUNC_PREFIX void pack_S(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3; - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3; - double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3; - double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33; - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37; - double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31; - double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0]; - _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 3] = _data_j_20_37_10[_stride_j_0 * ctr_0]; - _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 4] = _data_j_20_31_10[_stride_j_0 * ctr_0]; - _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 5] = _data_j_20_38_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_S - -namespace internal_pack_TS { -static FUNC_PREFIX void pack_TS(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1] = _data_j_20_38_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_TS - -namespace internal_pack_B { -static FUNC_PREFIX void pack_B(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3; - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3; - double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35; - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37; - double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 1] = _data_j_20_35_10[_stride_j_0 * ctr_0]; - _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 2] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 3] = _data_j_20_37_10[_stride_j_0 * ctr_0]; - _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 4] = _data_j_20_32_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_B - -namespace internal_pack_T { -static FUNC_PREFIX void pack_T(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3; - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36; - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 1] = _data_j_20_36_10[_stride_j_0 * ctr_0]; - _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 3] = _data_j_20_38_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_T - -namespace internal_pack_BN { -static FUNC_PREFIX void pack_BN(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_BN - -namespace internal_pack_N { -static FUNC_PREFIX void pack_N(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3; - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34; - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_34_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_N - -namespace internal_pack_TN { -static FUNC_PREFIX void pack_TN(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_TN - -namespace internal_unpack_BSW { -static FUNC_PREFIX void unpack_BSW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0]; - } - } - } -} -} // namespace internal_unpack_BSW - -namespace internal_unpack_SW { -static FUNC_PREFIX void unpack_SW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3; - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33; - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0]; - _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1]; - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2]; - } - } - } -} -} // namespace internal_unpack_SW - -namespace internal_unpack_TSW { -static FUNC_PREFIX void unpack_TSW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0]; - } - } - } -} -} // namespace internal_unpack_TSW - -namespace internal_unpack_BW { -static FUNC_PREFIX void unpack_BW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3; - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35; - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0]; - _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1]; - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2]; - } - } - } -} -} // namespace internal_unpack_BW - -namespace internal_unpack_W { -static FUNC_PREFIX void unpack_W(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3; - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3; - double *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2; - double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3; - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3; - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33; - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35; - double *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30; - double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36; - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34; - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0]; - _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 1]; - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 2]; - _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 3]; - _data_j_20_30_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 4]; - _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 5]; - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 6]; - _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 7]; - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 8]; - } - } - } -} -} // namespace internal_unpack_W - -namespace internal_unpack_TW { -static FUNC_PREFIX void unpack_TW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3; - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36; - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0]; - _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1]; - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2]; - } - } - } -} -} // namespace internal_unpack_TW - -namespace internal_unpack_BNW { -static FUNC_PREFIX void unpack_BNW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0]; - } - } - } -} -} // namespace internal_unpack_BNW - -namespace internal_unpack_NW { -static FUNC_PREFIX void unpack_NW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3; - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34; - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0]; - _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1]; - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2]; - } - } - } -} -} // namespace internal_unpack_NW - -namespace internal_unpack_TNW { -static FUNC_PREFIX void unpack_TNW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0]; - } - } - } -} -} // namespace internal_unpack_TNW - -namespace internal_unpack_BS { -static FUNC_PREFIX void unpack_BS(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0]; - _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1]; - } - } - } -} -} // namespace internal_unpack_BS - -namespace internal_unpack_S { -static FUNC_PREFIX void unpack_S(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3; - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3; - double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3; - double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33; - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37; - double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31; - double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0]; - _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 1]; - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 2]; - _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 3]; - _data_j_20_31_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 4]; - _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 5]; - } - } - } -} -} // namespace internal_unpack_S - -namespace internal_unpack_TS { -static FUNC_PREFIX void unpack_TS(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0]; - _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1]; - } - } - } -} -} // namespace internal_unpack_TS - -namespace internal_unpack_B { -static FUNC_PREFIX void unpack_B(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3; - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3; - double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35; - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37; - double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0]; - _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 1]; - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 2]; - _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 3]; - _data_j_20_32_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 4]; - } - } - } -} -} // namespace internal_unpack_B - -namespace internal_unpack_T { -static FUNC_PREFIX void unpack_T(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3; - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36; - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0]; - _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 1]; - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 2]; - _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 3]; - } - } - } -} -} // namespace internal_unpack_T - -namespace internal_unpack_BN { -static FUNC_PREFIX void unpack_BN(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0]; - } - } - } -} -} // namespace internal_unpack_BN - -namespace internal_unpack_N { -static FUNC_PREFIX void unpack_N(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3; - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34; - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0]; - _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1]; - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2]; - } - } - } -} -} // namespace internal_unpack_N - -namespace internal_unpack_TN { -static FUNC_PREFIX void unpack_TN(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0]; - } - } - } -} -} // namespace internal_unpack_TN - -void DensityPackInfo_double_precision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { - double *buffer = reinterpret_cast(byte_buffer); - - auto j = block->getData>(jID); - - CellInterval ci; - j->getSliceBeforeGhostLayer(dir, ci, 1, false); - - switch (dir) { - case stencil::BSW: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_BSW::pack_BSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::SW: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_SW::pack_SW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TSW: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_TSW::pack_TSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BW: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_BW::pack_BW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::W: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_W::pack_W(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TW: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_TW::pack_TW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BNW: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_BNW::pack_BNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::NW: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_NW::pack_NW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TNW: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_TNW::pack_TNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BS: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_BS::pack_BS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::S: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_S::pack_S(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TS: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_TS::pack_TS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::B: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_B::pack_B(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::T: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_T::pack_T(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BN: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_BN::pack_BN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::N: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_N::pack_N(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TN: { - double *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_TN::pack_TN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - default: - WALBERLA_ASSERT(false); - } -} - -void DensityPackInfo_double_precision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { - double *buffer = reinterpret_cast(byte_buffer); - - auto j = block->getData>(jID); - - CellInterval ci; - j->getGhostRegion(dir, ci, 1, false); - auto communciationDirection = stencil::inverseDir[dir]; - - switch (communciationDirection) { - case stencil::BSW: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_BSW::unpack_BSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::SW: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_SW::unpack_SW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TSW: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_TSW::unpack_TSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BW: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_BW::unpack_BW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::W: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_W::unpack_W(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TW: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_TW::unpack_TW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BNW: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_BNW::unpack_BNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::NW: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_NW::unpack_NW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TNW: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_TNW::unpack_TNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BS: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_BS::unpack_BS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::S: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_S::unpack_S(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TS: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_TS::unpack_TS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::B: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_B::unpack_B(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::T: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_T::unpack_T(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BN: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_BN::unpack_BN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::N: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_N::unpack_N(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TN: { - double *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_TN::unpack_TN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - default: - WALBERLA_ASSERT(false); - } -} - -uint_t DensityPackInfo_double_precision::size(stencil::Direction dir, const IBlock *block) const { - auto j = block->getData>(jID); - - CellInterval ci; - j->getGhostRegion(dir, ci, 1, false); - - uint_t elementsPerCell = 0; - - switch (dir) { - case stencil::BSW: - elementsPerCell = 1; - break; - - case stencil::SW: - elementsPerCell = 3; - break; - - case stencil::TSW: - elementsPerCell = 1; - break; - - case stencil::BW: - elementsPerCell = 3; - break; - - case stencil::W: - elementsPerCell = 9; - break; - - case stencil::TW: - elementsPerCell = 3; - break; - - case stencil::BNW: - elementsPerCell = 1; - break; - - case stencil::NW: - elementsPerCell = 3; - break; - - case stencil::TNW: - elementsPerCell = 1; - break; - - case stencil::BS: - elementsPerCell = 2; - break; - - case stencil::S: - elementsPerCell = 6; - break; - - case stencil::TS: - elementsPerCell = 2; - break; - - case stencil::B: - elementsPerCell = 5; - break; - - case stencil::T: - elementsPerCell = 4; - break; - - case stencil::BN: - elementsPerCell = 1; - break; - - case stencil::N: - elementsPerCell = 3; - break; - - case stencil::TN: - elementsPerCell = 1; - break; - - default: - elementsPerCell = 0; - } - return ci.numCells() * elementsPerCell * sizeof(double); -} - -} // namespace pystencils -} // namespace walberla \ No newline at end of file diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.h deleted file mode 100644 index d5cb19678b..0000000000 --- a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.h +++ /dev/null @@ -1,67 +0,0 @@ - -// kernel generated with pystencils v1.2, lbmpy v1.2, -// lbmpy_walberla/pystencils_walberla from waLBerla commit ref: -// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a - -#pragma once -#include "communication/UniformPackInfo.h" -#include "core/DataTypes.h" -#include "core/cell/CellInterval.h" -#include "domain_decomposition/IBlock.h" -#include "field/GhostLayerField.h" -#include "stencil/Directions.h" - -#define FUNC_PREFIX - -#ifdef __GNUC__ -#define RESTRICT __restrict__ -#elif _MSC_VER -#define RESTRICT __restrict -#else -#define RESTRICT -#endif - -namespace walberla { -namespace pystencils { - -class DensityPackInfo_double_precision - : public ::walberla::communication::UniformPackInfo { -public: - DensityPackInfo_double_precision(BlockDataID jID_) : jID(jID_){}; - virtual ~DensityPackInfo_double_precision() {} - - bool constantDataExchange() const { return true; } - bool threadsafeReceiving() const { return true; } - - void unpackData(IBlock *receiver, stencil::Direction dir, - mpi::RecvBuffer &buffer) { - const auto dataSize = size(dir, receiver); - unpack(dir, buffer.skip(dataSize), receiver); - } - - void communicateLocal(const IBlock *sender, IBlock *receiver, - stencil::Direction dir) { - // TODO: optimize by generating kernel for this case - mpi::SendBuffer sBuffer; - packData(sender, dir, sBuffer); - mpi::RecvBuffer rBuffer(sBuffer); - unpackData(receiver, stencil::inverseDir[dir], rBuffer); - } - - void packDataImpl(const IBlock *sender, stencil::Direction dir, - mpi::SendBuffer &outBuffer) const { - const auto dataSize = size(dir, sender); - pack(dir, outBuffer.forward(dataSize), const_cast(sender)); - } - - void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const; - void unpack(stencil::Direction dir, unsigned char *buffer, - IBlock *block) const; - uint_t size(stencil::Direction dir, const IBlock *block) const; - -private: - BlockDataID jID; -}; - -} // namespace pystencils -} // namespace walberla \ No newline at end of file diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.cpp deleted file mode 100644 index fab1ca3023..0000000000 --- a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.cpp +++ /dev/null @@ -1,1484 +0,0 @@ - -// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a - -#include "DensityPackInfo_single_precision.h" -#include "core/DataTypes.h" -#include "core/cell/CellInterval.h" -#include "stencil/Directions.h" - -#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wfloat-equal" -#pragma GCC diagnostic ignored "-Wshadow" -#pragma GCC diagnostic ignored "-Wconversion" -#pragma GCC diagnostic ignored "-Wunused-variable" -#endif - -namespace walberla { -namespace pystencils { - -using walberla::cell::CellInterval; -using walberla::stencil::Direction; - -namespace internal_pack_BSW { -static FUNC_PREFIX void pack_BSW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_BSW - -namespace internal_pack_SW { -static FUNC_PREFIX void pack_SW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3; - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33; - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_SW - -namespace internal_pack_TSW { -static FUNC_PREFIX void pack_TSW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_TSW - -namespace internal_pack_BW { -static FUNC_PREFIX void pack_BW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3; - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35; - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_35_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_BW - -namespace internal_pack_W { -static FUNC_PREFIX void pack_W(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3; - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3; - float *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2; - float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3; - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3; - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33; - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35; - float *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30; - float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36; - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34; - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 3] = _data_j_20_35_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 4] = _data_j_20_30_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 5] = _data_j_20_36_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 6] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 7] = _data_j_20_34_10[_stride_j_0 * ctr_0]; - _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 8] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_W - -namespace internal_pack_TW { -static FUNC_PREFIX void pack_TW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3; - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36; - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_36_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_TW - -namespace internal_pack_BNW { -static FUNC_PREFIX void pack_BNW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_BNW - -namespace internal_pack_NW { -static FUNC_PREFIX void pack_NW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3; - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34; - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_34_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_NW - -namespace internal_pack_TNW { -static FUNC_PREFIX void pack_TNW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_TNW - -namespace internal_pack_BS { -static FUNC_PREFIX void pack_BS(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1] = _data_j_20_37_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_BS - -namespace internal_pack_S { -static FUNC_PREFIX void pack_S(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3; - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3; - float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3; - float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33; - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37; - float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31; - float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0]; - _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 3] = _data_j_20_37_10[_stride_j_0 * ctr_0]; - _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 4] = _data_j_20_31_10[_stride_j_0 * ctr_0]; - _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 5] = _data_j_20_38_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_S - -namespace internal_pack_TS { -static FUNC_PREFIX void pack_TS(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1] = _data_j_20_38_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_TS - -namespace internal_pack_B { -static FUNC_PREFIX void pack_B(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3; - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3; - float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35; - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37; - float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0]; - _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 1] = _data_j_20_35_10[_stride_j_0 * ctr_0]; - _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 2] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 3] = _data_j_20_37_10[_stride_j_0 * ctr_0]; - _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 4] = _data_j_20_32_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_B - -namespace internal_pack_T { -static FUNC_PREFIX void pack_T(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3; - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36; - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0]; - _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 1] = _data_j_20_36_10[_stride_j_0 * ctr_0]; - _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 3] = _data_j_20_38_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_T - -namespace internal_pack_BN { -static FUNC_PREFIX void pack_BN(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_BN - -namespace internal_pack_N { -static FUNC_PREFIX void pack_N(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3; - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34; - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_34_10[_stride_j_0 * ctr_0]; - _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_N - -namespace internal_pack_TN { -static FUNC_PREFIX void pack_TN(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_312_10[_stride_j_0 * ctr_0]; - } - } - } -} -} // namespace internal_pack_TN - -namespace internal_unpack_BSW { -static FUNC_PREFIX void unpack_BSW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0]; - } - } - } -} -} // namespace internal_unpack_BSW - -namespace internal_unpack_SW { -static FUNC_PREFIX void unpack_SW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3; - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33; - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0]; - _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1]; - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2]; - } - } - } -} -} // namespace internal_unpack_SW - -namespace internal_unpack_TSW { -static FUNC_PREFIX void unpack_TSW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0]; - } - } - } -} -} // namespace internal_unpack_TSW - -namespace internal_unpack_BW { -static FUNC_PREFIX void unpack_BW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3; - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35; - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0]; - _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1]; - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2]; - } - } - } -} -} // namespace internal_unpack_BW - -namespace internal_unpack_W { -static FUNC_PREFIX void unpack_W(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3; - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3; - float *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2; - float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3; - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3; - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33; - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35; - float *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30; - float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36; - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34; - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0]; - _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 1]; - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 2]; - _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 3]; - _data_j_20_30_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 4]; - _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 5]; - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 6]; - _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 7]; - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 8]; - } - } - } -} -} // namespace internal_unpack_W - -namespace internal_unpack_TW { -static FUNC_PREFIX void unpack_TW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3; - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36; - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0]; - _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1]; - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2]; - } - } - } -} -} // namespace internal_unpack_TW - -namespace internal_unpack_BNW { -static FUNC_PREFIX void unpack_BNW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0]; - } - } - } -} -} // namespace internal_unpack_BNW - -namespace internal_unpack_NW { -static FUNC_PREFIX void unpack_NW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3; - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34; - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0]; - _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1]; - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2]; - } - } - } -} -} // namespace internal_unpack_NW - -namespace internal_unpack_TNW { -static FUNC_PREFIX void unpack_TNW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0]; - } - } - } -} -} // namespace internal_unpack_TNW - -namespace internal_unpack_BS { -static FUNC_PREFIX void unpack_BS(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0]; - _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1]; - } - } - } -} -} // namespace internal_unpack_BS - -namespace internal_unpack_S { -static FUNC_PREFIX void unpack_S(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3; - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3; - float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3; - float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33; - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37; - float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31; - float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0]; - _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 1]; - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 2]; - _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 3]; - _data_j_20_31_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 4]; - _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 5]; - } - } - } -} -} // namespace internal_unpack_S - -namespace internal_unpack_TS { -static FUNC_PREFIX void unpack_TS(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0]; - _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1]; - } - } - } -} -} // namespace internal_unpack_TS - -namespace internal_unpack_B { -static FUNC_PREFIX void unpack_B(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3; - float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3; - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3; - float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39; - float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35; - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37; - float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0]; - _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 1]; - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 2]; - _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 3]; - _data_j_20_32_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 4]; - } - } - } -} -} // namespace internal_unpack_B - -namespace internal_unpack_T { -static FUNC_PREFIX void unpack_T(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3; - float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3; - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310; - float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36; - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0]; - _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 1]; - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 2]; - _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 3]; - } - } - } -} -} // namespace internal_unpack_T - -namespace internal_unpack_BN { -static FUNC_PREFIX void unpack_BN(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0]; - } - } - } -} -} // namespace internal_unpack_BN - -namespace internal_unpack_N { -static FUNC_PREFIX void unpack_N(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3; - float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3; - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311; - float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34; - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0]; - _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1]; - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2]; - } - } - } -} -} // namespace internal_unpack_N - -namespace internal_unpack_TN { -static FUNC_PREFIX void unpack_TN(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) { - for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) { - float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3; - for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) { - float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312; - for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) { - _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0]; - } - } - } -} -} // namespace internal_unpack_TN - -void DensityPackInfo_single_precision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { - float *buffer = reinterpret_cast(byte_buffer); - - auto j = block->getData>(jID); - - CellInterval ci; - j->getSliceBeforeGhostLayer(dir, ci, 1, false); - - switch (dir) { - case stencil::BSW: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_BSW::pack_BSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::SW: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_SW::pack_SW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TSW: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_TSW::pack_TSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BW: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_BW::pack_BW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::W: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_W::pack_W(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TW: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_TW::pack_TW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BNW: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_BNW::pack_BNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::NW: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_NW::pack_NW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TNW: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_TNW::pack_TNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BS: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_BS::pack_BS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::S: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_S::pack_S(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TS: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_TS::pack_TS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::B: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_B::pack_B(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::T: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_T::pack_T(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BN: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_BN::pack_BN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::N: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_N::pack_N(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TN: { - float *RESTRICT _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_pack_TN::pack_TN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - default: - WALBERLA_ASSERT(false); - } -} - -void DensityPackInfo_single_precision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { - float *buffer = reinterpret_cast(byte_buffer); - - auto j = block->getData>(jID); - - CellInterval ci; - j->getGhostRegion(dir, ci, 1, false); - auto communciationDirection = stencil::inverseDir[dir]; - - switch (communciationDirection) { - case stencil::BSW: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_BSW::unpack_BSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::SW: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_SW::unpack_SW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TSW: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_TSW::unpack_TSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BW: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_BW::unpack_BW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::W: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_W::unpack_W(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TW: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_TW::unpack_TW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BNW: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_BNW::unpack_BNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::NW: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_NW::unpack_NW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TNW: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_TNW::unpack_TNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BS: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_BS::unpack_BS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::S: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_S::unpack_S(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TS: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_TS::unpack_TS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::B: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_B::unpack_B(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::T: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_T::unpack_T(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::BN: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_BN::unpack_BN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::N: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_N::unpack_N(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - case stencil::TN: { - float *RESTRICT const _data_buffer = buffer; - WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers())); - WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers())); - float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0)); - const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0)); - const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0); - WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0)); - const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0); - const int64_t _stride_j_0 = int64_t(j->xStride()); - const int64_t _stride_j_1 = int64_t(j->yStride()); - const int64_t _stride_j_2 = int64_t(j->zStride()); - const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride())); - internal_unpack_TN::unpack_TN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3); - break; - } - - default: - WALBERLA_ASSERT(false); - } -} - -uint_t DensityPackInfo_single_precision::size(stencil::Direction dir, const IBlock *block) const { - auto j = block->getData>(jID); - - CellInterval ci; - j->getGhostRegion(dir, ci, 1, false); - - uint_t elementsPerCell = 0; - - switch (dir) { - case stencil::BSW: - elementsPerCell = 1; - break; - - case stencil::SW: - elementsPerCell = 3; - break; - - case stencil::TSW: - elementsPerCell = 1; - break; - - case stencil::BW: - elementsPerCell = 3; - break; - - case stencil::W: - elementsPerCell = 9; - break; - - case stencil::TW: - elementsPerCell = 3; - break; - - case stencil::BNW: - elementsPerCell = 1; - break; - - case stencil::NW: - elementsPerCell = 3; - break; - - case stencil::TNW: - elementsPerCell = 1; - break; - - case stencil::BS: - elementsPerCell = 2; - break; - - case stencil::S: - elementsPerCell = 6; - break; - - case stencil::TS: - elementsPerCell = 2; - break; - - case stencil::B: - elementsPerCell = 5; - break; - - case stencil::T: - elementsPerCell = 4; - break; - - case stencil::BN: - elementsPerCell = 1; - break; - - case stencil::N: - elementsPerCell = 3; - break; - - case stencil::TN: - elementsPerCell = 1; - break; - - default: - elementsPerCell = 0; - } - return ci.numCells() * elementsPerCell * sizeof(float); -} - -} // namespace pystencils -} // namespace walberla \ No newline at end of file diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.h deleted file mode 100644 index 08ea0c0988..0000000000 --- a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.h +++ /dev/null @@ -1,67 +0,0 @@ - -// kernel generated with pystencils v1.2, lbmpy v1.2, -// lbmpy_walberla/pystencils_walberla from waLBerla commit ref: -// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a - -#pragma once -#include "communication/UniformPackInfo.h" -#include "core/DataTypes.h" -#include "core/cell/CellInterval.h" -#include "domain_decomposition/IBlock.h" -#include "field/GhostLayerField.h" -#include "stencil/Directions.h" - -#define FUNC_PREFIX - -#ifdef __GNUC__ -#define RESTRICT __restrict__ -#elif _MSC_VER -#define RESTRICT __restrict -#else -#define RESTRICT -#endif - -namespace walberla { -namespace pystencils { - -class DensityPackInfo_single_precision - : public ::walberla::communication::UniformPackInfo { -public: - DensityPackInfo_single_precision(BlockDataID jID_) : jID(jID_){}; - virtual ~DensityPackInfo_single_precision() {} - - bool constantDataExchange() const { return true; } - bool threadsafeReceiving() const { return true; } - - void unpackData(IBlock *receiver, stencil::Direction dir, - mpi::RecvBuffer &buffer) { - const auto dataSize = size(dir, receiver); - unpack(dir, buffer.skip(dataSize), receiver); - } - - void communicateLocal(const IBlock *sender, IBlock *receiver, - stencil::Direction dir) { - // TODO: optimize by generating kernel for this case - mpi::SendBuffer sBuffer; - packData(sender, dir, sBuffer); - mpi::RecvBuffer rBuffer(sBuffer); - unpackData(receiver, stencil::inverseDir[dir], rBuffer); - } - - void packDataImpl(const IBlock *sender, stencil::Direction dir, - mpi::SendBuffer &outBuffer) const { - const auto dataSize = size(dir, sender); - pack(dir, outBuffer.forward(dataSize), const_cast(sender)); - } - - void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const; - void unpack(stencil::Direction dir, unsigned char *buffer, - IBlock *block) const; - uint_t size(stencil::Direction dir, const IBlock *block) const; - -private: - BlockDataID jID; -}; - -} // namespace pystencils -} // namespace walberla \ No newline at end of file From 52aaef9e047b612a95b2d179d6192b0661b6632c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 7 Feb 2024 16:40:07 +0100 Subject: [PATCH 3/7] Fix tests and benchmarks --- maintainer/benchmarks/lb.py | 7 ++++--- testsuite/python/ek_interface.py | 1 + testsuite/python/lb_stats.py | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/maintainer/benchmarks/lb.py b/maintainer/benchmarks/lb.py index 979a7811c3..7131e62ff4 100644 --- a/maintainer/benchmarks/lb.py +++ b/maintainer/benchmarks/lb.py @@ -53,11 +53,11 @@ # process and check arguments n_iterations = 30 -assert args.volume_fraction > 0, "volume_fraction must be a positive number" +assert args.volume_fraction > 0, "--volume_fraction must be a positive number" assert args.volume_fraction < np.pi / (3 * np.sqrt(2)), \ - "volume_fraction exceeds the physical limit of sphere packing (~0.74)" + "--volume_fraction exceeds the physical limit of sphere packing (~0.74)" assert "box_l" not in args or args.particles_per_core == 0, \ - "Argument box_l requires particles_per_core=0" + "Argument --box_l requires --particles_per_core=0" required_features = ["LENNARD_JONES", "WALBERLA"] if args.gpu: @@ -85,6 +85,7 @@ if n_part == 0: box_l = args.box_l agrid = 1. + lb_grid = args.box_l measurement_steps = 80 else: # volume of N spheres with radius r: N * (4/3*pi*r^3) diff --git a/testsuite/python/ek_interface.py b/testsuite/python/ek_interface.py index e439edea90..136ea769f2 100644 --- a/testsuite/python/ek_interface.py +++ b/testsuite/python/ek_interface.py @@ -170,6 +170,7 @@ def test_ek_fft_solver(self): ek_solver = espressomd.electrokinetics.EKFFT( lattice=self.lattice, permittivity=0.01, single_precision=self.ek_params["single_precision"]) + self.assertEqual(ek_solver.lattice, self.lattice) self.assertEqual( ek_solver.single_precision, self.ek_params["single_precision"]) diff --git a/testsuite/python/lb_stats.py b/testsuite/python/lb_stats.py index 1f5044a0ec..11c2c51b88 100644 --- a/testsuite/python/lb_stats.py +++ b/testsuite/python/lb_stats.py @@ -58,7 +58,7 @@ def test_mass_momentum_thermostat(self): type=self.n_col_part // 2 * [0, 1], pos=np.random.random( (self.n_col_part, 3)) * self.system.box_l[0]) if espressomd.has_features("MASS"): - particles.mass = 0.1 + np.random.random( + particles.mass = 0.5 + np.random.random( len(self.system.part)) self.system.thermostat.turn_off() @@ -134,7 +134,7 @@ def test_mass_momentum_thermostat(self): # scale=np.std(all_temp_particle,ddof=1))[1] - self.params["temp"] # temp_prec_fluid = scipy.stats.norm.interval(0.95, loc=self.params["temp"], # scale=np.std(all_temp_fluid,ddof=1))[1] -self.params["temp"] - temp_prec_particle = 0.05 * self.params["temp"] + temp_prec_particle = 0.08 * self.params["temp"] temp_prec_fluid = 0.05 * self.params["temp"] self.assertAlmostEqual( From 639a556485cfb851afd38e1ba99ce7274c053152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Mon, 12 Feb 2024 17:14:39 +0100 Subject: [PATCH 4/7] Refactor waLBerla bridge --- .../templates/Boundary.tmpl.h | 5 ++-- src/script_interface/walberla/LBFluidNode.cpp | 3 +- .../walberla_bridge/utils/boundary_utils.hpp | 25 ++++++++-------- src/walberla_bridge/src/BoundaryHandling.hpp | 25 +++++++++------- .../src/electrokinetics/EKinWalberlaImpl.hpp | 21 ++++++++------ .../reactions/EKReactionImplIndexed.cpp | 6 ++-- .../src/lattice_boltzmann/LBWalberlaImpl.hpp | 29 +++++++++---------- .../Dynamic_UBB_double_precision.h | 6 ++-- .../Dynamic_UBB_single_precision.h | 6 ++-- .../tests/LBWalberlaImpl_unit_tests.cpp | 2 +- 10 files changed, 67 insertions(+), 61 deletions(-) diff --git a/maintainer/walberla_kernels/templates/Boundary.tmpl.h b/maintainer/walberla_kernels/templates/Boundary.tmpl.h index 5079dcc5a6..bdeaf57c06 100644 --- a/maintainer/walberla_kernels/templates/Boundary.tmpl.h +++ b/maintainer/walberla_kernels/templates/Boundary.tmpl.h @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -197,8 +198,8 @@ class {{class_name}} auto * flagField = block->getData< FlagField_T > ( flagFieldID ); {{additional_data_handler.additional_field_data|indent(4)}} - if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) )) - return; + assert(flagField->flagExists(boundaryFlagUID and + flagField->flagExists(domainFlagUID)); auto boundaryFlag = flagField->getFlag(boundaryFlagUID); auto domainFlag = flagField->getFlag(domainFlagUID); diff --git a/src/script_interface/walberla/LBFluidNode.cpp b/src/script_interface/walberla/LBFluidNode.cpp index 4cab29bb23..af9ff8b678 100644 --- a/src/script_interface/walberla/LBFluidNode.cpp +++ b/src/script_interface/walberla/LBFluidNode.cpp @@ -47,13 +47,12 @@ Variant LBFluidNode::do_call_method(std::string const &name, if (name == "set_velocity_at_boundary") { if (is_none(params.at("value"))) { m_lb_fluid->remove_node_from_boundary(m_index); - m_lb_fluid->ghost_communication(); } else { auto const u = get_value(params, "value") * m_conv_velocity; m_lb_fluid->set_node_velocity_at_boundary(m_index, u); - m_lb_fluid->ghost_communication(); } + m_lb_fluid->ghost_communication(); return {}; } if (name == "get_velocity_at_boundary") { diff --git a/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp b/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp index f91a6c833a..8c1558ee72 100644 --- a/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp +++ b/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp @@ -21,6 +21,8 @@ #include "walberla_utils.hpp" +#include + #include #include @@ -81,30 +83,29 @@ void set_boundary_from_grid(BoundaryModel &boundary, std::vector const &raster_flat, std::vector const &data_flat) { + auto const &conv = es2walberla; auto const grid_size = lattice.get_grid_dimensions(); auto const offset = lattice.get_local_grid_range().first; auto const gl = static_cast(lattice.get_ghost_layers()); assert(raster_flat.size() == Utils::product(grid_size)); auto const n_y = grid_size[1]; auto const n_z = grid_size[2]; - auto const off_i = offset[0]; - auto const off_j = offset[1]; - auto const off_k = offset[2]; - auto const &blocks = lattice.get_blocks(); - for (auto block = blocks->begin(); block != blocks->end(); ++block) { - auto const [size_i, size_j, size_k] = boundary.block_dims(*block); + for (auto const &block : *lattice.get_blocks()) { + auto const [size_i, size_j, size_k] = boundary.block_dims(block); // Get field data which knows about the indices - // In the loop, x,y,z are in block-local coordinates - for (int i = off_i - gl; i < size_i + off_i + gl; ++i) { - for (int j = off_j - gl; j < size_j + off_j + gl; ++j) { - for (int k = off_k - gl; k < size_k + off_k + gl; ++k) { - auto const node = Utils::Vector3i{{i, j, k}}; + // In the loop, i,j,k are in block-local coordinates + for (int i = -gl; i < size_i + gl; ++i) { + for (int j = -gl; j < size_j + gl; ++j) { + for (int k = -gl; k < size_k + gl; ++k) { + auto const node = offset + Utils::Vector3i{{i, j, k}}; auto const idx = (node + grid_size) % grid_size; auto const index = idx[0] * n_y * n_z + idx[1] * n_z + idx[2]; if (raster_flat[index]) { + auto const &value = data_flat[index]; auto const bc = get_block_and_cell(lattice, node, true); - boundary.set_node_value_at_boundary(node, data_flat[index], *bc); + assert(bc.has_value()); + boundary.set_node_value_at_boundary(node, conv(value), *bc); } } } diff --git a/src/walberla_bridge/src/BoundaryHandling.hpp b/src/walberla_bridge/src/BoundaryHandling.hpp index 4d004de9f8..86c2053888 100644 --- a/src/walberla_bridge/src/BoundaryHandling.hpp +++ b/src/walberla_bridge/src/BoundaryHandling.hpp @@ -23,7 +23,10 @@ #include #include +#include +#include #include +#include #include @@ -57,10 +60,9 @@ template class BoundaryHandling { return get_value(global); } - template - void set_node_boundary_value(Utils::Vector3i const &node, U const &val) { + void set_node_boundary_value(Utils::Vector3i const &node, T const &val) { auto const global = Cell(node[0], node[1], node[2]); - (*m_value_boundary)[global] = es2walberla(val); + (*m_value_boundary)[global] = val; } void unset_node_boundary_value(Utils::Vector3i const &node) { @@ -72,7 +74,7 @@ template class BoundaryHandling { [[nodiscard]] auto get_node_boundary_value(Utils::Vector3i const &node) const { auto const global = Cell(node[0], node[1], node[2]); - return walberla2es(get_value(global)); + return get_value(global); } bool node_is_boundary(Utils::Vector3i const &node) const { @@ -105,9 +107,8 @@ template class BoundaryHandling { BoundaryHandling(std::shared_ptr blocks, BlockDataID value_field_id, BlockDataID flag_field_id) - : m_blocks(std::move(blocks)), m_value_field_id(value_field_id), - m_flag_field_id(flag_field_id), m_callback(DynamicValueCallback()), - m_pending_changes(false) { + : m_blocks(std::move(blocks)), m_flag_field_id(flag_field_id), + m_callback(DynamicValueCallback()), m_pending_changes(false) { // reinitialize the flag field for (auto b = m_blocks->begin(); b != m_blocks->end(); ++b) { flag_reset_kernel(&*b); @@ -115,7 +116,7 @@ template class BoundaryHandling { // instantiate the boundary sweep std::function callback = m_callback; m_boundary = - std::make_shared(m_blocks, m_value_field_id, callback); + std::make_shared(m_blocks, value_field_id, callback); } void operator()(IBlock *block) { (*m_boundary)(block); } @@ -129,8 +130,7 @@ template class BoundaryHandling { return m_callback.get_node_boundary_value(node); } - template - void set_node_value_at_boundary(Utils::Vector3i const &node, U const &v, + void set_node_value_at_boundary(Utils::Vector3i const &node, T const &v, BlockAndCell const &bc) { auto [flag_field, boundary_flag] = get_flag_field_and_flag(bc.block); m_callback.set_node_boundary_value(node, v); @@ -138,6 +138,10 @@ template class BoundaryHandling { m_pending_changes = true; } + void unpack_node(Utils::Vector3i const &node, T const &v) { + m_callback.set_node_boundary_value(node, v); + } + void remove_node_from_boundary(Utils::Vector3i const &node, BlockAndCell const &bc) { auto [flag_field, boundary_flag] = get_flag_field_and_flag(bc.block); @@ -163,7 +167,6 @@ template class BoundaryHandling { private: std::shared_ptr m_blocks; - BlockDataID m_value_field_id; BlockDataID m_flag_field_id; DynamicValueCallback m_callback; std::shared_ptr m_boundary; diff --git a/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp b/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp index b60bfa935a..9fc4019aa6 100644 --- a/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp +++ b/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp @@ -195,7 +195,7 @@ class EKinWalberlaImpl : public EKinWalberlaBase { m_density_field_id)); // Synchronize ghost layers - (*m_full_communication)(); + ghost_communication(); } // Global parameters @@ -453,7 +453,8 @@ class EKinWalberlaImpl : public EKinWalberlaBase { if (!bc) return false; - m_boundary_flux->set_node_value_at_boundary(node, flux, *bc); + m_boundary_flux->set_node_value_at_boundary( + node, to_vector3(flux), *bc); return true; } @@ -465,7 +466,7 @@ class EKinWalberlaImpl : public EKinWalberlaBase { if (!bc or !m_boundary_flux->node_is_boundary(node)) return std::nullopt; - return {m_boundary_flux->get_node_value_at_boundary(node)}; + return {to_vector3d(m_boundary_flux->get_node_value_at_boundary(node))}; } bool remove_node_from_flux_boundary(Utils::Vector3i const &node) override { @@ -517,7 +518,8 @@ class EKinWalberlaImpl : public EKinWalberlaBase { auto const bc = get_block_and_cell(lattice, node, false); auto const &opt = *it; if (opt) { - m_boundary_density->set_node_value_at_boundary(node, *opt, *bc); + m_boundary_density->set_node_value_at_boundary( + node, FloatType_c(*opt), *bc); } else { m_boundary_density->remove_node_from_boundary(node, *bc); } @@ -545,8 +547,8 @@ class EKinWalberlaImpl : public EKinWalberlaBase { for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { auto const node = local_offset + Utils::Vector3i{{x, y, z}}; if (m_boundary_density->node_is_boundary(node)) { - out.emplace_back( - m_boundary_density->get_node_value_at_boundary(node)); + out.emplace_back(double_c( + m_boundary_density->get_node_value_at_boundary(node))); } else { out.emplace_back(std::nullopt); } @@ -575,7 +577,8 @@ class EKinWalberlaImpl : public EKinWalberlaBase { auto const bc = get_block_and_cell(lattice, node, false); auto const &opt = *it; if (opt) { - m_boundary_flux->set_node_value_at_boundary(node, *opt, *bc); + m_boundary_flux->set_node_value_at_boundary( + node, to_vector3(*opt), *bc); } else { m_boundary_flux->remove_node_from_boundary(node, *bc); } @@ -603,8 +606,8 @@ class EKinWalberlaImpl : public EKinWalberlaBase { for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { auto const node = local_offset + Utils::Vector3i{{x, y, z}}; if (m_boundary_flux->node_is_boundary(node)) { - out.emplace_back( - m_boundary_flux->get_node_value_at_boundary(node)); + out.emplace_back(to_vector3d( + m_boundary_flux->get_node_value_at_boundary(node))); } else { out.emplace_back(std::nullopt); } diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp index a02b084d83..c3f5643fd9 100644 --- a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp +++ b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -68,9 +69,8 @@ void fillFromFlagField(IBlock *block, BlockDataID indexVectorID, auto *flagField = block->getData(flagFieldID); - if (!(flagField->flagExists(boundaryFlagUID) && - flagField->flagExists(domainFlagUID))) - return; + assert(flagField->flagExists(boundaryFlagUID) and + flagField->flagExists(domainFlagUID)); auto boundaryFlag = flagField->getFlag(boundaryFlagUID); auto domainFlag = flagField->getFlag(domainFlagUID); diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp index f4a6e2560e..0e144abb47 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp @@ -973,18 +973,17 @@ class LBWalberlaImpl : public LBWalberlaBase { if (!bc or !m_boundary->node_is_boundary(node)) return std::nullopt; - return {m_boundary->get_node_value_at_boundary(node)}; + return {to_vector3d(m_boundary->get_node_value_at_boundary(node))}; } bool set_node_velocity_at_boundary(Utils::Vector3i const &node, Utils::Vector3d const &velocity) override { auto bc = get_block_and_cell(get_lattice(), node, true); - if (!bc) - return false; - - m_boundary->set_node_value_at_boundary(node, velocity, *bc); - - return true; + if (bc) { + m_boundary->set_node_value_at_boundary( + node, to_vector3(velocity), *bc); + } + return bc.has_value(); } std::vector> get_slice_velocity_at_boundary( @@ -1003,7 +1002,8 @@ class LBWalberlaImpl : public LBWalberlaBase { for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { auto const node = local_offset + Utils::Vector3i{{x, y, z}}; if (m_boundary->node_is_boundary(node)) { - out.emplace_back(m_boundary->get_node_value_at_boundary(node)); + out.emplace_back( + to_vector3d(m_boundary->get_node_value_at_boundary(node))); } else { out.emplace_back(std::nullopt); } @@ -1032,7 +1032,8 @@ class LBWalberlaImpl : public LBWalberlaBase { auto const bc = get_block_and_cell(lattice, node, false); auto const &opt = *it; if (opt) { - m_boundary->set_node_value_at_boundary(node, *opt, *bc); + m_boundary->set_node_value_at_boundary( + node, to_vector3(*opt), *bc); } else { m_boundary->remove_node_from_boundary(node, *bc); } @@ -1054,12 +1055,10 @@ class LBWalberlaImpl : public LBWalberlaBase { bool remove_node_from_boundary(Utils::Vector3i const &node) override { auto bc = get_block_and_cell(get_lattice(), node, true); - if (!bc) - return false; - - m_boundary->remove_node_from_boundary(node, *bc); - - return true; + if (bc) { + m_boundary->remove_node_from_boundary(node, *bc); + } + return bc.has_value(); } std::optional diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h index 38e68af361..7c9a644ac8 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h @@ -31,6 +31,7 @@ #include "field/FlagField.h" #include "field/GhostLayerField.h" +#include #include #include #include @@ -138,9 +139,8 @@ class Dynamic_UBB_double_precision { auto *flagField = block->getData(flagFieldID); - if (!(flagField->flagExists(boundaryFlagUID) && - flagField->flagExists(domainFlagUID))) - return; + assert(flagField->flagExists(boundaryFlagUID) and + flagField->flagExists(domainFlagUID)); auto boundaryFlag = flagField->getFlag(boundaryFlagUID); auto domainFlag = flagField->getFlag(domainFlagUID); diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h index e4175e74f1..ab7f27e111 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h @@ -31,6 +31,7 @@ #include "field/FlagField.h" #include "field/GhostLayerField.h" +#include #include #include #include @@ -138,9 +139,8 @@ class Dynamic_UBB_single_precision { auto *flagField = block->getData(flagFieldID); - if (!(flagField->flagExists(boundaryFlagUID) && - flagField->flagExists(domainFlagUID))) - return; + assert(flagField->flagExists(boundaryFlagUID) and + flagField->flagExists(domainFlagUID)); auto boundaryFlag = flagField->getFlag(boundaryFlagUID); auto domainFlag = flagField->getFlag(domainFlagUID); diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp index 3f8f9a5a7e..516d9ab242 100644 --- a/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp +++ b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp @@ -435,7 +435,7 @@ BOOST_DATA_TEST_CASE(forces_interpolation, bdata::make(all_lbs()), // todo: check a less symmetrical situation, where the force is applied not // in the middle between the nodes - for (Vector3i n : all_nodes_incl_ghosts(lb->get_lattice())) { + for (auto const &n : all_nodes_incl_ghosts(lb->get_lattice())) { if (lb->get_lattice().node_in_local_halo(n)) { auto const pos = 1. * n; // Mid point between nodes auto const f = Vector3d{{1., 2., -3.5}}; From d0c9154261f79964487d110a1e84e60e9d23c37b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 14 Feb 2024 19:32:29 +0100 Subject: [PATCH 5/7] Rewrite waLBerla MPI communication Split LB ghost communicator from LB boundaries communicator. Always communicate bounce-back velocities from the ghost layer. This fixes the regression introduced by 3fd170980bed30c430a9b0264e9504632b4b7326. --- samples/lb_circular_couette.py | 2 +- src/script_interface/walberla/LBFluid.cpp | 3 +- src/script_interface/walberla/LBFluidNode.cpp | 1 + .../walberla/LBFluidSlice.cpp | 6 +- src/walberla_bridge/src/BoundaryPackInfo.hpp | 147 ++++++++++++++++++ .../src/lattice_boltzmann/LBWalberlaImpl.hpp | 63 +++++--- .../tests/LBWalberlaImpl_unit_tests.cpp | 1 + testsuite/python/CMakeLists.txt | 1 + testsuite/python/lb_boundary_ghost_layer.py | 103 ++++++++++++ testsuite/python/lb_circular_couette.py | 4 +- .../samples/test_lb_circular_couette.py | 7 +- 11 files changed, 306 insertions(+), 32 deletions(-) create mode 100644 src/walberla_bridge/src/BoundaryPackInfo.hpp create mode 100644 testsuite/python/lb_boundary_ghost_layer.py diff --git a/samples/lb_circular_couette.py b/samples/lb_circular_couette.py index 0d915237ec..81a621b2ce 100644 --- a/samples/lb_circular_couette.py +++ b/samples/lb_circular_couette.py @@ -57,7 +57,7 @@ cyl_center = agrid * (grid_size // 2 + 0.5) * [1, 1, 0] cylinder_in = espressomd.shapes.Cylinder( center=cyl_center, axis=[0, 0, 1], length=3 * system.box_l[2], - radius=8.1 * agrid, direction=1) + radius=8.6 * agrid, direction=1) cylinder_out = espressomd.shapes.Cylinder( center=cyl_center, axis=[0, 0, 1], length=3 * system.box_l[2], radius=14.5 * agrid, direction=-1) diff --git a/src/script_interface/walberla/LBFluid.cpp b/src/script_interface/walberla/LBFluid.cpp index 44c5bbcd69..b66f954ef4 100644 --- a/src/script_interface/walberla/LBFluid.cpp +++ b/src/script_interface/walberla/LBFluid.cpp @@ -108,7 +108,6 @@ Variant LBFluid::do_call_method(std::string const &name, } if (name == "clear_boundaries") { m_instance->clear_boundaries(); - m_instance->ghost_communication(); ::System::get_system().on_lb_boundary_conditions_change(); return {}; } @@ -269,8 +268,8 @@ void LBFluid::load_checkpoint(std::string const &filename, int mode) { }; auto const on_success = [&lb_obj]() { - lb_obj.reallocate_ubb_field(); lb_obj.ghost_communication(); + lb_obj.reallocate_ubb_field(); }; load_checkpoint_common(*context(), "LB", filename, mode, read_metadata, diff --git a/src/script_interface/walberla/LBFluidNode.cpp b/src/script_interface/walberla/LBFluidNode.cpp index af9ff8b678..f30e26a395 100644 --- a/src/script_interface/walberla/LBFluidNode.cpp +++ b/src/script_interface/walberla/LBFluidNode.cpp @@ -53,6 +53,7 @@ Variant LBFluidNode::do_call_method(std::string const &name, m_lb_fluid->set_node_velocity_at_boundary(m_index, u); } m_lb_fluid->ghost_communication(); + m_lb_fluid->reallocate_ubb_field(); return {}; } if (name == "get_velocity_at_boundary") { diff --git a/src/script_interface/walberla/LBFluidSlice.cpp b/src/script_interface/walberla/LBFluidSlice.cpp index 1aee5f49fd..cb6bc905b1 100644 --- a/src/script_interface/walberla/LBFluidSlice.cpp +++ b/src/script_interface/walberla/LBFluidSlice.cpp @@ -99,8 +99,10 @@ Variant LBFluidSlice::do_call_method(std::string const &name, 1. / m_conv_velocity); } if (name == "set_velocity_at_boundary") { - return call(&LatticeModel::set_slice_velocity_at_boundary, {1}, - m_conv_velocity); + auto const retval = call(&LatticeModel::set_slice_velocity_at_boundary, {1}, + m_conv_velocity); + m_lb_fluid->reallocate_ubb_field(); + return retval; } if (name == "get_pressure_tensor") { return call(&LatticeModel::get_slice_pressure_tensor, {3, 3}, diff --git a/src/walberla_bridge/src/BoundaryPackInfo.hpp b/src/walberla_bridge/src/BoundaryPackInfo.hpp new file mode 100644 index 0000000000..3055b3ebe0 --- /dev/null +++ b/src/walberla_bridge/src/BoundaryPackInfo.hpp @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2024 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace walberla { +namespace field { +namespace communication { + +template +class BoundaryPackInfo : public PackInfo { +protected: + using PackInfo::bdId_; + +public: + using PackInfo::PackInfo; + using PackInfo::numberOfGhostLayersToCommunicate; + + ~BoundaryPackInfo() override = default; + + void setup_boundary_handle(std::shared_ptr lattice, + std::shared_ptr boundary) { + m_lattice = std::move(lattice); + m_boundary = std::move(boundary); + } + + bool constantDataExchange() const override { return false; } + bool threadsafeReceiving() const override { return true; } + + void communicateLocal(IBlock const *sender, IBlock *receiver, + stencil::Direction dir) override { + mpi::SendBuffer sBuffer; + packDataImpl(sender, dir, sBuffer); + mpi::RecvBuffer rBuffer(sBuffer); + unpackData(receiver, stencil::inverseDir[dir], rBuffer); + } + + void unpackData(IBlock *receiver, stencil::Direction dir, + mpi::RecvBuffer &buffer) override { + + auto *flag_field = receiver->getData(bdId_); + WALBERLA_ASSERT_NOT_NULLPTR(flag_field); + WALBERLA_ASSERT_NOT_NULLPTR(m_boundary); + WALBERLA_ASSERT_NOT_NULLPTR(m_lattice); + + auto const boundary_flag = flag_field->getFlag(Boundary_flag); + auto const gl = numberOfGhostLayersToCommunicate(flag_field); + auto const begin = [gl, dir](auto const *flag_field) { + return flag_field->beginGhostLayerOnly(gl, dir); + }; + +#ifndef NDEBUG + uint_t xSize, ySize, zSize, bSize; + buffer >> xSize >> ySize >> zSize >> bSize; + uint_t buf_size{0u}; + for (auto it = begin(flag_field); it != flag_field->end(); ++it) { + if (isFlagSet(it, boundary_flag)) { + ++buf_size; + } + } + WALBERLA_ASSERT_EQUAL(xSize, flag_field->xSize()); + WALBERLA_ASSERT_EQUAL(ySize, flag_field->ySize()); + WALBERLA_ASSERT_EQUAL(zSize, flag_field->zSize()); + WALBERLA_ASSERT_EQUAL(bSize, buf_size); +#endif + + auto const offset = std::get<0>(m_lattice->get_local_grid_range()); + typename Boundary_T::value_type value; + for (auto it = begin(flag_field); it != flag_field->end(); ++it) { + if (isFlagSet(it, boundary_flag)) { + auto const node = offset + Utils::Vector3i{{it.x(), it.y(), it.z()}}; + buffer >> value; + m_boundary->unpack_node(node, value); + } + } + } + +protected: + void packDataImpl(IBlock const *sender, stencil::Direction dir, + mpi::SendBuffer &buffer) const override { + + auto const *flag_field = sender->getData(bdId_); + WALBERLA_ASSERT_NOT_NULLPTR(flag_field); + WALBERLA_ASSERT_NOT_NULLPTR(m_boundary); + WALBERLA_ASSERT_NOT_NULLPTR(m_lattice); + + auto const boundary_flag = flag_field->getFlag(Boundary_flag); + auto const gl = numberOfGhostLayersToCommunicate(flag_field); + auto const begin = [gl, dir](auto const *flag_field) { + return flag_field->beginSliceBeforeGhostLayer(dir, gl); + }; + +#ifndef NDEBUG + uint_t buf_size{0u}; + for (auto it = begin(flag_field); it != flag_field->end(); ++it) { + if (isFlagSet(it, boundary_flag)) { + ++buf_size; + } + } + buffer << flag_field->xSize() << flag_field->ySize() << flag_field->zSize() + << buf_size; +#endif + + auto const offset = std::get<0>(m_lattice->get_local_grid_range()); + for (auto it = begin(flag_field); it != flag_field->end(); ++it) { + if (isFlagSet(it, boundary_flag)) { + auto const node = offset + Utils::Vector3i{{it.x(), it.y(), it.z()}}; + buffer << m_boundary->get_node_value_at_boundary(node); + } + } + } + +private: + std::shared_ptr m_lattice; + std::shared_ptr m_boundary; +}; + +} // namespace communication +} // namespace field +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp index 0e144abb47..ecf7e57a64 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp @@ -45,6 +45,7 @@ #include #include "../BoundaryHandling.hpp" +#include "../BoundaryPackInfo.hpp" #include "InterpolateAndShiftAtBoundary.hpp" #include "ResetForce.hpp" #include "lb_kernels.hpp" @@ -235,8 +236,9 @@ class LBWalberlaImpl : public LBWalberlaBase { typename FieldTrait::template PackInfo; // communicators - std::shared_ptr m_full_communication; - std::shared_ptr m_pdf_streaming_communication; + std::shared_ptr m_boundary_communicator; + std::shared_ptr m_pdf_full_communicator; + std::shared_ptr m_pdf_streaming_communicator; // ResetForce sweep + external force handling std::shared_ptr> m_reset_force; @@ -350,27 +352,33 @@ class LBWalberlaImpl : public LBWalberlaBase { reset_boundary_handling(); // Set up the communication and register fields - m_pdf_streaming_communication = + m_pdf_streaming_communicator = std::make_shared(blocks); - m_pdf_streaming_communication->addPackInfo( + m_pdf_streaming_communicator->addPackInfo( std::make_shared>(m_pdf_field_id, n_ghost_layers)); - m_pdf_streaming_communication->addPackInfo( + m_pdf_streaming_communicator->addPackInfo( std::make_shared>(m_last_applied_force_field_id, n_ghost_layers)); - m_pdf_streaming_communication->addPackInfo( - std::make_shared>( - m_flag_field_id, n_ghost_layers)); - m_full_communication = std::make_shared(blocks); - m_full_communication->addPackInfo( + m_pdf_full_communicator = std::make_shared(blocks); + m_pdf_full_communicator->addPackInfo( std::make_shared>(m_pdf_field_id, n_ghost_layers)); - m_full_communication->addPackInfo(std::make_shared>( - m_last_applied_force_field_id, n_ghost_layers)); - m_full_communication->addPackInfo(std::make_shared>( - m_velocity_field_id, n_ghost_layers)); - m_full_communication->addPackInfo( + m_pdf_full_communicator->addPackInfo( + std::make_shared>(m_last_applied_force_field_id, + n_ghost_layers)); + m_pdf_full_communicator->addPackInfo( + std::make_shared>(m_velocity_field_id, + n_ghost_layers)); + + m_boundary_communicator = std::make_shared(blocks); + m_boundary_communicator->addPackInfo( std::make_shared>( m_flag_field_id, n_ghost_layers)); + auto boundary_packinfo = std::make_shared< + field::communication::BoundaryPackInfo>( + m_flag_field_id, n_ghost_layers); + boundary_packinfo->setup_boundary_handle(m_lattice, m_boundary); + m_boundary_communicator->addPackInfo(boundary_packinfo); // Instantiate the sweep responsible for force double buffering and // external forces @@ -439,13 +447,13 @@ class LBWalberlaImpl : public LBWalberlaBase { integrate_reset_force(blocks); // LB collide integrate_collide(blocks); - m_pdf_streaming_communication->communicate(); + m_pdf_streaming_communicator->communicate(); // Handle boundaries integrate_boundaries(blocks); // LB stream integrate_stream(blocks); // Refresh ghost layers - m_full_communication->communicate(); + ghost_communication_pdfs(); } void integrate_pull_scheme() { @@ -458,7 +466,7 @@ class LBWalberlaImpl : public LBWalberlaBase { // LB collide integrate_collide(blocks); // Refresh ghost layers - ghost_communication(); + ghost_communication_pdfs(); } protected: @@ -474,7 +482,6 @@ class LBWalberlaImpl : public LBWalberlaBase { public: void integrate() override { - reallocate_ubb_field(); if (has_lees_edwards_bc()) { integrate_pull_scheme(); } else { @@ -485,7 +492,16 @@ class LBWalberlaImpl : public LBWalberlaBase { } void ghost_communication() override { - m_full_communication->communicate(); + ghost_communication_boundary(); + ghost_communication_pdfs(); + } + + void ghost_communication_boundary() { + m_boundary_communicator->communicate(); + } + + void ghost_communication_pdfs() { + m_pdf_full_communicator->communicate(); if (has_lees_edwards_bc()) { auto const &blocks = get_lattice().get_blocks(); apply_lees_edwards_pdf_interpolation(blocks); @@ -1097,7 +1113,10 @@ class LBWalberlaImpl : public LBWalberlaBase { void reallocate_ubb_field() override { m_boundary->boundary_update(); } - void clear_boundaries() override { reset_boundary_handling(); } + void clear_boundaries() override { + reset_boundary_handling(); + ghost_communication(); + } void update_boundary_from_shape(std::vector const &raster_flat, @@ -1105,6 +1124,8 @@ class LBWalberlaImpl : public LBWalberlaBase { auto const grid_size = get_lattice().get_grid_dimensions(); auto const data = fill_3D_vector_array(data_flat, grid_size); set_boundary_from_grid(*m_boundary, get_lattice(), raster_flat, data); + ghost_communication(); + reallocate_ubb_field(); } // Pressure tensor diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp index 516d9ab242..94e90de862 100644 --- a/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp +++ b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp @@ -202,6 +202,7 @@ BOOST_DATA_TEST_CASE(update_boundary_from_shape, bdata::make(all_lbs()), std::vector vel_flat(vel_3d.data(), vel_3d.data() + vel_3d.num_elements()); lb->update_boundary_from_shape(raster_flat, vel_flat); + lb->ghost_communication(); } for (auto const &node : nodes) { diff --git a/testsuite/python/CMakeLists.txt b/testsuite/python/CMakeLists.txt index ee457e27fc..0389f71ad7 100644 --- a/testsuite/python/CMakeLists.txt +++ b/testsuite/python/CMakeLists.txt @@ -334,6 +334,7 @@ python_test(FILE thole.py MAX_NUM_PROC 4) python_test(FILE lb_slice.py MAX_NUM_PROC 2) python_test(FILE lb_boundary_velocity.py MAX_NUM_PROC 1) # python_test(FILE lb_boundary_volume_force.py MAX_NUM_PROC 2) # TODO +python_test(FILE lb_boundary_ghost_layer.py MAX_NUM_PROC 2) python_test(FILE lb_circular_couette.py MAX_NUM_PROC 2 GPU_SLOTS 1) python_test(FILE lb_poiseuille.py MAX_NUM_PROC 4 GPU_SLOTS 1) python_test(FILE lb_poiseuille_cylinder.py MAX_NUM_PROC 2 GPU_SLOTS 1) diff --git a/testsuite/python/lb_boundary_ghost_layer.py b/testsuite/python/lb_boundary_ghost_layer.py new file mode 100644 index 0000000000..0c226d4e03 --- /dev/null +++ b/testsuite/python/lb_boundary_ghost_layer.py @@ -0,0 +1,103 @@ +# +# Copyright (C) 2024 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import unittest as ut +import unittest_decorators as utx +import numpy as np +import scipy.optimize + +import espressomd.lb +import espressomd.shapes + +AGRID = 0.5 +KINEMATIC_VISC = 2.7 +DENS = 1.7 +TIME_STEP = 0.07 +LB_PARAMS = {"agrid": AGRID, "tau": TIME_STEP, "density": DENS, + "kinematic_viscosity": KINEMATIC_VISC} + + +class TestCommon: + + system = espressomd.System(box_l=[16.0, 1.0, 1.0]) + system.time_step = TIME_STEP + system.cell_system.skin = 0.4 * AGRID + n_nodes = system.cell_system.get_state()["n_nodes"] + + def setUp(self): + self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params) + self.system.lb = self.lbf + self.ubb = espressomd.lb.VelocityBounceBack([0., 0., 1e-5]) + + def tearDown(self): + self.system.lb = None + + def get_profile(self): + xdata = np.arange(1, self.lbf.shape[0]) + ydata = [] + for x in xdata: + ydata.append(np.mean(self.lbf[x, :, :].velocity[:, :, 2])) + return xdata, np.array(ydata) + + def check_profile(self): + def quadratic(x, a, b, c): + return a * x**2 + b * x + c + + self.system.integrator.run(40) + xdata, ydata = self.get_profile() + popt_ref = (4e-8, -1e-6, 1e-5) + popt, _ = scipy.optimize.curve_fit( + quadratic, xdata, ydata, p0=popt_ref) + rtol = 0.3 if self.lbf.single_precision else 0.1 + np.testing.assert_allclose(popt, popt_ref, rtol=0.5, atol=0.) + np.testing.assert_allclose(ydata, quadratic(xdata, *popt), + rtol=rtol, atol=0.) + + def test_node_setter(self): + for i in (0, 1): + for j in (0, 1): + self.lbf[0, i, j].boundary = self.ubb + self.check_profile() + + def test_slice_setter(self): + self.lbf[0, :, :].boundary = self.ubb + self.check_profile() + + def test_shape_setter(self): + shape = espressomd.shapes.Wall(normal=[1, 0, 0], dist=AGRID) + self.lbf.add_boundary_from_shape(shape, velocity=self.ubb.velocity) + self.check_profile() + + +@utx.skipIfMissingFeatures(["WALBERLA"]) +@ut.skipIf(TestCommon.n_nodes != 2, "only runs for 2 MPI ranks") +class LBPoiseuilleWalberlaSinglePrecision(TestCommon, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": True} + + +@utx.skipIfMissingFeatures(["WALBERLA"]) +@ut.skipIf(TestCommon.n_nodes != 2, "only runs for 2 MPI ranks") +class LBPoiseuilleWalberlaDoublePrecision(TestCommon, ut.TestCase): + lb_class = espressomd.lb.LBFluidWalberla + lb_params = {"single_precision": False} + + +if __name__ == "__main__": + ut.main() diff --git a/testsuite/python/lb_circular_couette.py b/testsuite/python/lb_circular_couette.py index 71f5836671..e27e52d6cf 100644 --- a/testsuite/python/lb_circular_couette.py +++ b/testsuite/python/lb_circular_couette.py @@ -143,8 +143,8 @@ def test_taylor_couette_flow(self): a_ref, b_ref = taylor_couette(slip_vel, 0.0, cyl1.radius, cyl2.radius) v_phi_ref = a_ref * r + b_ref / r v_phi_drift = np.mean(v_phi) - np.mean(v_phi_ref) - np.testing.assert_allclose(v_phi_drift, 0., atol=1.2e-4) - np.testing.assert_allclose(v_phi - v_phi_drift, v_phi_ref, atol=1e-4) + np.testing.assert_allclose(v_phi_drift, 0., atol=4e-4) + np.testing.assert_allclose(v_phi - v_phi_drift, v_phi_ref, atol=4e-4) @utx.skipIfMissingFeatures(["WALBERLA"]) diff --git a/testsuite/scripts/samples/test_lb_circular_couette.py b/testsuite/scripts/samples/test_lb_circular_couette.py index c91ca03242..5111ee9032 100644 --- a/testsuite/scripts/samples/test_lb_circular_couette.py +++ b/testsuite/scripts/samples/test_lb_circular_couette.py @@ -52,19 +52,18 @@ def test_taylor_couette_flow(self): np.testing.assert_allclose(v_phi[:7], 0., atol=1e-7) # check azimuthal velocity in the linear regime - self.assertGreater(v_phi[7], v_phi[6]) self.assertGreater(v_phi[8], v_phi[7]) self.assertGreater(v_phi[9], v_phi[8]) # check azimuthal velocity in the Couette regime - xdata = sample.profile_r[9:] - ydata = v_phi[9:] + xdata = sample.profile_r[9:-1] + ydata = v_phi[9:-1] a_ref, b_ref = taylor_couette( sample.velocity_magnitude, 0.0, sample.cylinder_in.radius, sample.cylinder_out.radius, sample.agrid) (a_sim, b_sim), _ = scipy.optimize.curve_fit( lambda x, a, b: a * x + b / x, xdata, ydata) - np.testing.assert_allclose([a_sim, b_sim], [a_ref, b_ref], atol=1e-3) + np.testing.assert_allclose([a_sim, b_sim], [a_ref, b_ref], rtol=0.05) if __name__ == "__main__": From 220aa61c155c42bbcf37eb88df5501ea4304a5ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 21 Feb 2024 20:05:17 +0100 Subject: [PATCH 6/7] Make waLBerla dependency private --- src/core/lb/particle_coupling.cpp | 1 + src/core/unit_tests/ek_interface_test.cpp | 4 +- src/script_interface/walberla/EKFFT.hpp | 4 +- src/script_interface/walberla/EKNone.hpp | 3 +- src/script_interface/walberla/EKReaction.hpp | 26 +-- src/script_interface/walberla/EKSpecies.cpp | 3 +- src/walberla_bridge/CMakeLists.txt | 9 +- .../electrokinetics/ek_poisson_fft_init.hpp | 7 +- .../electrokinetics/ek_poisson_none_init.hpp | 7 +- .../electrokinetics/ek_walberla_init.hpp | 16 ++ .../reactions/EKReactionBase.hpp | 15 +- .../reactions/EKReactionBaseIndexed.hpp} | 32 ++- src/walberla_bridge/src/BoundaryHandling.hpp | 16 +- src/walberla_bridge/src/BoundaryPackInfo.hpp | 5 + src/walberla_bridge/src/LatticeWalberla.cpp | 3 +- .../src/electrokinetics/EKinWalberlaImpl.hpp | 13 +- .../electrokinetics/ek_poisson_fft_init.cpp | 4 + .../electrokinetics/ek_poisson_none_init.cpp | 4 + .../src/electrokinetics/ek_walberla_init.cpp | 27 ++- .../electrokinetics/reactions/CMakeLists.txt | 3 - .../reactions/EKReactionImplBulk.hpp | 23 +- .../reactions/EKReactionImplIndexed.cpp | 211 ------------------ .../reactions/EKReactionImplIndexed.hpp | 169 ++++++++++++-- .../src/lattice_boltzmann/LBWalberlaImpl.hpp | 23 +- .../src/lattice_boltzmann/ResetForce.hpp | 6 +- .../utils/boundary.hpp} | 2 +- .../utils/types_conversion.hpp} | 0 .../tests/lb_kernels_unit_tests.cpp | 3 +- 28 files changed, 312 insertions(+), 327 deletions(-) rename src/walberla_bridge/{src/electrokinetics/reactions/EKReactionImplBulk.cpp => include/walberla_bridge/electrokinetics/reactions/EKReactionBaseIndexed.hpp} (51%) delete mode 100644 src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp rename src/walberla_bridge/{include/walberla_bridge/utils/boundary_utils.hpp => src/utils/boundary.hpp} (99%) rename src/walberla_bridge/{include/walberla_bridge/utils/walberla_utils.hpp => src/utils/types_conversion.hpp} (100%) diff --git a/src/core/lb/particle_coupling.cpp b/src/core/lb/particle_coupling.cpp index cf10874730..48b1afa9f1 100644 --- a/src/core/lb/particle_coupling.cpp +++ b/src/core/lb/particle_coupling.cpp @@ -86,6 +86,7 @@ Utils::Vector3d lb_drag_force(LB::Solver const &lb, double lb_gamma, /** * @brief Check if a position is within the local box + halo. * + * @param local_box Local geometry * @param pos Position to check * @param halo Halo * diff --git a/src/core/unit_tests/ek_interface_test.cpp b/src/core/unit_tests/ek_interface_test.cpp index b2f5a9659f..bff055e845 100644 --- a/src/core/unit_tests/ek_interface_test.cpp +++ b/src/core/unit_tests/ek_interface_test.cpp @@ -85,7 +85,7 @@ static auto make_ek_actor() { ek_lattice = std::make_shared( params.grid_dimensions, ::communicator.node_grid, n_ghost_layers); ek_container = std::make_shared( - params.tau, new_ek_poisson_none(ek_lattice, single_precision)); + params.tau, walberla::new_ek_poisson_none(ek_lattice, single_precision)); ek_reactions = std::make_shared(); ek_instance = std::make_shared(ek_container, ek_reactions); #endif @@ -146,7 +146,7 @@ BOOST_AUTO_TEST_CASE(ek_interface_walberla) { auto constexpr single_precision = true; auto constexpr stoich = 1.; auto constexpr order = 2.; - auto ek_species = new_ek_walberla( + auto ek_species = walberla::new_ek_walberla( espresso::ek_lattice, params.diffusion, params.kT, params.valency, params.ext_efield, params.density, false, false, single_precision); auto ek_reactant = std::make_shared(ek_species, stoich, order); diff --git a/src/script_interface/walberla/EKFFT.hpp b/src/script_interface/walberla/EKFFT.hpp index c02fc1bae6..6ff34dcbb6 100644 --- a/src/script_interface/walberla/EKFFT.hpp +++ b/src/script_interface/walberla/EKFFT.hpp @@ -55,8 +55,8 @@ class EKFFT : public EKPoissonSolver { auto const permittivity = get_value(args, "permittivity") * m_conv_permittivity; - m_instance = new_ek_poisson_fft(m_lattice->lattice(), permittivity, - m_single_precision); + m_instance = ::walberla::new_ek_poisson_fft( + m_lattice->lattice(), permittivity, m_single_precision); add_parameters({ {"permittivity", diff --git a/src/script_interface/walberla/EKNone.hpp b/src/script_interface/walberla/EKNone.hpp index 5aa1eb4e70..3c1c38cdc4 100644 --- a/src/script_interface/walberla/EKNone.hpp +++ b/src/script_interface/walberla/EKNone.hpp @@ -45,7 +45,8 @@ class EKNone : public EKPoissonSolver { m_single_precision = get_value_or(args, "single_precision", false); m_lattice = get_value>(args, "lattice"); - m_instance = new_ek_poisson_none(m_lattice->lattice(), m_single_precision); + m_instance = ::walberla::new_ek_poisson_none(m_lattice->lattice(), + m_single_precision); add_parameters({ {"single_precision", AutoParameter::read_only, diff --git a/src/script_interface/walberla/EKReaction.hpp b/src/script_interface/walberla/EKReaction.hpp index 3adc5f8847..75e2d536c1 100644 --- a/src/script_interface/walberla/EKReaction.hpp +++ b/src/script_interface/walberla/EKReaction.hpp @@ -27,9 +27,9 @@ #include "LatticeIndices.hpp" #include "LatticeWalberla.hpp" +#include #include -#include -#include +#include #include #include @@ -80,22 +80,21 @@ class EKReaction : public AutoParameters { return tau / std::pow(Utils::int_pow<3>(agrid), sum_alphas - 1.); } - template - std::shared_ptr make_instance(VariantMap const &args) const { + template + auto make_instance(VariantMap const &args, F &allocator) const { auto lattice = get_value>(args, "lattice"); - auto reactant = get_value>(args, "reactants"); - auto output = - std::vector>(reactant.size()); + auto reactants = get_value>(args, "reactants"); + auto output = ::walberla::EKReactionBase::reactants_type(reactants.size()); auto get_instance = [](Variant const &v) { return get_value>(v)->get_instance(); }; - std::transform(reactant.begin(), reactant.end(), output.begin(), + std::transform(reactants.begin(), reactants.end(), output.begin(), get_instance); auto const coefficient = get_value(args, "coefficient") * get_conversion_coefficient(); - return std::make_shared(lattice->lattice(), output, coefficient); + return allocator(lattice->lattice(), output, coefficient); } std::shared_ptr<::walberla::EKReactionBase> m_ekreaction; @@ -118,7 +117,7 @@ class EKBulkReaction : public EKReaction { void do_construct(VariantMap const &args) override { m_conv_coefficient = calculate_bulk_conversion_factor(args); - m_ekreaction = make_instance<::walberla::EKReactionImplBulk>(args); + m_ekreaction = make_instance(args, ::walberla::new_ek_reaction_bulk); } }; @@ -143,10 +142,9 @@ class EKIndexedReaction : public EKReaction { void do_construct(VariantMap const &args) override { auto const agrid = get_agrid(args); m_conv_coefficient = calculate_bulk_conversion_factor(args) / agrid; - m_ekreaction = make_instance<::walberla::EKReactionImplIndexed>(args); m_ekreaction_impl = - std::dynamic_pointer_cast<::walberla::EKReactionImplIndexed>( - get_instance()); + make_instance(args, ::walberla::new_ek_reaction_indexed); + m_ekreaction = m_ekreaction_impl; } [[nodiscard]] Variant do_call_method(std::string const &method, @@ -170,7 +168,7 @@ class EKIndexedReaction : public EKReaction { } private: - std::shared_ptr<::walberla::EKReactionImplIndexed> m_ekreaction_impl; + std::shared_ptr<::walberla::EKReactionBaseIndexed> m_ekreaction_impl; }; } // namespace ScriptInterface::walberla diff --git a/src/script_interface/walberla/EKSpecies.cpp b/src/script_interface/walberla/EKSpecies.cpp index 9f908ad15b..c38cb1c5dc 100644 --- a/src/script_interface/walberla/EKSpecies.cpp +++ b/src/script_interface/walberla/EKSpecies.cpp @@ -24,7 +24,6 @@ #include "EKWalberlaNodeState.hpp" #include "WalberlaCheckpoint.hpp" -#include #include #include @@ -119,7 +118,7 @@ void EKSpecies::do_construct(VariantMap const &args) { auto const ek_ext_efield = ext_efield * m_conv_ext_efield; auto const ek_density = m_density = density * m_conv_density; auto const ek_kT = kT * m_conv_energy; - m_instance = new_ek_walberla( + m_instance = ::walberla::new_ek_walberla( m_lattice->lattice(), ek_diffusion, ek_kT, get_value(args, "valency"), ek_ext_efield, ek_density, get_value(args, "advection"), diff --git a/src/walberla_bridge/CMakeLists.txt b/src/walberla_bridge/CMakeLists.txt index f7912560ff..b5595397f3 100644 --- a/src/walberla_bridge/CMakeLists.txt +++ b/src/walberla_bridge/CMakeLists.txt @@ -42,8 +42,8 @@ if(ESPRESSO_BUILD_WITH_CUDA AND WALBERLA_BUILD_WITH_CUDA) PRIVATE ${WALBERLA_LIBS}) target_include_directories(espresso_walberla_cuda PUBLIC include) target_include_directories( - espresso_walberla_cuda PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${WALBERLA_INCLUDE_DIRS} ${walberla_BINARY_DIR}/src) + espresso_walberla_cuda PRIVATE ${WALBERLA_INCLUDE_DIRS} + ${walberla_BINARY_DIR}/src) install(TARGETS espresso_walberla_cuda LIBRARY DESTINATION ${ESPRESSO_INSTALL_PYTHON}/espressomd) target_link_libraries(espresso_walberla PUBLIC espresso::walberla_cuda) @@ -52,9 +52,8 @@ endif() target_link_libraries( espresso_walberla PUBLIC MPI::MPI_CXX espresso::utils PRIVATE espresso::cpp_flags espresso::walberla::cpp_flags ${WALBERLA_LIBS}) -target_include_directories( - espresso_walberla PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} - PRIVATE ${WALBERLA_INCLUDE_DIRS} ${walberla_BINARY_DIR}/src) +target_include_directories(espresso_walberla PRIVATE ${WALBERLA_INCLUDE_DIRS} + ${walberla_BINARY_DIR}/src) add_subdirectory(src) diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp index f0a7a2db61..10337c34df 100644 --- a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp +++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp @@ -20,11 +20,14 @@ #pragma once #include - -#include "PoissonSolver/PoissonSolver.hpp" +#include #include +namespace walberla { + std::shared_ptr new_ek_poisson_fft(std::shared_ptr const &lattice, double permittivity, bool single_precision); + +} // namespace walberla diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_none_init.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_none_init.hpp index af7d318989..aa9890d2aa 100644 --- a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_none_init.hpp +++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_none_init.hpp @@ -20,11 +20,14 @@ #pragma once #include - -#include "PoissonSolver/PoissonSolver.hpp" +#include #include +namespace walberla { + std::shared_ptr new_ek_poisson_none(std::shared_ptr const &lattice, bool single_precision); + +} // namespace walberla diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_walberla_init.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_walberla_init.hpp index fb700df86c..4de3b85435 100644 --- a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_walberla_init.hpp +++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_walberla_init.hpp @@ -22,13 +22,29 @@ #include "EKinWalberlaBase.hpp" #include +#include +#include #include #include +namespace walberla { + std::shared_ptr new_ek_walberla(std::shared_ptr const &lattice, double diffusion, double kT, double valency, Utils::Vector3d ext_efield, double density, bool advection, bool friction_coupling, bool single_precision); + +std::shared_ptr +new_ek_reaction_bulk(std::shared_ptr const &lattice, + typename EKReactionBase::reactants_type const &reactants, + double coefficient); + +std::shared_ptr new_ek_reaction_indexed( + std::shared_ptr const &lattice, + typename EKReactionBase::reactants_type const &reactants, + double coefficient); + +} // namespace walberla diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp index 049bd3226d..392c515b1a 100644 --- a/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp +++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp @@ -29,18 +29,19 @@ namespace walberla { class EKReactionBase { -private: - std::vector> m_reactants; - double m_coefficient; +public: + using reactants_type = std::vector>; +private: std::shared_ptr m_lattice; + reactants_type m_reactants; + double m_coefficient; public: EKReactionBase(std::shared_ptr lattice, - std::vector> reactants, - double coefficient) - : m_reactants(std::move(reactants)), m_coefficient(coefficient), - m_lattice(std::move(lattice)) {} + reactants_type reactants, double coefficient) + : m_lattice(std::move(lattice)), m_reactants(std::move(reactants)), + m_coefficient(coefficient) {} virtual ~EKReactionBase() = default; diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.cpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBaseIndexed.hpp similarity index 51% rename from src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.cpp rename to src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBaseIndexed.hpp index 800a84af57..90d17bedb1 100644 --- a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.cpp +++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBaseIndexed.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2023 The ESPResSo project + * Copyright (C) 2024 The ESPResSo project * * This file is part of ESPResSo. * @@ -17,26 +17,24 @@ * along with this program. If not, see . */ -#include "EKReactionImplBulk.hpp" +#pragma once -#include "generated_kernels/ReactionKernelBulk_all.h" +#include "EKReactionBase.hpp" -#include +#include -namespace walberla { +#include -void EKReactionImplBulk::perform_reaction() { - // TODO: if my understanding is correct: - // the kernels need to either run in the ghost layers and do the - // synchronization before or not run and do a synchronization afterwards. - // The better solution is probably the latter one. Not sure why it fails - // atm. +namespace walberla { - auto kernel = detail::ReactionKernelBulkSelector::get_kernel( - get_reactants(), get_coefficient()); +class EKReactionBaseIndexed : public EKReactionBase { +public: + using EKReactionBase::EKReactionBase; + ~EKReactionBaseIndexed() override = default; + virtual void set_node_is_boundary(Utils::Vector3i const &node, + bool is_boundary) = 0; + virtual std::optional + get_node_is_boundary(Utils::Vector3i const &node) = 0; +}; - for (auto &block : *get_lattice()->get_blocks()) { - kernel(&block); - } -} } // namespace walberla diff --git a/src/walberla_bridge/src/BoundaryHandling.hpp b/src/walberla_bridge/src/BoundaryHandling.hpp index 86c2053888..b5d59cbe87 100644 --- a/src/walberla_bridge/src/BoundaryHandling.hpp +++ b/src/walberla_bridge/src/BoundaryHandling.hpp @@ -20,7 +20,8 @@ #pragma once #include -#include + +#include "utils/types_conversion.hpp" #include #include @@ -38,12 +39,13 @@ namespace walberla { -/// Flag for domain cells, i.e. all cells -FlagUID const Domain_flag("domain"); -/// Flag for boundary cells -FlagUID const Boundary_flag("boundary"); - template class BoundaryHandling { +private: + /** Flag for domain cells, i.e. all cells. */ + FlagUID const Domain_flag{"domain"}; + /** Flag for boundary cells. */ + FlagUID const Boundary_flag{"boundary"}; + /** Container for the map between cells and values. */ class DynamicValueCallback { public: @@ -172,7 +174,7 @@ template class BoundaryHandling { std::shared_ptr m_boundary; bool m_pending_changes; - /** Register flags and set all cells to @ref Domain_flag. */ + /** Register flags and reset all cells. */ void flag_reset_kernel(IBlock *const block) { auto flag_field = block->template getData(m_flag_field_id); // register flags diff --git a/src/walberla_bridge/src/BoundaryPackInfo.hpp b/src/walberla_bridge/src/BoundaryPackInfo.hpp index 3055b3ebe0..baeeb7c385 100644 --- a/src/walberla_bridge/src/BoundaryPackInfo.hpp +++ b/src/walberla_bridge/src/BoundaryPackInfo.hpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -38,6 +39,10 @@ template class BoundaryPackInfo : public PackInfo { protected: using PackInfo::bdId_; + /** Flag for domain cells, i.e. all cells. */ + FlagUID const Domain_flag{"domain"}; + /** Flag for boundary cells. */ + FlagUID const Boundary_flag{"boundary"}; public: using PackInfo::PackInfo; diff --git a/src/walberla_bridge/src/LatticeWalberla.cpp b/src/walberla_bridge/src/LatticeWalberla.cpp index 0559fab379..2dc2943a40 100644 --- a/src/walberla_bridge/src/LatticeWalberla.cpp +++ b/src/walberla_bridge/src/LatticeWalberla.cpp @@ -19,7 +19,8 @@ #include #include -#include + +#include "utils/types_conversion.hpp" #include #include diff --git a/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp b/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp index 9fc4019aa6..14a7ff9924 100644 --- a/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp +++ b/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp @@ -22,21 +22,21 @@ #include #include #include +#include #include #include #include #include -#include -#include +#include #include "../BoundaryHandling.hpp" +#include "../utils/boundary.hpp" +#include "../utils/types_conversion.hpp" #include "ek_kernels.hpp" #include #include #include -#include -#include #include @@ -109,6 +109,11 @@ class EKinWalberlaImpl : public EKinWalberlaBase { BlockDataID m_flag_field_density_id; BlockDataID m_flag_field_flux_id; + /** Flag for domain cells, i.e. all cells. */ + FlagUID const Domain_flag{"domain"}; + /** Flag for boundary cells. */ + FlagUID const Boundary_flag{"boundary"}; + /** Block forest */ std::shared_ptr m_lattice; diff --git a/src/walberla_bridge/src/electrokinetics/ek_poisson_fft_init.cpp b/src/walberla_bridge/src/electrokinetics/ek_poisson_fft_init.cpp index 2598c6616e..b67da67b79 100644 --- a/src/walberla_bridge/src/electrokinetics/ek_poisson_fft_init.cpp +++ b/src/walberla_bridge/src/electrokinetics/ek_poisson_fft_init.cpp @@ -23,6 +23,8 @@ #include +namespace walberla { + std::shared_ptr new_ek_poisson_fft(std::shared_ptr const &lattice, double permittivity, bool single_precision) { @@ -31,3 +33,5 @@ new_ek_poisson_fft(std::shared_ptr const &lattice, } return std::make_shared>(lattice, permittivity); } + +} // namespace walberla diff --git a/src/walberla_bridge/src/electrokinetics/ek_poisson_none_init.cpp b/src/walberla_bridge/src/electrokinetics/ek_poisson_none_init.cpp index f912cd4911..2636be3ebd 100644 --- a/src/walberla_bridge/src/electrokinetics/ek_poisson_none_init.cpp +++ b/src/walberla_bridge/src/electrokinetics/ek_poisson_none_init.cpp @@ -23,6 +23,8 @@ #include +namespace walberla { + std::shared_ptr new_ek_poisson_none(std::shared_ptr const &lattice, bool single_precision) { @@ -31,3 +33,5 @@ new_ek_poisson_none(std::shared_ptr const &lattice, } return std::make_shared>(lattice); } + +} // namespace walberla diff --git a/src/walberla_bridge/src/electrokinetics/ek_walberla_init.cpp b/src/walberla_bridge/src/electrokinetics/ek_walberla_init.cpp index a666e8f9ac..03fc1d0fae 100644 --- a/src/walberla_bridge/src/electrokinetics/ek_walberla_init.cpp +++ b/src/walberla_bridge/src/electrokinetics/ek_walberla_init.cpp @@ -18,26 +18,49 @@ */ #include "EKinWalberlaImpl.hpp" +#include "reactions/EKReactionImplBulk.hpp" +#include "reactions/EKReactionImplIndexed.hpp" #include #include +#include +#include #include #include +namespace walberla { + std::shared_ptr new_ek_walberla(std::shared_ptr const &lattice, double diffusion, double kT, double valency, Utils::Vector3d ext_efield, double density, bool advection, bool friction_coupling, bool single_precision) { if (single_precision) { - return std::make_shared>( + return std::make_shared>( lattice, diffusion, kT, valency, ext_efield, density, advection, friction_coupling); } - return std::make_shared>( + return std::make_shared>( lattice, diffusion, kT, valency, ext_efield, density, advection, friction_coupling); } + +std::shared_ptr +new_ek_reaction_bulk(std::shared_ptr const &lattice, + typename EKReactionBase::reactants_type const &reactants, + double coefficient) { + return std::make_shared(lattice, reactants, coefficient); +} + +std::shared_ptr new_ek_reaction_indexed( + std::shared_ptr const &lattice, + typename EKReactionBase::reactants_type const &reactants, + double coefficient) { + return std::make_shared(lattice, reactants, + coefficient); +} + +} // namespace walberla diff --git a/src/walberla_bridge/src/electrokinetics/reactions/CMakeLists.txt b/src/walberla_bridge/src/electrokinetics/reactions/CMakeLists.txt index 6559b5c9ff..4f5e805245 100644 --- a/src/walberla_bridge/src/electrokinetics/reactions/CMakeLists.txt +++ b/src/walberla_bridge/src/electrokinetics/reactions/CMakeLists.txt @@ -18,6 +18,3 @@ # add_subdirectory(generated_kernels) - -target_sources(espresso_walberla PRIVATE EKReactionImplBulk.cpp - EKReactionImplIndexed.cpp) diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp index 33f7e21770..09f38d319b 100644 --- a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp +++ b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp @@ -19,28 +19,37 @@ #pragma once +#include "generated_kernels/ReactionKernelBulk_all.h" + #include #include #include -#include -#include +#include namespace walberla { class EKReactionImplBulk : public EKReactionBase { public: - EKReactionImplBulk(const std::shared_ptr &lattice, - const std::vector> &reactants, - double coefficient) - : EKReactionBase(lattice, reactants, coefficient) {} ~EKReactionImplBulk() override = default; + using EKReactionBase::EKReactionBase; using EKReactionBase::get_coefficient; using EKReactionBase::get_lattice; using EKReactionBase::get_reactants; - void perform_reaction() override; + void perform_reaction() override { + // TODO: if my understanding is correct: + // the kernels need to either run in the ghost layers and do the + // synchronization before or not run and do a synchronization afterwards. + // The better solution is probably the latter one. Not sure why it fails + // atm. + auto kernel = detail::ReactionKernelBulkSelector::get_kernel( + get_reactants(), get_coefficient()); + for (auto &block : *get_lattice()->get_blocks()) { + kernel(&block); + } + } }; } // namespace walberla diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp deleted file mode 100644 index c3f5643fd9..0000000000 --- a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (C) 2022-2023 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include "EKReactionImplIndexed.hpp" - -#include "generated_kernels/ReactionKernelIndexed_all.h" - -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace walberla { - -/// Flag for domain cells, i.e. all cells -FlagUID const Domain_flag("domain"); -/// Flag for boundary cells -FlagUID const Boundary_flag("boundary"); - -namespace detail { -// FlagField to use -using FlagField = FlagField; - -template -inline auto -get_flag_field_and_flag(IBlock *block, - domain_decomposition::BlockDataID const &flagfield_id) { - auto const flag_field = - block->template uncheckedFastGetData(flagfield_id); - auto const boundary_flag = flag_field->getFlag(Boundary_flag); - return std::make_tuple(flag_field, boundary_flag); -} - -template -void fillFromFlagField(IBlock *block, BlockDataID indexVectorID, - ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, - FlagUID domainFlagUID) { - auto *indexVectors = block->uncheckedFastGetData(indexVectorID); - auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL); - auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER); - auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER); - - auto *flagField = block->getData(flagFieldID); - - assert(flagField->flagExists(boundaryFlagUID) and - flagField->flagExists(domainFlagUID)); - - auto boundaryFlag = flagField->getFlag(boundaryFlagUID); - auto domainFlag = flagField->getFlag(domainFlagUID); - - auto inner = flagField->xyzSize(); - inner.expand(cell_idx_t(-1)); - - indexVectorAll.clear(); - indexVectorInner.clear(); - indexVectorOuter.clear(); - - auto flagWithGLayers = flagField->xyzSizeWithGhostLayer(); - for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end(); - ++it) { - - if (!isFlagSet(it, boundaryFlag)) - continue; - if (flagWithGLayers.contains(it.x(), it.y(), it.z()) && - isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) { - - auto element = IndexInfo(it.x(), it.y(), it.z()); - - indexVectorAll.push_back(element); - if (inner.contains(it.x(), it.y(), it.z())) - indexVectorInner.push_back(element); - else - indexVectorOuter.push_back(element); - } - } - - indexVectors->syncGPU(); -} - -template -void fillFromFlagField(const std::shared_ptr &blocks, - BlockDataID indexVectorID, ConstBlockDataID flagFieldID, - FlagUID boundaryFlagUID, FlagUID domainFlagUID) { - for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) - fillFromFlagField( - blockIt.get(), indexVectorID, flagFieldID, boundaryFlagUID, - domainFlagUID); -} -} // namespace detail - -EKReactionImplIndexed::EKReactionImplIndexed( - std::shared_ptr lattice, - std::vector> reactants, double coefficient) - : EKReactionBase(lattice, reactants, coefficient), - m_pending_changes(false) { - m_flagfield_id = - static_cast(field::addFlagFieldToStorage( - get_lattice()->get_blocks(), "flag field reaction", - get_lattice()->get_ghost_layers())); - - // take one IndexVector as a dummy-value - using IndexVectors = detail::ReactionKernelIndexedSelector::KernelTrait<>:: - ReactionKernelIndexed::IndexVectors; - - auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) { - return new IndexVectors(); - }; - m_indexvector_id = static_cast( - get_lattice() - ->get_blocks() - ->template addStructuredBlockData(createIdxVector, - "IndexField")); - - for (auto &block : *get_lattice()->get_blocks()) { - auto flag_field = - block.template getData(BlockDataID(m_flagfield_id)); - // register flags - flag_field->registerFlag(Domain_flag); - flag_field->registerFlag(Boundary_flag); - // mark all cells as domain cells and fluid cells - auto domain_flag = flag_field->getFlag(Domain_flag); - auto boundary_flag = flag_field->getFlag(Boundary_flag); - for (auto it = flag_field->begin(); it != flag_field->end(); ++it) { - flag_field->addFlag(it.x(), it.y(), it.z(), domain_flag); - flag_field->removeFlag(it.x(), it.y(), it.z(), boundary_flag); - } - } -} - -void EKReactionImplIndexed::perform_reaction() { - boundary_update(); - - auto kernel = detail::ReactionKernelIndexedSelector::get_kernel( - get_reactants(), get_coefficient(), BlockDataID(get_indexvector_id())); - - for (auto &block : *get_lattice()->get_blocks()) { - kernel(&block); - } -} - -void EKReactionImplIndexed::set_node_is_boundary(Utils::Vector3i const &node, - bool is_boundary) { - auto bc = get_block_and_cell(*get_lattice(), node, true); - if (!bc) - return; - - auto [flag_field, boundary_flag] = - detail::get_flag_field_and_flag( - bc->block, BlockDataID(get_flagfield_id())); - if (is_boundary) { - flag_field->addFlag(bc->cell, boundary_flag); - } else { - flag_field->removeFlag(bc->cell, boundary_flag); - } - m_pending_changes = true; -} - -std::optional -EKReactionImplIndexed::get_node_is_boundary(Utils::Vector3i const &node) { - auto bc = get_block_and_cell(*get_lattice(), node, true); - if (!bc) - return std::nullopt; - - auto [flag_field, boundary_flag] = - detail::get_flag_field_and_flag( - bc->block, BlockDataID(get_flagfield_id())); - return {flag_field->isFlagSet(bc->cell, boundary_flag)}; -} - -void EKReactionImplIndexed::boundary_update() { - // take one IndexVector/IndexInfo as a dummy-value - using IndexVectors = detail::ReactionKernelIndexedSelector::KernelTrait<>:: - ReactionKernelIndexed::IndexVectors; - using IndexInfo = detail::ReactionKernelIndexedSelector::KernelTrait<>:: - ReactionKernelIndexed::IndexInfo; - - if (m_pending_changes) { - detail::fillFromFlagField( - get_lattice()->get_blocks(), BlockDataID(get_indexvector_id()), - BlockDataID(get_flagfield_id()), Boundary_flag, Domain_flag); - m_pending_changes = false; - } -} -} // namespace walberla diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp index 65686644f7..48125caf36 100644 --- a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp +++ b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp @@ -19,50 +19,179 @@ #pragma once +#include "generated_kernels/ReactionKernelIndexed_all.h" + +#include #include #include -#include +#include + +#include +#include +#include +#include +#include +#include #include +#include #include #include #include +#include #include namespace walberla { -class EKReactionImplIndexed : public EKReactionBase { +class EKReactionImplIndexed : public EKReactionBaseIndexed { private: - std::size_t m_flagfield_id; - std::size_t m_indexvector_id; - + BlockDataID m_flagfield_id; + BlockDataID m_indexvector_id; bool m_pending_changes; public: - EKReactionImplIndexed(std::shared_ptr lattice, - std::vector> reactants, - double coefficient); + /** Flag for domain cells, i.e. all cells. */ + FlagUID const Domain_flag{"domain"}; + /** Flag for boundary cells. */ + FlagUID const Boundary_flag{"boundary"}; + + using FlagField = field::FlagField; + using IndexVectors = detail::ReactionKernelIndexedSelector::KernelTrait<>:: + ReactionKernelIndexed::IndexVectors; + using IndexInfo = detail::ReactionKernelIndexedSelector::KernelTrait<>:: + ReactionKernelIndexed::IndexInfo; + +private: + auto get_flag_field_and_flag(IBlock *block, BlockDataID const &flagfield_id) { + auto const flag_field = + block->template uncheckedFastGetData(flagfield_id); + auto const boundary_flag = flag_field->getFlag(Boundary_flag); + return std::make_tuple(flag_field, boundary_flag); + } + +public: + EKReactionImplIndexed(std::shared_ptr const &lattice, + reactants_type const &reactants, double coefficient) + : EKReactionBaseIndexed(lattice, reactants, coefficient), + m_pending_changes(false) { + m_flagfield_id = field::addFlagFieldToStorage( + get_lattice()->get_blocks(), "flag field reaction", + get_lattice()->get_ghost_layers()); + + auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) { + return new IndexVectors(); + }; + m_indexvector_id = get_lattice() + ->get_blocks() + ->template addStructuredBlockData( + createIdxVector, "IndexField"); + + for (auto &block : *get_lattice()->get_blocks()) { + auto flag_field = block.template getData(m_flagfield_id); + // register flags + flag_field->registerFlag(Domain_flag); + flag_field->registerFlag(Boundary_flag); + // mark all cells as domain cells and fluid cells + auto domain_flag = flag_field->getFlag(Domain_flag); + auto boundary_flag = flag_field->getFlag(Boundary_flag); + for (auto it = flag_field->begin(); it != flag_field->end(); ++it) { + flag_field->addFlag(it.x(), it.y(), it.z(), domain_flag); + flag_field->removeFlag(it.x(), it.y(), it.z(), boundary_flag); + } + } + } ~EKReactionImplIndexed() override = default; - using EKReactionBase::get_coefficient; - using EKReactionBase::get_lattice; - using EKReactionBase::get_reactants; + using EKReactionBaseIndexed::get_coefficient; + using EKReactionBaseIndexed::get_lattice; + using EKReactionBaseIndexed::get_reactants; - void perform_reaction() override; + void perform_reaction() override { + boundary_update(); + auto kernel = detail::ReactionKernelIndexedSelector::get_kernel( + get_reactants(), get_coefficient(), m_indexvector_id); + for (auto &block : *get_lattice()->get_blocks()) { + kernel(&block); + } + } - void set_node_is_boundary(Utils::Vector3i const &node, bool is_boundary); - [[nodiscard]] std::optional - get_node_is_boundary(Utils::Vector3i const &node); + void set_node_is_boundary(Utils::Vector3i const &node, + bool is_boundary) override { + if (auto bc = get_block_and_cell(*get_lattice(), node, true)) { + auto const [flag_field, boundary_flag] = + get_flag_field_and_flag(bc->block, m_flagfield_id); + if (is_boundary) { + flag_field->addFlag(bc->cell, boundary_flag); + } else { + flag_field->removeFlag(bc->cell, boundary_flag); + } + m_pending_changes = true; + } + } - [[nodiscard]] auto get_indexvector_id() const noexcept { - return m_indexvector_id; + [[nodiscard]] std::optional + get_node_is_boundary(Utils::Vector3i const &node) override { + if (auto bc = get_block_and_cell(*get_lattice(), node, true)) { + auto const [flag_field, boundary_flag] = + get_flag_field_and_flag(bc->block, m_flagfield_id); + return {flag_field->isFlagSet(bc->cell, boundary_flag)}; + } + return std::nullopt; } - [[nodiscard]] auto get_flagfield_id() const noexcept { - return m_flagfield_id; + + void boundary_update() { + if (m_pending_changes) { + for (auto &block : *get_lattice()->get_blocks()) { + fillFromFlagField(block); + } + m_pending_changes = false; + } } - void boundary_update(); +private: + void fillFromFlagField(IBlock &block) { + auto *indexVectors = + block.uncheckedFastGetData(m_indexvector_id); + auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL); + auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER); + auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER); + + auto *flagField = block.getData(m_flagfield_id); + + assert(flagField->flagExists(Boundary_flag) and + flagField->flagExists(Domain_flag)); + + auto boundaryFlag = flagField->getFlag(Boundary_flag); + auto domainFlag = flagField->getFlag(Domain_flag); + + auto inner = flagField->xyzSize(); + inner.expand(cell_idx_t(-1)); + + indexVectorAll.clear(); + indexVectorInner.clear(); + indexVectorOuter.clear(); + + auto flagWithGLayers = flagField->xyzSizeWithGhostLayer(); + for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end(); + ++it) { + if (!isFlagSet(it, boundaryFlag)) + continue; + + if (flagWithGLayers.contains(it.x(), it.y(), it.z()) && + isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) { + auto element = IndexInfo(it.x(), it.y(), it.z()); + indexVectorAll.push_back(element); + if (inner.contains(it.x(), it.y(), it.z())) { + indexVectorInner.push_back(element); + } else { + indexVectorOuter.push_back(element); + } + } + } + + indexVectors->syncGPU(); + } }; } // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp index ecf7e57a64..5270c26aa4 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp @@ -29,23 +29,20 @@ #include #include #include +#include #include +#include #include +#include #include #include - -#include -#include -#include -#include -#include -#include - #include #include #include "../BoundaryHandling.hpp" #include "../BoundaryPackInfo.hpp" +#include "../utils/boundary.hpp" +#include "../utils/types_conversion.hpp" #include "InterpolateAndShiftAtBoundary.hpp" #include "ResetForce.hpp" #include "lb_kernels.hpp" @@ -55,8 +52,6 @@ #include #include #include -#include -#include #include #include @@ -71,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -106,8 +102,8 @@ class LBWalberlaImpl : public LBWalberlaBase { protected: template struct FieldTrait { - using PdfField = GhostLayerField; - using VectorField = GhostLayerField; + using PdfField = field::GhostLayerField; + using VectorField = field::GhostLayerField; template using PackInfo = field::communication::PackInfo; }; @@ -217,6 +213,9 @@ class LBWalberlaImpl : public LBWalberlaBase { BlockDataID m_velocity_field_id; BlockDataID m_vec_tmp_field_id; + /** Flag for boundary cells. */ + FlagUID const Boundary_flag{"boundary"}; + /** * @brief Full communicator. * We use the D3Q27 directions to update cells along the diagonals during diff --git a/src/walberla_bridge/src/lattice_boltzmann/ResetForce.hpp b/src/walberla_bridge/src/lattice_boltzmann/ResetForce.hpp index 1d0a154e5b..ce7d19295a 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/ResetForce.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/ResetForce.hpp @@ -22,11 +22,11 @@ #include "generated_kernels/FieldAccessorsDoublePrecision.h" #include "generated_kernels/FieldAccessorsSinglePrecision.h" -#include +#include "../utils/types_conversion.hpp" #include -#include -#include +#include +#include #include diff --git a/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp b/src/walberla_bridge/src/utils/boundary.hpp similarity index 99% rename from src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp rename to src/walberla_bridge/src/utils/boundary.hpp index 8c1558ee72..7d3f3cdb07 100644 --- a/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp +++ b/src/walberla_bridge/src/utils/boundary.hpp @@ -19,7 +19,7 @@ #pragma once -#include "walberla_utils.hpp" +#include "types_conversion.hpp" #include diff --git a/src/walberla_bridge/include/walberla_bridge/utils/walberla_utils.hpp b/src/walberla_bridge/src/utils/types_conversion.hpp similarity index 100% rename from src/walberla_bridge/include/walberla_bridge/utils/walberla_utils.hpp rename to src/walberla_bridge/src/utils/types_conversion.hpp diff --git a/src/walberla_bridge/tests/lb_kernels_unit_tests.cpp b/src/walberla_bridge/tests/lb_kernels_unit_tests.cpp index 2666843c37..3045e9974c 100644 --- a/src/walberla_bridge/tests/lb_kernels_unit_tests.cpp +++ b/src/walberla_bridge/tests/lb_kernels_unit_tests.cpp @@ -29,8 +29,7 @@ #include "../src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h" #include "../src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h" #include "../src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h" - -#include +#include "../src/utils/types_conversion.hpp" #include From f4590fd59824efb68c48eacdcb0ae43ba56c94bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 21 Feb 2024 20:16:55 +0100 Subject: [PATCH 7/7] Restrict Boost range Boost 1.84 is not supported. Drop the macOS CI job: Homebrew only provides a recipe for boost-mpi version 1.84. --- .github/workflows/push_pull.yml | 2 +- CMakeLists.txt | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/push_pull.yml b/.github/workflows/push_pull.yml index 1a2eb4e39f..94d9bf24d9 100644 --- a/.github/workflows/push_pull.yml +++ b/.github/workflows/push_pull.yml @@ -10,7 +10,7 @@ permissions: jobs: macos: runs-on: macos-12 - if: ${{ github.repository == 'espressomd/espresso' }} + if: false steps: - name: Checkout uses: actions/checkout@main diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c40cb72a7..5c643b7d4b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -435,6 +435,9 @@ if(ESPRESSO_BUILD_TESTS) endif() find_package(Boost 1.74.0 REQUIRED ${BOOST_COMPONENTS}) +if(${Boost_VERSION} VERSION_GREATER_EQUAL 1.84.0) + message(FATAL_ERROR "Boost version ${Boost_VERSION} is unsupported.") +endif() # # Paths