From 8bdf0163ce54ba44fc617900c4ebfc9c40cffc30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Thu, 1 Feb 2024 14:31:01 +0100
Subject: [PATCH 1/7] Fix compiler errors

---
 .../walberla_kernels/templates/Boundary.tmpl.h  |  6 +++---
 src/core/electrostatics/p3m.cpp                 |  2 ++
 .../electrostatics/CoulombScafacos.hpp          |  1 +
 .../reactions/EKReactionImplIndexed.cpp         |  2 +-
 .../Dynamic_UBB_double_precision.h              | 17 +++++++++--------
 .../Dynamic_UBB_single_precision.h              | 17 +++++++++--------
 6 files changed, 25 insertions(+), 20 deletions(-)
diff --git a/maintainer/walberla_kernels/templates/Boundary.tmpl.h b/maintainer/walberla_kernels/templates/Boundary.tmpl.h
index 6bda8f86e0..5079dcc5a6 100644
--- a/maintainer/walberla_kernels/templates/Boundary.tmpl.h
+++ b/maintainer/walberla_kernels/templates/Boundary.tmpl.h
@@ -41,7 +41,7 @@
 #include <core/debug/Debug.h>
 
 #include <functional>
-#include <set>
+#include <memory>
 #include <vector>
 
 {% for header in interface_spec.headers %}
@@ -122,7 +122,7 @@ class {{class_name}}
         {%- endif %}
     };
 
-    {{class_name}}( const shared_ptr<StructuredBlockForest> & blocks,
+    {{class_name}}( const std::shared_ptr<StructuredBlockForest> & blocks,
                    {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}}{{additional_data_handler.constructor_arguments}})
         :{{additional_data_handler.initialiser_list}} {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }}
     {
@@ -177,7 +177,7 @@ class {{class_name}}
     }
 
     template<typename FlagField_T>
-    void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
+    void fillFromFlagField( const std::shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID,
                             FlagUID boundaryFlagUID, FlagUID domainFlagUID)
     {
         for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt )
diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp
index 7bb20f4a27..d8ec633abd 100644
--- a/src/core/electrostatics/p3m.cpp
+++ b/src/core/electrostatics/p3m.cpp
@@ -599,9 +599,11 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag,
       }
     }
     energy *= prefactor;
+#ifdef NPT
     if (npt_flag) {
       npt_add_virial_contribution(energy);
     }
+#endif
     if (not energy_flag) {
       energy = 0.;
     }
diff --git a/src/script_interface/electrostatics/CoulombScafacos.hpp b/src/script_interface/electrostatics/CoulombScafacos.hpp
index 558264315b..827942a38b 100644
--- a/src/script_interface/electrostatics/CoulombScafacos.hpp
+++ b/src/script_interface/electrostatics/CoulombScafacos.hpp
@@ -31,6 +31,7 @@
 #include "script_interface/get_value.hpp"
 #include "script_interface/scafacos/scafacos.hpp"
 
+#include <iomanip>
 #include <regex>
 #include <set>
 #include <sstream>
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp
index 90c14b9069..a02b084d83 100644
--- a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp
+++ b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp
@@ -105,7 +105,7 @@ void fillFromFlagField(IBlock *block, BlockDataID indexVectorID,
 }
 
 template <typename FlagField, typename IndexVectors, typename IndexInfo>
-void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,
                        BlockDataID indexVectorID, ConstBlockDataID flagFieldID,
                        FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
   for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h
index f2a93a9f94..38e68af361 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h
@@ -31,7 +31,8 @@
 #include "field/FlagField.h"
 #include "field/GhostLayerField.h"
 
-#include <set>
+#include <functional>
+#include <memory>
 #include <vector>
 
 #ifdef __GNUC__
@@ -85,10 +86,10 @@ class Dynamic_UBB_double_precision {
   };
 
   Dynamic_UBB_double_precision(
-      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID pdfsID_,
-      std::function<Vector3<double>(const Cell &,
-                                    const shared_ptr<StructuredBlockForest> &,
-                                    IBlock &)> &velocityCallback)
+      const std::shared_ptr<StructuredBlockForest> &blocks, BlockDataID pdfsID_,
+      std::function<Vector3<double>(
+          const Cell &, const std::shared_ptr<StructuredBlockForest> &,
+          IBlock &)> &velocityCallback)
       : elementInitaliser(velocityCallback), pdfsID(pdfsID_) {
     auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
       return new IndexVectors();
@@ -118,7 +119,7 @@ class Dynamic_UBB_double_precision {
   }
 
   template <typename FlagField_T>
-  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+  void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,
                          ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
                          FlagUID domainFlagUID) {
     for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
@@ -127,7 +128,7 @@ class Dynamic_UBB_double_precision {
   }
 
   template <typename FlagField_T>
-  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+  void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,
                          IBlock *block, ConstBlockDataID flagFieldID,
                          FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
     auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
@@ -558,7 +559,7 @@ class Dynamic_UBB_double_precision {
 
   BlockDataID indexVectorID;
   std::function<Vector3<double>(
-      const Cell &, const shared_ptr<StructuredBlockForest> &, IBlock &)>
+      const Cell &, const std::shared_ptr<StructuredBlockForest> &, IBlock &)>
       elementInitaliser;
 
 public:
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h
index 847d63b9ff..e4175e74f1 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h
@@ -31,7 +31,8 @@
 #include "field/FlagField.h"
 #include "field/GhostLayerField.h"
 
-#include <set>
+#include <functional>
+#include <memory>
 #include <vector>
 
 #ifdef __GNUC__
@@ -85,10 +86,10 @@ class Dynamic_UBB_single_precision {
   };
 
   Dynamic_UBB_single_precision(
-      const shared_ptr<StructuredBlockForest> &blocks, BlockDataID pdfsID_,
-      std::function<Vector3<float>(const Cell &,
-                                   const shared_ptr<StructuredBlockForest> &,
-                                   IBlock &)> &velocityCallback)
+      const std::shared_ptr<StructuredBlockForest> &blocks, BlockDataID pdfsID_,
+      std::function<Vector3<float>(
+          const Cell &, const std::shared_ptr<StructuredBlockForest> &,
+          IBlock &)> &velocityCallback)
       : elementInitaliser(velocityCallback), pdfsID(pdfsID_) {
     auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
       return new IndexVectors();
@@ -118,7 +119,7 @@ class Dynamic_UBB_single_precision {
   }
 
   template <typename FlagField_T>
-  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+  void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,
                          ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
                          FlagUID domainFlagUID) {
     for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
@@ -127,7 +128,7 @@ class Dynamic_UBB_single_precision {
   }
 
   template <typename FlagField_T>
-  void fillFromFlagField(const shared_ptr<StructuredBlockForest> &blocks,
+  void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,
                          IBlock *block, ConstBlockDataID flagFieldID,
                          FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
     auto *indexVectors = block->getData<IndexVectors>(indexVectorID);
@@ -558,7 +559,7 @@ class Dynamic_UBB_single_precision {
 
   BlockDataID indexVectorID;
   std::function<Vector3<float>(
-      const Cell &, const shared_ptr<StructuredBlockForest> &, IBlock &)>
+      const Cell &, const std::shared_ptr<StructuredBlockForest> &, IBlock &)>
       elementInitaliser;
 
 public:

From 855474f1a7c3f18f637a8f37123b43dc7e204520 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Mon, 5 Feb 2024 16:01:05 +0100
Subject: [PATCH 2/7] Remove unused code

---
 .../walberla_kernels/generate_ek_kernels.py   |    6 -
 src/core/ek/ek_reactions.cpp                  |   43 -
 src/python/espressomd/lb.py                   |   22 -
 .../DensityPackInfo_double_precision.cpp      | 1484 -----------------
 .../DensityPackInfo_double_precision.h        |   67 -
 .../DensityPackInfo_single_precision.cpp      | 1484 -----------------
 .../DensityPackInfo_single_precision.h        |   67 -
 7 files changed, 3173 deletions(-)
 delete mode 100644 src/core/ek/ek_reactions.cpp
 delete mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.cpp
 delete mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.h
 delete mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.cpp
 delete mode 100644 src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.h

diff --git a/maintainer/walberla_kernels/generate_ek_kernels.py b/maintainer/walberla_kernels/generate_ek_kernels.py
index fbf9dc747f..e54f19023f 100644
--- a/maintainer/walberla_kernels/generate_ek_kernels.py
+++ b/maintainer/walberla_kernels/generate_ek_kernels.py
@@ -186,12 +186,6 @@ def replace_getData_with_uncheckedFastGetData(filename: str) -> None:
         index_shape=density_field.index_shape,
         target=target)
 
-    pystencils_walberla.generate_pack_info_from_kernel(
-        ctx,
-        f"DensityPackInfo_{precision_suffix}",
-        ek_electrostatic.continuity(),
-        target=target)
-
     # ek reactions
     for i in range(1, max_num_reactants + 1):
         assignments = list(reaction_obj.generate_reaction(num_reactants=i))
diff --git a/src/core/ek/ek_reactions.cpp b/src/core/ek/ek_reactions.cpp
deleted file mode 100644
index c6689e3ed5..0000000000
--- a/src/core/ek/ek_reactions.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2022 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "config/config.hpp"
-
-#ifdef WALBERLA
-
-#include "ek/ek_reactions.hpp"
-
-#include <algorithm>
-
-namespace EK {
-
-EKReactions<walberla::EKReactionBase> ek_reactions;
-
-void perform_reactions() {
-  if (ek_reactions.empty()) {
-    return;
-  }
-
-  std::for_each(ek_reactions.begin(), ek_reactions.end(),
-                [](auto const &reaction) { reaction->perform_reaction(); });
-}
-
-} // namespace EK
-
-#endif // WALBERLA
diff --git a/src/python/espressomd/lb.py b/src/python/espressomd/lb.py
index 24afb8bb37..b805a0307f 100644
--- a/src/python/espressomd/lb.py
+++ b/src/python/espressomd/lb.py
@@ -53,20 +53,6 @@ def __getitem__(self, key):
     def __str__(self):
         return f"{self.__class__.__name__}({self.get_params()})"
 
-    def _activate(self):
-        self._activate_method()
-
-    def _deactivate(self):
-        self._deactivate_method()
-
-    def _activate_method(self):
-        self.call_method("activate")
-        utils.handle_errors("HydrodynamicInteraction activation failed")
-
-    def _deactivate_method(self):
-        self.call_method("deactivate")
-        utils.handle_errors("HydrodynamicInteraction deactivation failed")
-
     def validate_params(self, params):
         pass
 
@@ -342,13 +328,6 @@ class LBFluidNodeWalberla(ScriptInterfaceHelper):
     def required_keys(self):
         return {"parent_sip", "index"}
 
-    def __init__(self, *args, **kwargs):
-        if "sip" not in kwargs:
-            super().__init__(*args, **kwargs)
-            utils.handle_errors("LBFluidNode instantiation failed")
-        else:
-            super().__init__(**kwargs)
-
     def __reduce__(self):
         raise NotImplementedError("Cannot serialize LB fluid node objects")
 
@@ -494,7 +473,6 @@ def __init__(self, *args, **kwargs):
                 slice_range, grid_size)
             node = LBFluidNodeWalberla(index=np.array([0, 0, 0]), **kwargs)
             super().__init__(*args, node_sip=node, **kwargs, **extra_kwargs)
-            utils.handle_errors("LBFluidSliceWalberla instantiation failed")
 
     def __iter__(self):
         lower, upper = self.call_method("get_slice_ranges")
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.cpp
deleted file mode 100644
index 39b73e77a9..0000000000
--- a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.cpp
+++ /dev/null
@@ -1,1484 +0,0 @@
-
-// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
-
-#include "DensityPackInfo_double_precision.h"
-#include "core/DataTypes.h"
-#include "core/cell/CellInterval.h"
-#include "stencil/Directions.h"
-
-#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wfloat-equal"
-#pragma GCC diagnostic ignored "-Wshadow"
-#pragma GCC diagnostic ignored "-Wconversion"
-#pragma GCC diagnostic ignored "-Wunused-variable"
-#endif
-
-namespace walberla {
-namespace pystencils {
-
-using walberla::cell::CellInterval;
-using walberla::stencil::Direction;
-
-namespace internal_pack_BSW {
-static FUNC_PREFIX void pack_BSW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_BSW
-
-namespace internal_pack_SW {
-static FUNC_PREFIX void pack_SW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_SW
-
-namespace internal_pack_TSW {
-static FUNC_PREFIX void pack_TSW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_TSW
-
-namespace internal_pack_BW {
-static FUNC_PREFIX void pack_BW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_35_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_BW
-
-namespace internal_pack_W {
-static FUNC_PREFIX void pack_W(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
-    double *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
-    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
-      double *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
-      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 3] = _data_j_20_35_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 4] = _data_j_20_30_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 5] = _data_j_20_36_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 6] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 7] = _data_j_20_34_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 8] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_W
-
-namespace internal_pack_TW {
-static FUNC_PREFIX void pack_TW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_36_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_TW
-
-namespace internal_pack_BNW {
-static FUNC_PREFIX void pack_BNW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_BNW
-
-namespace internal_pack_NW {
-static FUNC_PREFIX void pack_NW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_34_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_NW
-
-namespace internal_pack_TNW {
-static FUNC_PREFIX void pack_TNW(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_TNW
-
-namespace internal_pack_BS {
-static FUNC_PREFIX void pack_BS(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1] = _data_j_20_37_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_BS
-
-namespace internal_pack_S {
-static FUNC_PREFIX void pack_S(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
-    double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
-    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
-      double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
-      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0];
-        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 3] = _data_j_20_37_10[_stride_j_0 * ctr_0];
-        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 4] = _data_j_20_31_10[_stride_j_0 * ctr_0];
-        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 5] = _data_j_20_38_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_S
-
-namespace internal_pack_TS {
-static FUNC_PREFIX void pack_TS(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1] = _data_j_20_38_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_TS
-
-namespace internal_pack_B {
-static FUNC_PREFIX void pack_B(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
-    double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
-      double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 1] = _data_j_20_35_10[_stride_j_0 * ctr_0];
-        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 2] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 3] = _data_j_20_37_10[_stride_j_0 * ctr_0];
-        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 4] = _data_j_20_32_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_B
-
-namespace internal_pack_T {
-static FUNC_PREFIX void pack_T(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 1] = _data_j_20_36_10[_stride_j_0 * ctr_0];
-        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 3] = _data_j_20_38_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_T
-
-namespace internal_pack_BN {
-static FUNC_PREFIX void pack_BN(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_BN
-
-namespace internal_pack_N {
-static FUNC_PREFIX void pack_N(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_34_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_N
-
-namespace internal_pack_TN {
-static FUNC_PREFIX void pack_TN(double *RESTRICT _data_buffer, double *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_TN
-
-namespace internal_unpack_BSW {
-static FUNC_PREFIX void unpack_BSW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_BSW
-
-namespace internal_unpack_SW {
-static FUNC_PREFIX void unpack_SW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
-        _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_SW
-
-namespace internal_unpack_TSW {
-static FUNC_PREFIX void unpack_TSW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_TSW
-
-namespace internal_unpack_BW {
-static FUNC_PREFIX void unpack_BW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
-        _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_BW
-
-namespace internal_unpack_W {
-static FUNC_PREFIX void unpack_W(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
-    double *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
-    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
-      double *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
-      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0];
-        _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 1];
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 2];
-        _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 3];
-        _data_j_20_30_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 4];
-        _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 5];
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 6];
-        _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 7];
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 8];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_W
-
-namespace internal_unpack_TW {
-static FUNC_PREFIX void unpack_TW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
-        _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_TW
-
-namespace internal_unpack_BNW {
-static FUNC_PREFIX void unpack_BNW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_BNW
-
-namespace internal_unpack_NW {
-static FUNC_PREFIX void unpack_NW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
-        _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_NW
-
-namespace internal_unpack_TNW {
-static FUNC_PREFIX void unpack_TNW(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_TNW
-
-namespace internal_unpack_BS {
-static FUNC_PREFIX void unpack_BS(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0];
-        _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_BS
-
-namespace internal_unpack_S {
-static FUNC_PREFIX void unpack_S(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    double *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
-    double *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
-    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      double *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
-      double *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
-      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0];
-        _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 1];
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 2];
-        _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 3];
-        _data_j_20_31_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 4];
-        _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 5];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_S
-
-namespace internal_unpack_TS {
-static FUNC_PREFIX void unpack_TS(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0];
-        _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_TS
-
-namespace internal_unpack_B {
-static FUNC_PREFIX void unpack_B(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    double *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    double *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
-    double *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      double *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      double *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
-      double *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0];
-        _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 1];
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 2];
-        _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 3];
-        _data_j_20_32_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 4];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_B
-
-namespace internal_unpack_T {
-static FUNC_PREFIX void unpack_T(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    double *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    double *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      double *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      double *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0];
-        _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 1];
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 2];
-        _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 3];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_T
-
-namespace internal_unpack_BN {
-static FUNC_PREFIX void unpack_BN(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_BN
-
-namespace internal_unpack_N {
-static FUNC_PREFIX void unpack_N(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    double *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      double *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
-        _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_N
-
-namespace internal_unpack_TN {
-static FUNC_PREFIX void unpack_TN(double *RESTRICT const _data_buffer, double *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    double *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      double *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_TN
-
-void DensityPackInfo_double_precision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const {
-  double *buffer = reinterpret_cast<double *>(byte_buffer);
-
-  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
-
-  CellInterval ci;
-  j->getSliceBeforeGhostLayer(dir, ci, 1, false);
-
-  switch (dir) {
-  case stencil::BSW: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_BSW::pack_BSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::SW: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_SW::pack_SW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TSW: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_TSW::pack_TSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BW: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_BW::pack_BW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::W: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_W::pack_W(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TW: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_TW::pack_TW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BNW: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_BNW::pack_BNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::NW: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_NW::pack_NW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TNW: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_TNW::pack_TNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BS: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_BS::pack_BS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::S: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_S::pack_S(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TS: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_TS::pack_TS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::B: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_B::pack_B(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::T: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_T::pack_T(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BN: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_BN::pack_BN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::N: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_N::pack_N(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TN: {
-    double *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_TN::pack_TN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  default:
-    WALBERLA_ASSERT(false);
-  }
-}
-
-void DensityPackInfo_double_precision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const {
-  double *buffer = reinterpret_cast<double *>(byte_buffer);
-
-  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
-
-  CellInterval ci;
-  j->getGhostRegion(dir, ci, 1, false);
-  auto communciationDirection = stencil::inverseDir[dir];
-
-  switch (communciationDirection) {
-  case stencil::BSW: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_BSW::unpack_BSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::SW: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_SW::unpack_SW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TSW: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_TSW::unpack_TSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BW: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_BW::unpack_BW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::W: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_W::unpack_W(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TW: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_TW::unpack_TW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BNW: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_BNW::unpack_BNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::NW: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_NW::unpack_NW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TNW: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_TNW::unpack_TNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BS: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_BS::unpack_BS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::S: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_S::unpack_S(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TS: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_TS::unpack_TS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::B: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_B::unpack_B(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::T: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_T::unpack_T(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BN: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_BN::unpack_BN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::N: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_N::unpack_N(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TN: {
-    double *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    double *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_TN::unpack_TN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  default:
-    WALBERLA_ASSERT(false);
-  }
-}
-
-uint_t DensityPackInfo_double_precision::size(stencil::Direction dir, const IBlock *block) const {
-  auto j = block->getData<field::GhostLayerField<double, 13>>(jID);
-
-  CellInterval ci;
-  j->getGhostRegion(dir, ci, 1, false);
-
-  uint_t elementsPerCell = 0;
-
-  switch (dir) {
-  case stencil::BSW:
-    elementsPerCell = 1;
-    break;
-
-  case stencil::SW:
-    elementsPerCell = 3;
-    break;
-
-  case stencil::TSW:
-    elementsPerCell = 1;
-    break;
-
-  case stencil::BW:
-    elementsPerCell = 3;
-    break;
-
-  case stencil::W:
-    elementsPerCell = 9;
-    break;
-
-  case stencil::TW:
-    elementsPerCell = 3;
-    break;
-
-  case stencil::BNW:
-    elementsPerCell = 1;
-    break;
-
-  case stencil::NW:
-    elementsPerCell = 3;
-    break;
-
-  case stencil::TNW:
-    elementsPerCell = 1;
-    break;
-
-  case stencil::BS:
-    elementsPerCell = 2;
-    break;
-
-  case stencil::S:
-    elementsPerCell = 6;
-    break;
-
-  case stencil::TS:
-    elementsPerCell = 2;
-    break;
-
-  case stencil::B:
-    elementsPerCell = 5;
-    break;
-
-  case stencil::T:
-    elementsPerCell = 4;
-    break;
-
-  case stencil::BN:
-    elementsPerCell = 1;
-    break;
-
-  case stencil::N:
-    elementsPerCell = 3;
-    break;
-
-  case stencil::TN:
-    elementsPerCell = 1;
-    break;
-
-  default:
-    elementsPerCell = 0;
-  }
-  return ci.numCells() * elementsPerCell * sizeof(double);
-}
-
-} // namespace pystencils
-} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.h
deleted file mode 100644
index d5cb19678b..0000000000
--- a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_double_precision.h
+++ /dev/null
@@ -1,67 +0,0 @@
-
-// kernel generated with pystencils v1.2, lbmpy v1.2,
-// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
-// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
-
-#pragma once
-#include "communication/UniformPackInfo.h"
-#include "core/DataTypes.h"
-#include "core/cell/CellInterval.h"
-#include "domain_decomposition/IBlock.h"
-#include "field/GhostLayerField.h"
-#include "stencil/Directions.h"
-
-#define FUNC_PREFIX
-
-#ifdef __GNUC__
-#define RESTRICT __restrict__
-#elif _MSC_VER
-#define RESTRICT __restrict
-#else
-#define RESTRICT
-#endif
-
-namespace walberla {
-namespace pystencils {
-
-class DensityPackInfo_double_precision
-    : public ::walberla::communication::UniformPackInfo {
-public:
-  DensityPackInfo_double_precision(BlockDataID jID_) : jID(jID_){};
-  virtual ~DensityPackInfo_double_precision() {}
-
-  bool constantDataExchange() const { return true; }
-  bool threadsafeReceiving() const { return true; }
-
-  void unpackData(IBlock *receiver, stencil::Direction dir,
-                  mpi::RecvBuffer &buffer) {
-    const auto dataSize = size(dir, receiver);
-    unpack(dir, buffer.skip(dataSize), receiver);
-  }
-
-  void communicateLocal(const IBlock *sender, IBlock *receiver,
-                        stencil::Direction dir) {
-    // TODO: optimize by generating kernel for this case
-    mpi::SendBuffer sBuffer;
-    packData(sender, dir, sBuffer);
-    mpi::RecvBuffer rBuffer(sBuffer);
-    unpackData(receiver, stencil::inverseDir[dir], rBuffer);
-  }
-
-  void packDataImpl(const IBlock *sender, stencil::Direction dir,
-                    mpi::SendBuffer &outBuffer) const {
-    const auto dataSize = size(dir, sender);
-    pack(dir, outBuffer.forward(dataSize), const_cast<IBlock *>(sender));
-  }
-
-  void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const;
-  void unpack(stencil::Direction dir, unsigned char *buffer,
-              IBlock *block) const;
-  uint_t size(stencil::Direction dir, const IBlock *block) const;
-
-private:
-  BlockDataID jID;
-};
-
-} // namespace pystencils
-} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.cpp b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.cpp
deleted file mode 100644
index fab1ca3023..0000000000
--- a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.cpp
+++ /dev/null
@@ -1,1484 +0,0 @@
-
-// kernel generated with pystencils v1.2, lbmpy v1.2, lbmpy_walberla/pystencils_walberla from waLBerla commit ref: a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
-
-#include "DensityPackInfo_single_precision.h"
-#include "core/DataTypes.h"
-#include "core/cell/CellInterval.h"
-#include "stencil/Directions.h"
-
-#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wfloat-equal"
-#pragma GCC diagnostic ignored "-Wshadow"
-#pragma GCC diagnostic ignored "-Wconversion"
-#pragma GCC diagnostic ignored "-Wunused-variable"
-#endif
-
-namespace walberla {
-namespace pystencils {
-
-using walberla::cell::CellInterval;
-using walberla::stencil::Direction;
-
-namespace internal_pack_BSW {
-static FUNC_PREFIX void pack_BSW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_BSW
-
-namespace internal_pack_SW {
-static FUNC_PREFIX void pack_SW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_SW
-
-namespace internal_pack_TSW {
-static FUNC_PREFIX void pack_TSW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_TSW
-
-namespace internal_pack_BW {
-static FUNC_PREFIX void pack_BW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_35_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_BW
-
-namespace internal_pack_W {
-static FUNC_PREFIX void pack_W(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
-    float *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
-    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
-      float *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
-      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 3] = _data_j_20_35_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 4] = _data_j_20_30_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 5] = _data_j_20_36_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 6] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 7] = _data_j_20_34_10[_stride_j_0 * ctr_0];
-        _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 8] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_W
-
-namespace internal_pack_TW {
-static FUNC_PREFIX void pack_TW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_36_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_TW
-
-namespace internal_pack_BNW {
-static FUNC_PREFIX void pack_BNW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_BNW
-
-namespace internal_pack_NW {
-static FUNC_PREFIX void pack_NW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_34_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_NW
-
-namespace internal_pack_TNW {
-static FUNC_PREFIX void pack_TNW(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_TNW
-
-namespace internal_pack_BS {
-static FUNC_PREFIX void pack_BS(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1] = _data_j_20_37_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_BS
-
-namespace internal_pack_S {
-static FUNC_PREFIX void pack_S(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
-    float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
-    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
-      float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
-      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 1] = _data_j_20_33_10[_stride_j_0 * ctr_0];
-        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 2] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 3] = _data_j_20_37_10[_stride_j_0 * ctr_0];
-        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 4] = _data_j_20_31_10[_stride_j_0 * ctr_0];
-        _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 5] = _data_j_20_38_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_S
-
-namespace internal_pack_TS {
-static FUNC_PREFIX void pack_TS(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-        _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1] = _data_j_20_38_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_TS
-
-namespace internal_pack_B {
-static FUNC_PREFIX void pack_B(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
-    float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
-      float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0] = _data_j_20_39_10[_stride_j_0 * ctr_0];
-        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 1] = _data_j_20_35_10[_stride_j_0 * ctr_0];
-        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 2] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 3] = _data_j_20_37_10[_stride_j_0 * ctr_0];
-        _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 4] = _data_j_20_32_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_B
-
-namespace internal_pack_T {
-static FUNC_PREFIX void pack_T(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0] = _data_j_20_310_10[_stride_j_0 * ctr_0];
-        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 1] = _data_j_20_36_10[_stride_j_0 * ctr_0];
-        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-        _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 3] = _data_j_20_38_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_T
-
-namespace internal_pack_BN {
-static FUNC_PREFIX void pack_BN(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_BN
-
-namespace internal_pack_N {
-static FUNC_PREFIX void pack_N(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0] = _data_j_20_311_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1] = _data_j_20_34_10[_stride_j_0 * ctr_0];
-        _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_N
-
-namespace internal_pack_TN {
-static FUNC_PREFIX void pack_TN(float *RESTRICT _data_buffer, float *RESTRICT const _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0] = _data_j_20_312_10[_stride_j_0 * ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_pack_TN
-
-namespace internal_unpack_BSW {
-static FUNC_PREFIX void unpack_BSW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_BSW
-
-namespace internal_unpack_SW {
-static FUNC_PREFIX void unpack_SW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
-        _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_SW
-
-namespace internal_unpack_TSW {
-static FUNC_PREFIX void unpack_TSW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_TSW
-
-namespace internal_unpack_BW {
-static FUNC_PREFIX void unpack_BW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
-        _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_BW
-
-namespace internal_unpack_W {
-static FUNC_PREFIX void unpack_W(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
-    float *RESTRICT _data_j_20_30 = _data_j + _stride_j_2 * ctr_2;
-    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
-      float *RESTRICT _data_j_20_30_10 = _stride_j_1 * ctr_1 + _data_j_20_30;
-      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0];
-        _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 1];
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 2];
-        _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 3];
-        _data_j_20_30_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 4];
-        _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 5];
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 6];
-        _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 7];
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[9 * _size_j_0 * _size_j_1 * ctr_2 + 9 * _size_j_0 * ctr_1 + 9 * ctr_0 + 8];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_W
-
-namespace internal_unpack_TW {
-static FUNC_PREFIX void unpack_TW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
-        _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_TW
-
-namespace internal_unpack_BNW {
-static FUNC_PREFIX void unpack_BNW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_BNW
-
-namespace internal_unpack_NW {
-static FUNC_PREFIX void unpack_NW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
-        _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_NW
-
-namespace internal_unpack_TNW {
-static FUNC_PREFIX void unpack_TNW(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_TNW
-
-namespace internal_unpack_BS {
-static FUNC_PREFIX void unpack_BS(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0];
-        _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_BS
-
-namespace internal_unpack_S {
-static FUNC_PREFIX void unpack_S(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    float *RESTRICT _data_j_20_33 = _data_j + _stride_j_2 * ctr_2 + 3 * _stride_j_3;
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
-    float *RESTRICT _data_j_20_31 = _data_j + _stride_j_2 * ctr_2 + _stride_j_3;
-    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      float *RESTRICT _data_j_20_33_10 = _stride_j_1 * ctr_1 + _data_j_20_33;
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
-      float *RESTRICT _data_j_20_31_10 = _stride_j_1 * ctr_1 + _data_j_20_31;
-      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0];
-        _data_j_20_33_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 1];
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 2];
-        _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 3];
-        _data_j_20_31_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 4];
-        _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[6 * _size_j_0 * _size_j_1 * ctr_2 + 6 * _size_j_0 * ctr_1 + 6 * ctr_0 + 5];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_S
-
-namespace internal_unpack_TS {
-static FUNC_PREFIX void unpack_TS(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0];
-        _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[2 * _size_j_0 * _size_j_1 * ctr_2 + 2 * _size_j_0 * ctr_1 + 2 * ctr_0 + 1];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_TS
-
-namespace internal_unpack_B {
-static FUNC_PREFIX void unpack_B(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_39 = _data_j + _stride_j_2 * ctr_2 + 9 * _stride_j_3;
-    float *RESTRICT _data_j_20_35 = _data_j + _stride_j_2 * ctr_2 + 5 * _stride_j_3;
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    float *RESTRICT _data_j_20_37 = _data_j + _stride_j_2 * ctr_2 + 7 * _stride_j_3;
-    float *RESTRICT _data_j_20_32 = _data_j + _stride_j_2 * ctr_2 + 2 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_39_10 = _stride_j_1 * ctr_1 + _data_j_20_39;
-      float *RESTRICT _data_j_20_35_10 = _stride_j_1 * ctr_1 + _data_j_20_35;
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      float *RESTRICT _data_j_20_37_10 = _stride_j_1 * ctr_1 + _data_j_20_37;
-      float *RESTRICT _data_j_20_32_10 = _stride_j_1 * ctr_1 + _data_j_20_32;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_39_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0];
-        _data_j_20_35_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 1];
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 2];
-        _data_j_20_37_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 3];
-        _data_j_20_32_10[_stride_j_0 * ctr_0] = _data_buffer[5 * _size_j_0 * _size_j_1 * ctr_2 + 5 * _size_j_0 * ctr_1 + 5 * ctr_0 + 4];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_B
-
-namespace internal_unpack_T {
-static FUNC_PREFIX void unpack_T(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_310 = _data_j + _stride_j_2 * ctr_2 + 10 * _stride_j_3;
-    float *RESTRICT _data_j_20_36 = _data_j + _stride_j_2 * ctr_2 + 6 * _stride_j_3;
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    float *RESTRICT _data_j_20_38 = _data_j + _stride_j_2 * ctr_2 + 8 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_310_10 = _stride_j_1 * ctr_1 + _data_j_20_310;
-      float *RESTRICT _data_j_20_36_10 = _stride_j_1 * ctr_1 + _data_j_20_36;
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      float *RESTRICT _data_j_20_38_10 = _stride_j_1 * ctr_1 + _data_j_20_38;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_310_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0];
-        _data_j_20_36_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 1];
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 2];
-        _data_j_20_38_10[_stride_j_0 * ctr_0] = _data_buffer[4 * _size_j_0 * _size_j_1 * ctr_2 + 4 * _size_j_0 * ctr_1 + 4 * ctr_0 + 3];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_T
-
-namespace internal_unpack_BN {
-static FUNC_PREFIX void unpack_BN(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_BN
-
-namespace internal_unpack_N {
-static FUNC_PREFIX void unpack_N(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_311 = _data_j + _stride_j_2 * ctr_2 + 11 * _stride_j_3;
-    float *RESTRICT _data_j_20_34 = _data_j + _stride_j_2 * ctr_2 + 4 * _stride_j_3;
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_311_10 = _stride_j_1 * ctr_1 + _data_j_20_311;
-      float *RESTRICT _data_j_20_34_10 = _stride_j_1 * ctr_1 + _data_j_20_34;
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_311_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0];
-        _data_j_20_34_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 1];
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[3 * _size_j_0 * _size_j_1 * ctr_2 + 3 * _size_j_0 * ctr_1 + 3 * ctr_0 + 2];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_N
-
-namespace internal_unpack_TN {
-static FUNC_PREFIX void unpack_TN(float *RESTRICT const _data_buffer, float *RESTRICT _data_j, int64_t const _size_j_0, int64_t const _size_j_1, int64_t const _size_j_2, int64_t const _stride_j_0, int64_t const _stride_j_1, int64_t const _stride_j_2, int64_t const _stride_j_3) {
-  for (int64_t ctr_2 = 0; ctr_2 < _size_j_2; ctr_2 += 1) {
-    float *RESTRICT _data_j_20_312 = _data_j + _stride_j_2 * ctr_2 + 12 * _stride_j_3;
-    for (int64_t ctr_1 = 0; ctr_1 < _size_j_1; ctr_1 += 1) {
-      float *RESTRICT _data_j_20_312_10 = _stride_j_1 * ctr_1 + _data_j_20_312;
-      for (int64_t ctr_0 = 0; ctr_0 < _size_j_0; ctr_0 += 1) {
-        _data_j_20_312_10[_stride_j_0 * ctr_0] = _data_buffer[_size_j_0 * _size_j_1 * ctr_2 + _size_j_0 * ctr_1 + ctr_0];
-      }
-    }
-  }
-}
-} // namespace internal_unpack_TN
-
-void DensityPackInfo_single_precision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const {
-  float *buffer = reinterpret_cast<float *>(byte_buffer);
-
-  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
-
-  CellInterval ci;
-  j->getSliceBeforeGhostLayer(dir, ci, 1, false);
-
-  switch (dir) {
-  case stencil::BSW: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_BSW::pack_BSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::SW: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_SW::pack_SW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TSW: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_TSW::pack_TSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BW: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_BW::pack_BW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::W: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_W::pack_W(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TW: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_TW::pack_TW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BNW: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_BNW::pack_BNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::NW: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_NW::pack_NW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TNW: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_TNW::pack_TNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BS: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_BS::pack_BS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::S: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_S::pack_S(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TS: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_TS::pack_TS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::B: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_B::pack_B(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::T: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_T::pack_T(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BN: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_BN::pack_BN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::N: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_N::pack_N(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TN: {
-    float *RESTRICT _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT const _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_pack_TN::pack_TN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  default:
-    WALBERLA_ASSERT(false);
-  }
-}
-
-void DensityPackInfo_single_precision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const {
-  float *buffer = reinterpret_cast<float *>(byte_buffer);
-
-  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
-
-  CellInterval ci;
-  j->getGhostRegion(dir, ci, 1, false);
-  auto communciationDirection = stencil::inverseDir[dir];
-
-  switch (communciationDirection) {
-  case stencil::BSW: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_BSW::unpack_BSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::SW: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_SW::unpack_SW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TSW: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_TSW::unpack_TSW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BW: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_BW::unpack_BW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::W: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_W::unpack_W(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TW: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_TW::unpack_TW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BNW: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_BNW::unpack_BNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::NW: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_NW::unpack_NW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TNW: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_TNW::unpack_TNW(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BS: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_BS::unpack_BS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::S: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_S::unpack_S(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TS: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_TS::unpack_TS(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::B: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_B::unpack_B(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::T: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_T::unpack_T(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::BN: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_BN::unpack_BN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::N: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_N::unpack_N(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  case stencil::TN: {
-    float *RESTRICT const _data_buffer = buffer;
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(j->nrOfGhostLayers()));
-    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(j->nrOfGhostLayers()));
-    float *RESTRICT _data_j = j->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->xSizeWithGhostLayer(), int64_t(cell_idx_c(ci.xSize()) + 0));
-    const int64_t _size_j_0 = int64_t(cell_idx_c(ci.xSize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->ySizeWithGhostLayer(), int64_t(cell_idx_c(ci.ySize()) + 0));
-    const int64_t _size_j_1 = int64_t(cell_idx_c(ci.ySize()) + 0);
-    WALBERLA_ASSERT_GREATER_EQUAL(j->zSizeWithGhostLayer(), int64_t(cell_idx_c(ci.zSize()) + 0));
-    const int64_t _size_j_2 = int64_t(cell_idx_c(ci.zSize()) + 0);
-    const int64_t _stride_j_0 = int64_t(j->xStride());
-    const int64_t _stride_j_1 = int64_t(j->yStride());
-    const int64_t _stride_j_2 = int64_t(j->zStride());
-    const int64_t _stride_j_3 = int64_t(1 * int64_t(j->fStride()));
-    internal_unpack_TN::unpack_TN(_data_buffer, _data_j, _size_j_0, _size_j_1, _size_j_2, _stride_j_0, _stride_j_1, _stride_j_2, _stride_j_3);
-    break;
-  }
-
-  default:
-    WALBERLA_ASSERT(false);
-  }
-}
-
-uint_t DensityPackInfo_single_precision::size(stencil::Direction dir, const IBlock *block) const {
-  auto j = block->getData<field::GhostLayerField<float, 13>>(jID);
-
-  CellInterval ci;
-  j->getGhostRegion(dir, ci, 1, false);
-
-  uint_t elementsPerCell = 0;
-
-  switch (dir) {
-  case stencil::BSW:
-    elementsPerCell = 1;
-    break;
-
-  case stencil::SW:
-    elementsPerCell = 3;
-    break;
-
-  case stencil::TSW:
-    elementsPerCell = 1;
-    break;
-
-  case stencil::BW:
-    elementsPerCell = 3;
-    break;
-
-  case stencil::W:
-    elementsPerCell = 9;
-    break;
-
-  case stencil::TW:
-    elementsPerCell = 3;
-    break;
-
-  case stencil::BNW:
-    elementsPerCell = 1;
-    break;
-
-  case stencil::NW:
-    elementsPerCell = 3;
-    break;
-
-  case stencil::TNW:
-    elementsPerCell = 1;
-    break;
-
-  case stencil::BS:
-    elementsPerCell = 2;
-    break;
-
-  case stencil::S:
-    elementsPerCell = 6;
-    break;
-
-  case stencil::TS:
-    elementsPerCell = 2;
-    break;
-
-  case stencil::B:
-    elementsPerCell = 5;
-    break;
-
-  case stencil::T:
-    elementsPerCell = 4;
-    break;
-
-  case stencil::BN:
-    elementsPerCell = 1;
-    break;
-
-  case stencil::N:
-    elementsPerCell = 3;
-    break;
-
-  case stencil::TN:
-    elementsPerCell = 1;
-    break;
-
-  default:
-    elementsPerCell = 0;
-  }
-  return ci.numCells() * elementsPerCell * sizeof(float);
-}
-
-} // namespace pystencils
-} // namespace walberla
\ No newline at end of file
diff --git a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.h b/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.h
deleted file mode 100644
index 08ea0c0988..0000000000
--- a/src/walberla_bridge/src/electrokinetics/generated_kernels/DensityPackInfo_single_precision.h
+++ /dev/null
@@ -1,67 +0,0 @@
-
-// kernel generated with pystencils v1.2, lbmpy v1.2,
-// lbmpy_walberla/pystencils_walberla from waLBerla commit ref:
-// a839fac6ef7d0c58e7710e4d50490e9dd7146b4a
-
-#pragma once
-#include "communication/UniformPackInfo.h"
-#include "core/DataTypes.h"
-#include "core/cell/CellInterval.h"
-#include "domain_decomposition/IBlock.h"
-#include "field/GhostLayerField.h"
-#include "stencil/Directions.h"
-
-#define FUNC_PREFIX
-
-#ifdef __GNUC__
-#define RESTRICT __restrict__
-#elif _MSC_VER
-#define RESTRICT __restrict
-#else
-#define RESTRICT
-#endif
-
-namespace walberla {
-namespace pystencils {
-
-class DensityPackInfo_single_precision
-    : public ::walberla::communication::UniformPackInfo {
-public:
-  DensityPackInfo_single_precision(BlockDataID jID_) : jID(jID_){};
-  virtual ~DensityPackInfo_single_precision() {}
-
-  bool constantDataExchange() const { return true; }
-  bool threadsafeReceiving() const { return true; }
-
-  void unpackData(IBlock *receiver, stencil::Direction dir,
-                  mpi::RecvBuffer &buffer) {
-    const auto dataSize = size(dir, receiver);
-    unpack(dir, buffer.skip(dataSize), receiver);
-  }
-
-  void communicateLocal(const IBlock *sender, IBlock *receiver,
-                        stencil::Direction dir) {
-    // TODO: optimize by generating kernel for this case
-    mpi::SendBuffer sBuffer;
-    packData(sender, dir, sBuffer);
-    mpi::RecvBuffer rBuffer(sBuffer);
-    unpackData(receiver, stencil::inverseDir[dir], rBuffer);
-  }
-
-  void packDataImpl(const IBlock *sender, stencil::Direction dir,
-                    mpi::SendBuffer &outBuffer) const {
-    const auto dataSize = size(dir, sender);
-    pack(dir, outBuffer.forward(dataSize), const_cast<IBlock *>(sender));
-  }
-
-  void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const;
-  void unpack(stencil::Direction dir, unsigned char *buffer,
-              IBlock *block) const;
-  uint_t size(stencil::Direction dir, const IBlock *block) const;
-
-private:
-  BlockDataID jID;
-};
-
-} // namespace pystencils
-} // namespace walberla
\ No newline at end of file

From 52aaef9e047b612a95b2d179d6192b0661b6632c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Wed, 7 Feb 2024 16:40:07 +0100
Subject: [PATCH 3/7] Fix tests and benchmarks

---
 maintainer/benchmarks/lb.py      | 7 ++++---
 testsuite/python/ek_interface.py | 1 +
 testsuite/python/lb_stats.py     | 4 ++--
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/maintainer/benchmarks/lb.py b/maintainer/benchmarks/lb.py
index 979a7811c3..7131e62ff4 100644
--- a/maintainer/benchmarks/lb.py
+++ b/maintainer/benchmarks/lb.py
@@ -53,11 +53,11 @@
 
 # process and check arguments
 n_iterations = 30
-assert args.volume_fraction > 0, "volume_fraction must be a positive number"
+assert args.volume_fraction > 0, "--volume_fraction must be a positive number"
 assert args.volume_fraction < np.pi / (3 * np.sqrt(2)), \
-    "volume_fraction exceeds the physical limit of sphere packing (~0.74)"
+    "--volume_fraction exceeds the physical limit of sphere packing (~0.74)"
 assert "box_l" not in args or args.particles_per_core == 0, \
-    "Argument box_l requires particles_per_core=0"
+    "Argument --box_l requires --particles_per_core=0"
 
 required_features = ["LENNARD_JONES", "WALBERLA"]
 if args.gpu:
@@ -85,6 +85,7 @@
 if n_part == 0:
     box_l = args.box_l
     agrid = 1.
+    lb_grid = args.box_l
     measurement_steps = 80
 else:
     # volume of N spheres with radius r: N * (4/3*pi*r^3)
diff --git a/testsuite/python/ek_interface.py b/testsuite/python/ek_interface.py
index e439edea90..136ea769f2 100644
--- a/testsuite/python/ek_interface.py
+++ b/testsuite/python/ek_interface.py
@@ -170,6 +170,7 @@ def test_ek_fft_solver(self):
         ek_solver = espressomd.electrokinetics.EKFFT(
             lattice=self.lattice, permittivity=0.01,
             single_precision=self.ek_params["single_precision"])
+        self.assertEqual(ek_solver.lattice, self.lattice)
         self.assertEqual(
             ek_solver.single_precision,
             self.ek_params["single_precision"])
diff --git a/testsuite/python/lb_stats.py b/testsuite/python/lb_stats.py
index 1f5044a0ec..11c2c51b88 100644
--- a/testsuite/python/lb_stats.py
+++ b/testsuite/python/lb_stats.py
@@ -58,7 +58,7 @@ def test_mass_momentum_thermostat(self):
             type=self.n_col_part // 2 * [0, 1], pos=np.random.random(
                 (self.n_col_part, 3)) * self.system.box_l[0])
         if espressomd.has_features("MASS"):
-            particles.mass = 0.1 + np.random.random(
+            particles.mass = 0.5 + np.random.random(
                 len(self.system.part))
 
         self.system.thermostat.turn_off()
@@ -134,7 +134,7 @@ def test_mass_momentum_thermostat(self):
         #   scale=np.std(all_temp_particle,ddof=1))[1] - self.params["temp"]
         # temp_prec_fluid = scipy.stats.norm.interval(0.95, loc=self.params["temp"],
         #   scale=np.std(all_temp_fluid,ddof=1))[1] -self.params["temp"]
-        temp_prec_particle = 0.05 * self.params["temp"]
+        temp_prec_particle = 0.08 * self.params["temp"]
         temp_prec_fluid = 0.05 * self.params["temp"]
 
         self.assertAlmostEqual(

From 639a556485cfb851afd38e1ba99ce7274c053152 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Mon, 12 Feb 2024 17:14:39 +0100
Subject: [PATCH 4/7] Refactor waLBerla bridge

---
 .../templates/Boundary.tmpl.h                 |  5 ++--
 src/script_interface/walberla/LBFluidNode.cpp |  3 +-
 .../walberla_bridge/utils/boundary_utils.hpp  | 25 ++++++++--------
 src/walberla_bridge/src/BoundaryHandling.hpp  | 25 +++++++++-------
 .../src/electrokinetics/EKinWalberlaImpl.hpp  | 21 ++++++++------
 .../reactions/EKReactionImplIndexed.cpp       |  6 ++--
 .../src/lattice_boltzmann/LBWalberlaImpl.hpp  | 29 +++++++++----------
 .../Dynamic_UBB_double_precision.h            |  6 ++--
 .../Dynamic_UBB_single_precision.h            |  6 ++--
 .../tests/LBWalberlaImpl_unit_tests.cpp       |  2 +-
 10 files changed, 67 insertions(+), 61 deletions(-)

diff --git a/maintainer/walberla_kernels/templates/Boundary.tmpl.h b/maintainer/walberla_kernels/templates/Boundary.tmpl.h
index 5079dcc5a6..bdeaf57c06 100644
--- a/maintainer/walberla_kernels/templates/Boundary.tmpl.h
+++ b/maintainer/walberla_kernels/templates/Boundary.tmpl.h
@@ -40,6 +40,7 @@
 #include <field/FlagField.h>
 #include <core/debug/Debug.h>
 
+#include <cassert>
 #include <functional>
 #include <memory>
 #include <vector>
@@ -197,8 +198,8 @@ class {{class_name}}
         auto * flagField = block->getData< FlagField_T > ( flagFieldID );
         {{additional_data_handler.additional_field_data|indent(4)}}
 
-        if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) ))
-            return;
+        assert(flagField->flagExists(boundaryFlagUID and
+               flagField->flagExists(domainFlagUID));
 
         auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
         auto domainFlag = flagField->getFlag(domainFlagUID);
diff --git a/src/script_interface/walberla/LBFluidNode.cpp b/src/script_interface/walberla/LBFluidNode.cpp
index 4cab29bb23..af9ff8b678 100644
--- a/src/script_interface/walberla/LBFluidNode.cpp
+++ b/src/script_interface/walberla/LBFluidNode.cpp
@@ -47,13 +47,12 @@ Variant LBFluidNode::do_call_method(std::string const &name,
   if (name == "set_velocity_at_boundary") {
     if (is_none(params.at("value"))) {
       m_lb_fluid->remove_node_from_boundary(m_index);
-      m_lb_fluid->ghost_communication();
     } else {
       auto const u =
           get_value<Utils::Vector3d>(params, "value") * m_conv_velocity;
       m_lb_fluid->set_node_velocity_at_boundary(m_index, u);
-      m_lb_fluid->ghost_communication();
     }
+    m_lb_fluid->ghost_communication();
     return {};
   }
   if (name == "get_velocity_at_boundary") {
diff --git a/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp b/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp
index f91a6c833a..8c1558ee72 100644
--- a/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp
+++ b/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp
@@ -21,6 +21,8 @@
 
 #include "walberla_utils.hpp"
 
+#include <walberla_bridge/LatticeWalberla.hpp>
+
 #include <utils/Vector.hpp>
 
 #include <cassert>
@@ -81,30 +83,29 @@ void set_boundary_from_grid(BoundaryModel &boundary,
                             std::vector<int> const &raster_flat,
                             std::vector<DataType> const &data_flat) {
 
+  auto const &conv = es2walberla<DataType, typename BoundaryModel::value_type>;
   auto const grid_size = lattice.get_grid_dimensions();
   auto const offset = lattice.get_local_grid_range().first;
   auto const gl = static_cast<int>(lattice.get_ghost_layers());
   assert(raster_flat.size() == Utils::product(grid_size));
   auto const n_y = grid_size[1];
   auto const n_z = grid_size[2];
-  auto const off_i = offset[0];
-  auto const off_j = offset[1];
-  auto const off_k = offset[2];
 
-  auto const &blocks = lattice.get_blocks();
-  for (auto block = blocks->begin(); block != blocks->end(); ++block) {
-    auto const [size_i, size_j, size_k] = boundary.block_dims(*block);
+  for (auto const &block : *lattice.get_blocks()) {
+    auto const [size_i, size_j, size_k] = boundary.block_dims(block);
     // Get field data which knows about the indices
-    // In the loop, x,y,z are in block-local coordinates
-    for (int i = off_i - gl; i < size_i + off_i + gl; ++i) {
-      for (int j = off_j - gl; j < size_j + off_j + gl; ++j) {
-        for (int k = off_k - gl; k < size_k + off_k + gl; ++k) {
-          auto const node = Utils::Vector3i{{i, j, k}};
+    // In the loop, i,j,k are in block-local coordinates
+    for (int i = -gl; i < size_i + gl; ++i) {
+      for (int j = -gl; j < size_j + gl; ++j) {
+        for (int k = -gl; k < size_k + gl; ++k) {
+          auto const node = offset + Utils::Vector3i{{i, j, k}};
           auto const idx = (node + grid_size) % grid_size;
           auto const index = idx[0] * n_y * n_z + idx[1] * n_z + idx[2];
           if (raster_flat[index]) {
+            auto const &value = data_flat[index];
             auto const bc = get_block_and_cell(lattice, node, true);
-            boundary.set_node_value_at_boundary(node, data_flat[index], *bc);
+            assert(bc.has_value());
+            boundary.set_node_value_at_boundary(node, conv(value), *bc);
           }
         }
       }
diff --git a/src/walberla_bridge/src/BoundaryHandling.hpp b/src/walberla_bridge/src/BoundaryHandling.hpp
index 4d004de9f8..86c2053888 100644
--- a/src/walberla_bridge/src/BoundaryHandling.hpp
+++ b/src/walberla_bridge/src/BoundaryHandling.hpp
@@ -23,7 +23,10 @@
 #include <walberla_bridge/utils/walberla_utils.hpp>
 
 #include <blockforest/StructuredBlockForest.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
 #include <field/FlagField.h>
+#include <field/FlagUID.h>
 
 #include <utils/Vector.hpp>
 
@@ -57,10 +60,9 @@ template <typename T, typename BoundaryClass> class BoundaryHandling {
       return get_value(global);
     }
 
-    template <typename U>
-    void set_node_boundary_value(Utils::Vector3i const &node, U const &val) {
+    void set_node_boundary_value(Utils::Vector3i const &node, T const &val) {
       auto const global = Cell(node[0], node[1], node[2]);
-      (*m_value_boundary)[global] = es2walberla<U, T>(val);
+      (*m_value_boundary)[global] = val;
     }
 
     void unset_node_boundary_value(Utils::Vector3i const &node) {
@@ -72,7 +74,7 @@ template <typename T, typename BoundaryClass> class BoundaryHandling {
     [[nodiscard]] auto
     get_node_boundary_value(Utils::Vector3i const &node) const {
       auto const global = Cell(node[0], node[1], node[2]);
-      return walberla2es(get_value(global));
+      return get_value(global);
     }
 
     bool node_is_boundary(Utils::Vector3i const &node) const {
@@ -105,9 +107,8 @@ template <typename T, typename BoundaryClass> class BoundaryHandling {
 
   BoundaryHandling(std::shared_ptr<StructuredBlockForest> blocks,
                    BlockDataID value_field_id, BlockDataID flag_field_id)
-      : m_blocks(std::move(blocks)), m_value_field_id(value_field_id),
-        m_flag_field_id(flag_field_id), m_callback(DynamicValueCallback()),
-        m_pending_changes(false) {
+      : m_blocks(std::move(blocks)), m_flag_field_id(flag_field_id),
+        m_callback(DynamicValueCallback()), m_pending_changes(false) {
     // reinitialize the flag field
     for (auto b = m_blocks->begin(); b != m_blocks->end(); ++b) {
       flag_reset_kernel(&*b);
@@ -115,7 +116,7 @@ template <typename T, typename BoundaryClass> class BoundaryHandling {
     // instantiate the boundary sweep
     std::function callback = m_callback;
     m_boundary =
-        std::make_shared<BoundaryClass>(m_blocks, m_value_field_id, callback);
+        std::make_shared<BoundaryClass>(m_blocks, value_field_id, callback);
   }
 
   void operator()(IBlock *block) { (*m_boundary)(block); }
@@ -129,8 +130,7 @@ template <typename T, typename BoundaryClass> class BoundaryHandling {
     return m_callback.get_node_boundary_value(node);
   }
 
-  template <typename U>
-  void set_node_value_at_boundary(Utils::Vector3i const &node, U const &v,
+  void set_node_value_at_boundary(Utils::Vector3i const &node, T const &v,
                                   BlockAndCell const &bc) {
     auto [flag_field, boundary_flag] = get_flag_field_and_flag(bc.block);
     m_callback.set_node_boundary_value(node, v);
@@ -138,6 +138,10 @@ template <typename T, typename BoundaryClass> class BoundaryHandling {
     m_pending_changes = true;
   }
 
+  void unpack_node(Utils::Vector3i const &node, T const &v) {
+    m_callback.set_node_boundary_value(node, v);
+  }
+
   void remove_node_from_boundary(Utils::Vector3i const &node,
                                  BlockAndCell const &bc) {
     auto [flag_field, boundary_flag] = get_flag_field_and_flag(bc.block);
@@ -163,7 +167,6 @@ template <typename T, typename BoundaryClass> class BoundaryHandling {
 
 private:
   std::shared_ptr<StructuredBlockForest> m_blocks;
-  BlockDataID m_value_field_id;
   BlockDataID m_flag_field_id;
   DynamicValueCallback m_callback;
   std::shared_ptr<BoundaryClass> m_boundary;
diff --git a/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp b/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp
index b60bfa935a..9fc4019aa6 100644
--- a/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp
+++ b/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp
@@ -195,7 +195,7 @@ class EKinWalberlaImpl : public EKinWalberlaBase {
             m_density_field_id));
 
     // Synchronize ghost layers
-    (*m_full_communication)();
+    ghost_communication();
   }
 
   // Global parameters
@@ -453,7 +453,8 @@ class EKinWalberlaImpl : public EKinWalberlaBase {
     if (!bc)
       return false;
 
-    m_boundary_flux->set_node_value_at_boundary(node, flux, *bc);
+    m_boundary_flux->set_node_value_at_boundary(
+        node, to_vector3<FloatType>(flux), *bc);
 
     return true;
   }
@@ -465,7 +466,7 @@ class EKinWalberlaImpl : public EKinWalberlaBase {
     if (!bc or !m_boundary_flux->node_is_boundary(node))
       return std::nullopt;
 
-    return {m_boundary_flux->get_node_value_at_boundary(node)};
+    return {to_vector3d(m_boundary_flux->get_node_value_at_boundary(node))};
   }
 
   bool remove_node_from_flux_boundary(Utils::Vector3i const &node) override {
@@ -517,7 +518,8 @@ class EKinWalberlaImpl : public EKinWalberlaBase {
             auto const bc = get_block_and_cell(lattice, node, false);
             auto const &opt = *it;
             if (opt) {
-              m_boundary_density->set_node_value_at_boundary(node, *opt, *bc);
+              m_boundary_density->set_node_value_at_boundary(
+                  node, FloatType_c(*opt), *bc);
             } else {
               m_boundary_density->remove_node_from_boundary(node, *bc);
             }
@@ -545,8 +547,8 @@ class EKinWalberlaImpl : public EKinWalberlaBase {
           for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
             auto const node = local_offset + Utils::Vector3i{{x, y, z}};
             if (m_boundary_density->node_is_boundary(node)) {
-              out.emplace_back(
-                  m_boundary_density->get_node_value_at_boundary(node));
+              out.emplace_back(double_c(
+                  m_boundary_density->get_node_value_at_boundary(node)));
             } else {
               out.emplace_back(std::nullopt);
             }
@@ -575,7 +577,8 @@ class EKinWalberlaImpl : public EKinWalberlaBase {
             auto const bc = get_block_and_cell(lattice, node, false);
             auto const &opt = *it;
             if (opt) {
-              m_boundary_flux->set_node_value_at_boundary(node, *opt, *bc);
+              m_boundary_flux->set_node_value_at_boundary(
+                  node, to_vector3<FloatType>(*opt), *bc);
             } else {
               m_boundary_flux->remove_node_from_boundary(node, *bc);
             }
@@ -603,8 +606,8 @@ class EKinWalberlaImpl : public EKinWalberlaBase {
           for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
             auto const node = local_offset + Utils::Vector3i{{x, y, z}};
             if (m_boundary_flux->node_is_boundary(node)) {
-              out.emplace_back(
-                  m_boundary_flux->get_node_value_at_boundary(node));
+              out.emplace_back(to_vector3d(
+                  m_boundary_flux->get_node_value_at_boundary(node)));
             } else {
               out.emplace_back(std::nullopt);
             }
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp
index a02b084d83..c3f5643fd9 100644
--- a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp
+++ b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp
@@ -30,6 +30,7 @@
 #include <domain_decomposition/IBlock.h>
 #include <field/AddToStorage.h>
 
+#include <cassert>
 #include <cstddef>
 #include <memory>
 #include <optional>
@@ -68,9 +69,8 @@ void fillFromFlagField(IBlock *block, BlockDataID indexVectorID,
 
   auto *flagField = block->getData<FlagField>(flagFieldID);
 
-  if (!(flagField->flagExists(boundaryFlagUID) &&
-        flagField->flagExists(domainFlagUID)))
-    return;
+  assert(flagField->flagExists(boundaryFlagUID) and
+         flagField->flagExists(domainFlagUID));
 
   auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
   auto domainFlag = flagField->getFlag(domainFlagUID);
diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
index f4a6e2560e..0e144abb47 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
@@ -973,18 +973,17 @@ class LBWalberlaImpl : public LBWalberlaBase {
     if (!bc or !m_boundary->node_is_boundary(node))
       return std::nullopt;
 
-    return {m_boundary->get_node_value_at_boundary(node)};
+    return {to_vector3d(m_boundary->get_node_value_at_boundary(node))};
   }
 
   bool set_node_velocity_at_boundary(Utils::Vector3i const &node,
                                      Utils::Vector3d const &velocity) override {
     auto bc = get_block_and_cell(get_lattice(), node, true);
-    if (!bc)
-      return false;
-
-    m_boundary->set_node_value_at_boundary(node, velocity, *bc);
-
-    return true;
+    if (bc) {
+      m_boundary->set_node_value_at_boundary(
+          node, to_vector3<FloatType>(velocity), *bc);
+    }
+    return bc.has_value();
   }
 
   std::vector<std::optional<Utils::Vector3d>> get_slice_velocity_at_boundary(
@@ -1003,7 +1002,8 @@ class LBWalberlaImpl : public LBWalberlaBase {
           for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) {
             auto const node = local_offset + Utils::Vector3i{{x, y, z}};
             if (m_boundary->node_is_boundary(node)) {
-              out.emplace_back(m_boundary->get_node_value_at_boundary(node));
+              out.emplace_back(
+                  to_vector3d(m_boundary->get_node_value_at_boundary(node)));
             } else {
               out.emplace_back(std::nullopt);
             }
@@ -1032,7 +1032,8 @@ class LBWalberlaImpl : public LBWalberlaBase {
             auto const bc = get_block_and_cell(lattice, node, false);
             auto const &opt = *it;
             if (opt) {
-              m_boundary->set_node_value_at_boundary(node, *opt, *bc);
+              m_boundary->set_node_value_at_boundary(
+                  node, to_vector3<FloatType>(*opt), *bc);
             } else {
               m_boundary->remove_node_from_boundary(node, *bc);
             }
@@ -1054,12 +1055,10 @@ class LBWalberlaImpl : public LBWalberlaBase {
 
   bool remove_node_from_boundary(Utils::Vector3i const &node) override {
     auto bc = get_block_and_cell(get_lattice(), node, true);
-    if (!bc)
-      return false;
-
-    m_boundary->remove_node_from_boundary(node, *bc);
-
-    return true;
+    if (bc) {
+      m_boundary->remove_node_from_boundary(node, *bc);
+    }
+    return bc.has_value();
   }
 
   std::optional<bool>
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h
index 38e68af361..7c9a644ac8 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h
@@ -31,6 +31,7 @@
 #include "field/FlagField.h"
 #include "field/GhostLayerField.h"
 
+#include <cassert>
 #include <functional>
 #include <memory>
 #include <vector>
@@ -138,9 +139,8 @@ class Dynamic_UBB_double_precision {
 
     auto *flagField = block->getData<FlagField_T>(flagFieldID);
 
-    if (!(flagField->flagExists(boundaryFlagUID) &&
-          flagField->flagExists(domainFlagUID)))
-      return;
+    assert(flagField->flagExists(boundaryFlagUID) and
+           flagField->flagExists(domainFlagUID));
 
     auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
     auto domainFlag = flagField->getFlag(domainFlagUID);
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h
index e4175e74f1..ab7f27e111 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h
@@ -31,6 +31,7 @@
 #include "field/FlagField.h"
 #include "field/GhostLayerField.h"
 
+#include <cassert>
 #include <functional>
 #include <memory>
 #include <vector>
@@ -138,9 +139,8 @@ class Dynamic_UBB_single_precision {
 
     auto *flagField = block->getData<FlagField_T>(flagFieldID);
 
-    if (!(flagField->flagExists(boundaryFlagUID) &&
-          flagField->flagExists(domainFlagUID)))
-      return;
+    assert(flagField->flagExists(boundaryFlagUID) and
+           flagField->flagExists(domainFlagUID));
 
     auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
     auto domainFlag = flagField->getFlag(domainFlagUID);
diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp
index 3f8f9a5a7e..516d9ab242 100644
--- a/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp
+++ b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp
@@ -435,7 +435,7 @@ BOOST_DATA_TEST_CASE(forces_interpolation, bdata::make(all_lbs()),
   // todo: check a less symmetrical situation, where the force is applied not
   // in the middle between the nodes
 
-  for (Vector3i n : all_nodes_incl_ghosts(lb->get_lattice())) {
+  for (auto const &n : all_nodes_incl_ghosts(lb->get_lattice())) {
     if (lb->get_lattice().node_in_local_halo(n)) {
       auto const pos = 1. * n; // Mid point between nodes
       auto const f = Vector3d{{1., 2., -3.5}};

From d0c9154261f79964487d110a1e84e60e9d23c37b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Wed, 14 Feb 2024 19:32:29 +0100
Subject: [PATCH 5/7] Rewrite waLBerla MPI communication

Split LB ghost communicator from LB boundaries communicator.
Always communicate bounce-back velocities from the ghost layer.
This fixes the regression introduced by
3fd170980bed30c430a9b0264e9504632b4b7326.
---
 samples/lb_circular_couette.py                |   2 +-
 src/script_interface/walberla/LBFluid.cpp     |   3 +-
 src/script_interface/walberla/LBFluidNode.cpp |   1 +
 .../walberla/LBFluidSlice.cpp                 |   6 +-
 src/walberla_bridge/src/BoundaryPackInfo.hpp  | 147 ++++++++++++++++++
 .../src/lattice_boltzmann/LBWalberlaImpl.hpp  |  63 +++++---
 .../tests/LBWalberlaImpl_unit_tests.cpp       |   1 +
 testsuite/python/CMakeLists.txt               |   1 +
 testsuite/python/lb_boundary_ghost_layer.py   | 103 ++++++++++++
 testsuite/python/lb_circular_couette.py       |   4 +-
 .../samples/test_lb_circular_couette.py       |   7 +-
 11 files changed, 306 insertions(+), 32 deletions(-)
 create mode 100644 src/walberla_bridge/src/BoundaryPackInfo.hpp
 create mode 100644 testsuite/python/lb_boundary_ghost_layer.py

diff --git a/samples/lb_circular_couette.py b/samples/lb_circular_couette.py
index 0d915237ec..81a621b2ce 100644
--- a/samples/lb_circular_couette.py
+++ b/samples/lb_circular_couette.py
@@ -57,7 +57,7 @@
 cyl_center = agrid * (grid_size // 2 + 0.5) * [1, 1, 0]
 cylinder_in = espressomd.shapes.Cylinder(
     center=cyl_center, axis=[0, 0, 1], length=3 * system.box_l[2],
-    radius=8.1 * agrid, direction=1)
+    radius=8.6 * agrid, direction=1)
 cylinder_out = espressomd.shapes.Cylinder(
     center=cyl_center, axis=[0, 0, 1], length=3 * system.box_l[2],
     radius=14.5 * agrid, direction=-1)
diff --git a/src/script_interface/walberla/LBFluid.cpp b/src/script_interface/walberla/LBFluid.cpp
index 44c5bbcd69..b66f954ef4 100644
--- a/src/script_interface/walberla/LBFluid.cpp
+++ b/src/script_interface/walberla/LBFluid.cpp
@@ -108,7 +108,6 @@ Variant LBFluid::do_call_method(std::string const &name,
   }
   if (name == "clear_boundaries") {
     m_instance->clear_boundaries();
-    m_instance->ghost_communication();
     ::System::get_system().on_lb_boundary_conditions_change();
     return {};
   }
@@ -269,8 +268,8 @@ void LBFluid::load_checkpoint(std::string const &filename, int mode) {
   };
 
   auto const on_success = [&lb_obj]() {
-    lb_obj.reallocate_ubb_field();
     lb_obj.ghost_communication();
+    lb_obj.reallocate_ubb_field();
   };
 
   load_checkpoint_common(*context(), "LB", filename, mode, read_metadata,
diff --git a/src/script_interface/walberla/LBFluidNode.cpp b/src/script_interface/walberla/LBFluidNode.cpp
index af9ff8b678..f30e26a395 100644
--- a/src/script_interface/walberla/LBFluidNode.cpp
+++ b/src/script_interface/walberla/LBFluidNode.cpp
@@ -53,6 +53,7 @@ Variant LBFluidNode::do_call_method(std::string const &name,
       m_lb_fluid->set_node_velocity_at_boundary(m_index, u);
     }
     m_lb_fluid->ghost_communication();
+    m_lb_fluid->reallocate_ubb_field();
     return {};
   }
   if (name == "get_velocity_at_boundary") {
diff --git a/src/script_interface/walberla/LBFluidSlice.cpp b/src/script_interface/walberla/LBFluidSlice.cpp
index 1aee5f49fd..cb6bc905b1 100644
--- a/src/script_interface/walberla/LBFluidSlice.cpp
+++ b/src/script_interface/walberla/LBFluidSlice.cpp
@@ -99,8 +99,10 @@ Variant LBFluidSlice::do_call_method(std::string const &name,
                 1. / m_conv_velocity);
   }
   if (name == "set_velocity_at_boundary") {
-    return call(&LatticeModel::set_slice_velocity_at_boundary, {1},
-                m_conv_velocity);
+    auto const retval = call(&LatticeModel::set_slice_velocity_at_boundary, {1},
+                             m_conv_velocity);
+    m_lb_fluid->reallocate_ubb_field();
+    return retval;
   }
   if (name == "get_pressure_tensor") {
     return call(&LatticeModel::get_slice_pressure_tensor, {3, 3},
diff --git a/src/walberla_bridge/src/BoundaryPackInfo.hpp b/src/walberla_bridge/src/BoundaryPackInfo.hpp
new file mode 100644
index 0000000000..3055b3ebe0
--- /dev/null
+++ b/src/walberla_bridge/src/BoundaryPackInfo.hpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2024 The ESPResSo project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <core/debug/Debug.h>
+#include <core/mpi/RecvBuffer.h>
+#include <core/mpi/SendBuffer.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/communication/PackInfo.h>
+#include <stencil/Directions.h>
+
+#include <memory>
+#include <tuple>
+#include <utility>
+
+namespace walberla {
+namespace field {
+namespace communication {
+
+template <typename GhostLayerField_T, typename Boundary_T>
+class BoundaryPackInfo : public PackInfo<GhostLayerField_T> {
+protected:
+  using PackInfo<GhostLayerField_T>::bdId_;
+
+public:
+  using PackInfo<GhostLayerField_T>::PackInfo;
+  using PackInfo<GhostLayerField_T>::numberOfGhostLayersToCommunicate;
+
+  ~BoundaryPackInfo() override = default;
+
+  void setup_boundary_handle(std::shared_ptr<LatticeWalberla> lattice,
+                             std::shared_ptr<Boundary_T> boundary) {
+    m_lattice = std::move(lattice);
+    m_boundary = std::move(boundary);
+  }
+
+  bool constantDataExchange() const override { return false; }
+  bool threadsafeReceiving() const override { return true; }
+
+  void communicateLocal(IBlock const *sender, IBlock *receiver,
+                        stencil::Direction dir) override {
+    mpi::SendBuffer sBuffer;
+    packDataImpl(sender, dir, sBuffer);
+    mpi::RecvBuffer rBuffer(sBuffer);
+    unpackData(receiver, stencil::inverseDir[dir], rBuffer);
+  }
+
+  void unpackData(IBlock *receiver, stencil::Direction dir,
+                  mpi::RecvBuffer &buffer) override {
+
+    auto *flag_field = receiver->getData<GhostLayerField_T>(bdId_);
+    WALBERLA_ASSERT_NOT_NULLPTR(flag_field);
+    WALBERLA_ASSERT_NOT_NULLPTR(m_boundary);
+    WALBERLA_ASSERT_NOT_NULLPTR(m_lattice);
+
+    auto const boundary_flag = flag_field->getFlag(Boundary_flag);
+    auto const gl = numberOfGhostLayersToCommunicate(flag_field);
+    auto const begin = [gl, dir](auto const *flag_field) {
+      return flag_field->beginGhostLayerOnly(gl, dir);
+    };
+
+#ifndef NDEBUG
+    uint_t xSize, ySize, zSize, bSize;
+    buffer >> xSize >> ySize >> zSize >> bSize;
+    uint_t buf_size{0u};
+    for (auto it = begin(flag_field); it != flag_field->end(); ++it) {
+      if (isFlagSet(it, boundary_flag)) {
+        ++buf_size;
+      }
+    }
+    WALBERLA_ASSERT_EQUAL(xSize, flag_field->xSize());
+    WALBERLA_ASSERT_EQUAL(ySize, flag_field->ySize());
+    WALBERLA_ASSERT_EQUAL(zSize, flag_field->zSize());
+    WALBERLA_ASSERT_EQUAL(bSize, buf_size);
+#endif
+
+    auto const offset = std::get<0>(m_lattice->get_local_grid_range());
+    typename Boundary_T::value_type value;
+    for (auto it = begin(flag_field); it != flag_field->end(); ++it) {
+      if (isFlagSet(it, boundary_flag)) {
+        auto const node = offset + Utils::Vector3i{{it.x(), it.y(), it.z()}};
+        buffer >> value;
+        m_boundary->unpack_node(node, value);
+      }
+    }
+  }
+
+protected:
+  void packDataImpl(IBlock const *sender, stencil::Direction dir,
+                    mpi::SendBuffer &buffer) const override {
+
+    auto const *flag_field = sender->getData<GhostLayerField_T>(bdId_);
+    WALBERLA_ASSERT_NOT_NULLPTR(flag_field);
+    WALBERLA_ASSERT_NOT_NULLPTR(m_boundary);
+    WALBERLA_ASSERT_NOT_NULLPTR(m_lattice);
+
+    auto const boundary_flag = flag_field->getFlag(Boundary_flag);
+    auto const gl = numberOfGhostLayersToCommunicate(flag_field);
+    auto const begin = [gl, dir](auto const *flag_field) {
+      return flag_field->beginSliceBeforeGhostLayer(dir, gl);
+    };
+
+#ifndef NDEBUG
+    uint_t buf_size{0u};
+    for (auto it = begin(flag_field); it != flag_field->end(); ++it) {
+      if (isFlagSet(it, boundary_flag)) {
+        ++buf_size;
+      }
+    }
+    buffer << flag_field->xSize() << flag_field->ySize() << flag_field->zSize()
+           << buf_size;
+#endif
+
+    auto const offset = std::get<0>(m_lattice->get_local_grid_range());
+    for (auto it = begin(flag_field); it != flag_field->end(); ++it) {
+      if (isFlagSet(it, boundary_flag)) {
+        auto const node = offset + Utils::Vector3i{{it.x(), it.y(), it.z()}};
+        buffer << m_boundary->get_node_value_at_boundary(node);
+      }
+    }
+  }
+
+private:
+  std::shared_ptr<LatticeWalberla> m_lattice;
+  std::shared_ptr<Boundary_T> m_boundary;
+};
+
+} // namespace communication
+} // namespace field
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
index 0e144abb47..ecf7e57a64 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
@@ -45,6 +45,7 @@
 #include <stencil/D3Q27.h>
 
 #include "../BoundaryHandling.hpp"
+#include "../BoundaryPackInfo.hpp"
 #include "InterpolateAndShiftAtBoundary.hpp"
 #include "ResetForce.hpp"
 #include "lb_kernels.hpp"
@@ -235,8 +236,9 @@ class LBWalberlaImpl : public LBWalberlaBase {
       typename FieldTrait<FloatType, Architecture>::template PackInfo<Field>;
 
   // communicators
-  std::shared_ptr<FullCommunicator> m_full_communication;
-  std::shared_ptr<PDFStreamingCommunicator> m_pdf_streaming_communication;
+  std::shared_ptr<FullCommunicator> m_boundary_communicator;
+  std::shared_ptr<FullCommunicator> m_pdf_full_communicator;
+  std::shared_ptr<PDFStreamingCommunicator> m_pdf_streaming_communicator;
 
   // ResetForce sweep + external force handling
   std::shared_ptr<ResetForce<PdfField, VectorField>> m_reset_force;
@@ -350,27 +352,33 @@ class LBWalberlaImpl : public LBWalberlaBase {
     reset_boundary_handling();
 
     // Set up the communication and register fields
-    m_pdf_streaming_communication =
+    m_pdf_streaming_communicator =
         std::make_shared<PDFStreamingCommunicator>(blocks);
-    m_pdf_streaming_communication->addPackInfo(
+    m_pdf_streaming_communicator->addPackInfo(
         std::make_shared<PackInfo<PdfField>>(m_pdf_field_id, n_ghost_layers));
-    m_pdf_streaming_communication->addPackInfo(
+    m_pdf_streaming_communicator->addPackInfo(
         std::make_shared<PackInfo<VectorField>>(m_last_applied_force_field_id,
                                                 n_ghost_layers));
-    m_pdf_streaming_communication->addPackInfo(
-        std::make_shared<field::communication::PackInfo<FlagField>>(
-            m_flag_field_id, n_ghost_layers));
 
-    m_full_communication = std::make_shared<FullCommunicator>(blocks);
-    m_full_communication->addPackInfo(
+    m_pdf_full_communicator = std::make_shared<FullCommunicator>(blocks);
+    m_pdf_full_communicator->addPackInfo(
         std::make_shared<PackInfo<PdfField>>(m_pdf_field_id, n_ghost_layers));
-    m_full_communication->addPackInfo(std::make_shared<PackInfo<VectorField>>(
-        m_last_applied_force_field_id, n_ghost_layers));
-    m_full_communication->addPackInfo(std::make_shared<PackInfo<VectorField>>(
-        m_velocity_field_id, n_ghost_layers));
-    m_full_communication->addPackInfo(
+    m_pdf_full_communicator->addPackInfo(
+        std::make_shared<PackInfo<VectorField>>(m_last_applied_force_field_id,
+                                                n_ghost_layers));
+    m_pdf_full_communicator->addPackInfo(
+        std::make_shared<PackInfo<VectorField>>(m_velocity_field_id,
+                                                n_ghost_layers));
+
+    m_boundary_communicator = std::make_shared<FullCommunicator>(blocks);
+    m_boundary_communicator->addPackInfo(
         std::make_shared<field::communication::PackInfo<FlagField>>(
             m_flag_field_id, n_ghost_layers));
+    auto boundary_packinfo = std::make_shared<
+        field::communication::BoundaryPackInfo<FlagField, BoundaryModel>>(
+        m_flag_field_id, n_ghost_layers);
+    boundary_packinfo->setup_boundary_handle(m_lattice, m_boundary);
+    m_boundary_communicator->addPackInfo(boundary_packinfo);
 
     // Instantiate the sweep responsible for force double buffering and
     // external forces
@@ -439,13 +447,13 @@ class LBWalberlaImpl : public LBWalberlaBase {
     integrate_reset_force(blocks);
     // LB collide
     integrate_collide(blocks);
-    m_pdf_streaming_communication->communicate();
+    m_pdf_streaming_communicator->communicate();
     // Handle boundaries
     integrate_boundaries(blocks);
     // LB stream
     integrate_stream(blocks);
     // Refresh ghost layers
-    m_full_communication->communicate();
+    ghost_communication_pdfs();
   }
 
   void integrate_pull_scheme() {
@@ -458,7 +466,7 @@ class LBWalberlaImpl : public LBWalberlaBase {
     // LB collide
     integrate_collide(blocks);
     // Refresh ghost layers
-    ghost_communication();
+    ghost_communication_pdfs();
   }
 
 protected:
@@ -474,7 +482,6 @@ class LBWalberlaImpl : public LBWalberlaBase {
 
 public:
   void integrate() override {
-    reallocate_ubb_field();
     if (has_lees_edwards_bc()) {
       integrate_pull_scheme();
     } else {
@@ -485,7 +492,16 @@ class LBWalberlaImpl : public LBWalberlaBase {
   }
 
   void ghost_communication() override {
-    m_full_communication->communicate();
+    ghost_communication_boundary();
+    ghost_communication_pdfs();
+  }
+
+  void ghost_communication_boundary() {
+    m_boundary_communicator->communicate();
+  }
+
+  void ghost_communication_pdfs() {
+    m_pdf_full_communicator->communicate();
     if (has_lees_edwards_bc()) {
       auto const &blocks = get_lattice().get_blocks();
       apply_lees_edwards_pdf_interpolation(blocks);
@@ -1097,7 +1113,10 @@ class LBWalberlaImpl : public LBWalberlaBase {
 
   void reallocate_ubb_field() override { m_boundary->boundary_update(); }
 
-  void clear_boundaries() override { reset_boundary_handling(); }
+  void clear_boundaries() override {
+    reset_boundary_handling();
+    ghost_communication();
+  }
 
   void
   update_boundary_from_shape(std::vector<int> const &raster_flat,
@@ -1105,6 +1124,8 @@ class LBWalberlaImpl : public LBWalberlaBase {
     auto const grid_size = get_lattice().get_grid_dimensions();
     auto const data = fill_3D_vector_array(data_flat, grid_size);
     set_boundary_from_grid(*m_boundary, get_lattice(), raster_flat, data);
+    ghost_communication();
+    reallocate_ubb_field();
   }
 
   // Pressure tensor
diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp
index 516d9ab242..94e90de862 100644
--- a/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp
+++ b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp
@@ -202,6 +202,7 @@ BOOST_DATA_TEST_CASE(update_boundary_from_shape, bdata::make(all_lbs()),
     std::vector<double> vel_flat(vel_3d.data(),
                                  vel_3d.data() + vel_3d.num_elements());
     lb->update_boundary_from_shape(raster_flat, vel_flat);
+    lb->ghost_communication();
   }
 
   for (auto const &node : nodes) {
diff --git a/testsuite/python/CMakeLists.txt b/testsuite/python/CMakeLists.txt
index ee457e27fc..0389f71ad7 100644
--- a/testsuite/python/CMakeLists.txt
+++ b/testsuite/python/CMakeLists.txt
@@ -334,6 +334,7 @@ python_test(FILE thole.py MAX_NUM_PROC 4)
 python_test(FILE lb_slice.py MAX_NUM_PROC 2)
 python_test(FILE lb_boundary_velocity.py MAX_NUM_PROC 1)
 # python_test(FILE lb_boundary_volume_force.py MAX_NUM_PROC 2) # TODO
+python_test(FILE lb_boundary_ghost_layer.py MAX_NUM_PROC 2)
 python_test(FILE lb_circular_couette.py MAX_NUM_PROC 2 GPU_SLOTS 1)
 python_test(FILE lb_poiseuille.py MAX_NUM_PROC 4 GPU_SLOTS 1)
 python_test(FILE lb_poiseuille_cylinder.py MAX_NUM_PROC 2 GPU_SLOTS 1)
diff --git a/testsuite/python/lb_boundary_ghost_layer.py b/testsuite/python/lb_boundary_ghost_layer.py
new file mode 100644
index 0000000000..0c226d4e03
--- /dev/null
+++ b/testsuite/python/lb_boundary_ghost_layer.py
@@ -0,0 +1,103 @@
+#
+# Copyright (C) 2024 The ESPResSo project
+#
+# This file is part of ESPResSo.
+#
+# ESPResSo is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# ESPResSo is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import unittest as ut
+import unittest_decorators as utx
+import numpy as np
+import scipy.optimize
+
+import espressomd.lb
+import espressomd.shapes
+
+AGRID = 0.5
+KINEMATIC_VISC = 2.7
+DENS = 1.7
+TIME_STEP = 0.07
+LB_PARAMS = {"agrid": AGRID, "tau": TIME_STEP, "density": DENS,
+             "kinematic_viscosity": KINEMATIC_VISC}
+
+
+class TestCommon:
+
+    system = espressomd.System(box_l=[16.0, 1.0, 1.0])
+    system.time_step = TIME_STEP
+    system.cell_system.skin = 0.4 * AGRID
+    n_nodes = system.cell_system.get_state()["n_nodes"]
+
+    def setUp(self):
+        self.lbf = self.lb_class(**LB_PARAMS, **self.lb_params)
+        self.system.lb = self.lbf
+        self.ubb = espressomd.lb.VelocityBounceBack([0., 0., 1e-5])
+
+    def tearDown(self):
+        self.system.lb = None
+
+    def get_profile(self):
+        xdata = np.arange(1, self.lbf.shape[0])
+        ydata = []
+        for x in xdata:
+            ydata.append(np.mean(self.lbf[x, :, :].velocity[:, :, 2]))
+        return xdata, np.array(ydata)
+
+    def check_profile(self):
+        def quadratic(x, a, b, c):
+            return a * x**2 + b * x + c
+
+        self.system.integrator.run(40)
+        xdata, ydata = self.get_profile()
+        popt_ref = (4e-8, -1e-6, 1e-5)
+        popt, _ = scipy.optimize.curve_fit(
+            quadratic, xdata, ydata, p0=popt_ref)
+        rtol = 0.3 if self.lbf.single_precision else 0.1
+        np.testing.assert_allclose(popt, popt_ref, rtol=0.5, atol=0.)
+        np.testing.assert_allclose(ydata, quadratic(xdata, *popt),
+                                   rtol=rtol, atol=0.)
+
+    def test_node_setter(self):
+        for i in (0, 1):
+            for j in (0, 1):
+                self.lbf[0, i, j].boundary = self.ubb
+        self.check_profile()
+
+    def test_slice_setter(self):
+        self.lbf[0, :, :].boundary = self.ubb
+        self.check_profile()
+
+    def test_shape_setter(self):
+        shape = espressomd.shapes.Wall(normal=[1, 0, 0], dist=AGRID)
+        self.lbf.add_boundary_from_shape(shape, velocity=self.ubb.velocity)
+        self.check_profile()
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+@ut.skipIf(TestCommon.n_nodes != 2, "only runs for 2 MPI ranks")
+class LBPoiseuilleWalberlaSinglePrecision(TestCommon, ut.TestCase):
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": True}
+
+
+@utx.skipIfMissingFeatures(["WALBERLA"])
+@ut.skipIf(TestCommon.n_nodes != 2, "only runs for 2 MPI ranks")
+class LBPoiseuilleWalberlaDoublePrecision(TestCommon, ut.TestCase):
+    lb_class = espressomd.lb.LBFluidWalberla
+    lb_params = {"single_precision": False}
+
+
+if __name__ == "__main__":
+    ut.main()
diff --git a/testsuite/python/lb_circular_couette.py b/testsuite/python/lb_circular_couette.py
index 71f5836671..e27e52d6cf 100644
--- a/testsuite/python/lb_circular_couette.py
+++ b/testsuite/python/lb_circular_couette.py
@@ -143,8 +143,8 @@ def test_taylor_couette_flow(self):
         a_ref, b_ref = taylor_couette(slip_vel, 0.0, cyl1.radius, cyl2.radius)
         v_phi_ref = a_ref * r + b_ref / r
         v_phi_drift = np.mean(v_phi) - np.mean(v_phi_ref)
-        np.testing.assert_allclose(v_phi_drift, 0., atol=1.2e-4)
-        np.testing.assert_allclose(v_phi - v_phi_drift, v_phi_ref, atol=1e-4)
+        np.testing.assert_allclose(v_phi_drift, 0., atol=4e-4)
+        np.testing.assert_allclose(v_phi - v_phi_drift, v_phi_ref, atol=4e-4)
 
 
 @utx.skipIfMissingFeatures(["WALBERLA"])
diff --git a/testsuite/scripts/samples/test_lb_circular_couette.py b/testsuite/scripts/samples/test_lb_circular_couette.py
index c91ca03242..5111ee9032 100644
--- a/testsuite/scripts/samples/test_lb_circular_couette.py
+++ b/testsuite/scripts/samples/test_lb_circular_couette.py
@@ -52,19 +52,18 @@ def test_taylor_couette_flow(self):
         np.testing.assert_allclose(v_phi[:7], 0., atol=1e-7)
 
         # check azimuthal velocity in the linear regime
-        self.assertGreater(v_phi[7], v_phi[6])
         self.assertGreater(v_phi[8], v_phi[7])
         self.assertGreater(v_phi[9], v_phi[8])
 
         # check azimuthal velocity in the Couette regime
-        xdata = sample.profile_r[9:]
-        ydata = v_phi[9:]
+        xdata = sample.profile_r[9:-1]
+        ydata = v_phi[9:-1]
         a_ref, b_ref = taylor_couette(
             sample.velocity_magnitude, 0.0, sample.cylinder_in.radius,
             sample.cylinder_out.radius, sample.agrid)
         (a_sim, b_sim), _ = scipy.optimize.curve_fit(
             lambda x, a, b: a * x + b / x, xdata, ydata)
-        np.testing.assert_allclose([a_sim, b_sim], [a_ref, b_ref], atol=1e-3)
+        np.testing.assert_allclose([a_sim, b_sim], [a_ref, b_ref], rtol=0.05)
 
 
 if __name__ == "__main__":

From 220aa61c155c42bbcf37eb88df5501ea4304a5ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Wed, 21 Feb 2024 20:05:17 +0100
Subject: [PATCH 6/7] Make waLBerla dependency private

---
 src/core/lb/particle_coupling.cpp             |   1 +
 src/core/unit_tests/ek_interface_test.cpp     |   4 +-
 src/script_interface/walberla/EKFFT.hpp       |   4 +-
 src/script_interface/walberla/EKNone.hpp      |   3 +-
 src/script_interface/walberla/EKReaction.hpp  |  26 +--
 src/script_interface/walberla/EKSpecies.cpp   |   3 +-
 src/walberla_bridge/CMakeLists.txt            |   9 +-
 .../electrokinetics/ek_poisson_fft_init.hpp   |   7 +-
 .../electrokinetics/ek_poisson_none_init.hpp  |   7 +-
 .../electrokinetics/ek_walberla_init.hpp      |  16 ++
 .../reactions/EKReactionBase.hpp              |  15 +-
 .../reactions/EKReactionBaseIndexed.hpp}      |  32 ++-
 src/walberla_bridge/src/BoundaryHandling.hpp  |  16 +-
 src/walberla_bridge/src/BoundaryPackInfo.hpp  |   5 +
 src/walberla_bridge/src/LatticeWalberla.cpp   |   3 +-
 .../src/electrokinetics/EKinWalberlaImpl.hpp  |  13 +-
 .../electrokinetics/ek_poisson_fft_init.cpp   |   4 +
 .../electrokinetics/ek_poisson_none_init.cpp  |   4 +
 .../src/electrokinetics/ek_walberla_init.cpp  |  27 ++-
 .../electrokinetics/reactions/CMakeLists.txt  |   3 -
 .../reactions/EKReactionImplBulk.hpp          |  23 +-
 .../reactions/EKReactionImplIndexed.cpp       | 211 ------------------
 .../reactions/EKReactionImplIndexed.hpp       | 169 ++++++++++++--
 .../src/lattice_boltzmann/LBWalberlaImpl.hpp  |  23 +-
 .../src/lattice_boltzmann/ResetForce.hpp      |   6 +-
 .../utils/boundary.hpp}                       |   2 +-
 .../utils/types_conversion.hpp}               |   0
 .../tests/lb_kernels_unit_tests.cpp           |   3 +-
 28 files changed, 312 insertions(+), 327 deletions(-)
 rename src/walberla_bridge/{src/electrokinetics/reactions/EKReactionImplBulk.cpp => include/walberla_bridge/electrokinetics/reactions/EKReactionBaseIndexed.hpp} (51%)
 delete mode 100644 src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp
 rename src/walberla_bridge/{include/walberla_bridge/utils/boundary_utils.hpp => src/utils/boundary.hpp} (99%)
 rename src/walberla_bridge/{include/walberla_bridge/utils/walberla_utils.hpp => src/utils/types_conversion.hpp} (100%)

diff --git a/src/core/lb/particle_coupling.cpp b/src/core/lb/particle_coupling.cpp
index cf10874730..48b1afa9f1 100644
--- a/src/core/lb/particle_coupling.cpp
+++ b/src/core/lb/particle_coupling.cpp
@@ -86,6 +86,7 @@ Utils::Vector3d lb_drag_force(LB::Solver const &lb, double lb_gamma,
 /**
  * @brief Check if a position is within the local box + halo.
  *
+ * @param local_box Local geometry
  * @param pos Position to check
  * @param halo Halo
  *
diff --git a/src/core/unit_tests/ek_interface_test.cpp b/src/core/unit_tests/ek_interface_test.cpp
index b2f5a9659f..bff055e845 100644
--- a/src/core/unit_tests/ek_interface_test.cpp
+++ b/src/core/unit_tests/ek_interface_test.cpp
@@ -85,7 +85,7 @@ static auto make_ek_actor() {
   ek_lattice = std::make_shared<LatticeWalberla>(
       params.grid_dimensions, ::communicator.node_grid, n_ghost_layers);
   ek_container = std::make_shared<EK::EKWalberla::ek_container_type>(
-      params.tau, new_ek_poisson_none(ek_lattice, single_precision));
+      params.tau, walberla::new_ek_poisson_none(ek_lattice, single_precision));
   ek_reactions = std::make_shared<EK::EKWalberla::ek_reactions_type>();
   ek_instance = std::make_shared<EK::EKWalberla>(ek_container, ek_reactions);
 #endif
@@ -146,7 +146,7 @@ BOOST_AUTO_TEST_CASE(ek_interface_walberla) {
     auto constexpr single_precision = true;
     auto constexpr stoich = 1.;
     auto constexpr order = 2.;
-    auto ek_species = new_ek_walberla(
+    auto ek_species = walberla::new_ek_walberla(
         espresso::ek_lattice, params.diffusion, params.kT, params.valency,
         params.ext_efield, params.density, false, false, single_precision);
     auto ek_reactant = std::make_shared<EKReactant>(ek_species, stoich, order);
diff --git a/src/script_interface/walberla/EKFFT.hpp b/src/script_interface/walberla/EKFFT.hpp
index c02fc1bae6..6ff34dcbb6 100644
--- a/src/script_interface/walberla/EKFFT.hpp
+++ b/src/script_interface/walberla/EKFFT.hpp
@@ -55,8 +55,8 @@ class EKFFT : public EKPoissonSolver {
     auto const permittivity =
         get_value<double>(args, "permittivity") * m_conv_permittivity;
 
-    m_instance = new_ek_poisson_fft(m_lattice->lattice(), permittivity,
-                                    m_single_precision);
+    m_instance = ::walberla::new_ek_poisson_fft(
+        m_lattice->lattice(), permittivity, m_single_precision);
 
     add_parameters({
         {"permittivity",
diff --git a/src/script_interface/walberla/EKNone.hpp b/src/script_interface/walberla/EKNone.hpp
index 5aa1eb4e70..3c1c38cdc4 100644
--- a/src/script_interface/walberla/EKNone.hpp
+++ b/src/script_interface/walberla/EKNone.hpp
@@ -45,7 +45,8 @@ class EKNone : public EKPoissonSolver {
     m_single_precision = get_value_or<bool>(args, "single_precision", false);
     m_lattice = get_value<std::shared_ptr<LatticeWalberla>>(args, "lattice");
 
-    m_instance = new_ek_poisson_none(m_lattice->lattice(), m_single_precision);
+    m_instance = ::walberla::new_ek_poisson_none(m_lattice->lattice(),
+                                                 m_single_precision);
 
     add_parameters({
         {"single_precision", AutoParameter::read_only,
diff --git a/src/script_interface/walberla/EKReaction.hpp b/src/script_interface/walberla/EKReaction.hpp
index 3adc5f8847..75e2d536c1 100644
--- a/src/script_interface/walberla/EKReaction.hpp
+++ b/src/script_interface/walberla/EKReaction.hpp
@@ -27,9 +27,9 @@
 #include "LatticeIndices.hpp"
 #include "LatticeWalberla.hpp"
 
+#include <walberla_bridge/electrokinetics/ek_walberla_init.hpp>
 #include <walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp>
-#include <walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp>
-#include <walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp>
+#include <walberla_bridge/electrokinetics/reactions/EKReactionBaseIndexed.hpp>
 
 #include <script_interface/ScriptInterface.hpp>
 #include <script_interface/auto_parameters/AutoParameters.hpp>
@@ -80,22 +80,21 @@ class EKReaction : public AutoParameters<EKReaction, LatticeIndices> {
     return tau / std::pow(Utils::int_pow<3>(agrid), sum_alphas - 1.);
   }
 
-  template <typename T>
-  std::shared_ptr<T> make_instance(VariantMap const &args) const {
+  template <typename F>
+  auto make_instance(VariantMap const &args, F &allocator) const {
     auto lattice = get_value<std::shared_ptr<LatticeWalberla>>(args, "lattice");
-    auto reactant = get_value<std::vector<Variant>>(args, "reactants");
-    auto output =
-        std::vector<std::shared_ptr<::walberla::EKReactant>>(reactant.size());
+    auto reactants = get_value<std::vector<Variant>>(args, "reactants");
+    auto output = ::walberla::EKReactionBase::reactants_type(reactants.size());
     auto get_instance = [](Variant const &v) {
       return get_value<std::shared_ptr<EKReactant>>(v)->get_instance();
     };
-    std::transform(reactant.begin(), reactant.end(), output.begin(),
+    std::transform(reactants.begin(), reactants.end(), output.begin(),
                    get_instance);
 
     auto const coefficient =
         get_value<double>(args, "coefficient") * get_conversion_coefficient();
 
-    return std::make_shared<T>(lattice->lattice(), output, coefficient);
+    return allocator(lattice->lattice(), output, coefficient);
   }
 
   std::shared_ptr<::walberla::EKReactionBase> m_ekreaction;
@@ -118,7 +117,7 @@ class EKBulkReaction : public EKReaction {
 
   void do_construct(VariantMap const &args) override {
     m_conv_coefficient = calculate_bulk_conversion_factor(args);
-    m_ekreaction = make_instance<::walberla::EKReactionImplBulk>(args);
+    m_ekreaction = make_instance(args, ::walberla::new_ek_reaction_bulk);
   }
 };
 
@@ -143,10 +142,9 @@ class EKIndexedReaction : public EKReaction {
   void do_construct(VariantMap const &args) override {
     auto const agrid = get_agrid(args);
     m_conv_coefficient = calculate_bulk_conversion_factor(args) / agrid;
-    m_ekreaction = make_instance<::walberla::EKReactionImplIndexed>(args);
     m_ekreaction_impl =
-        std::dynamic_pointer_cast<::walberla::EKReactionImplIndexed>(
-            get_instance());
+        make_instance(args, ::walberla::new_ek_reaction_indexed);
+    m_ekreaction = m_ekreaction_impl;
   }
 
   [[nodiscard]] Variant do_call_method(std::string const &method,
@@ -170,7 +168,7 @@ class EKIndexedReaction : public EKReaction {
   }
 
 private:
-  std::shared_ptr<::walberla::EKReactionImplIndexed> m_ekreaction_impl;
+  std::shared_ptr<::walberla::EKReactionBaseIndexed> m_ekreaction_impl;
 };
 
 } // namespace ScriptInterface::walberla
diff --git a/src/script_interface/walberla/EKSpecies.cpp b/src/script_interface/walberla/EKSpecies.cpp
index 9f908ad15b..c38cb1c5dc 100644
--- a/src/script_interface/walberla/EKSpecies.cpp
+++ b/src/script_interface/walberla/EKSpecies.cpp
@@ -24,7 +24,6 @@
 #include "EKWalberlaNodeState.hpp"
 #include "WalberlaCheckpoint.hpp"
 
-#include <walberla_bridge/LatticeWalberla.hpp>
 #include <walberla_bridge/electrokinetics/ek_walberla_init.hpp>
 
 #include <boost/mpi.hpp>
@@ -119,7 +118,7 @@ void EKSpecies::do_construct(VariantMap const &args) {
     auto const ek_ext_efield = ext_efield * m_conv_ext_efield;
     auto const ek_density = m_density = density * m_conv_density;
     auto const ek_kT = kT * m_conv_energy;
-    m_instance = new_ek_walberla(
+    m_instance = ::walberla::new_ek_walberla(
         m_lattice->lattice(), ek_diffusion, ek_kT,
         get_value<double>(args, "valency"), ek_ext_efield, ek_density,
         get_value<bool>(args, "advection"),
diff --git a/src/walberla_bridge/CMakeLists.txt b/src/walberla_bridge/CMakeLists.txt
index f7912560ff..b5595397f3 100644
--- a/src/walberla_bridge/CMakeLists.txt
+++ b/src/walberla_bridge/CMakeLists.txt
@@ -42,8 +42,8 @@ if(ESPRESSO_BUILD_WITH_CUDA AND WALBERLA_BUILD_WITH_CUDA)
                         PRIVATE ${WALBERLA_LIBS})
   target_include_directories(espresso_walberla_cuda PUBLIC include)
   target_include_directories(
-    espresso_walberla_cuda PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
-    PRIVATE ${WALBERLA_INCLUDE_DIRS} ${walberla_BINARY_DIR}/src)
+    espresso_walberla_cuda PRIVATE ${WALBERLA_INCLUDE_DIRS}
+                                   ${walberla_BINARY_DIR}/src)
   install(TARGETS espresso_walberla_cuda
           LIBRARY DESTINATION ${ESPRESSO_INSTALL_PYTHON}/espressomd)
   target_link_libraries(espresso_walberla PUBLIC espresso::walberla_cuda)
@@ -52,9 +52,8 @@ endif()
 target_link_libraries(
   espresso_walberla PUBLIC MPI::MPI_CXX espresso::utils
   PRIVATE espresso::cpp_flags espresso::walberla::cpp_flags ${WALBERLA_LIBS})
-target_include_directories(
-  espresso_walberla PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
-  PRIVATE ${WALBERLA_INCLUDE_DIRS} ${walberla_BINARY_DIR}/src)
+target_include_directories(espresso_walberla PRIVATE ${WALBERLA_INCLUDE_DIRS}
+                                                     ${walberla_BINARY_DIR}/src)
 
 add_subdirectory(src)
 
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp
index f0a7a2db61..10337c34df 100644
--- a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_fft_init.hpp
@@ -20,11 +20,14 @@
 #pragma once
 
 #include <walberla_bridge/LatticeWalberla.hpp>
-
-#include "PoissonSolver/PoissonSolver.hpp"
+#include <walberla_bridge/electrokinetics/PoissonSolver/PoissonSolver.hpp>
 
 #include <memory>
 
+namespace walberla {
+
 std::shared_ptr<walberla::PoissonSolver>
 new_ek_poisson_fft(std::shared_ptr<LatticeWalberla> const &lattice,
                    double permittivity, bool single_precision);
+
+} // namespace walberla
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_none_init.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_none_init.hpp
index af7d318989..aa9890d2aa 100644
--- a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_none_init.hpp
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_poisson_none_init.hpp
@@ -20,11 +20,14 @@
 #pragma once
 
 #include <walberla_bridge/LatticeWalberla.hpp>
-
-#include "PoissonSolver/PoissonSolver.hpp"
+#include <walberla_bridge/electrokinetics/PoissonSolver/PoissonSolver.hpp>
 
 #include <memory>
 
+namespace walberla {
+
 std::shared_ptr<walberla::PoissonSolver>
 new_ek_poisson_none(std::shared_ptr<LatticeWalberla> const &lattice,
                     bool single_precision);
+
+} // namespace walberla
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_walberla_init.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_walberla_init.hpp
index fb700df86c..4de3b85435 100644
--- a/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_walberla_init.hpp
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/ek_walberla_init.hpp
@@ -22,13 +22,29 @@
 #include "EKinWalberlaBase.hpp"
 
 #include <walberla_bridge/LatticeWalberla.hpp>
+#include <walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp>
+#include <walberla_bridge/electrokinetics/reactions/EKReactionBaseIndexed.hpp>
 
 #include <utils/Vector.hpp>
 
 #include <memory>
 
+namespace walberla {
+
 std::shared_ptr<EKinWalberlaBase>
 new_ek_walberla(std::shared_ptr<LatticeWalberla> const &lattice,
                 double diffusion, double kT, double valency,
                 Utils::Vector3d ext_efield, double density, bool advection,
                 bool friction_coupling, bool single_precision);
+
+std::shared_ptr<EKReactionBase>
+new_ek_reaction_bulk(std::shared_ptr<LatticeWalberla> const &lattice,
+                     typename EKReactionBase::reactants_type const &reactants,
+                     double coefficient);
+
+std::shared_ptr<EKReactionBaseIndexed> new_ek_reaction_indexed(
+    std::shared_ptr<LatticeWalberla> const &lattice,
+    typename EKReactionBase::reactants_type const &reactants,
+    double coefficient);
+
+} // namespace walberla
diff --git a/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp
index 049bd3226d..392c515b1a 100644
--- a/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp
@@ -29,18 +29,19 @@
 namespace walberla {
 
 class EKReactionBase {
-private:
-  std::vector<std::shared_ptr<EKReactant>> m_reactants;
-  double m_coefficient;
+public:
+  using reactants_type = std::vector<std::shared_ptr<EKReactant>>;
 
+private:
   std::shared_ptr<LatticeWalberla> m_lattice;
+  reactants_type m_reactants;
+  double m_coefficient;
 
 public:
   EKReactionBase(std::shared_ptr<LatticeWalberla> lattice,
-                 std::vector<std::shared_ptr<EKReactant>> reactants,
-                 double coefficient)
-      : m_reactants(std::move(reactants)), m_coefficient(coefficient),
-        m_lattice(std::move(lattice)) {}
+                 reactants_type reactants, double coefficient)
+      : m_lattice(std::move(lattice)), m_reactants(std::move(reactants)),
+        m_coefficient(coefficient) {}
 
   virtual ~EKReactionBase() = default;
 
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.cpp b/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBaseIndexed.hpp
similarity index 51%
rename from src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.cpp
rename to src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBaseIndexed.hpp
index 800a84af57..90d17bedb1 100644
--- a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.cpp
+++ b/src/walberla_bridge/include/walberla_bridge/electrokinetics/reactions/EKReactionBaseIndexed.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2022-2023 The ESPResSo project
+ * Copyright (C) 2024 The ESPResSo project
  *
  * This file is part of ESPResSo.
  *
@@ -17,26 +17,24 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "EKReactionImplBulk.hpp"
+#pragma once
 
-#include "generated_kernels/ReactionKernelBulk_all.h"
+#include "EKReactionBase.hpp"
 
-#include <blockforest/StructuredBlockForest.h>
+#include <utils/Vector.hpp>
 
-namespace walberla {
+#include <optional>
 
-void EKReactionImplBulk::perform_reaction() {
-  // TODO: if my understanding is correct:
-  //  the kernels need to either run in the ghost layers and do the
-  //  synchronization before or not run and do a synchronization afterwards.
-  //  The better solution is probably the latter one. Not sure why it fails
-  //  atm.
+namespace walberla {
 
-  auto kernel = detail::ReactionKernelBulkSelector::get_kernel(
-      get_reactants(), get_coefficient());
+class EKReactionBaseIndexed : public EKReactionBase {
+public:
+  using EKReactionBase::EKReactionBase;
+  ~EKReactionBaseIndexed() override = default;
+  virtual void set_node_is_boundary(Utils::Vector3i const &node,
+                                    bool is_boundary) = 0;
+  virtual std::optional<bool>
+  get_node_is_boundary(Utils::Vector3i const &node) = 0;
+};
 
-  for (auto &block : *get_lattice()->get_blocks()) {
-    kernel(&block);
-  }
-}
 } // namespace walberla
diff --git a/src/walberla_bridge/src/BoundaryHandling.hpp b/src/walberla_bridge/src/BoundaryHandling.hpp
index 86c2053888..b5d59cbe87 100644
--- a/src/walberla_bridge/src/BoundaryHandling.hpp
+++ b/src/walberla_bridge/src/BoundaryHandling.hpp
@@ -20,7 +20,8 @@
 #pragma once
 
 #include <walberla_bridge/BlockAndCell.hpp>
-#include <walberla_bridge/utils/walberla_utils.hpp>
+
+#include "utils/types_conversion.hpp"
 
 #include <blockforest/StructuredBlockForest.h>
 #include <domain_decomposition/BlockDataID.h>
@@ -38,12 +39,13 @@
 
 namespace walberla {
 
-/// Flag for domain cells, i.e. all cells
-FlagUID const Domain_flag("domain");
-/// Flag for boundary cells
-FlagUID const Boundary_flag("boundary");
-
 template <typename T, typename BoundaryClass> class BoundaryHandling {
+private:
+  /** Flag for domain cells, i.e. all cells. */
+  FlagUID const Domain_flag{"domain"};
+  /** Flag for boundary cells. */
+  FlagUID const Boundary_flag{"boundary"};
+
   /** Container for the map between cells and values. */
   class DynamicValueCallback {
   public:
@@ -172,7 +174,7 @@ template <typename T, typename BoundaryClass> class BoundaryHandling {
   std::shared_ptr<BoundaryClass> m_boundary;
   bool m_pending_changes;
 
-  /** Register flags and set all cells to @ref Domain_flag. */
+  /** Register flags and reset all cells. */
   void flag_reset_kernel(IBlock *const block) {
     auto flag_field = block->template getData<FlagField>(m_flag_field_id);
     // register flags
diff --git a/src/walberla_bridge/src/BoundaryPackInfo.hpp b/src/walberla_bridge/src/BoundaryPackInfo.hpp
index 3055b3ebe0..baeeb7c385 100644
--- a/src/walberla_bridge/src/BoundaryPackInfo.hpp
+++ b/src/walberla_bridge/src/BoundaryPackInfo.hpp
@@ -23,6 +23,7 @@
 #include <core/mpi/RecvBuffer.h>
 #include <core/mpi/SendBuffer.h>
 #include <domain_decomposition/IBlock.h>
+#include <field/FlagUID.h>
 #include <field/communication/PackInfo.h>
 #include <stencil/Directions.h>
 
@@ -38,6 +39,10 @@ template <typename GhostLayerField_T, typename Boundary_T>
 class BoundaryPackInfo : public PackInfo<GhostLayerField_T> {
 protected:
   using PackInfo<GhostLayerField_T>::bdId_;
+  /** Flag for domain cells, i.e. all cells. */
+  FlagUID const Domain_flag{"domain"};
+  /** Flag for boundary cells. */
+  FlagUID const Boundary_flag{"boundary"};
 
 public:
   using PackInfo<GhostLayerField_T>::PackInfo;
diff --git a/src/walberla_bridge/src/LatticeWalberla.cpp b/src/walberla_bridge/src/LatticeWalberla.cpp
index 0559fab379..2dc2943a40 100644
--- a/src/walberla_bridge/src/LatticeWalberla.cpp
+++ b/src/walberla_bridge/src/LatticeWalberla.cpp
@@ -19,7 +19,8 @@
 
 #include <walberla_bridge/BlockAndCell.hpp>
 #include <walberla_bridge/LatticeWalberla.hpp>
-#include <walberla_bridge/utils/walberla_utils.hpp>
+
+#include "utils/types_conversion.hpp"
 
 #include <blockforest/Initialization.h>
 #include <blockforest/StructuredBlockForest.h>
diff --git a/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp b/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp
index 9fc4019aa6..14a7ff9924 100644
--- a/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp
+++ b/src/walberla_bridge/src/electrokinetics/EKinWalberlaImpl.hpp
@@ -22,21 +22,21 @@
 #include <blockforest/communication/UniformBufferedScheme.h>
 #include <field/AddToStorage.h>
 #include <field/FlagField.h>
+#include <field/FlagUID.h>
 #include <field/GhostLayerField.h>
 #include <field/communication/PackInfo.h>
 #include <field/vtk/FlagFieldCellFilter.h>
 #include <field/vtk/VTKWriter.h>
-#include <lbm/lattice_model/D3Q27.h>
-#include <timeloop/SweepTimeloop.h>
+#include <stencil/D3Q27.h>
 
 #include "../BoundaryHandling.hpp"
+#include "../utils/boundary.hpp"
+#include "../utils/types_conversion.hpp"
 #include "ek_kernels.hpp"
 
 #include <walberla_bridge/BlockAndCell.hpp>
 #include <walberla_bridge/LatticeWalberla.hpp>
 #include <walberla_bridge/electrokinetics/EKinWalberlaBase.hpp>
-#include <walberla_bridge/utils/boundary_utils.hpp>
-#include <walberla_bridge/utils/walberla_utils.hpp>
 
 #include <utils/Vector.hpp>
 
@@ -109,6 +109,11 @@ class EKinWalberlaImpl : public EKinWalberlaBase {
   BlockDataID m_flag_field_density_id;
   BlockDataID m_flag_field_flux_id;
 
+  /** Flag for domain cells, i.e. all cells. */
+  FlagUID const Domain_flag{"domain"};
+  /** Flag for boundary cells. */
+  FlagUID const Boundary_flag{"boundary"};
+
   /** Block forest */
   std::shared_ptr<LatticeWalberla> m_lattice;
 
diff --git a/src/walberla_bridge/src/electrokinetics/ek_poisson_fft_init.cpp b/src/walberla_bridge/src/electrokinetics/ek_poisson_fft_init.cpp
index 2598c6616e..b67da67b79 100644
--- a/src/walberla_bridge/src/electrokinetics/ek_poisson_fft_init.cpp
+++ b/src/walberla_bridge/src/electrokinetics/ek_poisson_fft_init.cpp
@@ -23,6 +23,8 @@
 
 #include <memory>
 
+namespace walberla {
+
 std::shared_ptr<walberla::PoissonSolver>
 new_ek_poisson_fft(std::shared_ptr<LatticeWalberla> const &lattice,
                    double permittivity, bool single_precision) {
@@ -31,3 +33,5 @@ new_ek_poisson_fft(std::shared_ptr<LatticeWalberla> const &lattice,
   }
   return std::make_shared<walberla::FFT<double>>(lattice, permittivity);
 }
+
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/ek_poisson_none_init.cpp b/src/walberla_bridge/src/electrokinetics/ek_poisson_none_init.cpp
index f912cd4911..2636be3ebd 100644
--- a/src/walberla_bridge/src/electrokinetics/ek_poisson_none_init.cpp
+++ b/src/walberla_bridge/src/electrokinetics/ek_poisson_none_init.cpp
@@ -23,6 +23,8 @@
 
 #include <memory>
 
+namespace walberla {
+
 std::shared_ptr<walberla::PoissonSolver>
 new_ek_poisson_none(std::shared_ptr<LatticeWalberla> const &lattice,
                     bool single_precision) {
@@ -31,3 +33,5 @@ new_ek_poisson_none(std::shared_ptr<LatticeWalberla> const &lattice,
   }
   return std::make_shared<walberla::None<double>>(lattice);
 }
+
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/ek_walberla_init.cpp b/src/walberla_bridge/src/electrokinetics/ek_walberla_init.cpp
index a666e8f9ac..03fc1d0fae 100644
--- a/src/walberla_bridge/src/electrokinetics/ek_walberla_init.cpp
+++ b/src/walberla_bridge/src/electrokinetics/ek_walberla_init.cpp
@@ -18,26 +18,49 @@
  */
 
 #include "EKinWalberlaImpl.hpp"
+#include "reactions/EKReactionImplBulk.hpp"
+#include "reactions/EKReactionImplIndexed.hpp"
 
 #include <walberla_bridge/LatticeWalberla.hpp>
 #include <walberla_bridge/electrokinetics/ek_walberla_init.hpp>
+#include <walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp>
+#include <walberla_bridge/electrokinetics/reactions/EKReactionBaseIndexed.hpp>
 
 #include <utils/Vector.hpp>
 
 #include <memory>
 
+namespace walberla {
+
 std::shared_ptr<EKinWalberlaBase>
 new_ek_walberla(std::shared_ptr<LatticeWalberla> const &lattice,
                 double diffusion, double kT, double valency,
                 Utils::Vector3d ext_efield, double density, bool advection,
                 bool friction_coupling, bool single_precision) {
   if (single_precision) {
-    return std::make_shared<walberla::EKinWalberlaImpl<13, float>>(
+    return std::make_shared<EKinWalberlaImpl<13, float>>(
         lattice, diffusion, kT, valency, ext_efield, density, advection,
         friction_coupling);
   }
 
-  return std::make_shared<walberla::EKinWalberlaImpl<13, double>>(
+  return std::make_shared<EKinWalberlaImpl<13, double>>(
       lattice, diffusion, kT, valency, ext_efield, density, advection,
       friction_coupling);
 }
+
+std::shared_ptr<EKReactionBase>
+new_ek_reaction_bulk(std::shared_ptr<LatticeWalberla> const &lattice,
+                     typename EKReactionBase::reactants_type const &reactants,
+                     double coefficient) {
+  return std::make_shared<EKReactionImplBulk>(lattice, reactants, coefficient);
+}
+
+std::shared_ptr<EKReactionBaseIndexed> new_ek_reaction_indexed(
+    std::shared_ptr<LatticeWalberla> const &lattice,
+    typename EKReactionBase::reactants_type const &reactants,
+    double coefficient) {
+  return std::make_shared<EKReactionImplIndexed>(lattice, reactants,
+                                                 coefficient);
+}
+
+} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/CMakeLists.txt b/src/walberla_bridge/src/electrokinetics/reactions/CMakeLists.txt
index 6559b5c9ff..4f5e805245 100644
--- a/src/walberla_bridge/src/electrokinetics/reactions/CMakeLists.txt
+++ b/src/walberla_bridge/src/electrokinetics/reactions/CMakeLists.txt
@@ -18,6 +18,3 @@
 #
 
 add_subdirectory(generated_kernels)
-
-target_sources(espresso_walberla PRIVATE EKReactionImplBulk.cpp
-                                         EKReactionImplIndexed.cpp)
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp
index 33f7e21770..09f38d319b 100644
--- a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp
+++ b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplBulk.hpp
@@ -19,28 +19,37 @@
 
 #pragma once
 
+#include "generated_kernels/ReactionKernelBulk_all.h"
+
 #include <walberla_bridge/LatticeWalberla.hpp>
 #include <walberla_bridge/electrokinetics/reactions/EKReactant.hpp>
 #include <walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp>
 
-#include <memory>
-#include <vector>
+#include <blockforest/StructuredBlockForest.h>
 
 namespace walberla {
 
 class EKReactionImplBulk : public EKReactionBase {
 public:
-  EKReactionImplBulk(const std::shared_ptr<LatticeWalberla> &lattice,
-                     const std::vector<std::shared_ptr<EKReactant>> &reactants,
-                     double coefficient)
-      : EKReactionBase(lattice, reactants, coefficient) {}
   ~EKReactionImplBulk() override = default;
 
+  using EKReactionBase::EKReactionBase;
   using EKReactionBase::get_coefficient;
   using EKReactionBase::get_lattice;
   using EKReactionBase::get_reactants;
 
-  void perform_reaction() override;
+  void perform_reaction() override {
+    // TODO: if my understanding is correct:
+    // the kernels need to either run in the ghost layers and do the
+    // synchronization before or not run and do a synchronization afterwards.
+    // The better solution is probably the latter one. Not sure why it fails
+    // atm.
+    auto kernel = detail::ReactionKernelBulkSelector::get_kernel(
+        get_reactants(), get_coefficient());
+    for (auto &block : *get_lattice()->get_blocks()) {
+      kernel(&block);
+    }
+  }
 };
 
 } // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp
deleted file mode 100644
index c3f5643fd9..0000000000
--- a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (C) 2022-2023 The ESPResSo project
- *
- * This file is part of ESPResSo.
- *
- * ESPResSo is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * ESPResSo is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "EKReactionImplIndexed.hpp"
-
-#include "generated_kernels/ReactionKernelIndexed_all.h"
-
-#include <walberla_bridge/BlockAndCell.hpp>
-#include <walberla_bridge/LatticeWalberla.hpp>
-#include <walberla_bridge/electrokinetics/reactions/EKReactant.hpp>
-#include <walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp>
-
-#include <domain_decomposition/BlockDataID.h>
-#include <domain_decomposition/IBlock.h>
-#include <field/AddToStorage.h>
-
-#include <cassert>
-#include <cstddef>
-#include <memory>
-#include <optional>
-#include <tuple>
-#include <vector>
-
-namespace walberla {
-
-/// Flag for domain cells, i.e. all cells
-FlagUID const Domain_flag("domain");
-/// Flag for boundary cells
-FlagUID const Boundary_flag("boundary");
-
-namespace detail {
-// FlagField to use
-using FlagField = FlagField<uint8_t>;
-
-template <typename FlagField>
-inline auto
-get_flag_field_and_flag(IBlock *block,
-                        domain_decomposition::BlockDataID const &flagfield_id) {
-  auto const flag_field =
-      block->template uncheckedFastGetData<FlagField>(flagfield_id);
-  auto const boundary_flag = flag_field->getFlag(Boundary_flag);
-  return std::make_tuple(flag_field, boundary_flag);
-}
-
-template <typename FlagField, typename IndexVectors, typename IndexInfo>
-void fillFromFlagField(IBlock *block, BlockDataID indexVectorID,
-                       ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID,
-                       FlagUID domainFlagUID) {
-  auto *indexVectors = block->uncheckedFastGetData<IndexVectors>(indexVectorID);
-  auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
-  auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
-  auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
-
-  auto *flagField = block->getData<FlagField>(flagFieldID);
-
-  assert(flagField->flagExists(boundaryFlagUID) and
-         flagField->flagExists(domainFlagUID));
-
-  auto boundaryFlag = flagField->getFlag(boundaryFlagUID);
-  auto domainFlag = flagField->getFlag(domainFlagUID);
-
-  auto inner = flagField->xyzSize();
-  inner.expand(cell_idx_t(-1));
-
-  indexVectorAll.clear();
-  indexVectorInner.clear();
-  indexVectorOuter.clear();
-
-  auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
-  for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
-       ++it) {
-
-    if (!isFlagSet(it, boundaryFlag))
-      continue;
-    if (flagWithGLayers.contains(it.x(), it.y(), it.z()) &&
-        isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
-
-      auto element = IndexInfo(it.x(), it.y(), it.z());
-
-      indexVectorAll.push_back(element);
-      if (inner.contains(it.x(), it.y(), it.z()))
-        indexVectorInner.push_back(element);
-      else
-        indexVectorOuter.push_back(element);
-    }
-  }
-
-  indexVectors->syncGPU();
-}
-
-template <typename FlagField, typename IndexVectors, typename IndexInfo>
-void fillFromFlagField(const std::shared_ptr<StructuredBlockForest> &blocks,
-                       BlockDataID indexVectorID, ConstBlockDataID flagFieldID,
-                       FlagUID boundaryFlagUID, FlagUID domainFlagUID) {
-  for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
-    fillFromFlagField<FlagField, IndexVectors, IndexInfo>(
-        blockIt.get(), indexVectorID, flagFieldID, boundaryFlagUID,
-        domainFlagUID);
-}
-} // namespace detail
-
-EKReactionImplIndexed::EKReactionImplIndexed(
-    std::shared_ptr<LatticeWalberla> lattice,
-    std::vector<std::shared_ptr<EKReactant>> reactants, double coefficient)
-    : EKReactionBase(lattice, reactants, coefficient),
-      m_pending_changes(false) {
-  m_flagfield_id =
-      static_cast<std::size_t>(field::addFlagFieldToStorage<detail::FlagField>(
-          get_lattice()->get_blocks(), "flag field reaction",
-          get_lattice()->get_ghost_layers()));
-
-  // take one IndexVector as a dummy-value
-  using IndexVectors = detail::ReactionKernelIndexedSelector::KernelTrait<>::
-      ReactionKernelIndexed::IndexVectors;
-
-  auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
-    return new IndexVectors();
-  };
-  m_indexvector_id = static_cast<std::size_t>(
-      get_lattice()
-          ->get_blocks()
-          ->template addStructuredBlockData<IndexVectors>(createIdxVector,
-                                                          "IndexField"));
-
-  for (auto &block : *get_lattice()->get_blocks()) {
-    auto flag_field =
-        block.template getData<detail::FlagField>(BlockDataID(m_flagfield_id));
-    // register flags
-    flag_field->registerFlag(Domain_flag);
-    flag_field->registerFlag(Boundary_flag);
-    // mark all cells as domain cells and fluid cells
-    auto domain_flag = flag_field->getFlag(Domain_flag);
-    auto boundary_flag = flag_field->getFlag(Boundary_flag);
-    for (auto it = flag_field->begin(); it != flag_field->end(); ++it) {
-      flag_field->addFlag(it.x(), it.y(), it.z(), domain_flag);
-      flag_field->removeFlag(it.x(), it.y(), it.z(), boundary_flag);
-    }
-  }
-}
-
-void EKReactionImplIndexed::perform_reaction() {
-  boundary_update();
-
-  auto kernel = detail::ReactionKernelIndexedSelector::get_kernel(
-      get_reactants(), get_coefficient(), BlockDataID(get_indexvector_id()));
-
-  for (auto &block : *get_lattice()->get_blocks()) {
-    kernel(&block);
-  }
-}
-
-void EKReactionImplIndexed::set_node_is_boundary(Utils::Vector3i const &node,
-                                                 bool is_boundary) {
-  auto bc = get_block_and_cell(*get_lattice(), node, true);
-  if (!bc)
-    return;
-
-  auto [flag_field, boundary_flag] =
-      detail::get_flag_field_and_flag<detail::FlagField>(
-          bc->block, BlockDataID(get_flagfield_id()));
-  if (is_boundary) {
-    flag_field->addFlag(bc->cell, boundary_flag);
-  } else {
-    flag_field->removeFlag(bc->cell, boundary_flag);
-  }
-  m_pending_changes = true;
-}
-
-std::optional<bool>
-EKReactionImplIndexed::get_node_is_boundary(Utils::Vector3i const &node) {
-  auto bc = get_block_and_cell(*get_lattice(), node, true);
-  if (!bc)
-    return std::nullopt;
-
-  auto [flag_field, boundary_flag] =
-      detail::get_flag_field_and_flag<detail::FlagField>(
-          bc->block, BlockDataID(get_flagfield_id()));
-  return {flag_field->isFlagSet(bc->cell, boundary_flag)};
-}
-
-void EKReactionImplIndexed::boundary_update() {
-  // take one IndexVector/IndexInfo as a dummy-value
-  using IndexVectors = detail::ReactionKernelIndexedSelector::KernelTrait<>::
-      ReactionKernelIndexed::IndexVectors;
-  using IndexInfo = detail::ReactionKernelIndexedSelector::KernelTrait<>::
-      ReactionKernelIndexed::IndexInfo;
-
-  if (m_pending_changes) {
-    detail::fillFromFlagField<detail::FlagField, IndexVectors, IndexInfo>(
-        get_lattice()->get_blocks(), BlockDataID(get_indexvector_id()),
-        BlockDataID(get_flagfield_id()), Boundary_flag, Domain_flag);
-    m_pending_changes = false;
-  }
-}
-} // namespace walberla
diff --git a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp
index 65686644f7..48125caf36 100644
--- a/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp
+++ b/src/walberla_bridge/src/electrokinetics/reactions/EKReactionImplIndexed.hpp
@@ -19,50 +19,179 @@
 
 #pragma once
 
+#include "generated_kernels/ReactionKernelIndexed_all.h"
+
+#include <walberla_bridge/BlockAndCell.hpp>
 #include <walberla_bridge/LatticeWalberla.hpp>
 #include <walberla_bridge/electrokinetics/reactions/EKReactant.hpp>
-#include <walberla_bridge/electrokinetics/reactions/EKReactionBase.hpp>
+#include <walberla_bridge/electrokinetics/reactions/EKReactionBaseIndexed.hpp>
+
+#include <blockforest/StructuredBlockForest.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
+#include <field/AddToStorage.h>
+#include <field/FlagField.h>
+#include <field/FlagUID.h>
 
 #include <utils/Vector.hpp>
 
+#include <cassert>
 #include <cstddef>
 #include <memory>
 #include <optional>
+#include <tuple>
 #include <vector>
 
 namespace walberla {
 
-class EKReactionImplIndexed : public EKReactionBase {
+class EKReactionImplIndexed : public EKReactionBaseIndexed {
 private:
-  std::size_t m_flagfield_id;
-  std::size_t m_indexvector_id;
-
+  BlockDataID m_flagfield_id;
+  BlockDataID m_indexvector_id;
   bool m_pending_changes;
 
 public:
-  EKReactionImplIndexed(std::shared_ptr<LatticeWalberla> lattice,
-                        std::vector<std::shared_ptr<EKReactant>> reactants,
-                        double coefficient);
+  /** Flag for domain cells, i.e. all cells. */
+  FlagUID const Domain_flag{"domain"};
+  /** Flag for boundary cells. */
+  FlagUID const Boundary_flag{"boundary"};
+
+  using FlagField = field::FlagField<uint8_t>;
+  using IndexVectors = detail::ReactionKernelIndexedSelector::KernelTrait<>::
+      ReactionKernelIndexed::IndexVectors;
+  using IndexInfo = detail::ReactionKernelIndexedSelector::KernelTrait<>::
+      ReactionKernelIndexed::IndexInfo;
+
+private:
+  auto get_flag_field_and_flag(IBlock *block, BlockDataID const &flagfield_id) {
+    auto const flag_field =
+        block->template uncheckedFastGetData<FlagField>(flagfield_id);
+    auto const boundary_flag = flag_field->getFlag(Boundary_flag);
+    return std::make_tuple(flag_field, boundary_flag);
+  }
+
+public:
+  EKReactionImplIndexed(std::shared_ptr<LatticeWalberla> const &lattice,
+                        reactants_type const &reactants, double coefficient)
+      : EKReactionBaseIndexed(lattice, reactants, coefficient),
+        m_pending_changes(false) {
+    m_flagfield_id = field::addFlagFieldToStorage<FlagField>(
+        get_lattice()->get_blocks(), "flag field reaction",
+        get_lattice()->get_ghost_layers());
+
+    auto createIdxVector = [](IBlock *const, StructuredBlockStorage *const) {
+      return new IndexVectors();
+    };
+    m_indexvector_id = get_lattice()
+                           ->get_blocks()
+                           ->template addStructuredBlockData<IndexVectors>(
+                               createIdxVector, "IndexField");
+
+    for (auto &block : *get_lattice()->get_blocks()) {
+      auto flag_field = block.template getData<FlagField>(m_flagfield_id);
+      // register flags
+      flag_field->registerFlag(Domain_flag);
+      flag_field->registerFlag(Boundary_flag);
+      // mark all cells as domain cells and fluid cells
+      auto domain_flag = flag_field->getFlag(Domain_flag);
+      auto boundary_flag = flag_field->getFlag(Boundary_flag);
+      for (auto it = flag_field->begin(); it != flag_field->end(); ++it) {
+        flag_field->addFlag(it.x(), it.y(), it.z(), domain_flag);
+        flag_field->removeFlag(it.x(), it.y(), it.z(), boundary_flag);
+      }
+    }
+  }
   ~EKReactionImplIndexed() override = default;
 
-  using EKReactionBase::get_coefficient;
-  using EKReactionBase::get_lattice;
-  using EKReactionBase::get_reactants;
+  using EKReactionBaseIndexed::get_coefficient;
+  using EKReactionBaseIndexed::get_lattice;
+  using EKReactionBaseIndexed::get_reactants;
 
-  void perform_reaction() override;
+  void perform_reaction() override {
+    boundary_update();
+    auto kernel = detail::ReactionKernelIndexedSelector::get_kernel(
+        get_reactants(), get_coefficient(), m_indexvector_id);
+    for (auto &block : *get_lattice()->get_blocks()) {
+      kernel(&block);
+    }
+  }
 
-  void set_node_is_boundary(Utils::Vector3i const &node, bool is_boundary);
-  [[nodiscard]] std::optional<bool>
-  get_node_is_boundary(Utils::Vector3i const &node);
+  void set_node_is_boundary(Utils::Vector3i const &node,
+                            bool is_boundary) override {
+    if (auto bc = get_block_and_cell(*get_lattice(), node, true)) {
+      auto const [flag_field, boundary_flag] =
+          get_flag_field_and_flag(bc->block, m_flagfield_id);
+      if (is_boundary) {
+        flag_field->addFlag(bc->cell, boundary_flag);
+      } else {
+        flag_field->removeFlag(bc->cell, boundary_flag);
+      }
+      m_pending_changes = true;
+    }
+  }
 
-  [[nodiscard]] auto get_indexvector_id() const noexcept {
-    return m_indexvector_id;
+  [[nodiscard]] std::optional<bool>
+  get_node_is_boundary(Utils::Vector3i const &node) override {
+    if (auto bc = get_block_and_cell(*get_lattice(), node, true)) {
+      auto const [flag_field, boundary_flag] =
+          get_flag_field_and_flag(bc->block, m_flagfield_id);
+      return {flag_field->isFlagSet(bc->cell, boundary_flag)};
+    }
+    return std::nullopt;
   }
-  [[nodiscard]] auto get_flagfield_id() const noexcept {
-    return m_flagfield_id;
+
+  void boundary_update() {
+    if (m_pending_changes) {
+      for (auto &block : *get_lattice()->get_blocks()) {
+        fillFromFlagField(block);
+      }
+      m_pending_changes = false;
+    }
   }
 
-  void boundary_update();
+private:
+  void fillFromFlagField(IBlock &block) {
+    auto *indexVectors =
+        block.uncheckedFastGetData<IndexVectors>(m_indexvector_id);
+    auto &indexVectorAll = indexVectors->indexVector(IndexVectors::ALL);
+    auto &indexVectorInner = indexVectors->indexVector(IndexVectors::INNER);
+    auto &indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER);
+
+    auto *flagField = block.getData<FlagField>(m_flagfield_id);
+
+    assert(flagField->flagExists(Boundary_flag) and
+           flagField->flagExists(Domain_flag));
+
+    auto boundaryFlag = flagField->getFlag(Boundary_flag);
+    auto domainFlag = flagField->getFlag(Domain_flag);
+
+    auto inner = flagField->xyzSize();
+    inner.expand(cell_idx_t(-1));
+
+    indexVectorAll.clear();
+    indexVectorInner.clear();
+    indexVectorOuter.clear();
+
+    auto flagWithGLayers = flagField->xyzSizeWithGhostLayer();
+    for (auto it = flagField->beginWithGhostLayerXYZ(); it != flagField->end();
+         ++it) {
+      if (!isFlagSet(it, boundaryFlag))
+        continue;
+
+      if (flagWithGLayers.contains(it.x(), it.y(), it.z()) &&
+          isFlagSet(it.neighbor(0, 0, 0, 0), domainFlag)) {
+        auto element = IndexInfo(it.x(), it.y(), it.z());
+        indexVectorAll.push_back(element);
+        if (inner.contains(it.x(), it.y(), it.z())) {
+          indexVectorInner.push_back(element);
+        } else {
+          indexVectorOuter.push_back(element);
+        }
+      }
+    }
+
+    indexVectors->syncGPU();
+  }
 };
 
 } // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
index ecf7e57a64..5270c26aa4 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
@@ -29,23 +29,20 @@
 #include <blockforest/Initialization.h>
 #include <blockforest/StructuredBlockForest.h>
 #include <blockforest/communication/UniformBufferedScheme.h>
+#include <domain_decomposition/BlockDataID.h>
 #include <domain_decomposition/IBlock.h>
+#include <field/AddToStorage.h>
 #include <field/GhostLayerField.h>
+#include <field/communication/PackInfo.h>
 #include <field/vtk/FlagFieldCellFilter.h>
 #include <field/vtk/VTKWriter.h>
-
-#include <field/AddToStorage.h>
-#include <field/FlagField.h>
-#include <field/communication/PackInfo.h>
-#include <lbm/communication/PdfFieldPackInfo.h>
-#include <lbm/field/AddToStorage.h>
-#include <lbm/field/PdfField.h>
-
 #include <stencil/D3Q19.h>
 #include <stencil/D3Q27.h>
 
 #include "../BoundaryHandling.hpp"
 #include "../BoundaryPackInfo.hpp"
+#include "../utils/boundary.hpp"
+#include "../utils/types_conversion.hpp"
 #include "InterpolateAndShiftAtBoundary.hpp"
 #include "ResetForce.hpp"
 #include "lb_kernels.hpp"
@@ -55,8 +52,6 @@
 #include <walberla_bridge/LatticeWalberla.hpp>
 #include <walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp>
 #include <walberla_bridge/lattice_boltzmann/LeesEdwardsPack.hpp>
-#include <walberla_bridge/utils/boundary_utils.hpp>
-#include <walberla_bridge/utils/walberla_utils.hpp>
 
 #include <utils/Vector.hpp>
 #include <utils/interpolation/bspline_3d.hpp>
@@ -71,6 +66,7 @@
 #include <optional>
 #include <stdexcept>
 #include <string>
+#include <tuple>
 #include <type_traits>
 #include <utility>
 #include <variant>
@@ -106,8 +102,8 @@ class LBWalberlaImpl : public LBWalberlaBase {
 
 protected:
   template <typename FT, lbmpy::Arch AT = lbmpy::Arch::CPU> struct FieldTrait {
-    using PdfField = GhostLayerField<FT, Stencil::Size>;
-    using VectorField = GhostLayerField<FT, uint_t{3u}>;
+    using PdfField = field::GhostLayerField<FT, Stencil::Size>;
+    using VectorField = field::GhostLayerField<FT, uint_t{3u}>;
     template <class Field>
     using PackInfo = field::communication::PackInfo<Field>;
   };
@@ -217,6 +213,9 @@ class LBWalberlaImpl : public LBWalberlaBase {
   BlockDataID m_velocity_field_id;
   BlockDataID m_vec_tmp_field_id;
 
+  /** Flag for boundary cells. */
+  FlagUID const Boundary_flag{"boundary"};
+
   /**
    * @brief Full communicator.
    * We use the D3Q27 directions to update cells along the diagonals during
diff --git a/src/walberla_bridge/src/lattice_boltzmann/ResetForce.hpp b/src/walberla_bridge/src/lattice_boltzmann/ResetForce.hpp
index 1d0a154e5b..ce7d19295a 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/ResetForce.hpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/ResetForce.hpp
@@ -22,11 +22,11 @@
 #include "generated_kernels/FieldAccessorsDoublePrecision.h"
 #include "generated_kernels/FieldAccessorsSinglePrecision.h"
 
-#include <walberla_bridge/utils/walberla_utils.hpp>
+#include "../utils/types_conversion.hpp"
 
 #include <core/math/Vector3.h>
-#include <domain_decomposition/SharedSweep.h>
-#include <lbm/sweeps/CellwiseSweep.h>
+#include <domain_decomposition/BlockDataID.h>
+#include <domain_decomposition/IBlock.h>
 
 #include <utils/Vector.hpp>
 
diff --git a/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp b/src/walberla_bridge/src/utils/boundary.hpp
similarity index 99%
rename from src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp
rename to src/walberla_bridge/src/utils/boundary.hpp
index 8c1558ee72..7d3f3cdb07 100644
--- a/src/walberla_bridge/include/walberla_bridge/utils/boundary_utils.hpp
+++ b/src/walberla_bridge/src/utils/boundary.hpp
@@ -19,7 +19,7 @@
 
 #pragma once
 
-#include "walberla_utils.hpp"
+#include "types_conversion.hpp"
 
 #include <walberla_bridge/LatticeWalberla.hpp>
 
diff --git a/src/walberla_bridge/include/walberla_bridge/utils/walberla_utils.hpp b/src/walberla_bridge/src/utils/types_conversion.hpp
similarity index 100%
rename from src/walberla_bridge/include/walberla_bridge/utils/walberla_utils.hpp
rename to src/walberla_bridge/src/utils/types_conversion.hpp
diff --git a/src/walberla_bridge/tests/lb_kernels_unit_tests.cpp b/src/walberla_bridge/tests/lb_kernels_unit_tests.cpp
index 2666843c37..3045e9974c 100644
--- a/src/walberla_bridge/tests/lb_kernels_unit_tests.cpp
+++ b/src/walberla_bridge/tests/lb_kernels_unit_tests.cpp
@@ -29,8 +29,7 @@
 #include "../src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precision.h"
 #include "../src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h"
 #include "../src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h"
-
-#include <walberla_bridge/utils/walberla_utils.hpp>
+#include "../src/utils/types_conversion.hpp"
 
 #include <utils/Vector.hpp>
 

From f4590fd59824efb68c48eacdcb0ae43ba56c94bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Wed, 21 Feb 2024 20:16:55 +0100
Subject: [PATCH 7/7] Restrict Boost range

Boost 1.84 is not supported. Drop the macOS CI job: Homebrew only
provides a recipe for boost-mpi version 1.84.
---
 .github/workflows/push_pull.yml | 2 +-
 CMakeLists.txt                  | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/push_pull.yml b/.github/workflows/push_pull.yml
index 1a2eb4e39f..94d9bf24d9 100644
--- a/.github/workflows/push_pull.yml
+++ b/.github/workflows/push_pull.yml
@@ -10,7 +10,7 @@ permissions:
 jobs:
   macos:
     runs-on: macos-12
-    if: ${{ github.repository == 'espressomd/espresso' }}
+    if: false
     steps:
       - name: Checkout
         uses: actions/checkout@main
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7c40cb72a7..5c643b7d4b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -435,6 +435,9 @@ if(ESPRESSO_BUILD_TESTS)
 endif()
 
 find_package(Boost 1.74.0 REQUIRED ${BOOST_COMPONENTS})
+if(${Boost_VERSION} VERSION_GREATER_EQUAL 1.84.0)
+  message(FATAL_ERROR "Boost version ${Boost_VERSION} is unsupported.")
+endif()
 
 #
 # Paths