From 3c0ccf2f631e1b188a74c59b3908a700cf5f4b19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Tue, 17 Sep 2024 18:36:36 +0200 Subject: [PATCH 1/4] General LB maintenance Use the AVX streaming kernels. Adjust pre-conditions of LB tests. Fix CMake bug in the benchmarks. Tweak benchmark summary information. --- maintainer/benchmarks/CMakeLists.txt | 3 ++- maintainer/benchmarks/benchmarks.py | 2 +- maintainer/benchmarks/lb.py | 2 +- .../walberla_kernels/generate_lb_kernels.py | 15 ++++++++------- .../walberla_kernels/pystencils_espresso.py | 3 +-- src/script_interface/walberla/LBFluid.cpp | 1 + .../generated_kernels/CMakeLists.txt | 3 ++- .../src/lattice_boltzmann/lb_kernels.hpp | 12 ++++++++---- testsuite/python/lb.py | 2 +- testsuite/python/lb_planar_couette.py | 13 ++++++++++++- testsuite/python/regular_decomposition.py | 2 +- 11 files changed, 38 insertions(+), 20 deletions(-) diff --git a/maintainer/benchmarks/CMakeLists.txt b/maintainer/benchmarks/CMakeLists.txt index 887cae6ab8..ea6c0c5f48 100644 --- a/maintainer/benchmarks/CMakeLists.txt +++ b/maintainer/benchmarks/CMakeLists.txt @@ -150,4 +150,5 @@ add_custom_target( COMMAND ${CMAKE_CTEST_COMMAND} --timeout ${ESPRESSO_TEST_TIMEOUT} ${ESPRESSO_CTEST_ARGS} --output-on-failure) -add_dependencies(benchmark benchmark_python benchmarks_data) +add_dependencies(benchmark_python pypresso benchmarks_data) +add_dependencies(benchmark benchmark_python) diff --git a/maintainer/benchmarks/benchmarks.py b/maintainer/benchmarks/benchmarks.py index 3d39d2fd1d..b9343300b2 100644 --- a/maintainer/benchmarks/benchmarks.py +++ b/maintainer/benchmarks/benchmarks.py @@ -84,7 +84,7 @@ def get_timings(system, n_steps, n_iterations, verbose=True): energy = system.analysis.energy()["total"] verlet = system.cell_system.get_state()["verlet_reuse"] print( - f"step {i}, time: {1000 * t:.1f} ms, verlet: {verlet:.2f}, energy: {energy:.2e}") + f"step {i}, time: {1000 * t:.2f} ms, verlet: {verlet:.2f}, energy: {energy:.2e}") return np.array(timings) diff --git a/maintainer/benchmarks/lb.py b/maintainer/benchmarks/lb.py index 7131e62ff4..ea42b42005 100644 --- a/maintainer/benchmarks/lb.py +++ b/maintainer/benchmarks/lb.py @@ -149,7 +149,7 @@ # average time avg, ci = benchmarks.get_average_time(timings) -print(f"average: {1000 * avg:.1f} +/- {1000 * ci:.1f} ms (95% C.I.)") +print(f"average: {1000 * avg:.2f} +/- {1000 * ci:.2f} ms (95% C.I.)") # write report benchmarks.write_report(args.output, n_proc, timings, measurement_steps) diff --git a/maintainer/walberla_kernels/generate_lb_kernels.py b/maintainer/walberla_kernels/generate_lb_kernels.py index 9afd75925c..cfe346507a 100644 --- a/maintainer/walberla_kernels/generate_lb_kernels.py +++ b/maintainer/walberla_kernels/generate_lb_kernels.py @@ -98,6 +98,8 @@ def paramlist(parameters, keys): stencil = lbmpy.stencils.LBStencil(lbmpy.enums.Stencil.D3Q19) fields = pystencils_espresso.generate_fields(config, stencil) force_field = fields["force"] + lbm_opt = lbmpy.LBMOptimisation(symbolic_field=fields["pdfs"]) + streaming_pattern = "push" # LB Method definition method = lbmpy.creationfunctions.create_mrt_orthogonal( @@ -133,12 +135,11 @@ def paramlist(parameters, keys): force_model=lbmpy.ForceModel.GUO, force=force_field.center_vector, kernel_type="collide_only") - lbm_opt = lbmpy.LBMOptimisation(symbolic_field=fields["pdfs"]) - le_collision_rule_unthermalized = lbmpy.create_lb_update_rule( + le_update_rule_unthermalized = lbmpy.create_lb_update_rule( lbm_config=le_config, lbm_optimisation=lbm_opt) le_collision_rule_unthermalized = lees_edwards.add_lees_edwards_to_collision( - config, le_collision_rule_unthermalized, + config, le_update_rule_unthermalized, fields["pdfs"], stencil, 1) # shear_dir_normal y for params, target_suffix in paramlist(parameters, ("GPU", "CPU", "AVX")): pystencils_espresso.generate_collision_sweep( @@ -153,8 +154,8 @@ def paramlist(parameters, keys): ps.TypedSymbol(f"block_offset_{i}", np.uint32) for i in range(3)) - # generate thermalized LB - collision_rule_thermalized = lbmpy.creationfunctions.create_lb_collision_rule( + # generate thermalized LB collision rule + lb_collision_rule_thermalized = lbmpy.creationfunctions.create_lb_collision_rule( method, zero_centered=False, fluctuating={ @@ -170,7 +171,7 @@ def paramlist(parameters, keys): pystencils_espresso.generate_collision_sweep( ctx, method, - collision_rule_thermalized, + lb_collision_rule_thermalized, stem, params, block_offset=block_offsets, @@ -202,7 +203,7 @@ def paramlist(parameters, keys): lbmpy_walberla.generate_boundary( ctx, f"Dynamic_UBB_{precision_suffix}{target_suffix}", ubb_dynamic, method, additional_data_handler=ubb_data_handler, - streaming_pattern="push", target=target) + streaming_pattern=streaming_pattern, target=target) with open(f"Dynamic_UBB_{precision_suffix}{target_suffix}.h", "r+") as f: content = f.read() diff --git a/maintainer/walberla_kernels/pystencils_espresso.py b/maintainer/walberla_kernels/pystencils_espresso.py index a6e3051793..0b6a9d70bb 100644 --- a/maintainer/walberla_kernels/pystencils_espresso.py +++ b/maintainer/walberla_kernels/pystencils_espresso.py @@ -164,9 +164,8 @@ def __init__(self, dim, time_step=ps.typing.TypedSymbol( data_type_np = {'double': 'float64', 'float': 'float32'} -def generate_fields(config, stencil): +def generate_fields(config, stencil, field_layout='fzyx'): dtype = data_type_np[config.data_type.default_factory().c_name] - field_layout = 'fzyx' q = len(stencil) dim = len(stencil[0]) diff --git a/src/script_interface/walberla/LBFluid.cpp b/src/script_interface/walberla/LBFluid.cpp index ed4d08674c..5b3bf4cabc 100644 --- a/src/script_interface/walberla/LBFluid.cpp +++ b/src/script_interface/walberla/LBFluid.cpp @@ -191,6 +191,7 @@ void LBFluid::do_construct(VariantMap const ¶ms) { ::LB::LBWalberla::update_collision_model(*m_instance, *m_lb_params, lb_kT, static_cast(seed)); m_instance->set_external_force(lb_ext_f); + m_instance->ghost_communication(); for (auto &vtk : m_vtk_writers) { vtk->attach_to_lattice(m_instance, get_latice_to_md_units_conversion()); } diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt index 27c3d39749..c2bf4267a8 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt @@ -29,7 +29,8 @@ if(ESPRESSO_BUILD_WITH_WALBERLA_AVX) PRIVATE CollideSweepSinglePrecisionLeesEdwardsAVX.cpp CollideSweepDoublePrecisionLeesEdwardsAVX.cpp CollideSweepSinglePrecisionThermalizedAVX.cpp - CollideSweepDoublePrecisionThermalizedAVX.cpp) + CollideSweepDoublePrecisionThermalizedAVX.cpp + StreamSweepSinglePrecisionAVX.cpp StreamSweepDoublePrecisionAVX.cpp) else() target_sources( espresso_walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp index 7e065049f8..6d5d4fc79a 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp @@ -27,19 +27,21 @@ #include "generated_kernels/FieldAccessorsSinglePrecision.h" #include "generated_kernels/InitialPDFsSetterDoublePrecision.h" #include "generated_kernels/InitialPDFsSetterSinglePrecision.h" -#include "generated_kernels/StreamSweepDoublePrecision.h" -#include "generated_kernels/StreamSweepSinglePrecision.h" #ifdef __AVX2__ #include "generated_kernels/CollideSweepDoublePrecisionLeesEdwardsAVX.h" #include "generated_kernels/CollideSweepDoublePrecisionThermalizedAVX.h" #include "generated_kernels/CollideSweepSinglePrecisionLeesEdwardsAVX.h" #include "generated_kernels/CollideSweepSinglePrecisionThermalizedAVX.h" +#include "generated_kernels/StreamSweepDoublePrecisionAVX.h" +#include "generated_kernels/StreamSweepSinglePrecisionAVX.h" #else #include "generated_kernels/CollideSweepDoublePrecisionLeesEdwards.h" #include "generated_kernels/CollideSweepDoublePrecisionThermalized.h" #include "generated_kernels/CollideSweepSinglePrecisionLeesEdwards.h" #include "generated_kernels/CollideSweepSinglePrecisionThermalized.h" +#include "generated_kernels/StreamSweepDoublePrecision.h" +#include "generated_kernels/StreamSweepSinglePrecision.h" #endif namespace walberla { @@ -53,13 +55,14 @@ template struct KernelTrait { pystencils::CollideSweepDoublePrecisionThermalizedAVX; using CollisionModelLeesEdwards = pystencils::CollideSweepDoublePrecisionLeesEdwardsAVX; + using StreamSweep = pystencils::StreamSweepDoublePrecisionAVX; #else using CollisionModelThermalized = pystencils::CollideSweepDoublePrecisionThermalized; using CollisionModelLeesEdwards = pystencils::CollideSweepDoublePrecisionLeesEdwards; -#endif using StreamSweep = pystencils::StreamSweepDoublePrecision; +#endif using InitialPDFsSetter = pystencils::InitialPDFsSetterDoublePrecision; }; @@ -69,13 +72,14 @@ template <> struct KernelTrait { pystencils::CollideSweepSinglePrecisionThermalizedAVX; using CollisionModelLeesEdwards = pystencils::CollideSweepSinglePrecisionLeesEdwardsAVX; + using StreamSweep = pystencils::StreamSweepSinglePrecisionAVX; #else using CollisionModelThermalized = pystencils::CollideSweepSinglePrecisionThermalized; using CollisionModelLeesEdwards = pystencils::CollideSweepSinglePrecisionLeesEdwards; -#endif using StreamSweep = pystencils::StreamSweepSinglePrecision; +#endif using InitialPDFsSetter = pystencils::InitialPDFsSetterSinglePrecision; }; diff --git a/testsuite/python/lb.py b/testsuite/python/lb.py index 2905d104c4..4e585d5f08 100644 --- a/testsuite/python/lb.py +++ b/testsuite/python/lb.py @@ -674,7 +674,7 @@ def test_tracers_coupling_rounding(self): self.system.thermostat.set_lb(LB_fluid=lbf, seed=3, gamma=self.gamma) rtol = self.rtol if lbf.single_precision: - rtol *= 100. + rtol *= 200. mode_tracer = espressomd.propagation.Propagation.TRANS_LB_TRACER self.system.time = 0. p = self.system.part.add(pos=[-1E-30] * 3, propagation=mode_tracer) diff --git a/testsuite/python/lb_planar_couette.py b/testsuite/python/lb_planar_couette.py index bb72e47b73..7295128b86 100644 --- a/testsuite/python/lb_planar_couette.py +++ b/testsuite/python/lb_planar_couette.py @@ -64,6 +64,7 @@ class LBCouetteFlowCommon: system.time_step = LB_PARAMS['tau'] system.cell_system.skin = 0.1 system.cell_system.set_n_square() + n_nodes = np.prod(system.cell_system.node_grid) def setUp(self): self.system.time = 0. @@ -74,11 +75,16 @@ def tearDown(self): def check_profile(self, u_getter, **kwargs): system = self.system - system.box_l = [64, 1, 64] + # carefully select the domain decomposition + assert self.n_nodes == 1 or kwargs["shear_plane_normal"] == "y" + system.box_l = [16, 16, 16] if "x" not in kwargs.values(): + system.cell_system.node_grid = [1, self.n_nodes, 1] system.box_l = [1, 64, 64] elif "z" not in kwargs.values(): + system.cell_system.node_grid = [self.n_nodes, 1, 1] system.box_l = [64, 64, 1] + assert system.box_l[0] != 16. h = np.max(system.box_l) shear_velocity = 0.05 k_max = 100 @@ -108,12 +114,15 @@ def test_profile_xy(self): self.check_profile(lambda lbf: lbf[5, :, 0].velocity[:, 0], shear_direction="x", shear_plane_normal="y") + @ut.skipIf(n_nodes > 1, "Skipping test: only runs for n_nodes == 1") def test_profile_zy(self): self.check_profile(lambda lbf: lbf[0, :, 5].velocity[:, 0], shear_direction="z", shear_plane_normal="y") @utx.skipIfMissingFeatures(["WALBERLA"]) +@ut.skipIf(LBCouetteFlowCommon.n_nodes > 2, + "Skipping test: only runs for n_nodes <= 2") class LBCouetteFlowWalberla(LBCouetteFlowCommon, ut.TestCase): """Test for the Walberla implementation of the LB in double-precision.""" @@ -123,6 +132,8 @@ class LBCouetteFlowWalberla(LBCouetteFlowCommon, ut.TestCase): @utx.skipIfMissingFeatures(["WALBERLA"]) +@ut.skipIf(LBCouetteFlowCommon.n_nodes > 2, + "Skipping test: only runs for n_nodes <= 2") class LBCouetteFlowWalberlaSinglePrecision(LBCouetteFlowCommon, ut.TestCase): """Test for the Walberla implementation of the LB in single-precision.""" diff --git a/testsuite/python/regular_decomposition.py b/testsuite/python/regular_decomposition.py index d94720f11e..895926126b 100644 --- a/testsuite/python/regular_decomposition.py +++ b/testsuite/python/regular_decomposition.py @@ -91,7 +91,7 @@ def test_resort(self): self.check_resort() @ut.skipIf(system.cell_system.get_state()["n_nodes"] != 4, - "Skipping test: only runs for n_nodes >= 4") + "Skipping test: only runs for n_nodes == 4") def test_resort_alternating(self): # check particle resorting when the left and right cells are different self.system.cell_system.node_grid = [4, 1, 1] From 5e39e84dc4629061da90402beda4f506cf7aee72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 18 Sep 2024 17:06:08 +0200 Subject: [PATCH 2/4] Split LB communicators --- src/core/integrate.cpp | 7 + src/core/lb/LBNone.hpp | 3 + src/core/lb/LBWalberla.cpp | 10 ++ src/core/lb/LBWalberla.hpp | 3 + src/core/lb/Solver.cpp | 15 ++ src/core/lb/Solver.hpp | 15 ++ src/core/lb/particle_coupling.cpp | 1 + ...BFluxDensityProfileAtParticlePositions.cpp | 6 +- .../CylindricalLBVelocityProfile.cpp | 3 +- ...alLBVelocityProfileAtParticlePositions.cpp | 5 +- src/core/observables/LBVelocityProfile.cpp | 3 +- .../unit_tests/lb_particle_coupling_test.cpp | 3 + .../lattice_boltzmann/LBWalberlaBase.hpp | 15 +- .../src/lattice_boltzmann/LBWalberlaImpl.hpp | 137 ++++++++++++++++-- .../tests/LBWalberlaImpl_flow_tests.cpp | 7 + .../tests/LBWalberlaImpl_unit_tests.cpp | 13 ++ 16 files changed, 228 insertions(+), 18 deletions(-) diff --git a/src/core/integrate.cpp b/src/core/integrate.cpp index 34e2abe9f2..9ed1d628a4 100644 --- a/src/core/integrate.cpp +++ b/src/core/integrate.cpp @@ -629,6 +629,7 @@ int System::System::integrate(int n_steps, int reuse_forces) { propagation.lb_skipped_md_steps = 0; propagation.ek_skipped_md_steps = 0; lb.propagate(); + lb.ghost_communication_vel(); ek.propagate(); } } else if (lb_active) { @@ -654,6 +655,9 @@ int System::System::integrate(int n_steps, int reuse_forces) { #ifdef VIRTUAL_SITES_INERTIALESS_TRACERS if (thermostat->lb and (propagation.used_propagations & PropagationMode::TRANS_LB_TRACER)) { + if (lb_active) { + lb.ghost_communication_vel(); + } lb_tracers_propagate(*cell_structure, lb, time_step); } #endif @@ -678,6 +682,9 @@ int System::System::integrate(int n_steps, int reuse_forces) { } } // for-loop over integration steps + if (lb_active) { + lb.ghost_communication(); + } lees_edwards->update_box_params(*box_geo, sim_time); #ifdef CALIPER CALI_CXX_MARK_LOOP_END(integration_loop); diff --git a/src/core/lb/LBNone.hpp b/src/core/lb/LBNone.hpp index 9ee33c07e8..3e6483c036 100644 --- a/src/core/lb/LBNone.hpp +++ b/src/core/lb/LBNone.hpp @@ -29,6 +29,9 @@ namespace LB { struct LBNone { void propagate() { throw NoLBActive{}; } + void ghost_communication() { throw NoLBActive{}; } + void ghost_communication_pdf() { throw NoLBActive{}; } + void ghost_communication_vel() { throw NoLBActive{}; } double get_agrid() const { throw NoLBActive{}; } double get_tau() const { throw NoLBActive{}; } double get_kT() const { throw NoLBActive{}; } diff --git a/src/core/lb/LBWalberla.cpp b/src/core/lb/LBWalberla.cpp index f31634f448..9944d05408 100644 --- a/src/core/lb/LBWalberla.cpp +++ b/src/core/lb/LBWalberla.cpp @@ -52,6 +52,16 @@ Utils::VectorXd<9> LBWalberla::get_pressure_tensor() const { void LBWalberla::propagate() { lb_fluid->integrate(); } +void LBWalberla::ghost_communication() { lb_fluid->ghost_communication(); } + +void LBWalberla::ghost_communication_pdf() { + lb_fluid->ghost_communication_vel(); +} + +void LBWalberla::ghost_communication_vel() { + lb_fluid->ghost_communication_vel(); +} + void LBWalberla::lebc_sanity_checks(unsigned int shear_direction, unsigned int shear_plane_normal) const { lb_fluid->check_lebc(shear_direction, shear_plane_normal); diff --git a/src/core/lb/LBWalberla.hpp b/src/core/lb/LBWalberla.hpp index a5b6f77426..4c0cdae69a 100644 --- a/src/core/lb/LBWalberla.hpp +++ b/src/core/lb/LBWalberla.hpp @@ -72,6 +72,9 @@ struct LBWalberla { std::vector get_velocities_at_pos(std::vector const &pos); void propagate(); + void ghost_communication(); + void ghost_communication_pdf(); + void ghost_communication_vel(); void veto_time_step(double time_step) const; void veto_kT(double kT) const; void sanity_checks(System::System const &system) const; diff --git a/src/core/lb/Solver.cpp b/src/core/lb/Solver.cpp index 69e733b4ed..758f36c4d7 100644 --- a/src/core/lb/Solver.cpp +++ b/src/core/lb/Solver.cpp @@ -73,6 +73,21 @@ void Solver::propagate() { std::visit([](auto &ptr) { ptr->propagate(); }, *impl->solver); } +void Solver::ghost_communication() { + check_solver(impl); + std::visit([](auto &ptr) { ptr->ghost_communication(); }, *impl->solver); +} + +void Solver::ghost_communication_pdf() { + check_solver(impl); + std::visit([](auto &ptr) { ptr->ghost_communication_pdf(); }, *impl->solver); +} + +void Solver::ghost_communication_vel() { + check_solver(impl); + std::visit([](auto &ptr) { ptr->ghost_communication_vel(); }, *impl->solver); +} + void Solver::sanity_checks() const { if (impl->solver) { auto const &system = get_system(); diff --git a/src/core/lb/Solver.hpp b/src/core/lb/Solver.hpp index 47d9d5e019..9915a949e0 100644 --- a/src/core/lb/Solver.hpp +++ b/src/core/lb/Solver.hpp @@ -68,6 +68,21 @@ struct Solver : public System::Leaf { */ void propagate(); + /** + * @brief Perform a full ghost communication. + */ + void ghost_communication(); + + /** + * @brief Perform a ghost communication of the PDF field. + */ + void ghost_communication_pdf(); + + /** + * @brief Perform a ghost communication of the velocity field. + */ + void ghost_communication_vel(); + /** * @brief Perform a full initialization of the lattice-Boltzmann system. * All derived parameters and the fluid are reset to their default values. diff --git a/src/core/lb/particle_coupling.cpp b/src/core/lb/particle_coupling.cpp index f5e49f505a..648384f1f6 100644 --- a/src/core/lb/particle_coupling.cpp +++ b/src/core/lb/particle_coupling.cpp @@ -360,6 +360,7 @@ void System::System::lb_couple_particles() { auto const ghost_particles = cell_structure->ghost_particles(); LB::ParticleCoupling coupling{*thermostat->lb, lb, *box_geo, *local_geo}; LB::CouplingBookkeeping bookkeeping{*cell_structure}; + lb.ghost_communication_vel(); std::vector particles{}; for (auto const *particle_range : {&real_particles, &ghost_particles}) { for (auto &p : *particle_range) { diff --git a/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp b/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp index b5d0736dfb..32556a0a2e 100644 --- a/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp +++ b/src/core/observables/CylindricalLBFluxDensityProfileAtParticlePositions.cpp @@ -45,10 +45,12 @@ CylindricalLBFluxDensityProfileAtParticlePositions::evaluate( local_folded_positions.reserve(local_particles.size()); local_flux_densities.reserve(local_particles.size()); - auto const &system = System::get_system(); + auto &system = System::get_system(); auto const &box_geo = *system.box_geo; - auto const &lb = system.lb; + auto &lb = system.lb; auto const vel_conv = lb.get_lattice_speed(); + lb.ghost_communication_pdf(); + lb.ghost_communication_vel(); for (auto const &p : local_particles) { auto const pos = box_geo.folded_position(traits.position(p)); diff --git a/src/core/observables/CylindricalLBVelocityProfile.cpp b/src/core/observables/CylindricalLBVelocityProfile.cpp index e260632197..4266b1df1c 100644 --- a/src/core/observables/CylindricalLBVelocityProfile.cpp +++ b/src/core/observables/CylindricalLBVelocityProfile.cpp @@ -36,8 +36,9 @@ std::vector CylindricalLBVelocityProfile::operator()( decltype(sampling_positions) local_positions{}; std::vector local_velocities{}; - auto const &lb = System::get_system().lb; + auto &lb = System::get_system().lb; auto const vel_conv = lb.get_lattice_speed(); + lb.ghost_communication_vel(); for (auto const &pos : sampling_positions) { if (auto const vel = lb.get_interpolated_velocity(pos)) { diff --git a/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp b/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp index 4d30e53d88..1f8b7d3fdc 100644 --- a/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp +++ b/src/core/observables/CylindricalLBVelocityProfileAtParticlePositions.cpp @@ -41,10 +41,11 @@ std::vector CylindricalLBVelocityProfileAtParticlePositions::evaluate( local_folded_positions.reserve(local_particles.size()); local_velocities.reserve(local_particles.size()); - auto const &system = System::get_system(); + auto &system = System::get_system(); auto const &box_geo = *system.box_geo; - auto const &lb = system.lb; + auto &lb = system.lb; auto const vel_conv = lb.get_lattice_speed(); + lb.ghost_communication_vel(); for (auto const &p : local_particles) { auto const pos = box_geo.folded_position(traits.position(p)); diff --git a/src/core/observables/LBVelocityProfile.cpp b/src/core/observables/LBVelocityProfile.cpp index 13db451211..a194dbde7a 100644 --- a/src/core/observables/LBVelocityProfile.cpp +++ b/src/core/observables/LBVelocityProfile.cpp @@ -36,8 +36,9 @@ LBVelocityProfile::operator()(boost::mpi::communicator const &comm) const { decltype(sampling_positions) local_positions{}; std::vector local_velocities{}; - auto const &lb = System::get_system().lb; + auto &lb = System::get_system().lb; auto const vel_conv = lb.get_lattice_speed(); + lb.ghost_communication_vel(); for (auto const &pos : sampling_positions) { if (auto const vel = lb.get_interpolated_velocity(pos)) { diff --git a/src/core/unit_tests/lb_particle_coupling_test.cpp b/src/core/unit_tests/lb_particle_coupling_test.cpp index b42bea56eb..97e0f4c2e8 100644 --- a/src/core/unit_tests/lb_particle_coupling_test.cpp +++ b/src/core/unit_tests/lb_particle_coupling_test.cpp @@ -609,6 +609,9 @@ BOOST_AUTO_TEST_CASE(lb_exceptions) { BOOST_CHECK_THROW(lb.lebc_sanity_checks(0u, 1u), NoLBActive); BOOST_CHECK_THROW(lb.propagate(), NoLBActive); BOOST_CHECK_THROW(lb.update_collision_model(), NoLBActive); + BOOST_CHECK_THROW(lb.ghost_communication(), NoLBActive); + BOOST_CHECK_THROW(lb.ghost_communication_pdf(), NoLBActive); + BOOST_CHECK_THROW(lb.ghost_communication_vel(), NoLBActive); BOOST_CHECK_THROW(lb.on_cell_structure_change(), NoLBActive); BOOST_CHECK_THROW(lb.on_boxl_change(), NoLBActive); BOOST_CHECK_THROW(lb.on_node_grid_change(), NoLBActive); diff --git a/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp b/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp index bff71a129f..e2d15671bf 100644 --- a/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp +++ b/src/walberla_bridge/include/walberla_bridge/lattice_boltzmann/LBWalberlaBase.hpp @@ -42,12 +42,23 @@ class LBWalberlaBase : public LatticeModel { public: ~LBWalberlaBase() override = default; - /** @brief Integrate LB for one time step. */ + /** + * @brief Integrate LB for one time step. + * The ghost layer may be out-of-date after integration. + * Call @ref ghost_communication() to refresh them before + * calling any getter function that reads from the halo region. + */ virtual void integrate() = 0; - /** @brief Perform ghost communication of PDF and applied forces. */ + /** @brief Perform a full ghost communication. */ virtual void ghost_communication() = 0; + /** @brief Perform a ghost communication of the PDF field. */ + virtual void ghost_communication_pdf() = 0; + + /** @brief Perform a ghost communication of the velocity field. */ + virtual void ghost_communication_vel() = 0; + /** @brief Number of discretized velocities in the PDF. */ virtual std::size_t stencil_size() const noexcept = 0; diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp index 6105b996c6..91266fa05a 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp @@ -66,6 +66,7 @@ #include #include +#include #include #include #include @@ -152,6 +153,17 @@ class LBWalberlaImpl : public LBWalberlaBase { using GPUField = gpu::GPUField; #endif + struct GhostComm { + /** @brief Ghost communication operations. */ + enum GhostCommFlags : unsigned { + PDF, ///< PDFs communication + VEL, ///< velocities communication + LAF, ///< last applied forces communication + UBB, ///< boundaries communication + SIZE + }; + }; + public: template FloatType FloatType_c(T t) const { return numeric_cast(t); @@ -298,8 +310,12 @@ class LBWalberlaImpl : public LBWalberlaBase { // communicators std::shared_ptr m_boundary_communicator; - std::shared_ptr m_pdf_full_communicator; + std::shared_ptr m_full_communicator; + std::shared_ptr m_pdf_communicator; + std::shared_ptr m_vel_communicator; + std::shared_ptr m_laf_communicator; std::shared_ptr m_pdf_streaming_communicator; + std::bitset m_pending_ghost_comm; // ResetForce sweep + external force handling std::shared_ptr> m_reset_force; @@ -439,14 +455,24 @@ class LBWalberlaImpl : public LBWalberlaBase { m_pdf_streaming_communicator->addPackInfo( std::make_shared>(m_last_applied_force_field_id)); - m_pdf_full_communicator = std::make_shared(blocks); - m_pdf_full_communicator->addPackInfo( + m_full_communicator = std::make_shared(blocks); + m_full_communicator->addPackInfo( std::make_shared>(m_pdf_field_id)); - m_pdf_full_communicator->addPackInfo( + m_full_communicator->addPackInfo( std::make_shared>(m_last_applied_force_field_id)); - m_pdf_full_communicator->addPackInfo( + m_full_communicator->addPackInfo( std::make_shared>(m_velocity_field_id)); + m_pdf_communicator = std::make_shared(blocks); + m_vel_communicator = std::make_shared(blocks); + m_laf_communicator = std::make_shared(blocks); + m_pdf_communicator->addPackInfo( + std::make_shared>(m_pdf_field_id)); + m_vel_communicator->addPackInfo( + std::make_shared>(m_velocity_field_id)); + m_laf_communicator->addPackInfo( + std::make_shared>(m_last_applied_force_field_id)); + m_boundary_communicator = std::make_shared(blocks); m_boundary_communicator->addPackInfo( @@ -458,6 +484,8 @@ class LBWalberlaImpl : public LBWalberlaBase { boundary_packinfo->setup_boundary_handle(m_lattice, m_boundary); m_boundary_communicator->addPackInfo(boundary_packinfo); + m_pending_ghost_comm.set(); + // Instantiate the sweep responsible for force double buffering and // external forces m_reset_force = std::make_shared>( @@ -530,8 +558,12 @@ class LBWalberlaImpl : public LBWalberlaBase { integrate_boundaries(blocks); // LB stream integrate_stream(blocks); + // Mark pending ghost layer updates + m_pending_ghost_comm.set(GhostComm::PDF); + m_pending_ghost_comm.set(GhostComm::VEL); + m_pending_ghost_comm.set(GhostComm::LAF); // Refresh ghost layers - ghost_communication_pdfs(); + ghost_communication_push_scheme(); } void integrate_pull_scheme() { @@ -542,7 +574,12 @@ class LBWalberlaImpl : public LBWalberlaBase { integrate_stream(blocks); // LB collide integrate_collide(blocks); + // Reset force fields integrate_reset_force(blocks); + // Mark pending ghost layer updates + m_pending_ghost_comm.set(GhostComm::PDF); + m_pending_ghost_comm.set(GhostComm::VEL); + m_pending_ghost_comm.set(GhostComm::LAF); // Refresh ghost layers ghost_communication_pdfs(); } @@ -570,21 +607,75 @@ class LBWalberlaImpl : public LBWalberlaBase { } void ghost_communication() override { - ghost_communication_boundary(); - ghost_communication_pdfs(); + if (m_pending_ghost_comm.any()) { + ghost_communication_boundary(); + ghost_communication_pdfs(); + } + } + + void ghost_communication_pdf() override { + if (m_pending_ghost_comm.test(GhostComm::PDF)) { + m_pdf_communicator->communicate(); + if (has_lees_edwards_bc()) { + auto const &blocks = get_lattice().get_blocks(); + apply_lees_edwards_pdf_interpolation(blocks); + } + m_pending_ghost_comm.reset(GhostComm::PDF); + } + } + + void ghost_communication_vel() override { + if (m_pending_ghost_comm.test(GhostComm::VEL)) { + m_vel_communicator->communicate(); + if (has_lees_edwards_bc()) { + auto const &blocks = get_lattice().get_blocks(); + apply_lees_edwards_vel_interpolation_and_shift(blocks); + } + m_pending_ghost_comm.reset(GhostComm::VEL); + } + } + + void ghost_communication_laf() { + if (m_pending_ghost_comm.test(GhostComm::LAF)) { + m_laf_communicator->communicate(); + if (has_lees_edwards_bc()) { + auto const &blocks = get_lattice().get_blocks(); + apply_lees_edwards_last_applied_force_interpolation(blocks); + } + m_pending_ghost_comm.reset(GhostComm::LAF); + } } void ghost_communication_boundary() { - m_boundary_communicator->communicate(); + if (m_pending_ghost_comm.test(GhostComm::UBB)) { + m_boundary_communicator->communicate(); + m_pending_ghost_comm.reset(GhostComm::UBB); + } } void ghost_communication_pdfs() { - m_pdf_full_communicator->communicate(); + m_full_communicator->communicate(); + if (has_lees_edwards_bc()) { + auto const &blocks = get_lattice().get_blocks(); + apply_lees_edwards_pdf_interpolation(blocks); + apply_lees_edwards_vel_interpolation_and_shift(blocks); + apply_lees_edwards_last_applied_force_interpolation(blocks); + } + m_pending_ghost_comm.reset(GhostComm::PDF); + m_pending_ghost_comm.reset(GhostComm::VEL); + m_pending_ghost_comm.reset(GhostComm::LAF); + } + + void ghost_communication_push_scheme() { if (has_lees_edwards_bc()) { + m_full_communicator->communicate(); auto const &blocks = get_lattice().get_blocks(); apply_lees_edwards_pdf_interpolation(blocks); apply_lees_edwards_vel_interpolation_and_shift(blocks); apply_lees_edwards_last_applied_force_interpolation(blocks); + m_pending_ghost_comm.reset(GhostComm::PDF); + m_pending_ghost_comm.reset(GhostComm::VEL); + m_pending_ghost_comm.reset(GhostComm::LAF); } } @@ -672,6 +763,8 @@ class LBWalberlaImpl : public LBWalberlaBase { std::optional get_node_velocity(Utils::Vector3i const &node, bool consider_ghosts = false) const override { + assert(not(consider_ghosts and m_pending_ghost_comm.test(GhostComm::VEL))); + assert(not(consider_ghosts and m_pending_ghost_comm.test(GhostComm::UBB))); auto const is_boundary = get_node_is_boundary(node, consider_ghosts); if (is_boundary) // is info available locally if (*is_boundary) // is the node a boundary @@ -688,6 +781,8 @@ class LBWalberlaImpl : public LBWalberlaBase { bool set_node_velocity(Utils::Vector3i const &node, Utils::Vector3d const &v) override { + m_pending_ghost_comm.set(GhostComm::PDF); + m_pending_ghost_comm.set(GhostComm::VEL); auto bc = get_block_and_cell(get_lattice(), node, false); if (!bc) return false; @@ -750,6 +845,8 @@ class LBWalberlaImpl : public LBWalberlaBase { void set_slice_velocity(Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner, std::vector const &velocity) override { + m_pending_ghost_comm.set(GhostComm::PDF); + m_pending_ghost_comm.set(GhostComm::VEL); if (auto const ci = get_interval(lower_corner, upper_corner)) { auto const &lattice = get_lattice(); auto &block = *(lattice.get_blocks()->begin()); @@ -859,6 +956,8 @@ class LBWalberlaImpl : public LBWalberlaBase { std::optional get_velocity_at_pos(Utils::Vector3d const &pos, bool consider_points_in_halo = false) const override { + assert(not m_pending_ghost_comm.test(GhostComm::VEL)); + assert(not m_pending_ghost_comm.test(GhostComm::UBB)); if (!consider_points_in_halo and !m_lattice->pos_in_local_domain(pos)) return std::nullopt; if (consider_points_in_halo and !m_lattice->pos_in_local_halo(pos)) @@ -882,6 +981,7 @@ class LBWalberlaImpl : public LBWalberlaBase { std::optional get_density_at_pos(Utils::Vector3d const &pos, bool consider_points_in_halo = false) const override { + assert(not m_pending_ghost_comm.test(GhostComm::PDF)); if (!consider_points_in_halo and !m_lattice->pos_in_local_domain(pos)) return std::nullopt; if (consider_points_in_halo and !m_lattice->pos_in_local_halo(pos)) @@ -938,6 +1038,7 @@ class LBWalberlaImpl : public LBWalberlaBase { std::optional get_node_last_applied_force(Utils::Vector3i const &node, bool consider_ghosts = false) const override { + assert(not(consider_ghosts and m_pending_ghost_comm.test(GhostComm::LAF))); auto const bc = get_block_and_cell(get_lattice(), node, consider_ghosts); if (!bc) return std::nullopt; @@ -950,6 +1051,8 @@ class LBWalberlaImpl : public LBWalberlaBase { bool set_node_last_applied_force(Utils::Vector3i const &node, Utils::Vector3d const &force) override { + m_pending_ghost_comm.set(GhostComm::VEL); + m_pending_ghost_comm.set(GhostComm::LAF); auto bc = get_block_and_cell(get_lattice(), node, false); if (!bc) return false; @@ -990,6 +1093,8 @@ class LBWalberlaImpl : public LBWalberlaBase { void set_slice_last_applied_force(Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner, std::vector const &force) override { + m_pending_ghost_comm.set(GhostComm::VEL); + m_pending_ghost_comm.set(GhostComm::LAF); if (auto const ci = get_interval(lower_corner, upper_corner)) { auto const &lattice = get_lattice(); auto &block = *(lattice.get_blocks()->begin()); @@ -1008,6 +1113,7 @@ class LBWalberlaImpl : public LBWalberlaBase { std::optional> get_node_population(Utils::Vector3i const &node, bool consider_ghosts = false) const override { + assert(not(consider_ghosts and m_pending_ghost_comm.test(GhostComm::PDF))); auto bc = get_block_and_cell(get_lattice(), node, consider_ghosts); if (!bc) return std::nullopt; @@ -1024,6 +1130,8 @@ class LBWalberlaImpl : public LBWalberlaBase { bool set_node_population(Utils::Vector3i const &node, std::vector const &population) override { + m_pending_ghost_comm.set(GhostComm::PDF); + m_pending_ghost_comm.set(GhostComm::VEL); auto bc = get_block_and_cell(get_lattice(), node, false); if (!bc) return false; @@ -1086,6 +1194,7 @@ class LBWalberlaImpl : public LBWalberlaBase { std::optional get_node_density(Utils::Vector3i const &node, bool consider_ghosts = false) const override { + assert(not(consider_ghosts and m_pending_ghost_comm.test(GhostComm::PDF))); auto bc = get_block_and_cell(get_lattice(), node, consider_ghosts); if (!bc) return std::nullopt; @@ -1097,6 +1206,7 @@ class LBWalberlaImpl : public LBWalberlaBase { } bool set_node_density(Utils::Vector3i const &node, double density) override { + m_pending_ghost_comm.set(GhostComm::PDF); auto bc = get_block_and_cell(get_lattice(), node, false); if (!bc) return false; @@ -1131,6 +1241,7 @@ class LBWalberlaImpl : public LBWalberlaBase { void set_slice_density(Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner, std::vector const &density) override { + m_pending_ghost_comm.set(GhostComm::PDF); if (auto const ci = get_interval(lower_corner, upper_corner)) { auto const &lattice = get_lattice(); auto &block = *(lattice.get_blocks()->begin()); @@ -1145,6 +1256,7 @@ class LBWalberlaImpl : public LBWalberlaBase { std::optional get_node_velocity_at_boundary(Utils::Vector3i const &node, bool consider_ghosts = false) const override { + assert(not(consider_ghosts and m_pending_ghost_comm.test(GhostComm::UBB))); auto const bc = get_block_and_cell(get_lattice(), node, consider_ghosts); if (!bc or !m_boundary->node_is_boundary(node)) return std::nullopt; @@ -1154,6 +1266,7 @@ class LBWalberlaImpl : public LBWalberlaBase { bool set_node_velocity_at_boundary(Utils::Vector3i const &node, Utils::Vector3d const &velocity) override { + m_pending_ghost_comm.set(GhostComm::UBB); auto bc = get_block_and_cell(get_lattice(), node, true); if (bc) { m_boundary->set_node_value_at_boundary( @@ -1194,6 +1307,7 @@ class LBWalberlaImpl : public LBWalberlaBase { void set_slice_velocity_at_boundary( Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner, std::vector> const &velocity) override { + m_pending_ghost_comm.set(GhostComm::UBB); if (auto const ci = get_interval(lower_corner, upper_corner)) { auto const &lattice = get_lattice(); auto const local_offset = std::get<0>(lattice.get_local_grid_range()); @@ -1240,6 +1354,7 @@ class LBWalberlaImpl : public LBWalberlaBase { std::optional get_node_is_boundary(Utils::Vector3i const &node, bool consider_ghosts = false) const override { + assert(not(consider_ghosts and m_pending_ghost_comm.test(GhostComm::UBB))); auto const bc = get_block_and_cell(get_lattice(), node, consider_ghosts); if (!bc) return std::nullopt; @@ -1275,6 +1390,7 @@ class LBWalberlaImpl : public LBWalberlaBase { void clear_boundaries() override { reset_boundary_handling(); + m_pending_ghost_comm.set(GhostComm::UBB); ghost_communication(); } @@ -1284,6 +1400,7 @@ class LBWalberlaImpl : public LBWalberlaBase { auto const grid_size = get_lattice().get_grid_dimensions(); auto const data = fill_3D_vector_array(data_flat, grid_size); set_boundary_from_grid(*m_boundary, get_lattice(), raster_flat, data); + m_pending_ghost_comm.set(GhostComm::UBB); ghost_communication(); reallocate_ubb_field(); } diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_flow_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_flow_tests.cpp index ce0adc8410..36526ee3ce 100644 --- a/src/walberla_bridge/tests/LBWalberlaImpl_flow_tests.cpp +++ b/src/walberla_bridge/tests/LBWalberlaImpl_flow_tests.cpp @@ -62,6 +62,7 @@ BOOST_DATA_TEST_CASE(integrate_with_point_force_thermalized, // Check that momentum stays zero after initial integration lb->integrate(); lb->integrate(); + lb->ghost_communication(); auto mom_local = lb->get_momentum(); auto mom = boost::mpi::all_reduce(world, mom_local, std::plus()); BOOST_CHECK_SMALL(mom.norm(), 1E-10); @@ -74,7 +75,9 @@ BOOST_DATA_TEST_CASE(integrate_with_point_force_thermalized, lb->set_external_force(f1); auto const force_node = Vector3i{{1, 1, 1}}; lb->add_force_at_pos(force_node + Vector3d::broadcast(.5), f2); + lb->ghost_communication(); lb->integrate(); + lb->ghost_communication(); for (auto const &n : all_nodes_incl_ghosts(lb->get_lattice())) { if (lb->get_lattice().node_in_local_halo(n)) { auto const laf = *(lb->get_node_last_applied_force(n, true)); @@ -101,6 +104,7 @@ BOOST_DATA_TEST_CASE(integrate_with_point_force_thermalized, // No f/2 correction, since no force was applied in last time step mom_exp = 1.0 * f1 * Utils::product(params.grid_dimensions) + 1.0 * f2; lb->integrate(); + lb->ghost_communication(); mom_local = lb->get_momentum(); mom = boost::mpi::all_reduce(world, mom_local, std::plus()); BOOST_CHECK_SMALL((mom - mom_exp).norm(), 1E-10); @@ -114,6 +118,7 @@ BOOST_DATA_TEST_CASE(integrate_with_point_force_unthermalized, // Check that momentum stays zero after initial integration lb->integrate(); + lb->ghost_communication(); BOOST_CHECK_SMALL(lb->get_momentum().norm(), 1E-10); // Check that momentum changes as expected when applying forces @@ -123,6 +128,7 @@ BOOST_DATA_TEST_CASE(integrate_with_point_force_unthermalized, lb->set_external_force(f1); lb->add_force_at_pos(Utils::Vector3d{2, 2, 2}, f2); lb->integrate(); + lb->ghost_communication(); auto mom_local = lb->get_momentum(); auto mom = boost::mpi::all_reduce(world, mom_local, std::plus()); @@ -137,6 +143,7 @@ BOOST_DATA_TEST_CASE(integrate_with_point_force_unthermalized, // check that momentum doesn't drift when no force is applied again lb->set_external_force(Vector3d{}); lb->integrate(); + lb->ghost_communication(); // The expected moment is just that applied during a single time step // No f/2 correction, since no force was applied in last time step mom_exp = 1.0 * f1 * Utils::product(params.grid_dimensions) + 1.0 * f2; diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp index 5df3715cd9..c3352fcbed 100644 --- a/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp +++ b/src/walberla_bridge/tests/LBWalberlaImpl_unit_tests.cpp @@ -135,6 +135,7 @@ BOOST_DATA_TEST_CASE(per_node_boundary, bdata::make(all_lbs()), lb_generator) { } { BOOST_CHECK(lb->set_node_velocity_at_boundary(node, vel)); + lb->ghost_communication(); auto const res = lb->get_node_is_boundary(node, true); // Did we get a value? BOOST_REQUIRE(res); @@ -159,6 +160,7 @@ BOOST_DATA_TEST_CASE(per_node_boundary, bdata::make(all_lbs()), lb_generator) { } else { // Not in the local halo. BOOST_CHECK(!lb->set_node_velocity_at_boundary(node, vel)); + lb->ghost_communication(); BOOST_CHECK(!lb->get_node_velocity_at_boundary(node)); BOOST_CHECK(!lb->remove_node_from_boundary(node)); BOOST_CHECK(!lb->get_node_is_boundary(node)); @@ -420,6 +422,7 @@ BOOST_DATA_TEST_CASE(total_momentum, bdata::make(all_lbs()), lb_generator) { if (lb->get_lattice().node_in_local_domain(n2)) { lb->set_node_velocity(n2, v2); } + lb->ghost_communication(); boost::mpi::communicator world; auto const mom_local = lb->get_momentum(); @@ -441,6 +444,7 @@ BOOST_DATA_TEST_CASE(forces_interpolation, bdata::make(all_lbs()), auto const pos = 1. * n; // Mid point between nodes auto const f = Vector3d{{1., 2., -3.5}}; lb->add_force_at_pos(pos, f); + lb->ghost_communication(); // Check neighboring nodes for force to be applied for (int x : {0, 1}) for (int y : {0, 1}) @@ -453,6 +457,8 @@ BOOST_DATA_TEST_CASE(forces_interpolation, bdata::make(all_lbs()), } // Apply counter force to clear force field lb->add_force_at_pos(pos, -f); + } else { + lb->ghost_communication(); } } } @@ -474,10 +480,14 @@ BOOST_DATA_TEST_CASE(forces_book_keeping, bdata::make(all_lbs()), // Add force to node position if (lb->get_lattice().node_in_local_domain(n)) { lb->add_force_at_pos(n + Vector3d::broadcast(.5), f); + lb->ghost_communication(); BOOST_CHECK_SMALL((*(lb->get_node_force_to_be_applied(n)) - f).norm(), 1E-10); + } else { + lb->ghost_communication(); } lb->integrate(); + lb->ghost_communication(); // Check nodes incl some of the ghosts for (auto cn : {n, n + params.grid_dimensions, n - params.grid_dimensions, n + Vector3i{{params.grid_dimensions[0], 0, 0}}}) { @@ -489,6 +499,7 @@ BOOST_DATA_TEST_CASE(forces_book_keeping, bdata::make(all_lbs()), } } lb->integrate(); + lb->ghost_communication(); for (auto cn : {n, n + params.grid_dimensions, n - params.grid_dimensions, n + Vector3i{{params.grid_dimensions[0], 0, 0}}}) { if (lb->get_lattice().node_in_local_halo(cn)) { @@ -518,6 +529,7 @@ BOOST_DATA_TEST_CASE(force_in_corner, bdata::make(all_lbs()), lb_generator) { } } } + lb->ghost_communication(); // check forces to be applied // Each corner node should have 1/8 of the force @@ -535,6 +547,7 @@ BOOST_DATA_TEST_CASE(force_in_corner, bdata::make(all_lbs()), lb_generator) { BOOST_CHECK_EQUAL(count, 8); lb->integrate(); + lb->ghost_communication(); // check applied forces from last integration step count_local = 0; From 8ea71112e88969f14234e2ed12444c43f0fa3360 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Thu, 19 Sep 2024 18:27:58 +0200 Subject: [PATCH 3/4] Generate PackInfo --- .../walberla_kernels/generate_lb_kernels.py | 24 + .../walberla_kernels/pystencils_espresso.py | 54 + .../src/lattice_boltzmann/LBWalberlaImpl.hpp | 70 +- .../generated_kernels/CMakeLists.txt | 2 + .../PackInfoPdfDoublePrecision.cpp | 1358 +++++++++++++++++ .../PackInfoPdfDoublePrecision.h | 84 + .../PackInfoPdfSinglePrecision.cpp | 1358 +++++++++++++++++ .../PackInfoPdfSinglePrecision.h | 84 + .../PackInfoVecDoublePrecision.cpp | 212 +++ .../PackInfoVecDoublePrecision.h | 84 + .../PackInfoVecSinglePrecision.cpp | 212 +++ .../PackInfoVecSinglePrecision.h | 84 + .../src/lattice_boltzmann/lb_kernels.hpp | 8 + 13 files changed, 3622 insertions(+), 12 deletions(-) create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.h create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.h create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.h create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.h diff --git a/maintainer/walberla_kernels/generate_lb_kernels.py b/maintainer/walberla_kernels/generate_lb_kernels.py index cfe346507a..21300af894 100644 --- a/maintainer/walberla_kernels/generate_lb_kernels.py +++ b/maintainer/walberla_kernels/generate_lb_kernels.py @@ -193,6 +193,30 @@ def paramlist(parameters, keys): ctx, config, method, templates ) + # generate PackInfo + assignments = pystencils_espresso.generate_pack_info_pdfs_field_assignments( + fields, streaming_pattern="pull") + spec = pystencils_espresso.generate_pack_info_vector_field_specifications( + config, stencil, force_field.layout) + for params, target_suffix in paramlist(parameters, ["CPU"]): + pystencils_walberla.generate_pack_info_from_kernel( + ctx, f"PackInfoPdf{precision_prefix}{target_suffix}", assignments, + kind="pull", **params) + pystencils_walberla.generate_pack_info( + ctx, f"PackInfoVec{precision_prefix}{target_suffix}", spec, **params) + if target_suffix == "CUDA": + continue + token = "\n //TODO: optimize by generating kernel for this case\n" + for field_suffix in ["Pdf", "Vec"]: + class_name = f"PackInfo{field_suffix}{precision_prefix}{target_suffix}" # nopep8 + with open(f"{class_name}.h", "r+") as f: + content = f.read() + assert token in content + content = content.replace(token, "\n") + f.seek(0) + f.truncate() + f.write(content) + # boundary conditions ubb_dynamic = lbmpy_espresso.UBB( lambda *args: None, dim=3, data_type=config.data_type.default_factory()) diff --git a/maintainer/walberla_kernels/pystencils_espresso.py b/maintainer/walberla_kernels/pystencils_espresso.py index 0b6a9d70bb..3cf6edfbf3 100644 --- a/maintainer/walberla_kernels/pystencils_espresso.py +++ b/maintainer/walberla_kernels/pystencils_espresso.py @@ -207,6 +207,60 @@ def generate_fields(config, stencil, field_layout='fzyx'): return fields +def generate_pack_info_pdfs_field_assignments(fields, streaming_pattern): + """ + Visualize the stencil directions with:: + + import lbmpy + import matplotlib.pyplot as plt + stencil = lbmpy.LBStencil(lbmpy.Stencil.D3Q19) + stencil.plot(data=[i for i in range(19)]) + plt.show() + + """ + stencil = lbmpy.enums.Stencil.D3Q19 + lbm_config = lbmpy.LBMConfig(stencil=stencil, + method=lbmpy.Method.CUMULANT, + compressible=True, + zero_centered=False, + weighted=True, + streaming_pattern=streaming_pattern, + relaxation_rate=sp.Symbol("omega_shear"), + ) + lbm_opt = lbmpy.LBMOptimisation( + symbolic_field=fields["pdfs" if streaming_pattern == + "pull" else "pdfs_tmp"], + symbolic_temporary_field=fields["pdfs" if streaming_pattern == + "push" else "pdfs_tmp"], + field_layout=fields['pdfs'].layout) + lbm_update_rule = lbmpy.create_lb_update_rule( + lbm_config=lbm_config, + lbm_optimisation=lbm_opt) + return lbm_update_rule.all_assignments + + +def generate_pack_info_vector_field_specifications(config, stencil, layout): + import collections + import itertools + field = ps.Field.create_generic( + "field", + 3, + data_type_np[config.data_type.default_factory().c_name], + index_dimensions=1, + layout=layout, + index_shape=(3,) + ) + q = len(stencil) + coord = itertools.product(*[(-1, 0, 1)] * 3) + if q == 19: + dirs = tuple((i, j, k) for i, j, k in coord if i**2 + j**2 + k**2 != 3) + else: + dirs = tuple((i, j, k) for i, j, k in coord) + spec = collections.defaultdict(set) + spec[dirs] = {field[0, 0, 0](i) for i in range(3)} + return spec + + def generate_config(ctx, params): return pystencils_walberla.utility.config_from_context(ctx, **params) diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp index 91266fa05a..6f1fedae10 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp @@ -119,6 +119,11 @@ class LBWalberlaImpl : public LBWalberlaBase { using VectorField = field::GhostLayerField; template using PackInfo = field::communication::PackInfo; + template + using PackInfoStreaming = + std::conditional_t, + typename detail::KernelTrait::PackInfoPdf, + typename detail::KernelTrait::PackInfoVec>; template using RegularCommScheme = blockforest::communication::UniformBufferedScheme; @@ -133,6 +138,8 @@ class LBWalberlaImpl : public LBWalberlaBase { using VectorField = gpu::GPUField; template using PackInfo = gpu::communication::MemcpyPackInfo; + template + using PackInfoStreaming = gpu::communication::MemcpyPackInfo; template using RegularCommScheme = gpu::communication::UniformGPUScheme; template @@ -284,6 +291,7 @@ class LBWalberlaImpl : public LBWalberlaBase { /** Flag for boundary cells. */ FlagUID const Boundary_flag{"boundary"}; + bool m_has_boundaries{false}; /** * @brief Full communicator. @@ -307,6 +315,10 @@ class LBWalberlaImpl : public LBWalberlaBase { template using PackInfo = typename FieldTrait::template PackInfo; + template + using PackInfoStreaming = + typename FieldTrait::template PackInfoStreaming; // communicators std::shared_ptr m_boundary_communicator; @@ -414,6 +426,24 @@ class LBWalberlaImpl : public LBWalberlaBase { #endif } + void setup_streaming_communicator() { + auto const setup = [this]() { + auto const &blocks = m_lattice->get_blocks(); + m_pdf_streaming_communicator = + std::make_shared(blocks); + m_pdf_streaming_communicator->addPackInfo( + std::make_shared(m_pdf_field_id)); + m_pdf_streaming_communicator->addPackInfo( + std::make_shared>( + m_last_applied_force_field_id)); + }; + if (m_has_boundaries or (m_collision_model and has_lees_edwards_bc())) { + setup.template operator()>(); + } else { + setup.template operator()>(); + } + } + public: LBWalberlaImpl(std::shared_ptr lattice, double viscosity, double density) @@ -448,12 +478,7 @@ class LBWalberlaImpl : public LBWalberlaBase { reset_boundary_handling(); // Set up the communication and register fields - m_pdf_streaming_communicator = - std::make_shared(blocks); - m_pdf_streaming_communicator->addPackInfo( - std::make_shared>(m_pdf_field_id)); - m_pdf_streaming_communicator->addPackInfo( - std::make_shared>(m_last_applied_force_field_id)); + setup_streaming_communicator(); m_full_communicator = std::make_shared(blocks); m_full_communicator->addPackInfo( @@ -555,7 +580,9 @@ class LBWalberlaImpl : public LBWalberlaBase { integrate_collide(blocks); m_pdf_streaming_communicator->communicate(); // Handle boundaries - integrate_boundaries(blocks); + if (m_has_boundaries) { + integrate_boundaries(blocks); + } // LB stream integrate_stream(blocks); // Mark pending ghost layer updates @@ -569,7 +596,9 @@ class LBWalberlaImpl : public LBWalberlaBase { void integrate_pull_scheme() { auto const &blocks = get_lattice().get_blocks(); // Handle boundaries - integrate_boundaries(blocks); + if (m_has_boundaries) { + integrate_boundaries(blocks); + } // LB stream integrate_stream(blocks); // LB collide @@ -690,6 +719,7 @@ class LBWalberlaImpl : public LBWalberlaBase { omega_odd, omega, seed, uint32_t{0u}); m_collision_model = std::make_shared(std::move(obj)); m_run_collide_sweep = CollideSweepVisitor(blocks); + setup_streaming_communicator(); } void set_collision_model( @@ -734,6 +764,7 @@ class LBWalberlaImpl : public LBWalberlaBase { blocks, m_last_applied_force_field_id, m_vec_tmp_field_id, n_ghost_layers, shear_direction, shear_plane_normal, m_lees_edwards_callbacks->get_pos_offset); + setup_streaming_communicator(); } void check_lebc(unsigned int shear_direction, @@ -765,10 +796,12 @@ class LBWalberlaImpl : public LBWalberlaBase { bool consider_ghosts = false) const override { assert(not(consider_ghosts and m_pending_ghost_comm.test(GhostComm::VEL))); assert(not(consider_ghosts and m_pending_ghost_comm.test(GhostComm::UBB))); - auto const is_boundary = get_node_is_boundary(node, consider_ghosts); - if (is_boundary) // is info available locally - if (*is_boundary) // is the node a boundary + if (m_has_boundaries) { + auto const is_boundary = get_node_is_boundary(node, consider_ghosts); + if (is_boundary and *is_boundary) { return get_node_velocity_at_boundary(node, consider_ghosts); + } + } auto const bc = get_block_and_cell(get_lattice(), node, consider_ghosts); if (!bc) return std::nullopt; @@ -1266,6 +1299,7 @@ class LBWalberlaImpl : public LBWalberlaBase { bool set_node_velocity_at_boundary(Utils::Vector3i const &node, Utils::Vector3d const &velocity) override { + on_boundary_add(); m_pending_ghost_comm.set(GhostComm::UBB); auto bc = get_block_and_cell(get_lattice(), node, true); if (bc) { @@ -1307,6 +1341,7 @@ class LBWalberlaImpl : public LBWalberlaBase { void set_slice_velocity_at_boundary( Utils::Vector3i const &lower_corner, Utils::Vector3i const &upper_corner, std::vector> const &velocity) override { + on_boundary_add(); m_pending_ghost_comm.set(GhostComm::UBB); if (auto const ci = get_interval(lower_corner, upper_corner)) { auto const &lattice = get_lattice(); @@ -1388,19 +1423,30 @@ class LBWalberlaImpl : public LBWalberlaBase { void reallocate_ubb_field() override { m_boundary->boundary_update(); } + void on_boundary_add() { + if (not m_has_boundaries) { + m_has_boundaries = true; + setup_streaming_communicator(); + } + m_has_boundaries = true; + } + void clear_boundaries() override { reset_boundary_handling(); m_pending_ghost_comm.set(GhostComm::UBB); ghost_communication(); + m_has_boundaries = false; + setup_streaming_communicator(); } void update_boundary_from_shape(std::vector const &raster_flat, std::vector const &data_flat) override { + on_boundary_add(); + m_pending_ghost_comm.set(GhostComm::UBB); auto const grid_size = get_lattice().get_grid_dimensions(); auto const data = fill_3D_vector_array(data_flat, grid_size); set_boundary_from_grid(*m_boundary, get_lattice(), raster_flat, data); - m_pending_ghost_comm.set(GhostComm::UBB); ghost_communication(); reallocate_ubb_field(); } diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt index c2bf4267a8..434d968d52 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt @@ -20,6 +20,8 @@ target_sources( espresso_walberla PRIVATE StreamSweepSinglePrecision.cpp StreamSweepDoublePrecision.cpp + PackInfoPdfSinglePrecision.cpp PackInfoPdfDoublePrecision.cpp + PackInfoVecSinglePrecision.cpp PackInfoVecDoublePrecision.cpp InitialPDFsSetterSinglePrecision.cpp InitialPDFsSetterDoublePrecision.cpp Dynamic_UBB_single_precision.cpp Dynamic_UBB_double_precision.cpp) diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp new file mode 100644 index 0000000000..abee661f39 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp @@ -0,0 +1,1358 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoPdfDoublePrecision.cpp +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#include "PackInfoPdfDoublePrecision.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "stencil/Directions.h" + +#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +namespace walberla { +namespace pystencils { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + +namespace internal_pack_SW { +static FUNC_PREFIX void pack_SW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_SW + +namespace internal_pack_BW { +static FUNC_PREFIX void pack_BW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BW + +namespace internal_pack_W { +static FUNC_PREFIX void pack_W(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_W + +namespace internal_pack_TW { +static FUNC_PREFIX void pack_TW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TW + +namespace internal_pack_NW { +static FUNC_PREFIX void pack_NW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_NW + +namespace internal_pack_BS { +static FUNC_PREFIX void pack_BS(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BS + +namespace internal_pack_S { +static FUNC_PREFIX void pack_S(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_S + +namespace internal_pack_TS { +static FUNC_PREFIX void pack_TS(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TS + +namespace internal_pack_B { +static FUNC_PREFIX void pack_B(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_B + +namespace internal_pack_T { +static FUNC_PREFIX void pack_T(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_T + +namespace internal_pack_BN { +static FUNC_PREFIX void pack_BN(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BN + +namespace internal_pack_N { +static FUNC_PREFIX void pack_N(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_N + +namespace internal_pack_TN { +static FUNC_PREFIX void pack_TN(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TN + +namespace internal_pack_SE { +static FUNC_PREFIX void pack_SE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_SE + +namespace internal_pack_BE { +static FUNC_PREFIX void pack_BE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BE + +namespace internal_pack_E { +static FUNC_PREFIX void pack_E(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_E + +namespace internal_pack_TE { +static FUNC_PREFIX void pack_TE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TE + +namespace internal_pack_NE { +static FUNC_PREFIX void pack_NE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_NE + +namespace internal_unpack_SW { +static FUNC_PREFIX void unpack_SW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_SW + +namespace internal_unpack_BW { +static FUNC_PREFIX void unpack_BW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BW + +namespace internal_unpack_W { +static FUNC_PREFIX void unpack_W(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_W + +namespace internal_unpack_TW { +static FUNC_PREFIX void unpack_TW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TW + +namespace internal_unpack_NW { +static FUNC_PREFIX void unpack_NW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_NW + +namespace internal_unpack_BS { +static FUNC_PREFIX void unpack_BS(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BS + +namespace internal_unpack_S { +static FUNC_PREFIX void unpack_S(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_S + +namespace internal_unpack_TS { +static FUNC_PREFIX void unpack_TS(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TS + +namespace internal_unpack_B { +static FUNC_PREFIX void unpack_B(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_B + +namespace internal_unpack_T { +static FUNC_PREFIX void unpack_T(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_T + +namespace internal_unpack_BN { +static FUNC_PREFIX void unpack_BN(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BN + +namespace internal_unpack_N { +static FUNC_PREFIX void unpack_N(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_N + +namespace internal_unpack_TN { +static FUNC_PREFIX void unpack_TN(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TN + +namespace internal_unpack_SE { +static FUNC_PREFIX void unpack_SE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_SE + +namespace internal_unpack_BE { +static FUNC_PREFIX void unpack_BE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BE + +namespace internal_unpack_E { +static FUNC_PREFIX void unpack_E(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_E + +namespace internal_unpack_TE { +static FUNC_PREFIX void unpack_TE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TE + +namespace internal_unpack_NE { +static FUNC_PREFIX void unpack_NE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_NE + +void PackInfoPdfDoublePrecision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + double *buffer = reinterpret_cast(byte_buffer); + + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch (dir) { + case stencil::SW: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_SW::pack_SW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BW: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BW::pack_BW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::W: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_W::pack_W(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TW: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TW::pack_TW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NW: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_NW::pack_NW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BS: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BS::pack_BS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::S: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_S::pack_S(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TS: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TS::pack_TS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::B: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_B::pack_B(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::T: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_T::pack_T(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BN: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BN::pack_BN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::N: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_N::pack_N(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TN: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TN::pack_TN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::SE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_SE::pack_SE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BE::pack_BE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::E: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_E::pack_E(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TE::pack_TE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_NE::pack_NE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +void PackInfoPdfDoublePrecision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + double *buffer = reinterpret_cast(byte_buffer); + + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch (communciationDirection) { + case stencil::SW: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_SW::unpack_SW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BW: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BW::unpack_BW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::W: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_W::unpack_W(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TW: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TW::unpack_TW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NW: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_NW::unpack_NW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BS: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BS::unpack_BS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::S: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_S::unpack_S(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TS: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TS::unpack_TS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::B: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_B::unpack_B(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::T: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_T::unpack_T(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BN: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BN::unpack_BN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::N: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_N::unpack_N(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TN: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TN::unpack_TN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::SE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_SE::unpack_SE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BE::unpack_BE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::E: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_E::unpack_E(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TE::unpack_TE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_NE::unpack_NE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +uint_t PackInfoPdfDoublePrecision::size(stencil::Direction dir, const IBlock *block) const { + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch (dir) { + case stencil::SW: + elementsPerCell = 1; + break; + + case stencil::BW: + elementsPerCell = 1; + break; + + case stencil::W: + elementsPerCell = 5; + break; + + case stencil::TW: + elementsPerCell = 1; + break; + + case stencil::NW: + elementsPerCell = 1; + break; + + case stencil::BS: + elementsPerCell = 1; + break; + + case stencil::S: + elementsPerCell = 5; + break; + + case stencil::TS: + elementsPerCell = 1; + break; + + case stencil::B: + elementsPerCell = 5; + break; + + case stencil::T: + elementsPerCell = 5; + break; + + case stencil::BN: + elementsPerCell = 1; + break; + + case stencil::N: + elementsPerCell = 5; + break; + + case stencil::TN: + elementsPerCell = 1; + break; + + case stencil::SE: + elementsPerCell = 1; + break; + + case stencil::BE: + elementsPerCell = 1; + break; + + case stencil::E: + elementsPerCell = 5; + break; + + case stencil::TE: + elementsPerCell = 1; + break; + + case stencil::NE: + elementsPerCell = 1; + break; + + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof(double); +} + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.h new file mode 100644 index 0000000000..d2c205023c --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.h @@ -0,0 +1,84 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoPdfDoublePrecision.h +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, +// lbmpy_walberla/pystencils_walberla from waLBerla commit +// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#pragma once +#include "communication/UniformPackInfo.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "domain_decomposition/IBlock.h" +#include "field/GhostLayerField.h" +#include "stencil/Directions.h" + +#define FUNC_PREFIX + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace pystencils { + +class PackInfoPdfDoublePrecision + : public ::walberla::communication::UniformPackInfo { +public: + PackInfoPdfDoublePrecision(BlockDataID pdfsID_) : pdfsID(pdfsID_){}; + virtual ~PackInfoPdfDoublePrecision() {} + + bool constantDataExchange() const { return true; } + bool threadsafeReceiving() const { return true; } + + void unpackData(IBlock *receiver, stencil::Direction dir, + mpi::RecvBuffer &buffer) { + const auto dataSize = size(dir, receiver); + unpack(dir, buffer.skip(dataSize), receiver); + } + + void communicateLocal(const IBlock *sender, IBlock *receiver, + stencil::Direction dir) { + mpi::SendBuffer sBuffer; + packData(sender, dir, sBuffer); + mpi::RecvBuffer rBuffer(sBuffer); + unpackData(receiver, stencil::inverseDir[dir], rBuffer); + } + + void packDataImpl(const IBlock *sender, stencil::Direction dir, + mpi::SendBuffer &outBuffer) const { + const auto dataSize = size(dir, sender); + pack(dir, outBuffer.forward(dataSize), const_cast(sender)); + } + + void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const; + void unpack(stencil::Direction dir, unsigned char *buffer, + IBlock *block) const; + uint_t size(stencil::Direction dir, const IBlock *block) const; + +private: + BlockDataID pdfsID; +}; + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp new file mode 100644 index 0000000000..5beb6eb918 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp @@ -0,0 +1,1358 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoPdfSinglePrecision.cpp +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#include "PackInfoPdfSinglePrecision.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "stencil/Directions.h" + +#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +namespace walberla { +namespace pystencils { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + +namespace internal_pack_SW { +static FUNC_PREFIX void pack_SW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_SW + +namespace internal_pack_BW { +static FUNC_PREFIX void pack_BW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BW + +namespace internal_pack_W { +static FUNC_PREFIX void pack_W(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_W + +namespace internal_pack_TW { +static FUNC_PREFIX void pack_TW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TW + +namespace internal_pack_NW { +static FUNC_PREFIX void pack_NW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_NW + +namespace internal_pack_BS { +static FUNC_PREFIX void pack_BS(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BS + +namespace internal_pack_S { +static FUNC_PREFIX void pack_S(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_S + +namespace internal_pack_TS { +static FUNC_PREFIX void pack_TS(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TS + +namespace internal_pack_B { +static FUNC_PREFIX void pack_B(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_B + +namespace internal_pack_T { +static FUNC_PREFIX void pack_T(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_T + +namespace internal_pack_BN { +static FUNC_PREFIX void pack_BN(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BN + +namespace internal_pack_N { +static FUNC_PREFIX void pack_N(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_N + +namespace internal_pack_TN { +static FUNC_PREFIX void pack_TN(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TN + +namespace internal_pack_SE { +static FUNC_PREFIX void pack_SE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_SE + +namespace internal_pack_BE { +static FUNC_PREFIX void pack_BE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_BE + +namespace internal_pack_E { +static FUNC_PREFIX void pack_E(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_E + +namespace internal_pack_TE { +static FUNC_PREFIX void pack_TE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_TE + +namespace internal_pack_NE { +static FUNC_PREFIX void pack_NE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } + } + } +} +} // namespace internal_pack_NE + +namespace internal_unpack_SW { +static FUNC_PREFIX void unpack_SW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_SW + +namespace internal_unpack_BW { +static FUNC_PREFIX void unpack_BW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BW + +namespace internal_unpack_W { +static FUNC_PREFIX void unpack_W(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_W + +namespace internal_unpack_TW { +static FUNC_PREFIX void unpack_TW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TW + +namespace internal_unpack_NW { +static FUNC_PREFIX void unpack_NW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_NW + +namespace internal_unpack_BS { +static FUNC_PREFIX void unpack_BS(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BS + +namespace internal_unpack_S { +static FUNC_PREFIX void unpack_S(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_S + +namespace internal_unpack_TS { +static FUNC_PREFIX void unpack_TS(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TS + +namespace internal_unpack_B { +static FUNC_PREFIX void unpack_B(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_B + +namespace internal_unpack_T { +static FUNC_PREFIX void unpack_T(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_T + +namespace internal_unpack_BN { +static FUNC_PREFIX void unpack_BN(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BN + +namespace internal_unpack_N { +static FUNC_PREFIX void unpack_N(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_N + +namespace internal_unpack_TN { +static FUNC_PREFIX void unpack_TN(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TN + +namespace internal_unpack_SE { +static FUNC_PREFIX void unpack_SE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_SE + +namespace internal_unpack_BE { +static FUNC_PREFIX void unpack_BE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_BE + +namespace internal_unpack_E { +static FUNC_PREFIX void unpack_E(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } + } + } +} +} // namespace internal_unpack_E + +namespace internal_unpack_TE { +static FUNC_PREFIX void unpack_TE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_TE + +namespace internal_unpack_NE { +static FUNC_PREFIX void unpack_NE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_pdfs_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_pdfs_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_pdfs_0; ctr_0 += 1) { + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } + } + } +} +} // namespace internal_unpack_NE + +void PackInfoPdfSinglePrecision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + float *buffer = reinterpret_cast(byte_buffer); + + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch (dir) { + case stencil::SW: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_SW::pack_SW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BW: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BW::pack_BW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::W: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_W::pack_W(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TW: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TW::pack_TW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NW: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_NW::pack_NW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BS: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BS::pack_BS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::S: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_S::pack_S(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TS: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TS::pack_TS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::B: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_B::pack_B(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::T: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_T::pack_T(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BN: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BN::pack_BN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::N: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_N::pack_N(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TN: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TN::pack_TN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::SE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_SE::pack_SE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_BE::pack_BE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::E: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_E::pack_E(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_TE::pack_TE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_pack_NE::pack_NE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +void PackInfoPdfSinglePrecision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + float *buffer = reinterpret_cast(byte_buffer); + + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch (communciationDirection) { + case stencil::SW: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_SW::unpack_SW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BW: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BW::unpack_BW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::W: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_W::unpack_W(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TW: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TW::unpack_TW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NW: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_NW::unpack_NW(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BS: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BS::unpack_BS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::S: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_S::unpack_S(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TS: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TS::unpack_TS(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::B: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_B::unpack_B(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::T: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_T::unpack_T(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BN: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BN::unpack_BN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::N: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_N::unpack_N(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TN: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TN::unpack_TN(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::SE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_SE::unpack_SE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_BE::unpack_BE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::E: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_E::unpack_E(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_TE::unpack_TE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + internal_unpack_NE::unpack_NE(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +uint_t PackInfoPdfSinglePrecision::size(stencil::Direction dir, const IBlock *block) const { + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch (dir) { + case stencil::SW: + elementsPerCell = 1; + break; + + case stencil::BW: + elementsPerCell = 1; + break; + + case stencil::W: + elementsPerCell = 5; + break; + + case stencil::TW: + elementsPerCell = 1; + break; + + case stencil::NW: + elementsPerCell = 1; + break; + + case stencil::BS: + elementsPerCell = 1; + break; + + case stencil::S: + elementsPerCell = 5; + break; + + case stencil::TS: + elementsPerCell = 1; + break; + + case stencil::B: + elementsPerCell = 5; + break; + + case stencil::T: + elementsPerCell = 5; + break; + + case stencil::BN: + elementsPerCell = 1; + break; + + case stencil::N: + elementsPerCell = 5; + break; + + case stencil::TN: + elementsPerCell = 1; + break; + + case stencil::SE: + elementsPerCell = 1; + break; + + case stencil::BE: + elementsPerCell = 1; + break; + + case stencil::E: + elementsPerCell = 5; + break; + + case stencil::TE: + elementsPerCell = 1; + break; + + case stencil::NE: + elementsPerCell = 1; + break; + + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof(float); +} + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.h new file mode 100644 index 0000000000..9dd84b0c62 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.h @@ -0,0 +1,84 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoPdfSinglePrecision.h +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, +// lbmpy_walberla/pystencils_walberla from waLBerla commit +// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#pragma once +#include "communication/UniformPackInfo.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "domain_decomposition/IBlock.h" +#include "field/GhostLayerField.h" +#include "stencil/Directions.h" + +#define FUNC_PREFIX + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace pystencils { + +class PackInfoPdfSinglePrecision + : public ::walberla::communication::UniformPackInfo { +public: + PackInfoPdfSinglePrecision(BlockDataID pdfsID_) : pdfsID(pdfsID_){}; + virtual ~PackInfoPdfSinglePrecision() {} + + bool constantDataExchange() const { return true; } + bool threadsafeReceiving() const { return true; } + + void unpackData(IBlock *receiver, stencil::Direction dir, + mpi::RecvBuffer &buffer) { + const auto dataSize = size(dir, receiver); + unpack(dir, buffer.skip(dataSize), receiver); + } + + void communicateLocal(const IBlock *sender, IBlock *receiver, + stencil::Direction dir) { + mpi::SendBuffer sBuffer; + packData(sender, dir, sBuffer); + mpi::RecvBuffer rBuffer(sBuffer); + unpackData(receiver, stencil::inverseDir[dir], rBuffer); + } + + void packDataImpl(const IBlock *sender, stencil::Direction dir, + mpi::SendBuffer &outBuffer) const { + const auto dataSize = size(dir, sender); + pack(dir, outBuffer.forward(dataSize), const_cast(sender)); + } + + void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const; + void unpack(stencil::Direction dir, unsigned char *buffer, + IBlock *block) const; + uint_t size(stencil::Direction dir, const IBlock *block) const; + +private: + BlockDataID pdfsID; +}; + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp new file mode 100644 index 0000000000..5e94631ed0 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp @@ -0,0 +1,212 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoVecDoublePrecision.cpp +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#include "PackInfoVecDoublePrecision.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "stencil/Directions.h" + +#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +namespace walberla { +namespace pystencils { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + +namespace internal_05a1eb9a7382e5e7047cdb22e28b6556 { +static FUNC_PREFIX void pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT _data_buffer, double *RESTRICT const _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_field_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_field_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_field_0; ctr_0 += 1) { + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2]; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3]; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3]; + } + } + } +} +} // namespace internal_05a1eb9a7382e5e7047cdb22e28b6556 + +namespace internal_1ccccad4ca561e07a0934cadb07d0fc1 { +static FUNC_PREFIX void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT const _data_buffer, double *RESTRICT _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_field_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_field_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_field_0; ctr_0 += 1) { + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0]; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1]; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2]; + } + } + } +} +} // namespace internal_1ccccad4ca561e07a0934cadb07d0fc1 + +void PackInfoVecDoublePrecision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + double *buffer = reinterpret_cast(byte_buffer); + + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch (dir) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers())) + double *RESTRICT const _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_field_0 = int64_t(field->xStride()); + const int64_t _stride_field_1 = int64_t(field->yStride()); + const int64_t _stride_field_2 = int64_t(field->zStride()); + const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride())); + internal_05a1eb9a7382e5e7047cdb22e28b6556::pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +void PackInfoVecDoublePrecision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + double *buffer = reinterpret_cast(byte_buffer); + + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch (communciationDirection) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers())) + double *RESTRICT _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_field_0 = int64_t(field->xStride()); + const int64_t _stride_field_1 = int64_t(field->yStride()); + const int64_t _stride_field_2 = int64_t(field->zStride()); + const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride())); + internal_1ccccad4ca561e07a0934cadb07d0fc1::unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +uint_t PackInfoVecDoublePrecision::size(stencil::Direction dir, const IBlock *block) const { + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch (dir) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: + elementsPerCell = 3; + break; + + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof(double); +} + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.h new file mode 100644 index 0000000000..4cd1dc0869 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.h @@ -0,0 +1,84 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoVecDoublePrecision.h +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, +// lbmpy_walberla/pystencils_walberla from waLBerla commit +// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#pragma once +#include "communication/UniformPackInfo.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "domain_decomposition/IBlock.h" +#include "field/GhostLayerField.h" +#include "stencil/Directions.h" + +#define FUNC_PREFIX + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace pystencils { + +class PackInfoVecDoublePrecision + : public ::walberla::communication::UniformPackInfo { +public: + PackInfoVecDoublePrecision(BlockDataID fieldID_) : fieldID(fieldID_){}; + virtual ~PackInfoVecDoublePrecision() {} + + bool constantDataExchange() const { return true; } + bool threadsafeReceiving() const { return true; } + + void unpackData(IBlock *receiver, stencil::Direction dir, + mpi::RecvBuffer &buffer) { + const auto dataSize = size(dir, receiver); + unpack(dir, buffer.skip(dataSize), receiver); + } + + void communicateLocal(const IBlock *sender, IBlock *receiver, + stencil::Direction dir) { + mpi::SendBuffer sBuffer; + packData(sender, dir, sBuffer); + mpi::RecvBuffer rBuffer(sBuffer); + unpackData(receiver, stencil::inverseDir[dir], rBuffer); + } + + void packDataImpl(const IBlock *sender, stencil::Direction dir, + mpi::SendBuffer &outBuffer) const { + const auto dataSize = size(dir, sender); + pack(dir, outBuffer.forward(dataSize), const_cast(sender)); + } + + void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const; + void unpack(stencil::Direction dir, unsigned char *buffer, + IBlock *block) const; + uint_t size(stencil::Direction dir, const IBlock *block) const; + +private: + BlockDataID fieldID; +}; + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp new file mode 100644 index 0000000000..a9dea10421 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp @@ -0,0 +1,212 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoVecSinglePrecision.cpp +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#include "PackInfoVecSinglePrecision.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "stencil/Directions.h" + +#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +namespace walberla { +namespace pystencils { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + +namespace internal_05a1eb9a7382e5e7047cdb22e28b6556 { +static FUNC_PREFIX void pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(float *RESTRICT _data_buffer, float *RESTRICT const _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_field_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_field_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_field_0; ctr_0 += 1) { + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2]; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3]; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3]; + } + } + } +} +} // namespace internal_05a1eb9a7382e5e7047cdb22e28b6556 + +namespace internal_1ccccad4ca561e07a0934cadb07d0fc1 { +static FUNC_PREFIX void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(float *RESTRICT const _data_buffer, float *RESTRICT _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) { + for (int64_t ctr_2 = 0; ctr_2 < _size_field_2; ctr_2 += 1) { + for (int64_t ctr_1 = 0; ctr_1 < _size_field_1; ctr_1 += 1) { + for (int64_t ctr_0 = 0; ctr_0 < _size_field_0; ctr_0 += 1) { + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0]; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1]; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2]; + } + } + } +} +} // namespace internal_1ccccad4ca561e07a0934cadb07d0fc1 + +void PackInfoVecSinglePrecision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + float *buffer = reinterpret_cast(byte_buffer); + + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch (dir) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers())) + float *RESTRICT const _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_field_0 = int64_t(field->xStride()); + const int64_t _stride_field_1 = int64_t(field->yStride()); + const int64_t _stride_field_2 = int64_t(field->zStride()); + const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride())); + internal_05a1eb9a7382e5e7047cdb22e28b6556::pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +void PackInfoVecSinglePrecision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + float *buffer = reinterpret_cast(byte_buffer); + + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch (communciationDirection) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers())) + float *RESTRICT _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_field_0 = int64_t(field->xStride()); + const int64_t _stride_field_1 = int64_t(field->yStride()); + const int64_t _stride_field_2 = int64_t(field->zStride()); + const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride())); + internal_1ccccad4ca561e07a0934cadb07d0fc1::unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3); + break; + } + + default: + WALBERLA_ASSERT(false); + } +} + +uint_t PackInfoVecSinglePrecision::size(stencil::Direction dir, const IBlock *block) const { + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch (dir) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: + elementsPerCell = 3; + break; + + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof(float); +} + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.h new file mode 100644 index 0000000000..1d0e7936f9 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.h @@ -0,0 +1,84 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoVecSinglePrecision.h +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, +// lbmpy_walberla/pystencils_walberla from waLBerla commit +// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#pragma once +#include "communication/UniformPackInfo.h" +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "domain_decomposition/IBlock.h" +#include "field/GhostLayerField.h" +#include "stencil/Directions.h" + +#define FUNC_PREFIX + +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#elif _MSC_VER +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace pystencils { + +class PackInfoVecSinglePrecision + : public ::walberla::communication::UniformPackInfo { +public: + PackInfoVecSinglePrecision(BlockDataID fieldID_) : fieldID(fieldID_){}; + virtual ~PackInfoVecSinglePrecision() {} + + bool constantDataExchange() const { return true; } + bool threadsafeReceiving() const { return true; } + + void unpackData(IBlock *receiver, stencil::Direction dir, + mpi::RecvBuffer &buffer) { + const auto dataSize = size(dir, receiver); + unpack(dir, buffer.skip(dataSize), receiver); + } + + void communicateLocal(const IBlock *sender, IBlock *receiver, + stencil::Direction dir) { + mpi::SendBuffer sBuffer; + packData(sender, dir, sBuffer); + mpi::RecvBuffer rBuffer(sBuffer); + unpackData(receiver, stencil::inverseDir[dir], rBuffer); + } + + void packDataImpl(const IBlock *sender, stencil::Direction dir, + mpi::SendBuffer &outBuffer) const { + const auto dataSize = size(dir, sender); + pack(dir, outBuffer.forward(dataSize), const_cast(sender)); + } + + void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const; + void unpack(stencil::Direction dir, unsigned char *buffer, + IBlock *block) const; + uint_t size(stencil::Direction dir, const IBlock *block) const; + +private: + BlockDataID fieldID; +}; + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp index 6d5d4fc79a..c6df7eb3ce 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.hpp @@ -27,6 +27,10 @@ #include "generated_kernels/FieldAccessorsSinglePrecision.h" #include "generated_kernels/InitialPDFsSetterDoublePrecision.h" #include "generated_kernels/InitialPDFsSetterSinglePrecision.h" +#include "generated_kernels/PackInfoPdfDoublePrecision.h" +#include "generated_kernels/PackInfoPdfSinglePrecision.h" +#include "generated_kernels/PackInfoVecDoublePrecision.h" +#include "generated_kernels/PackInfoVecSinglePrecision.h" #ifdef __AVX2__ #include "generated_kernels/CollideSweepDoublePrecisionLeesEdwardsAVX.h" @@ -64,6 +68,8 @@ template struct KernelTrait { using StreamSweep = pystencils::StreamSweepDoublePrecision; #endif using InitialPDFsSetter = pystencils::InitialPDFsSetterDoublePrecision; + using PackInfoPdf = pystencils::PackInfoPdfDoublePrecision; + using PackInfoVec = pystencils::PackInfoVecDoublePrecision; }; template <> struct KernelTrait { @@ -81,6 +87,8 @@ template <> struct KernelTrait { using StreamSweep = pystencils::StreamSweepSinglePrecision; #endif using InitialPDFsSetter = pystencils::InitialPDFsSetterSinglePrecision; + using PackInfoPdf = pystencils::PackInfoPdfSinglePrecision; + using PackInfoVec = pystencils::PackInfoVecSinglePrecision; }; template From 960c2adefb0a2da5dd30316734860affb3a94d77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Fri, 20 Sep 2024 21:18:36 +0200 Subject: [PATCH 4/4] PackInfo MPI buffer alignment bugfix --- .../generated_kernels/PackInfoPdfDoublePrecision.cpp | 4 ++++ .../generated_kernels/PackInfoPdfDoublePrecision.h | 5 +++-- .../generated_kernels/PackInfoPdfSinglePrecision.cpp | 4 ++++ .../generated_kernels/PackInfoPdfSinglePrecision.h | 5 +++-- .../generated_kernels/PackInfoVecDoublePrecision.cpp | 4 ++++ .../generated_kernels/PackInfoVecDoublePrecision.h | 5 +++-- .../generated_kernels/PackInfoVecSinglePrecision.cpp | 4 ++++ .../generated_kernels/PackInfoVecSinglePrecision.h | 5 +++-- 8 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp index abee661f39..1ab45417dc 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp @@ -24,6 +24,8 @@ #include "core/cell/CellInterval.h" #include "stencil/Directions.h" +#include + #if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" @@ -519,6 +521,7 @@ static FUNC_PREFIX void unpack_NE(double *RESTRICT const _data_buffer, double *R } // namespace internal_unpack_NE void PackInfoPdfDoublePrecision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + byte_buffer += sizeof(double) - (reinterpret_cast(byte_buffer) - (reinterpret_cast(byte_buffer) / sizeof(double)) * sizeof(double)); double *buffer = reinterpret_cast(byte_buffer); auto pdfs = block->getData>(pdfsID); @@ -893,6 +896,7 @@ void PackInfoPdfDoublePrecision::pack(Direction dir, unsigned char *byte_buffer, } void PackInfoPdfDoublePrecision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + byte_buffer += sizeof(double) - (reinterpret_cast(byte_buffer) - (reinterpret_cast(byte_buffer) / sizeof(double)) * sizeof(double)); double *buffer = reinterpret_cast(byte_buffer); auto pdfs = block->getData>(pdfsID); diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.h index d2c205023c..6c2b00a8e3 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.h +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.h @@ -54,7 +54,7 @@ class PackInfoPdfDoublePrecision void unpackData(IBlock *receiver, stencil::Direction dir, mpi::RecvBuffer &buffer) { const auto dataSize = size(dir, receiver); - unpack(dir, buffer.skip(dataSize), receiver); + unpack(dir, buffer.skip(dataSize + sizeof(double)), receiver); } void communicateLocal(const IBlock *sender, IBlock *receiver, @@ -68,7 +68,8 @@ class PackInfoPdfDoublePrecision void packDataImpl(const IBlock *sender, stencil::Direction dir, mpi::SendBuffer &outBuffer) const { const auto dataSize = size(dir, sender); - pack(dir, outBuffer.forward(dataSize), const_cast(sender)); + pack(dir, outBuffer.forward(dataSize + sizeof(double)), + const_cast(sender)); } void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const; diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp index 5beb6eb918..e55017ab21 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp @@ -24,6 +24,8 @@ #include "core/cell/CellInterval.h" #include "stencil/Directions.h" +#include + #if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" @@ -519,6 +521,7 @@ static FUNC_PREFIX void unpack_NE(float *RESTRICT const _data_buffer, float *RES } // namespace internal_unpack_NE void PackInfoPdfSinglePrecision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + byte_buffer += sizeof(float) - (reinterpret_cast(byte_buffer) - (reinterpret_cast(byte_buffer) / sizeof(float)) * sizeof(float)); float *buffer = reinterpret_cast(byte_buffer); auto pdfs = block->getData>(pdfsID); @@ -893,6 +896,7 @@ void PackInfoPdfSinglePrecision::pack(Direction dir, unsigned char *byte_buffer, } void PackInfoPdfSinglePrecision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + byte_buffer += sizeof(float) - (reinterpret_cast(byte_buffer) - (reinterpret_cast(byte_buffer) / sizeof(float)) * sizeof(float)); float *buffer = reinterpret_cast(byte_buffer); auto pdfs = block->getData>(pdfsID); diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.h index 9dd84b0c62..40cb206890 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.h +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.h @@ -54,7 +54,7 @@ class PackInfoPdfSinglePrecision void unpackData(IBlock *receiver, stencil::Direction dir, mpi::RecvBuffer &buffer) { const auto dataSize = size(dir, receiver); - unpack(dir, buffer.skip(dataSize), receiver); + unpack(dir, buffer.skip(dataSize + sizeof(float)), receiver); } void communicateLocal(const IBlock *sender, IBlock *receiver, @@ -68,7 +68,8 @@ class PackInfoPdfSinglePrecision void packDataImpl(const IBlock *sender, stencil::Direction dir, mpi::SendBuffer &outBuffer) const { const auto dataSize = size(dir, sender); - pack(dir, outBuffer.forward(dataSize), const_cast(sender)); + pack(dir, outBuffer.forward(dataSize + sizeof(float)), + const_cast(sender)); } void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const; diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp index 5e94631ed0..6cbf3cb98d 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp @@ -24,6 +24,8 @@ #include "core/cell/CellInterval.h" #include "stencil/Directions.h" +#include + #if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" @@ -67,6 +69,7 @@ static FUNC_PREFIX void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE } // namespace internal_1ccccad4ca561e07a0934cadb07d0fc1 void PackInfoVecDoublePrecision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + byte_buffer += sizeof(double) - (reinterpret_cast(byte_buffer) - (reinterpret_cast(byte_buffer) / sizeof(double)) * sizeof(double)); double *buffer = reinterpret_cast(byte_buffer); auto field = block->getData>(fieldID); @@ -119,6 +122,7 @@ void PackInfoVecDoublePrecision::pack(Direction dir, unsigned char *byte_buffer, } void PackInfoVecDoublePrecision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + byte_buffer += sizeof(double) - (reinterpret_cast(byte_buffer) - (reinterpret_cast(byte_buffer) / sizeof(double)) * sizeof(double)); double *buffer = reinterpret_cast(byte_buffer); auto field = block->getData>(fieldID); diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.h index 4cd1dc0869..0445249235 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.h +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.h @@ -54,7 +54,7 @@ class PackInfoVecDoublePrecision void unpackData(IBlock *receiver, stencil::Direction dir, mpi::RecvBuffer &buffer) { const auto dataSize = size(dir, receiver); - unpack(dir, buffer.skip(dataSize), receiver); + unpack(dir, buffer.skip(dataSize + sizeof(double)), receiver); } void communicateLocal(const IBlock *sender, IBlock *receiver, @@ -68,7 +68,8 @@ class PackInfoVecDoublePrecision void packDataImpl(const IBlock *sender, stencil::Direction dir, mpi::SendBuffer &outBuffer) const { const auto dataSize = size(dir, sender); - pack(dir, outBuffer.forward(dataSize), const_cast(sender)); + pack(dir, outBuffer.forward(dataSize + sizeof(double)), + const_cast(sender)); } void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const; diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp index a9dea10421..3ddeee01b6 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp @@ -24,6 +24,8 @@ #include "core/cell/CellInterval.h" #include "stencil/Directions.h" +#include + #if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" @@ -67,6 +69,7 @@ static FUNC_PREFIX void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE } // namespace internal_1ccccad4ca561e07a0934cadb07d0fc1 void PackInfoVecSinglePrecision::pack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + byte_buffer += sizeof(float) - (reinterpret_cast(byte_buffer) - (reinterpret_cast(byte_buffer) / sizeof(float)) * sizeof(float)); float *buffer = reinterpret_cast(byte_buffer); auto field = block->getData>(fieldID); @@ -119,6 +122,7 @@ void PackInfoVecSinglePrecision::pack(Direction dir, unsigned char *byte_buffer, } void PackInfoVecSinglePrecision::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block) const { + byte_buffer += sizeof(float) - (reinterpret_cast(byte_buffer) - (reinterpret_cast(byte_buffer) / sizeof(float)) * sizeof(float)); float *buffer = reinterpret_cast(byte_buffer); auto field = block->getData>(fieldID); diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.h index 1d0e7936f9..04a360cbed 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.h +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.h @@ -54,7 +54,7 @@ class PackInfoVecSinglePrecision void unpackData(IBlock *receiver, stencil::Direction dir, mpi::RecvBuffer &buffer) { const auto dataSize = size(dir, receiver); - unpack(dir, buffer.skip(dataSize), receiver); + unpack(dir, buffer.skip(dataSize + sizeof(float)), receiver); } void communicateLocal(const IBlock *sender, IBlock *receiver, @@ -68,7 +68,8 @@ class PackInfoVecSinglePrecision void packDataImpl(const IBlock *sender, stencil::Direction dir, mpi::SendBuffer &outBuffer) const { const auto dataSize = size(dir, sender); - pack(dir, outBuffer.forward(dataSize), const_cast(sender)); + pack(dir, outBuffer.forward(dataSize + sizeof(float)), + const_cast(sender)); } void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block) const;