diff --git a/maintainer/benchmarks/lb.py b/maintainer/benchmarks/lb.py
index ea42b42005..04453c5ea7 100644
--- a/maintainer/benchmarks/lb.py
+++ b/maintainer/benchmarks/lb.py
@@ -31,7 +31,7 @@
parser.add_argument("--particles_per_core", metavar="N", action="store",
type=int, default=125, required=False,
help="Number of particles per core")
-parser.add_argument("--box_l", action="store",
+parser.add_argument("--box_l", action="store", nargs="+",
type=int, default=argparse.SUPPRESS, required=False,
help="Box length (cubic box)")
parser.add_argument("--lb_sites_per_particle", metavar="N_LB", action="store",
@@ -45,6 +45,8 @@
help="Using single-precision floating point accuracy")
parser.add_argument("--gpu", action=argparse.BooleanOptionalAction,
default=False, required=False, help="Use GPU implementation")
+parser.add_argument("--multi-gpu", action=argparse.BooleanOptionalAction,
+ default=False, required=False, help="Use multi-GPU implementation")
parser.add_argument("--output", metavar="FILEPATH", action="store",
type=str, required=False, default="benchmarks.csv",
help="Output file (default: benchmarks.csv)")
@@ -83,9 +85,9 @@
n_proc = system.cell_system.get_state()["n_nodes"]
n_part = n_proc * args.particles_per_core
if n_part == 0:
- box_l = args.box_l
+ box_l = 3 * args.box_l if len(args.box_l) == 1 else args.box_l
agrid = 1.
- lb_grid = args.box_l
+ lb_grid = box_l
measurement_steps = 80
else:
# volume of N spheres with radius r: N * (4/3*pi*r^3)
@@ -96,13 +98,16 @@
agrid = box_l / lb_grid
measurement_steps = max(50, int(120**3 / lb_grid**3))
measurement_steps = 40
+ lb_grid = 3 * [lb_grid]
+ box_l = 3 * [box_l]
-print(f"LB shape: [{lb_grid}, {lb_grid}, {lb_grid}]")
+print(f"box length: {box_l}")
+print(f"LB shape: {lb_grid}")
print(f"LB agrid: {agrid:.3f}")
# System
#############################################################
-system.box_l = 3 * (box_l,)
+system.box_l = box_l
# Integration parameters
#############################################################
@@ -135,8 +140,10 @@
# LB fluid setup
#############################################################
lb_class = espressomd.lb.LBFluidWalberla
-if args.gpu:
+if args.gpu or args.multi_gpu:
lb_class = espressomd.lb.LBFluidWalberlaGPU
+if args.multi_gpu:
+ system.cuda_init_handle.call_method("set_device_id_per_rank")
lbf = lb_class(agrid=agrid, tau=system.time_step, kinematic_viscosity=1.,
density=1., single_precision=args.single_precision)
system.lb = lbf
diff --git a/maintainer/walberla_kernels/Readme.md b/maintainer/walberla_kernels/Readme.md
index ef7f2fb0c5..39f1a7e41a 100644
--- a/maintainer/walberla_kernels/Readme.md
+++ b/maintainer/walberla_kernels/Readme.md
@@ -49,7 +49,6 @@ generate_lb_kernels --single-precision
generate_lb_kernels --gpu
generate_lb_kernels --gpu --single-precision
format_lb_kernels
-git diff src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_*CUDA*.cu # verify pragmas
# EK kernels
cd $(git rev-parse --show-toplevel)/src/walberla_bridge/src/electrokinetics/generated_kernels/
diff --git a/maintainer/walberla_kernels/custom_additional_extensions.py b/maintainer/walberla_kernels/custom_additional_extensions.py
index 19d8e3f22a..3115f7ffe7 100644
--- a/maintainer/walberla_kernels/custom_additional_extensions.py
+++ b/maintainer/walberla_kernels/custom_additional_extensions.py
@@ -349,3 +349,48 @@ def generate_kernel_selector(
"templates/ReactionKernelSelector.tmpl.h").render(**context)
generation_context.write_file(f"{class_name}_all.h", header)
+
+
+def generate_device_preprocessor(kernel, defines=()):
+ """
+ Generate device preprocessor directives.
+ """
+ pragmas = {
+ "packinfo": {
+ "nvcc": ["diag_suppress 177 // unused variable"],
+ "clang_host": ["-Wunused-variable"],
+ "clang_dev": ["-Wunused-variable"],
+ "gcc": ["-Wunused-variable"],
+ },
+ "ubb_boundary": {
+ "nvcc": ["diag_suppress 177 // unused variable"],
+ "clang_host": ["-Wstrict-aliasing", "-Wunused-variable", "-Wconversion", "-Wsign-compare"], # nopep8
+ "clang_dev": ["-Wstrict-aliasing", "-Wunused-variable", "-Wconversion", "-Wsign-compare"], # nopep8
+ "gcc": ["-Wstrict-aliasing", "-Wunused-variable", "-Wconversion"],
+ },
+ }
+
+ defines_table = {
+ "nvcc": {"RESTRICT": "__restrict__", "FUNC_PREFIX": "__global__"},
+ "msvc": {"RESTRICT": "__restrict", "FUNC_PREFIX": ""},
+ "clang_host": {"RESTRICT": "__restrict__", "FUNC_PREFIX": ""},
+ "clang_dev": {"RESTRICT": "__restrict__", "FUNC_PREFIX": "__global__"},
+ "gcc": {"RESTRICT": "__restrict__", "FUNC_PREFIX": ""},
+ "other": {"RESTRICT": "", "FUNC_PREFIX": ""},
+ }
+
+ context = {
+ "pragmas": pragmas[kernel],
+ "defines_table": defines_table,
+ "defines": defines,
+ }
+
+ custom_env = jinja2.Environment(
+ loader=jinja2.FileSystemLoader(pathlib.Path(__file__).parent),
+ undefined=jinja2.StrictUndefined
+ )
+
+ content = custom_env.get_template(
+ "templates/preprocessor.tmpl.cuh").render(**context)
+
+ return content.split("\n/* section */\n")[1:]
diff --git a/maintainer/walberla_kernels/generate_lb_kernels.py b/maintainer/walberla_kernels/generate_lb_kernels.py
index 21300af894..bef9badc54 100644
--- a/maintainer/walberla_kernels/generate_lb_kernels.py
+++ b/maintainer/walberla_kernels/generate_lb_kernels.py
@@ -1,5 +1,5 @@
#
-# Copyright (C) 2020-2023 The ESPResSo project
+# Copyright (C) 2020-2024 The ESPResSo project
#
# This file is part of ESPResSo.
#
@@ -17,6 +17,7 @@
# along with this program. If not, see .
#
+import re
import argparse
import packaging.specifiers
@@ -40,6 +41,7 @@
import relaxation_rates
import walberla_lbm_generation
import code_generation_context
+import custom_additional_extensions
parser = argparse.ArgumentParser(description="Generate the waLBerla kernels.")
parser.add_argument("--single-precision", action="store_true", required=False,
@@ -65,6 +67,24 @@ def paramlist(parameters, keys):
yield parameters[key]
+def get_ext_header(target_suffix):
+ return {"CUDA": "h"}.get(target_suffix, "h")
+
+
+def get_ext_source(target_suffix):
+ return {"CUDA": "cu"}.get(target_suffix, "cpp")
+
+
+def patch_file(class_name, extension, target_suffix, patch):
+ with open(f"{class_name}.{extension}", "r+") as f:
+ old_content = f.read()
+ new_content = patch(old_content, target_suffix)
+ if new_content != old_content:
+ f.seek(0)
+ f.truncate()
+ f.write(new_content)
+
+
with code_generation_context.CodeGeneration() as ctx:
ctx.double_accuracy = not args.single_precision
if target == ps.Target.GPU:
@@ -196,26 +216,57 @@ def paramlist(parameters, keys):
# generate PackInfo
assignments = pystencils_espresso.generate_pack_info_pdfs_field_assignments(
fields, streaming_pattern="pull")
- spec = pystencils_espresso.generate_pack_info_vector_field_specifications(
+ spec = pystencils_espresso.generate_pack_info_field_specifications(
config, stencil, force_field.layout)
- for params, target_suffix in paramlist(parameters, ["CPU"]):
+
+ def patch_packinfo_header(content, target_suffix):
+ if target_suffix in ["", "AVX"]:
+ token = "\n //TODO: optimize by generating kernel for this case\n"
+ assert token in content
+ content = content.replace(token, "\n")
+ ft = "float" if "SinglePrecision" in content else "double"
+ token = " pack(dir, outBuffer.forward(dataSize)"
+ assert token in content
+ content = content.replace(token, f"{token[:-1]} + sizeof({ft}))")
+ token = " unpack(dir, buffer.skip(dataSize)"
+ assert token in content
+ content = content.replace(token, f"{token[:-1]} + sizeof({ft}))")
+ elif target_suffix in ["CUDA"]:
+ token = "#define FUNC_PREFIX __global__"
+ assert token in content
+ content = content.replace(token, "")
+ content = re.sub(r"#ifdef __GNUC__[\s\S]+?#endif\n\n", "", content)
+ return content
+
+ def patch_packinfo_kernel(content, target_suffix):
+ if target_suffix in ["", "AVX"]:
+ # fix MPI buffer
+ m = re.search("(float|double) *\* *buffer = reinterpret_cast<(?:float|double) *\*>\(byte_buffer\);\n", content) # nopep8
+ assert m is not None
+ content = content.replace(m.group(0), f"byte_buffer += sizeof({m.group(1)}) - (reinterpret_cast(byte_buffer) - (reinterpret_cast(byte_buffer) / sizeof({m.group(1)})) * sizeof({m.group(1)}));\n {m.group(0)}") # nopep8
+ if target_suffix in ["CUDA"]:
+ token = "#define FUNC_PREFIX __global__"
+ assert token in content
+ push, _ = custom_additional_extensions.generate_device_preprocessor(
+ "packinfo", defines=("RESTRICT",))
+ content = content.replace(token, f"{token}\n{push}")
+ token = '#include "PackInfo'
+ assert token in content
+ content = content.replace(token, f'#include "core/DataTypes.h"\n#include "core/cell/CellInterval.h"\n#include "domain_decomposition/IBlock.h"\n#include "stencil/Directions.h"\n\n{token}') # nopep8
+ return content
+
+ for params, target_suffix in paramlist(parameters, ["CPU", "GPU"]):
pystencils_walberla.generate_pack_info_from_kernel(
ctx, f"PackInfoPdf{precision_prefix}{target_suffix}", assignments,
kind="pull", **params)
pystencils_walberla.generate_pack_info(
ctx, f"PackInfoVec{precision_prefix}{target_suffix}", spec, **params)
- if target_suffix == "CUDA":
- continue
- token = "\n //TODO: optimize by generating kernel for this case\n"
- for field_suffix in ["Pdf", "Vec"]:
- class_name = f"PackInfo{field_suffix}{precision_prefix}{target_suffix}" # nopep8
- with open(f"{class_name}.h", "r+") as f:
- content = f.read()
- assert token in content
- content = content.replace(token, "\n")
- f.seek(0)
- f.truncate()
- f.write(content)
+ for suffix in ["Pdf", "Vec"]:
+ class_name = f"PackInfo{suffix}{precision_prefix}{target_suffix}"
+ patch_file(class_name, get_ext_header(target_suffix),
+ target_suffix, patch_packinfo_header)
+ patch_file(class_name, get_ext_source(target_suffix),
+ target_suffix, patch_packinfo_kernel)
# boundary conditions
ubb_dynamic = lbmpy_espresso.UBB(
@@ -223,17 +274,28 @@ def paramlist(parameters, keys):
ubb_data_handler = lbmpy_espresso.BounceBackSlipVelocityUBB(
method.stencil, ubb_dynamic)
- for _, target_suffix in paramlist(parameters, ("GPU", "CPU")):
+ # pylint: disable=unused-argument
+ def patch_boundary_header(content, target_suffix):
+ return content.replace("real_t", config.data_type.default_factory().c_name) # nopep8
+
+ def patch_boundary_kernel(content, target_suffix):
+ if target_suffix in ["CUDA"]:
+ push, pop = custom_additional_extensions.generate_device_preprocessor(
+ "ubb_boundary", defines=("RESTRICT",))
+ content = re.sub(r"#ifdef __GNUC__[\s\S]+?#endif(?=\n\n|\n//)", "", content) # nopep8
+ content = re.sub(r"#ifdef __CUDACC__[\s\S]+?#endif(?=\n\n|\n//)", push, content, 1) # nopep8
+ content = re.sub(r"#ifdef __CUDACC__[\s\S]+?#endif(?=\n\n|\n//)", pop, content, 1) # nopep8
+ assert push in content
+ assert pop in content
+ return content
+
+ for _, target_suffix in paramlist(parameters, ("CPU", "GPU")):
+ class_name = f"Dynamic_UBB_{precision_suffix}{target_suffix}"
lbmpy_walberla.generate_boundary(
- ctx, f"Dynamic_UBB_{precision_suffix}{target_suffix}", ubb_dynamic,
- method, additional_data_handler=ubb_data_handler,
+ ctx, class_name, ubb_dynamic, method,
+ additional_data_handler=ubb_data_handler,
streaming_pattern=streaming_pattern, target=target)
-
- with open(f"Dynamic_UBB_{precision_suffix}{target_suffix}.h", "r+") as f:
- content = f.read()
- f.seek(0)
- f.truncate(0)
- # patch for floating point accuracy
- content = content.replace("real_t",
- config.data_type.default_factory().c_name)
- f.write(content)
+ patch_file(class_name, get_ext_header(target_suffix),
+ target_suffix, patch_boundary_header)
+ patch_file(class_name, get_ext_source(target_suffix),
+ target_suffix, patch_boundary_kernel)
diff --git a/maintainer/walberla_kernels/pystencils_espresso.py b/maintainer/walberla_kernels/pystencils_espresso.py
index 3cf6edfbf3..1fd3321aeb 100644
--- a/maintainer/walberla_kernels/pystencils_espresso.py
+++ b/maintainer/walberla_kernels/pystencils_espresso.py
@@ -239,7 +239,8 @@ def generate_pack_info_pdfs_field_assignments(fields, streaming_pattern):
return lbm_update_rule.all_assignments
-def generate_pack_info_vector_field_specifications(config, stencil, layout):
+def generate_pack_info_field_specifications(
+ config, stencil, layout, vec_len=3):
import collections
import itertools
field = ps.Field.create_generic(
@@ -248,7 +249,7 @@ def generate_pack_info_vector_field_specifications(config, stencil, layout):
data_type_np[config.data_type.default_factory().c_name],
index_dimensions=1,
layout=layout,
- index_shape=(3,)
+ index_shape=(vec_len,)
)
q = len(stencil)
coord = itertools.product(*[(-1, 0, 1)] * 3)
@@ -257,7 +258,7 @@ def generate_pack_info_vector_field_specifications(config, stencil, layout):
else:
dirs = tuple((i, j, k) for i, j, k in coord)
spec = collections.defaultdict(set)
- spec[dirs] = {field[0, 0, 0](i) for i in range(3)}
+ spec[dirs] = {field[0, 0, 0](i) for i in range(vec_len)}
return spec
diff --git a/maintainer/walberla_kernels/templates/preprocessor.tmpl.cuh b/maintainer/walberla_kernels/templates/preprocessor.tmpl.cuh
new file mode 100644
index 0000000000..16cefedbae
--- /dev/null
+++ b/maintainer/walberla_kernels/templates/preprocessor.tmpl.cuh
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2024 The ESPResSo project
+ * Copyright (C) 2024 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+/* section */
+
+#if defined(__NVCC__)
+{% for name in defines -%}
+#define {{name}} {{defines_table["nvcc"][name]}}
+{% endfor -%}
+{% if pragmas["nvcc"] -%}
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic push
+{% for pragma in pragmas["nvcc"] -%}
+#pragma nv_{{pragma}}
+{% endfor -%}
+#else
+#pragma push
+{% for pragma in pragmas["nvcc"] -%}
+#pragma {{pragma}}
+{% endfor -%}
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+{% endif -%}
+#elif defined(__clang__)
+#if defined(__CUDA__)
+#if defined(__CUDA_ARCH__)
+// clang compiling CUDA code in device mode
+{% for name in defines -%}
+#define {{name}} {{defines_table["clang_dev"][name]}}
+{% endfor -%}
+{% if pragmas["clang_dev"] -%}
+#pragma clang diagnostic push
+{% for pragma in pragmas["clang_dev"] -%}
+#pragma clang diagnostic ignored "{{pragma}}"
+{% endfor -%}
+{% endif -%}
+#else
+// clang compiling CUDA code in host mode
+{% for name in defines -%}
+#define {{name}} {{defines_table["clang_host"][name]}}
+{% endfor -%}
+{% if pragmas["clang_host"] -%}
+#pragma clang diagnostic push
+{% for pragma in pragmas["clang_host"] -%}
+#pragma clang diagnostic ignored "{{pragma}}"
+{% endfor -%}
+{% endif -%}
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
+#elif defined(__GNUC__) or defined(__GNUG__)
+{% for name in defines -%}
+#define {{name}} {{defines_table["gcc"][name]}}
+{% endfor -%}
+{% if pragmas["gcc"] -%}
+#pragma GCC diagnostic push
+{% for pragma in pragmas["gcc"] -%}
+#pragma GCC diagnostic ignored "{{pragma}}"
+{% endfor -%}
+{% endif -%}
+#elif defined(_MSC_VER)
+{% for name in defines -%}
+#define {{name}} {{defines_table["msvc"][name]}}
+{% endfor -%}
+#else
+{% for name in defines -%}
+#define {{name}} {{defines_table["other"][name]}}
+{% endfor -%}
+#endif
+
+/* section */
+
+#if defined(__NVCC__)
+{% if pragmas["nvcc"] -%}
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic pop
+#else
+#pragma pop
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+{% endif -%}
+#elif defined(__clang__)
+#if defined(__CUDA__)
+#if defined(__CUDA_ARCH__)
+// clang compiling CUDA code in device mode
+{% if pragmas["clang_dev"] -%}
+#pragma clang diagnostic pop
+{% endif -%}
+#else
+{% if pragmas["clang_host"] -%}
+// clang compiling CUDA code in host mode
+#pragma clang diagnostic pop
+{% endif -%}
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
+#elif defined(__GNUC__) or defined(__GNUG__)
+{% if pragmas["gcc"] -%}
+#pragma GCC diagnostic pop
+{% endif -%}
+#endif
diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
index 6f1fedae10..8986fed7b4 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
@@ -119,11 +119,10 @@ class LBWalberlaImpl : public LBWalberlaBase {
using VectorField = field::GhostLayerField;
template
using PackInfo = field::communication::PackInfo;
- template
- using PackInfoStreaming =
- std::conditional_t,
- typename detail::KernelTrait::PackInfoPdf,
- typename detail::KernelTrait::PackInfoVec>;
+ using PackInfoStreamingPdf =
+ typename detail::KernelTrait::PackInfoPdf;
+ using PackInfoStreamingVec =
+ typename detail::KernelTrait::PackInfoVec;
template
using RegularCommScheme =
blockforest::communication::UniformBufferedScheme;
@@ -134,14 +133,30 @@ class LBWalberlaImpl : public LBWalberlaBase {
#if defined(__CUDACC__)
template struct FieldTrait {
+ private:
+ static auto constexpr AT = lbmpy::Arch::GPU;
+ template
+ using MemcpyPackInfo = gpu::communication::MemcpyPackInfo;
+
+ public:
+ template
+ class UniformGPUScheme
+ : public gpu::communication::UniformGPUScheme {
+ public:
+ explicit UniformGPUScheme(auto const &bf)
+ : gpu::communication::UniformGPUScheme(
+ bf, /* sendDirectlyFromGPU */ false,
+ /* useLocalCommunication */ false) {}
+ };
using PdfField = gpu::GPUField;
using VectorField = gpu::GPUField;
- template
- using PackInfo = gpu::communication::MemcpyPackInfo;
- template
- using PackInfoStreaming = gpu::communication::MemcpyPackInfo;
+ template using PackInfo = MemcpyPackInfo;
+ using PackInfoStreamingPdf =
+ typename detail::KernelTrait::PackInfoPdf;
+ using PackInfoStreamingVec =
+ typename detail::KernelTrait::PackInfoVec;
template
- using RegularCommScheme = gpu::communication::UniformGPUScheme;
+ using RegularCommScheme = UniformGPUScheme;
template
using BoundaryCommScheme =
blockforest::communication::UniformBufferedScheme;
@@ -315,10 +330,6 @@ class LBWalberlaImpl : public LBWalberlaBase {
template
using PackInfo =
typename FieldTrait::template PackInfo;
- template
- using PackInfoStreaming =
- typename FieldTrait::template PackInfoStreaming;
// communicators
std::shared_ptr m_boundary_communicator;
@@ -427,20 +438,22 @@ class LBWalberlaImpl : public LBWalberlaBase {
}
void setup_streaming_communicator() {
- auto const setup = [this]() {
+ auto const setup = [this]() {
auto const &blocks = m_lattice->get_blocks();
m_pdf_streaming_communicator =
std::make_shared(blocks);
m_pdf_streaming_communicator->addPackInfo(
- std::make_shared(m_pdf_field_id));
+ std::make_shared(m_pdf_field_id));
m_pdf_streaming_communicator->addPackInfo(
- std::make_shared>(
- m_last_applied_force_field_id));
+ std::make_shared(m_last_applied_force_field_id));
};
+ using FieldTrait = FieldTrait;
+ using PackInfoPdf = typename FieldTrait::PackInfoStreamingPdf;
+ using PackInfoVec = typename FieldTrait::PackInfoStreamingVec;
if (m_has_boundaries or (m_collision_model and has_lees_edwards_bc())) {
- setup.template operator()>();
+ setup.template operator(), PackInfoVec>();
} else {
- setup.template operator()>();
+ setup.template operator()();
}
}
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt
index 434d968d52..eadeb04c2f 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt
@@ -54,6 +54,10 @@ if(ESPRESSO_BUILD_WITH_CUDA AND WALBERLA_BUILD_WITH_CUDA)
StreamSweepSinglePrecisionCUDA.cu
InitialPDFsSetterDoublePrecisionCUDA.cu
InitialPDFsSetterSinglePrecisionCUDA.cu
+ PackInfoPdfSinglePrecisionCUDA.cu
+ PackInfoPdfDoublePrecisionCUDA.cu
+ PackInfoVecSinglePrecisionCUDA.cu
+ PackInfoVecDoublePrecisionCUDA.cu
Dynamic_UBB_double_precisionCUDA.cu
Dynamic_UBB_single_precisionCUDA.cu)
endif()
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precisionCUDA.cu
index 8d35b5d929..0fd77e065c 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precisionCUDA.cu
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precisionCUDA.cu
@@ -39,7 +39,7 @@ namespace lbm {
#else
#pragma push
#pragma diag_suppress 177 // unused variable
-#endif
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
#elif defined(__clang__)
#if defined(__CUDA__)
#if defined(__CUDA_ARCH__)
@@ -58,8 +58,8 @@ namespace lbm {
#pragma clang diagnostic ignored "-Wunused-variable"
#pragma clang diagnostic ignored "-Wconversion"
#pragma clang diagnostic ignored "-Wsign-compare"
-#endif
-#endif
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
#elif defined(__GNUC__) or defined(__GNUG__)
#define RESTRICT __restrict__
#pragma GCC diagnostic push
@@ -128,7 +128,13 @@ static FUNC_PREFIX __launch_bounds__(256) void dynamic_ubb_double_precisioncuda_
// NOLINTEND(readability-non-const-parameter*)
-#if defined(__clang__)
+#if defined(__NVCC__)
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic pop
+#else
+#pragma pop
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#elif defined(__clang__)
#if defined(__CUDA__)
#if defined(__CUDA_ARCH__)
// clang compiling CUDA code in device mode
@@ -136,16 +142,10 @@ static FUNC_PREFIX __launch_bounds__(256) void dynamic_ubb_double_precisioncuda_
#else
// clang compiling CUDA code in host mode
#pragma clang diagnostic pop
-#endif
-#endif
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
#elif defined(__GNUC__) or defined(__GNUG__)
#pragma GCC diagnostic pop
-#elif defined(__CUDACC__)
-#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
-#pragma nv_diagnostic pop
-#else
-#pragma pop
-#endif
#endif
void Dynamic_UBB_double_precisionCUDA::run_impl(IBlock *block, IndexVectors::Type type, gpuStream_t stream) {
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precisionCUDA.cu
index a046e6f9a4..4ed013d81e 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precisionCUDA.cu
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precisionCUDA.cu
@@ -39,7 +39,7 @@ namespace lbm {
#else
#pragma push
#pragma diag_suppress 177 // unused variable
-#endif
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
#elif defined(__clang__)
#if defined(__CUDA__)
#if defined(__CUDA_ARCH__)
@@ -58,8 +58,8 @@ namespace lbm {
#pragma clang diagnostic ignored "-Wunused-variable"
#pragma clang diagnostic ignored "-Wconversion"
#pragma clang diagnostic ignored "-Wsign-compare"
-#endif
-#endif
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
#elif defined(__GNUC__) or defined(__GNUG__)
#define RESTRICT __restrict__
#pragma GCC diagnostic push
@@ -128,7 +128,13 @@ static FUNC_PREFIX __launch_bounds__(256) void dynamic_ubb_single_precisioncuda_
// NOLINTEND(readability-non-const-parameter*)
-#if defined(__clang__)
+#if defined(__NVCC__)
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic pop
+#else
+#pragma pop
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#elif defined(__clang__)
#if defined(__CUDA__)
#if defined(__CUDA_ARCH__)
// clang compiling CUDA code in device mode
@@ -136,16 +142,10 @@ static FUNC_PREFIX __launch_bounds__(256) void dynamic_ubb_single_precisioncuda_
#else
// clang compiling CUDA code in host mode
#pragma clang diagnostic pop
-#endif
-#endif
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
#elif defined(__GNUC__) or defined(__GNUG__)
#pragma GCC diagnostic pop
-#elif defined(__CUDACC__)
-#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
-#pragma nv_diagnostic pop
-#else
-#pragma pop
-#endif
#endif
void Dynamic_UBB_single_precisionCUDA::run_impl(IBlock *block, IndexVectors::Type type, gpuStream_t stream) {
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp
index 1ab45417dc..6503551664 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp
@@ -24,8 +24,6 @@
#include "core/cell/CellInterval.h"
#include "stencil/Directions.h"
-#include
-
#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.cu
new file mode 100644
index 0000000000..5636dad6a3
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.cu
@@ -0,0 +1,1423 @@
+//======================================================================================================================
+//
+// This file is part of waLBerla. waLBerla is free software: you can
+// redistribute it and/or modify it under the terms of the GNU General Public
+// License as published by the Free Software Foundation, either version 3 of
+// the License, or (at your option) any later version.
+//
+// waLBerla is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with waLBerla (see COPYING.txt). If not, see .
+//
+//! \\file PackInfoPdfDoublePrecisionCUDA.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "domain_decomposition/IBlock.h"
+#include "stencil/Directions.h"
+
+#include "PackInfoPdfDoublePrecisionCUDA.h"
+
+#define FUNC_PREFIX __global__
+
+#if defined(__NVCC__)
+#define RESTRICT __restrict__
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic push
+#pragma nv_diag_suppress 177 // unused variable
+#else
+#pragma push
+#pragma diag_suppress 177 // unused variable
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#elif defined(__clang__)
+#if defined(__CUDA__)
+#if defined(__CUDA_ARCH__)
+// clang compiling CUDA code in device mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#else
+// clang compiling CUDA code in host mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
+#elif defined(__GNUC__) or defined(__GNUG__)
+#define RESTRICT __restrict__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#elif defined(_MSC_VER)
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+using walberla::cell::CellInterval;
+using walberla::stencil::Direction;
+
+namespace internal_pack_SW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_SW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_SW
+
+namespace internal_pack_BW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_BW
+
+namespace internal_pack_W {
+static FUNC_PREFIX __launch_bounds__(256) void pack_W(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_W
+
+namespace internal_pack_TW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_TW
+
+namespace internal_pack_NW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_NW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_NW
+
+namespace internal_pack_BS {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BS(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_BS
+
+namespace internal_pack_S {
+static FUNC_PREFIX __launch_bounds__(256) void pack_S(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_S
+
+namespace internal_pack_TS {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TS(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_TS
+
+namespace internal_pack_B {
+static FUNC_PREFIX __launch_bounds__(256) void pack_B(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_B
+
+namespace internal_pack_T {
+static FUNC_PREFIX __launch_bounds__(256) void pack_T(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_T
+
+namespace internal_pack_BN {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BN(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_BN
+
+namespace internal_pack_N {
+static FUNC_PREFIX __launch_bounds__(256) void pack_N(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_N
+
+namespace internal_pack_TN {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TN(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_TN
+
+namespace internal_pack_SE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_SE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_SE
+
+namespace internal_pack_BE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_BE
+
+namespace internal_pack_E {
+static FUNC_PREFIX __launch_bounds__(256) void pack_E(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_E
+
+namespace internal_pack_TE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_TE
+
+namespace internal_pack_NE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_NE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_NE
+
+namespace internal_unpack_SW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_SW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_SW
+
+namespace internal_unpack_BW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_BW
+
+namespace internal_unpack_W {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_W(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+ }
+}
+} // namespace internal_unpack_W
+
+namespace internal_unpack_TW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_TW
+
+namespace internal_unpack_NW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_NW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_NW
+
+namespace internal_unpack_BS {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BS(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_BS
+
+namespace internal_unpack_S {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_S(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+ }
+}
+} // namespace internal_unpack_S
+
+namespace internal_unpack_TS {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TS(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_TS
+
+namespace internal_unpack_B {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_B(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+ }
+}
+} // namespace internal_unpack_B
+
+namespace internal_unpack_T {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_T(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+ }
+}
+} // namespace internal_unpack_T
+
+namespace internal_unpack_BN {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BN(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_BN
+
+namespace internal_unpack_N {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_N(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+ }
+}
+} // namespace internal_unpack_N
+
+namespace internal_unpack_TN {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TN(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_TN
+
+namespace internal_unpack_SE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_SE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_SE
+
+namespace internal_unpack_BE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_BE
+
+namespace internal_unpack_E {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_E(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+ }
+}
+} // namespace internal_unpack_E
+
+namespace internal_unpack_TE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_TE
+
+namespace internal_unpack_NE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_NE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_NE
+
+void PackInfoPdfDoublePrecisionCUDA::pack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+ double *buffer = reinterpret_cast(byte_buffer);
+
+ auto pdfs = block->getData>(pdfsID);
+
+ CellInterval ci;
+ pdfs->getSliceBeforeGhostLayer(dir, ci, 1, false);
+
+ switch (dir) {
+ case stencil::SW: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_SW::pack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BW: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_BW::pack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::W: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_W::pack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TW: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_TW::pack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::NW: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_NW::pack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BS: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_BS::pack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::S: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_S::pack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TS: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_TS::pack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::B: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_B::pack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::T: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_T::pack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BN: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_BN::pack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::N: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_N::pack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TN: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_TN::pack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::SE: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_SE::pack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BE: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_BE::pack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::E: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_E::pack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TE: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_TE::pack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::NE: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_NE::pack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ default:
+ return;
+ }
+}
+
+void PackInfoPdfDoublePrecisionCUDA::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+ double *buffer = reinterpret_cast(byte_buffer);
+
+ auto pdfs = block->getData>(pdfsID);
+
+ CellInterval ci;
+ pdfs->getGhostRegion(dir, ci, 1, false);
+ auto communciationDirection = stencil::inverseDir[dir];
+
+ switch (communciationDirection) {
+ case stencil::SW: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_SW::unpack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BW: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_BW::unpack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::W: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_W::unpack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TW: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_TW::unpack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::NW: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_NW::unpack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BS: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_BS::unpack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::S: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_S::unpack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TS: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_TS::unpack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::B: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_B::unpack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::T: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_T::unpack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BN: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_BN::unpack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::N: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_N::unpack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TN: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_TN::unpack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::SE: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_SE::unpack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BE: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_BE::unpack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::E: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_E::unpack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TE: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_TE::unpack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::NE: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_NE::unpack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ default:
+ return;
+ }
+}
+
+uint_t PackInfoPdfDoublePrecisionCUDA::size(stencil::Direction dir, IBlock *block) {
+ auto pdfs = block->getData>(pdfsID);
+
+ CellInterval ci;
+ pdfs->getGhostRegion(dir, ci, 1, false);
+
+ uint_t elementsPerCell = 0;
+
+ switch (dir) {
+ case stencil::SW:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::BW:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::W:
+ elementsPerCell = 5;
+ break;
+
+ case stencil::TW:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::NW:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::BS:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::S:
+ elementsPerCell = 5;
+ break;
+
+ case stencil::TS:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::B:
+ elementsPerCell = 5;
+ break;
+
+ case stencil::T:
+ elementsPerCell = 5;
+ break;
+
+ case stencil::BN:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::N:
+ elementsPerCell = 5;
+ break;
+
+ case stencil::TN:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::SE:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::BE:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::E:
+ elementsPerCell = 5;
+ break;
+
+ case stencil::TE:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::NE:
+ elementsPerCell = 1;
+ break;
+
+ default:
+ elementsPerCell = 0;
+ }
+ return ci.numCells() * elementsPerCell * sizeof(double);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.h
new file mode 100644
index 0000000000..256f03be49
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.h
@@ -0,0 +1,64 @@
+//======================================================================================================================
+//
+// This file is part of waLBerla. waLBerla is free software: you can
+// redistribute it and/or modify it under the terms of the GNU General Public
+// License as published by the Free Software Foundation, either version 3 of
+// the License, or (at your option) any later version.
+//
+// waLBerla is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with waLBerla (see COPYING.txt). If not, see .
+//
+//! \\file PackInfoPdfDoublePrecisionCUDA.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+
+#include "domain_decomposition/IBlock.h"
+
+#include "stencil/Directions.h"
+
+#include "gpu/GPUField.h"
+#include "gpu/GPUWrapper.h"
+#include "gpu/communication/GeneratedGPUPackInfo.h"
+
+namespace walberla {
+namespace pystencils {
+
+class PackInfoPdfDoublePrecisionCUDA
+ : public ::walberla::gpu::GeneratedGPUPackInfo {
+public:
+ PackInfoPdfDoublePrecisionCUDA(BlockDataID pdfsID_) : pdfsID(pdfsID_){};
+ virtual ~PackInfoPdfDoublePrecisionCUDA() {}
+
+ void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+ gpuStream_t stream) override;
+ void communicateLocal(stencil::Direction /*dir*/, const IBlock * /* sender */,
+ IBlock * /* receiver */,
+ gpuStream_t /* stream */) override {
+ WALBERLA_ABORT("Local Communication not implemented yet for standard "
+ "PackInfos. To run your application turn of local "
+ "communication in the Communication class")
+ }
+ void unpack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+ gpuStream_t stream) override;
+ uint_t size(stencil::Direction dir, IBlock *block) override;
+
+private:
+ BlockDataID pdfsID;
+};
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp
index e55017ab21..b1ac86db8e 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp
@@ -24,8 +24,6 @@
#include "core/cell/CellInterval.h"
#include "stencil/Directions.h"
-#include
-
#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.cu
new file mode 100644
index 0000000000..51b2b40cd9
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.cu
@@ -0,0 +1,1423 @@
+//======================================================================================================================
+//
+// This file is part of waLBerla. waLBerla is free software: you can
+// redistribute it and/or modify it under the terms of the GNU General Public
+// License as published by the Free Software Foundation, either version 3 of
+// the License, or (at your option) any later version.
+//
+// waLBerla is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with waLBerla (see COPYING.txt). If not, see .
+//
+//! \\file PackInfoPdfSinglePrecisionCUDA.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "domain_decomposition/IBlock.h"
+#include "stencil/Directions.h"
+
+#include "PackInfoPdfSinglePrecisionCUDA.h"
+
+#define FUNC_PREFIX __global__
+
+#if defined(__NVCC__)
+#define RESTRICT __restrict__
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic push
+#pragma nv_diag_suppress 177 // unused variable
+#else
+#pragma push
+#pragma diag_suppress 177 // unused variable
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#elif defined(__clang__)
+#if defined(__CUDA__)
+#if defined(__CUDA_ARCH__)
+// clang compiling CUDA code in device mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#else
+// clang compiling CUDA code in host mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
+#elif defined(__GNUC__) or defined(__GNUG__)
+#define RESTRICT __restrict__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#elif defined(_MSC_VER)
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+using walberla::cell::CellInterval;
+using walberla::stencil::Direction;
+
+namespace internal_pack_SW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_SW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_SW
+
+namespace internal_pack_BW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_BW
+
+namespace internal_pack_W {
+static FUNC_PREFIX __launch_bounds__(256) void pack_W(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_W
+
+namespace internal_pack_TW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_TW
+
+namespace internal_pack_NW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_NW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_NW
+
+namespace internal_pack_BS {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BS(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_BS
+
+namespace internal_pack_S {
+static FUNC_PREFIX __launch_bounds__(256) void pack_S(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_S
+
+namespace internal_pack_TS {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TS(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_TS
+
+namespace internal_pack_B {
+static FUNC_PREFIX __launch_bounds__(256) void pack_B(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_B
+
+namespace internal_pack_T {
+static FUNC_PREFIX __launch_bounds__(256) void pack_T(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_T
+
+namespace internal_pack_BN {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BN(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_BN
+
+namespace internal_pack_N {
+static FUNC_PREFIX __launch_bounds__(256) void pack_N(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_N
+
+namespace internal_pack_TN {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TN(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_TN
+
+namespace internal_pack_SE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_SE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_SE
+
+namespace internal_pack_BE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_BE
+
+namespace internal_pack_E {
+static FUNC_PREFIX __launch_bounds__(256) void pack_E(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3];
+ _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_E
+
+namespace internal_pack_TE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_TE
+
+namespace internal_pack_NE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_NE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
+ }
+}
+} // namespace internal_pack_NE
+
+namespace internal_unpack_SW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_SW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_SW
+
+namespace internal_unpack_BW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_BW
+
+namespace internal_unpack_W {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_W(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+ }
+}
+} // namespace internal_unpack_W
+
+namespace internal_unpack_TW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_TW
+
+namespace internal_unpack_NW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_NW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_NW
+
+namespace internal_unpack_BS {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BS(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_BS
+
+namespace internal_unpack_S {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_S(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+ }
+}
+} // namespace internal_unpack_S
+
+namespace internal_unpack_TS {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TS(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_TS
+
+namespace internal_unpack_B {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_B(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+ }
+}
+} // namespace internal_unpack_B
+
+namespace internal_unpack_T {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_T(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+ }
+}
+} // namespace internal_unpack_T
+
+namespace internal_unpack_BN {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BN(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_BN
+
+namespace internal_unpack_N {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_N(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+ }
+}
+} // namespace internal_unpack_N
+
+namespace internal_unpack_TN {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TN(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_TN
+
+namespace internal_unpack_SE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_SE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_SE
+
+namespace internal_unpack_BE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_BE
+
+namespace internal_unpack_E {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_E(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+ }
+}
+} // namespace internal_unpack_E
+
+namespace internal_unpack_TE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_TE
+
+namespace internal_unpack_NE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_NE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+ }
+}
+} // namespace internal_unpack_NE
+
+void PackInfoPdfSinglePrecisionCUDA::pack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+ float *buffer = reinterpret_cast(byte_buffer);
+
+ auto pdfs = block->getData>(pdfsID);
+
+ CellInterval ci;
+ pdfs->getSliceBeforeGhostLayer(dir, ci, 1, false);
+
+ switch (dir) {
+ case stencil::SW: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_SW::pack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BW: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_BW::pack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::W: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_W::pack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TW: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_TW::pack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::NW: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_NW::pack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BS: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_BS::pack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::S: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_S::pack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TS: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_TS::pack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::B: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_B::pack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::T: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_T::pack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BN: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_BN::pack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::N: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_N::pack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TN: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_TN::pack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::SE: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_SE::pack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BE: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_BE::pack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::E: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_E::pack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TE: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_TE::pack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::NE: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_pack_NE::pack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ default:
+ return;
+ }
+}
+
+void PackInfoPdfSinglePrecisionCUDA::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+ float *buffer = reinterpret_cast(byte_buffer);
+
+ auto pdfs = block->getData>(pdfsID);
+
+ CellInterval ci;
+ pdfs->getGhostRegion(dir, ci, 1, false);
+ auto communciationDirection = stencil::inverseDir[dir];
+
+ switch (communciationDirection) {
+ case stencil::SW: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_SW::unpack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BW: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_BW::unpack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::W: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_W::unpack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TW: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_TW::unpack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::NW: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_NW::unpack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BS: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_BS::unpack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::S: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_S::unpack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TS: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_TS::unpack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::B: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_B::unpack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::T: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_T::unpack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BN: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_BN::unpack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::N: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_N::unpack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TN: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_TN::unpack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::SE: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_SE::unpack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::BE: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_BE::unpack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::E: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_E::unpack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::TE: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_TE::unpack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ case stencil::NE: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+ float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+ const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+ const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+ const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+ dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+ dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+ internal_unpack_NE::unpack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+ break;
+ }
+
+ default:
+ return;
+ }
+}
+
+uint_t PackInfoPdfSinglePrecisionCUDA::size(stencil::Direction dir, IBlock *block) {
+ auto pdfs = block->getData>(pdfsID);
+
+ CellInterval ci;
+ pdfs->getGhostRegion(dir, ci, 1, false);
+
+ uint_t elementsPerCell = 0;
+
+ switch (dir) {
+ case stencil::SW:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::BW:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::W:
+ elementsPerCell = 5;
+ break;
+
+ case stencil::TW:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::NW:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::BS:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::S:
+ elementsPerCell = 5;
+ break;
+
+ case stencil::TS:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::B:
+ elementsPerCell = 5;
+ break;
+
+ case stencil::T:
+ elementsPerCell = 5;
+ break;
+
+ case stencil::BN:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::N:
+ elementsPerCell = 5;
+ break;
+
+ case stencil::TN:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::SE:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::BE:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::E:
+ elementsPerCell = 5;
+ break;
+
+ case stencil::TE:
+ elementsPerCell = 1;
+ break;
+
+ case stencil::NE:
+ elementsPerCell = 1;
+ break;
+
+ default:
+ elementsPerCell = 0;
+ }
+ return ci.numCells() * elementsPerCell * sizeof(float);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.h
new file mode 100644
index 0000000000..c6ee2782b9
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.h
@@ -0,0 +1,64 @@
+//======================================================================================================================
+//
+// This file is part of waLBerla. waLBerla is free software: you can
+// redistribute it and/or modify it under the terms of the GNU General Public
+// License as published by the Free Software Foundation, either version 3 of
+// the License, or (at your option) any later version.
+//
+// waLBerla is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with waLBerla (see COPYING.txt). If not, see .
+//
+//! \\file PackInfoPdfSinglePrecisionCUDA.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+
+#include "domain_decomposition/IBlock.h"
+
+#include "stencil/Directions.h"
+
+#include "gpu/GPUField.h"
+#include "gpu/GPUWrapper.h"
+#include "gpu/communication/GeneratedGPUPackInfo.h"
+
+namespace walberla {
+namespace pystencils {
+
+class PackInfoPdfSinglePrecisionCUDA
+ : public ::walberla::gpu::GeneratedGPUPackInfo {
+public:
+ PackInfoPdfSinglePrecisionCUDA(BlockDataID pdfsID_) : pdfsID(pdfsID_){};
+ virtual ~PackInfoPdfSinglePrecisionCUDA() {}
+
+ void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+ gpuStream_t stream) override;
+ void communicateLocal(stencil::Direction /*dir*/, const IBlock * /* sender */,
+ IBlock * /* receiver */,
+ gpuStream_t /* stream */) override {
+ WALBERLA_ABORT("Local Communication not implemented yet for standard "
+ "PackInfos. To run your application turn of local "
+ "communication in the Communication class")
+ }
+ void unpack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+ gpuStream_t stream) override;
+ uint_t size(stencil::Direction dir, IBlock *block) override;
+
+private:
+ BlockDataID pdfsID;
+};
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp
index 6cbf3cb98d..da91325e5e 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp
@@ -24,8 +24,6 @@
#include "core/cell/CellInterval.h"
#include "stencil/Directions.h"
-#include
-
#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.cu
new file mode 100644
index 0000000000..e9bae41971
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.cu
@@ -0,0 +1,243 @@
+//======================================================================================================================
+//
+// This file is part of waLBerla. waLBerla is free software: you can
+// redistribute it and/or modify it under the terms of the GNU General Public
+// License as published by the Free Software Foundation, either version 3 of
+// the License, or (at your option) any later version.
+//
+// waLBerla is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with waLBerla (see COPYING.txt). If not, see .
+//
+//! \\file PackInfoVecDoublePrecisionCUDA.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "domain_decomposition/IBlock.h"
+#include "stencil/Directions.h"
+
+#include "PackInfoVecDoublePrecisionCUDA.h"
+
+#define FUNC_PREFIX __global__
+
+#if defined(__NVCC__)
+#define RESTRICT __restrict__
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic push
+#pragma nv_diag_suppress 177 // unused variable
+#else
+#pragma push
+#pragma diag_suppress 177 // unused variable
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#elif defined(__clang__)
+#if defined(__CUDA__)
+#if defined(__CUDA_ARCH__)
+// clang compiling CUDA code in device mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#else
+// clang compiling CUDA code in host mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
+#elif defined(__GNUC__) or defined(__GNUG__)
+#define RESTRICT __restrict__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#elif defined(_MSC_VER)
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+using walberla::cell::CellInterval;
+using walberla::stencil::Direction;
+
+namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT _data_buffer, double *RESTRICT const _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2];
+ _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3];
+ _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3];
+ }
+}
+} // namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE
+
+namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT const _data_buffer, double *RESTRICT _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0];
+ _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1];
+ _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2];
+ }
+}
+} // namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE
+
+void PackInfoVecDoublePrecisionCUDA::pack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+ double *buffer = reinterpret_cast(byte_buffer);
+
+ auto field = block->getData>(fieldID);
+
+ CellInterval ci;
+ field->getSliceBeforeGhostLayer(dir, ci, 1, false);
+
+ switch (dir) {
+ case stencil::SW:
+ case stencil::BW:
+ case stencil::W:
+ case stencil::TW:
+ case stencil::NW:
+ case stencil::BS:
+ case stencil::S:
+ case stencil::TS:
+ case stencil::B:
+ case stencil::C:
+ case stencil::T:
+ case stencil::BN:
+ case stencil::N:
+ case stencil::TN:
+ case stencil::SE:
+ case stencil::BE:
+ case stencil::E:
+ case stencil::TE:
+ case stencil::NE: {
+ double *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers()))
+ double *RESTRICT const _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_field_0 = int64_t(field->xStride());
+ const int64_t _stride_field_1 = int64_t(field->yStride());
+ const int64_t _stride_field_2 = int64_t(field->zStride());
+ const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride()));
+ dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))));
+ dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1)));
+ internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3);
+ break;
+ }
+
+ default:
+ return;
+ }
+}
+
+void PackInfoVecDoublePrecisionCUDA::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+ double *buffer = reinterpret_cast(byte_buffer);
+
+ auto field = block->getData>(fieldID);
+
+ CellInterval ci;
+ field->getGhostRegion(dir, ci, 1, false);
+ auto communciationDirection = stencil::inverseDir[dir];
+
+ switch (communciationDirection) {
+ case stencil::SW:
+ case stencil::BW:
+ case stencil::W:
+ case stencil::TW:
+ case stencil::NW:
+ case stencil::BS:
+ case stencil::S:
+ case stencil::TS:
+ case stencil::B:
+ case stencil::C:
+ case stencil::T:
+ case stencil::BN:
+ case stencil::N:
+ case stencil::TN:
+ case stencil::SE:
+ case stencil::BE:
+ case stencil::E:
+ case stencil::TE:
+ case stencil::NE: {
+ double *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers()))
+ double *RESTRICT _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_field_0 = int64_t(field->xStride());
+ const int64_t _stride_field_1 = int64_t(field->yStride());
+ const int64_t _stride_field_2 = int64_t(field->zStride());
+ const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride()));
+ dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))));
+ dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1)));
+ internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3);
+ break;
+ }
+
+ default:
+ return;
+ }
+}
+
+uint_t PackInfoVecDoublePrecisionCUDA::size(stencil::Direction dir, IBlock *block) {
+ auto field = block->getData>(fieldID);
+
+ CellInterval ci;
+ field->getGhostRegion(dir, ci, 1, false);
+
+ uint_t elementsPerCell = 0;
+
+ switch (dir) {
+ case stencil::SW:
+ case stencil::BW:
+ case stencil::W:
+ case stencil::TW:
+ case stencil::NW:
+ case stencil::BS:
+ case stencil::S:
+ case stencil::TS:
+ case stencil::B:
+ case stencil::C:
+ case stencil::T:
+ case stencil::BN:
+ case stencil::N:
+ case stencil::TN:
+ case stencil::SE:
+ case stencil::BE:
+ case stencil::E:
+ case stencil::TE:
+ case stencil::NE:
+ elementsPerCell = 3;
+ break;
+
+ default:
+ elementsPerCell = 0;
+ }
+ return ci.numCells() * elementsPerCell * sizeof(double);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.h
new file mode 100644
index 0000000000..18884f6c9d
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.h
@@ -0,0 +1,64 @@
+//======================================================================================================================
+//
+// This file is part of waLBerla. waLBerla is free software: you can
+// redistribute it and/or modify it under the terms of the GNU General Public
+// License as published by the Free Software Foundation, either version 3 of
+// the License, or (at your option) any later version.
+//
+// waLBerla is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with waLBerla (see COPYING.txt). If not, see .
+//
+//! \\file PackInfoVecDoublePrecisionCUDA.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+
+#include "domain_decomposition/IBlock.h"
+
+#include "stencil/Directions.h"
+
+#include "gpu/GPUField.h"
+#include "gpu/GPUWrapper.h"
+#include "gpu/communication/GeneratedGPUPackInfo.h"
+
+namespace walberla {
+namespace pystencils {
+
+class PackInfoVecDoublePrecisionCUDA
+ : public ::walberla::gpu::GeneratedGPUPackInfo {
+public:
+ PackInfoVecDoublePrecisionCUDA(BlockDataID fieldID_) : fieldID(fieldID_){};
+ virtual ~PackInfoVecDoublePrecisionCUDA() {}
+
+ void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+ gpuStream_t stream) override;
+ void communicateLocal(stencil::Direction /*dir*/, const IBlock * /* sender */,
+ IBlock * /* receiver */,
+ gpuStream_t /* stream */) override {
+ WALBERLA_ABORT("Local Communication not implemented yet for standard "
+ "PackInfos. To run your application turn of local "
+ "communication in the Communication class")
+ }
+ void unpack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+ gpuStream_t stream) override;
+ uint_t size(stencil::Direction dir, IBlock *block) override;
+
+private:
+ BlockDataID fieldID;
+};
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp
index 3ddeee01b6..c3b718b2d4 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp
@@ -24,8 +24,6 @@
#include "core/cell/CellInterval.h"
#include "stencil/Directions.h"
-#include
-
#if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.cu
new file mode 100644
index 0000000000..c38b9e669b
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.cu
@@ -0,0 +1,243 @@
+//======================================================================================================================
+//
+// This file is part of waLBerla. waLBerla is free software: you can
+// redistribute it and/or modify it under the terms of the GNU General Public
+// License as published by the Free Software Foundation, either version 3 of
+// the License, or (at your option) any later version.
+//
+// waLBerla is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with waLBerla (see COPYING.txt). If not, see .
+//
+//! \\file PackInfoVecSinglePrecisionCUDA.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "domain_decomposition/IBlock.h"
+#include "stencil/Directions.h"
+
+#include "PackInfoVecSinglePrecisionCUDA.h"
+
+#define FUNC_PREFIX __global__
+
+#if defined(__NVCC__)
+#define RESTRICT __restrict__
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic push
+#pragma nv_diag_suppress 177 // unused variable
+#else
+#pragma push
+#pragma diag_suppress 177 // unused variable
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#elif defined(__clang__)
+#if defined(__CUDA__)
+#if defined(__CUDA_ARCH__)
+// clang compiling CUDA code in device mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#else
+// clang compiling CUDA code in host mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
+#elif defined(__GNUC__) or defined(__GNUG__)
+#define RESTRICT __restrict__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#elif defined(_MSC_VER)
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+using walberla::cell::CellInterval;
+using walberla::stencil::Direction;
+
+namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(float *RESTRICT _data_buffer, float *RESTRICT const _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2];
+ _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3];
+ _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3];
+ }
+}
+} // namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE
+
+namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(float *RESTRICT const _data_buffer, float *RESTRICT _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) {
+ if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) {
+ const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+ const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+ const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+ _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0];
+ _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1];
+ _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2];
+ }
+}
+} // namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE
+
+void PackInfoVecSinglePrecisionCUDA::pack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+ float *buffer = reinterpret_cast(byte_buffer);
+
+ auto field = block->getData>(fieldID);
+
+ CellInterval ci;
+ field->getSliceBeforeGhostLayer(dir, ci, 1, false);
+
+ switch (dir) {
+ case stencil::SW:
+ case stencil::BW:
+ case stencil::W:
+ case stencil::TW:
+ case stencil::NW:
+ case stencil::BS:
+ case stencil::S:
+ case stencil::TS:
+ case stencil::B:
+ case stencil::C:
+ case stencil::T:
+ case stencil::BN:
+ case stencil::N:
+ case stencil::TN:
+ case stencil::SE:
+ case stencil::BE:
+ case stencil::E:
+ case stencil::TE:
+ case stencil::NE: {
+ float *RESTRICT _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers()))
+ float *RESTRICT const _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_field_0 = int64_t(field->xStride());
+ const int64_t _stride_field_1 = int64_t(field->yStride());
+ const int64_t _stride_field_2 = int64_t(field->zStride());
+ const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride()));
+ dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))));
+ dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1)));
+ internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3);
+ break;
+ }
+
+ default:
+ return;
+ }
+}
+
+void PackInfoVecSinglePrecisionCUDA::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+ float *buffer = reinterpret_cast(byte_buffer);
+
+ auto field = block->getData>(fieldID);
+
+ CellInterval ci;
+ field->getGhostRegion(dir, ci, 1, false);
+ auto communciationDirection = stencil::inverseDir[dir];
+
+ switch (communciationDirection) {
+ case stencil::SW:
+ case stencil::BW:
+ case stencil::W:
+ case stencil::TW:
+ case stencil::NW:
+ case stencil::BS:
+ case stencil::S:
+ case stencil::TS:
+ case stencil::B:
+ case stencil::C:
+ case stencil::T:
+ case stencil::BN:
+ case stencil::N:
+ case stencil::TN:
+ case stencil::SE:
+ case stencil::BE:
+ case stencil::E:
+ case stencil::TE:
+ case stencil::NE: {
+ float *RESTRICT const _data_buffer = buffer;
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers()))
+ WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers()))
+ float *RESTRICT _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+ const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+ const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0);
+ WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+ const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0);
+ const int64_t _stride_field_0 = int64_t(field->xStride());
+ const int64_t _stride_field_1 = int64_t(field->yStride());
+ const int64_t _stride_field_2 = int64_t(field->zStride());
+ const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride()));
+ dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))));
+ dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1)));
+ internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3);
+ break;
+ }
+
+ default:
+ return;
+ }
+}
+
+uint_t PackInfoVecSinglePrecisionCUDA::size(stencil::Direction dir, IBlock *block) {
+ auto field = block->getData>(fieldID);
+
+ CellInterval ci;
+ field->getGhostRegion(dir, ci, 1, false);
+
+ uint_t elementsPerCell = 0;
+
+ switch (dir) {
+ case stencil::SW:
+ case stencil::BW:
+ case stencil::W:
+ case stencil::TW:
+ case stencil::NW:
+ case stencil::BS:
+ case stencil::S:
+ case stencil::TS:
+ case stencil::B:
+ case stencil::C:
+ case stencil::T:
+ case stencil::BN:
+ case stencil::N:
+ case stencil::TN:
+ case stencil::SE:
+ case stencil::BE:
+ case stencil::E:
+ case stencil::TE:
+ case stencil::NE:
+ elementsPerCell = 3;
+ break;
+
+ default:
+ elementsPerCell = 0;
+ }
+ return ci.numCells() * elementsPerCell * sizeof(float);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.h
new file mode 100644
index 0000000000..c1eb6d2be7
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.h
@@ -0,0 +1,64 @@
+//======================================================================================================================
+//
+// This file is part of waLBerla. waLBerla is free software: you can
+// redistribute it and/or modify it under the terms of the GNU General Public
+// License as published by the Free Software Foundation, either version 3 of
+// the License, or (at your option) any later version.
+//
+// waLBerla is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with waLBerla (see COPYING.txt). If not, see .
+//
+//! \\file PackInfoVecSinglePrecisionCUDA.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+
+#include "domain_decomposition/IBlock.h"
+
+#include "stencil/Directions.h"
+
+#include "gpu/GPUField.h"
+#include "gpu/GPUWrapper.h"
+#include "gpu/communication/GeneratedGPUPackInfo.h"
+
+namespace walberla {
+namespace pystencils {
+
+class PackInfoVecSinglePrecisionCUDA
+ : public ::walberla::gpu::GeneratedGPUPackInfo {
+public:
+ PackInfoVecSinglePrecisionCUDA(BlockDataID fieldID_) : fieldID(fieldID_){};
+ virtual ~PackInfoVecSinglePrecisionCUDA() {}
+
+ void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+ gpuStream_t stream) override;
+ void communicateLocal(stencil::Direction /*dir*/, const IBlock * /* sender */,
+ IBlock * /* receiver */,
+ gpuStream_t /* stream */) override {
+ WALBERLA_ABORT("Local Communication not implemented yet for standard "
+ "PackInfos. To run your application turn of local "
+ "communication in the Communication class")
+ }
+ void unpack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+ gpuStream_t stream) override;
+ uint_t size(stencil::Direction dir, IBlock *block) override;
+
+private:
+ BlockDataID fieldID;
+};
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.cuh b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.cuh
index f9dc9ae83d..a71202df38 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.cuh
+++ b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.cuh
@@ -29,6 +29,10 @@
#include "generated_kernels/FieldAccessorsSinglePrecisionCUDA.cuh"
#include "generated_kernels/InitialPDFsSetterDoublePrecisionCUDA.h"
#include "generated_kernels/InitialPDFsSetterSinglePrecisionCUDA.h"
+#include "generated_kernels/PackInfoPdfDoublePrecisionCUDA.h"
+#include "generated_kernels/PackInfoPdfSinglePrecisionCUDA.h"
+#include "generated_kernels/PackInfoVecDoublePrecisionCUDA.h"
+#include "generated_kernels/PackInfoVecSinglePrecisionCUDA.h"
#include "generated_kernels/StreamSweepDoublePrecisionCUDA.h"
#include "generated_kernels/StreamSweepSinglePrecisionCUDA.h"
@@ -49,6 +53,8 @@ template <> struct KernelTrait {
pystencils::CollideSweepDoublePrecisionLeesEdwardsCUDA;
using StreamSweep = pystencils::StreamSweepDoublePrecisionCUDA;
using InitialPDFsSetter = pystencils::InitialPDFsSetterDoublePrecisionCUDA;
+ using PackInfoPdf = pystencils::PackInfoPdfDoublePrecisionCUDA;
+ using PackInfoVec = pystencils::PackInfoVecDoublePrecisionCUDA;
};
template <> struct KernelTrait {
@@ -58,6 +64,8 @@ template <> struct KernelTrait {
pystencils::CollideSweepSinglePrecisionLeesEdwardsCUDA;
using StreamSweep = pystencils::StreamSweepSinglePrecisionCUDA;
using InitialPDFsSetter = pystencils::InitialPDFsSetterSinglePrecisionCUDA;
+ using PackInfoPdf = pystencils::PackInfoPdfSinglePrecisionCUDA;
+ using PackInfoVec = pystencils::PackInfoVecSinglePrecisionCUDA;
};
template <> struct BoundaryHandlingTrait {
diff --git a/src/walberla_bridge/tests/CMakeLists.txt b/src/walberla_bridge/tests/CMakeLists.txt
index 83a7d9d2ee..06342ac225 100644
--- a/src/walberla_bridge/tests/CMakeLists.txt
+++ b/src/walberla_bridge/tests/CMakeLists.txt
@@ -24,9 +24,11 @@ function(ESPRESSO_ADD_TEST)
espresso_unit_test(
SRC ${TEST_SRC} NAME ${TEST_NAME} NUM_PROC ${TEST_NUM_PROC} DEPENDS
${TEST_DEPENDS} espresso::walberla espresso::utils)
+ if(WALBERLA_BUILD_WITH_CUDA)
+ target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla_cuda)
+ endif()
if(${TEST_SRC} MATCHES ".*\.cu$")
- target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cuda_flags
- espresso::walberla_cuda)
+ target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cuda_flags)
else()
target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cpp_flags)
endif()
diff --git a/testsuite/scripts/benchmarks/CMakeLists.txt b/testsuite/scripts/benchmarks/CMakeLists.txt
index 47583fdb57..76ecaa4612 100644
--- a/testsuite/scripts/benchmarks/CMakeLists.txt
+++ b/testsuite/scripts/benchmarks/CMakeLists.txt
@@ -43,7 +43,7 @@ add_custom_target(
benchmark_test(FILE test_lj.py)
benchmark_test(FILE test_lb.py SUFFIX cpu)
-# benchmark_test(FILE test_lb.py SUFFIX gpu LABELS "gpu") # TODO WALBERLA
+benchmark_test(FILE test_lb.py SUFFIX gpu LABELS "gpu")
benchmark_test(FILE test_p3m.py SUFFIX cpu)
benchmark_test(FILE test_p3m.py SUFFIX gpu LABELS "gpu")
benchmark_test(FILE test_ferrofluid.py)