From 4758e3ccd42fe995770f66287b96acc52b01900e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Wed, 30 Oct 2024 23:17:23 +0100 Subject: [PATCH] Generate LB GPU PackInfo kernels --- maintainer/benchmarks/lb.py | 19 +- maintainer/walberla_kernels/Readme.md | 1 - .../custom_additional_extensions.py | 45 + .../walberla_kernels/generate_lb_kernels.py | 116 +- .../walberla_kernels/pystencils_espresso.py | 7 +- .../templates/preprocessor.tmpl.cuh | 114 ++ .../src/lattice_boltzmann/LBWalberlaImpl.hpp | 53 +- .../generated_kernels/CMakeLists.txt | 4 + .../Dynamic_UBB_double_precisionCUDA.cu | 24 +- .../Dynamic_UBB_single_precisionCUDA.cu | 24 +- .../PackInfoPdfDoublePrecision.cpp | 2 - .../PackInfoPdfDoublePrecisionCUDA.cu | 1423 +++++++++++++++++ .../PackInfoPdfDoublePrecisionCUDA.h | 64 + .../PackInfoPdfSinglePrecision.cpp | 2 - .../PackInfoPdfSinglePrecisionCUDA.cu | 1423 +++++++++++++++++ .../PackInfoPdfSinglePrecisionCUDA.h | 64 + .../PackInfoVecDoublePrecision.cpp | 2 - .../PackInfoVecDoublePrecisionCUDA.cu | 243 +++ .../PackInfoVecDoublePrecisionCUDA.h | 64 + .../PackInfoVecSinglePrecision.cpp | 2 - .../PackInfoVecSinglePrecisionCUDA.cu | 243 +++ .../PackInfoVecSinglePrecisionCUDA.h | 64 + .../src/lattice_boltzmann/lb_kernels.cuh | 8 + src/walberla_bridge/tests/CMakeLists.txt | 6 +- testsuite/scripts/benchmarks/CMakeLists.txt | 2 +- 25 files changed, 3927 insertions(+), 92 deletions(-) create mode 100644 maintainer/walberla_kernels/templates/preprocessor.tmpl.cuh create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.cu create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.h create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.cu create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.h create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.cu create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.h create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.cu create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.h diff --git a/maintainer/benchmarks/lb.py b/maintainer/benchmarks/lb.py index ea42b42005..04453c5ea7 100644 --- a/maintainer/benchmarks/lb.py +++ b/maintainer/benchmarks/lb.py @@ -31,7 +31,7 @@ parser.add_argument("--particles_per_core", metavar="N", action="store", type=int, default=125, required=False, help="Number of particles per core") -parser.add_argument("--box_l", action="store", +parser.add_argument("--box_l", action="store", nargs="+", type=int, default=argparse.SUPPRESS, required=False, help="Box length (cubic box)") parser.add_argument("--lb_sites_per_particle", metavar="N_LB", action="store", @@ -45,6 +45,8 @@ help="Using single-precision floating point accuracy") parser.add_argument("--gpu", action=argparse.BooleanOptionalAction, default=False, required=False, help="Use GPU implementation") +parser.add_argument("--multi-gpu", action=argparse.BooleanOptionalAction, + default=False, required=False, help="Use multi-GPU implementation") parser.add_argument("--output", metavar="FILEPATH", action="store", type=str, required=False, default="benchmarks.csv", help="Output file (default: benchmarks.csv)") @@ -83,9 +85,9 @@ n_proc = system.cell_system.get_state()["n_nodes"] n_part = n_proc * args.particles_per_core if n_part == 0: - box_l = args.box_l + box_l = 3 * args.box_l if len(args.box_l) == 1 else args.box_l agrid = 1. - lb_grid = args.box_l + lb_grid = box_l measurement_steps = 80 else: # volume of N spheres with radius r: N * (4/3*pi*r^3) @@ -96,13 +98,16 @@ agrid = box_l / lb_grid measurement_steps = max(50, int(120**3 / lb_grid**3)) measurement_steps = 40 + lb_grid = 3 * [lb_grid] + box_l = 3 * [box_l] -print(f"LB shape: [{lb_grid}, {lb_grid}, {lb_grid}]") +print(f"box length: {box_l}") +print(f"LB shape: {lb_grid}") print(f"LB agrid: {agrid:.3f}") # System ############################################################# -system.box_l = 3 * (box_l,) +system.box_l = box_l # Integration parameters ############################################################# @@ -135,8 +140,10 @@ # LB fluid setup ############################################################# lb_class = espressomd.lb.LBFluidWalberla -if args.gpu: +if args.gpu or args.multi_gpu: lb_class = espressomd.lb.LBFluidWalberlaGPU +if args.multi_gpu: + system.cuda_init_handle.call_method("set_device_id_per_rank") lbf = lb_class(agrid=agrid, tau=system.time_step, kinematic_viscosity=1., density=1., single_precision=args.single_precision) system.lb = lbf diff --git a/maintainer/walberla_kernels/Readme.md b/maintainer/walberla_kernels/Readme.md index ef7f2fb0c5..39f1a7e41a 100644 --- a/maintainer/walberla_kernels/Readme.md +++ b/maintainer/walberla_kernels/Readme.md @@ -49,7 +49,6 @@ generate_lb_kernels --single-precision generate_lb_kernels --gpu generate_lb_kernels --gpu --single-precision format_lb_kernels -git diff src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_*CUDA*.cu # verify pragmas # EK kernels cd $(git rev-parse --show-toplevel)/src/walberla_bridge/src/electrokinetics/generated_kernels/ diff --git a/maintainer/walberla_kernels/custom_additional_extensions.py b/maintainer/walberla_kernels/custom_additional_extensions.py index 19d8e3f22a..3115f7ffe7 100644 --- a/maintainer/walberla_kernels/custom_additional_extensions.py +++ b/maintainer/walberla_kernels/custom_additional_extensions.py @@ -349,3 +349,48 @@ def generate_kernel_selector( "templates/ReactionKernelSelector.tmpl.h").render(**context) generation_context.write_file(f"{class_name}_all.h", header) + + +def generate_device_preprocessor(kernel, defines=()): + """ + Generate device preprocessor directives. + """ + pragmas = { + "packinfo": { + "nvcc": ["diag_suppress 177 // unused variable"], + "clang_host": ["-Wunused-variable"], + "clang_dev": ["-Wunused-variable"], + "gcc": ["-Wunused-variable"], + }, + "ubb_boundary": { + "nvcc": ["diag_suppress 177 // unused variable"], + "clang_host": ["-Wstrict-aliasing", "-Wunused-variable", "-Wconversion", "-Wsign-compare"], # nopep8 + "clang_dev": ["-Wstrict-aliasing", "-Wunused-variable", "-Wconversion", "-Wsign-compare"], # nopep8 + "gcc": ["-Wstrict-aliasing", "-Wunused-variable", "-Wconversion"], + }, + } + + defines_table = { + "nvcc": {"RESTRICT": "__restrict__", "FUNC_PREFIX": "__global__"}, + "msvc": {"RESTRICT": "__restrict", "FUNC_PREFIX": ""}, + "clang_host": {"RESTRICT": "__restrict__", "FUNC_PREFIX": ""}, + "clang_dev": {"RESTRICT": "__restrict__", "FUNC_PREFIX": "__global__"}, + "gcc": {"RESTRICT": "__restrict__", "FUNC_PREFIX": ""}, + "other": {"RESTRICT": "", "FUNC_PREFIX": ""}, + } + + context = { + "pragmas": pragmas[kernel], + "defines_table": defines_table, + "defines": defines, + } + + custom_env = jinja2.Environment( + loader=jinja2.FileSystemLoader(pathlib.Path(__file__).parent), + undefined=jinja2.StrictUndefined + ) + + content = custom_env.get_template( + "templates/preprocessor.tmpl.cuh").render(**context) + + return content.split("\n/* section */\n")[1:] diff --git a/maintainer/walberla_kernels/generate_lb_kernels.py b/maintainer/walberla_kernels/generate_lb_kernels.py index 21300af894..bef9badc54 100644 --- a/maintainer/walberla_kernels/generate_lb_kernels.py +++ b/maintainer/walberla_kernels/generate_lb_kernels.py @@ -1,5 +1,5 @@ # -# Copyright (C) 2020-2023 The ESPResSo project +# Copyright (C) 2020-2024 The ESPResSo project # # This file is part of ESPResSo. # @@ -17,6 +17,7 @@ # along with this program. If not, see . # +import re import argparse import packaging.specifiers @@ -40,6 +41,7 @@ import relaxation_rates import walberla_lbm_generation import code_generation_context +import custom_additional_extensions parser = argparse.ArgumentParser(description="Generate the waLBerla kernels.") parser.add_argument("--single-precision", action="store_true", required=False, @@ -65,6 +67,24 @@ def paramlist(parameters, keys): yield parameters[key] +def get_ext_header(target_suffix): + return {"CUDA": "h"}.get(target_suffix, "h") + + +def get_ext_source(target_suffix): + return {"CUDA": "cu"}.get(target_suffix, "cpp") + + +def patch_file(class_name, extension, target_suffix, patch): + with open(f"{class_name}.{extension}", "r+") as f: + old_content = f.read() + new_content = patch(old_content, target_suffix) + if new_content != old_content: + f.seek(0) + f.truncate() + f.write(new_content) + + with code_generation_context.CodeGeneration() as ctx: ctx.double_accuracy = not args.single_precision if target == ps.Target.GPU: @@ -196,26 +216,57 @@ def paramlist(parameters, keys): # generate PackInfo assignments = pystencils_espresso.generate_pack_info_pdfs_field_assignments( fields, streaming_pattern="pull") - spec = pystencils_espresso.generate_pack_info_vector_field_specifications( + spec = pystencils_espresso.generate_pack_info_field_specifications( config, stencil, force_field.layout) - for params, target_suffix in paramlist(parameters, ["CPU"]): + + def patch_packinfo_header(content, target_suffix): + if target_suffix in ["", "AVX"]: + token = "\n //TODO: optimize by generating kernel for this case\n" + assert token in content + content = content.replace(token, "\n") + ft = "float" if "SinglePrecision" in content else "double" + token = " pack(dir, outBuffer.forward(dataSize)" + assert token in content + content = content.replace(token, f"{token[:-1]} + sizeof({ft}))") + token = " unpack(dir, buffer.skip(dataSize)" + assert token in content + content = content.replace(token, f"{token[:-1]} + sizeof({ft}))") + elif target_suffix in ["CUDA"]: + token = "#define FUNC_PREFIX __global__" + assert token in content + content = content.replace(token, "") + content = re.sub(r"#ifdef __GNUC__[\s\S]+?#endif\n\n", "", content) + return content + + def patch_packinfo_kernel(content, target_suffix): + if target_suffix in ["", "AVX"]: + # fix MPI buffer + m = re.search("(float|double) *\* *buffer = reinterpret_cast<(?:float|double) *\*>\(byte_buffer\);\n", content) # nopep8 + assert m is not None + content = content.replace(m.group(0), f"byte_buffer += sizeof({m.group(1)}) - (reinterpret_cast(byte_buffer) - (reinterpret_cast(byte_buffer) / sizeof({m.group(1)})) * sizeof({m.group(1)}));\n {m.group(0)}") # nopep8 + if target_suffix in ["CUDA"]: + token = "#define FUNC_PREFIX __global__" + assert token in content + push, _ = custom_additional_extensions.generate_device_preprocessor( + "packinfo", defines=("RESTRICT",)) + content = content.replace(token, f"{token}\n{push}") + token = '#include "PackInfo' + assert token in content + content = content.replace(token, f'#include "core/DataTypes.h"\n#include "core/cell/CellInterval.h"\n#include "domain_decomposition/IBlock.h"\n#include "stencil/Directions.h"\n\n{token}') # nopep8 + return content + + for params, target_suffix in paramlist(parameters, ["CPU", "GPU"]): pystencils_walberla.generate_pack_info_from_kernel( ctx, f"PackInfoPdf{precision_prefix}{target_suffix}", assignments, kind="pull", **params) pystencils_walberla.generate_pack_info( ctx, f"PackInfoVec{precision_prefix}{target_suffix}", spec, **params) - if target_suffix == "CUDA": - continue - token = "\n //TODO: optimize by generating kernel for this case\n" - for field_suffix in ["Pdf", "Vec"]: - class_name = f"PackInfo{field_suffix}{precision_prefix}{target_suffix}" # nopep8 - with open(f"{class_name}.h", "r+") as f: - content = f.read() - assert token in content - content = content.replace(token, "\n") - f.seek(0) - f.truncate() - f.write(content) + for suffix in ["Pdf", "Vec"]: + class_name = f"PackInfo{suffix}{precision_prefix}{target_suffix}" + patch_file(class_name, get_ext_header(target_suffix), + target_suffix, patch_packinfo_header) + patch_file(class_name, get_ext_source(target_suffix), + target_suffix, patch_packinfo_kernel) # boundary conditions ubb_dynamic = lbmpy_espresso.UBB( @@ -223,17 +274,28 @@ def paramlist(parameters, keys): ubb_data_handler = lbmpy_espresso.BounceBackSlipVelocityUBB( method.stencil, ubb_dynamic) - for _, target_suffix in paramlist(parameters, ("GPU", "CPU")): + # pylint: disable=unused-argument + def patch_boundary_header(content, target_suffix): + return content.replace("real_t", config.data_type.default_factory().c_name) # nopep8 + + def patch_boundary_kernel(content, target_suffix): + if target_suffix in ["CUDA"]: + push, pop = custom_additional_extensions.generate_device_preprocessor( + "ubb_boundary", defines=("RESTRICT",)) + content = re.sub(r"#ifdef __GNUC__[\s\S]+?#endif(?=\n\n|\n//)", "", content) # nopep8 + content = re.sub(r"#ifdef __CUDACC__[\s\S]+?#endif(?=\n\n|\n//)", push, content, 1) # nopep8 + content = re.sub(r"#ifdef __CUDACC__[\s\S]+?#endif(?=\n\n|\n//)", pop, content, 1) # nopep8 + assert push in content + assert pop in content + return content + + for _, target_suffix in paramlist(parameters, ("CPU", "GPU")): + class_name = f"Dynamic_UBB_{precision_suffix}{target_suffix}" lbmpy_walberla.generate_boundary( - ctx, f"Dynamic_UBB_{precision_suffix}{target_suffix}", ubb_dynamic, - method, additional_data_handler=ubb_data_handler, + ctx, class_name, ubb_dynamic, method, + additional_data_handler=ubb_data_handler, streaming_pattern=streaming_pattern, target=target) - - with open(f"Dynamic_UBB_{precision_suffix}{target_suffix}.h", "r+") as f: - content = f.read() - f.seek(0) - f.truncate(0) - # patch for floating point accuracy - content = content.replace("real_t", - config.data_type.default_factory().c_name) - f.write(content) + patch_file(class_name, get_ext_header(target_suffix), + target_suffix, patch_boundary_header) + patch_file(class_name, get_ext_source(target_suffix), + target_suffix, patch_boundary_kernel) diff --git a/maintainer/walberla_kernels/pystencils_espresso.py b/maintainer/walberla_kernels/pystencils_espresso.py index 3cf6edfbf3..1fd3321aeb 100644 --- a/maintainer/walberla_kernels/pystencils_espresso.py +++ b/maintainer/walberla_kernels/pystencils_espresso.py @@ -239,7 +239,8 @@ def generate_pack_info_pdfs_field_assignments(fields, streaming_pattern): return lbm_update_rule.all_assignments -def generate_pack_info_vector_field_specifications(config, stencil, layout): +def generate_pack_info_field_specifications( + config, stencil, layout, vec_len=3): import collections import itertools field = ps.Field.create_generic( @@ -248,7 +249,7 @@ def generate_pack_info_vector_field_specifications(config, stencil, layout): data_type_np[config.data_type.default_factory().c_name], index_dimensions=1, layout=layout, - index_shape=(3,) + index_shape=(vec_len,) ) q = len(stencil) coord = itertools.product(*[(-1, 0, 1)] * 3) @@ -257,7 +258,7 @@ def generate_pack_info_vector_field_specifications(config, stencil, layout): else: dirs = tuple((i, j, k) for i, j, k in coord) spec = collections.defaultdict(set) - spec[dirs] = {field[0, 0, 0](i) for i in range(3)} + spec[dirs] = {field[0, 0, 0](i) for i in range(vec_len)} return spec diff --git a/maintainer/walberla_kernels/templates/preprocessor.tmpl.cuh b/maintainer/walberla_kernels/templates/preprocessor.tmpl.cuh new file mode 100644 index 0000000000..16cefedbae --- /dev/null +++ b/maintainer/walberla_kernels/templates/preprocessor.tmpl.cuh @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2024 The ESPResSo project + * Copyright (C) 2024 The waLBerla project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* section */ + +#if defined(__NVCC__) +{% for name in defines -%} +#define {{name}} {{defines_table["nvcc"][name]}} +{% endfor -%} +{% if pragmas["nvcc"] -%} +#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#pragma nv_diagnostic push +{% for pragma in pragmas["nvcc"] -%} +#pragma nv_{{pragma}} +{% endfor -%} +#else +#pragma push +{% for pragma in pragmas["nvcc"] -%} +#pragma {{pragma}} +{% endfor -%} +#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +{% endif -%} +#elif defined(__clang__) +#if defined(__CUDA__) +#if defined(__CUDA_ARCH__) +// clang compiling CUDA code in device mode +{% for name in defines -%} +#define {{name}} {{defines_table["clang_dev"][name]}} +{% endfor -%} +{% if pragmas["clang_dev"] -%} +#pragma clang diagnostic push +{% for pragma in pragmas["clang_dev"] -%} +#pragma clang diagnostic ignored "{{pragma}}" +{% endfor -%} +{% endif -%} +#else +// clang compiling CUDA code in host mode +{% for name in defines -%} +#define {{name}} {{defines_table["clang_host"][name]}} +{% endfor -%} +{% if pragmas["clang_host"] -%} +#pragma clang diagnostic push +{% for pragma in pragmas["clang_host"] -%} +#pragma clang diagnostic ignored "{{pragma}}" +{% endfor -%} +{% endif -%} +#endif // defined(__CUDA_ARCH__) +#endif // defined(__CUDA__) +#elif defined(__GNUC__) or defined(__GNUG__) +{% for name in defines -%} +#define {{name}} {{defines_table["gcc"][name]}} +{% endfor -%} +{% if pragmas["gcc"] -%} +#pragma GCC diagnostic push +{% for pragma in pragmas["gcc"] -%} +#pragma GCC diagnostic ignored "{{pragma}}" +{% endfor -%} +{% endif -%} +#elif defined(_MSC_VER) +{% for name in defines -%} +#define {{name}} {{defines_table["msvc"][name]}} +{% endfor -%} +#else +{% for name in defines -%} +#define {{name}} {{defines_table["other"][name]}} +{% endfor -%} +#endif + +/* section */ + +#if defined(__NVCC__) +{% if pragmas["nvcc"] -%} +#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#pragma nv_diagnostic pop +#else +#pragma pop +#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +{% endif -%} +#elif defined(__clang__) +#if defined(__CUDA__) +#if defined(__CUDA_ARCH__) +// clang compiling CUDA code in device mode +{% if pragmas["clang_dev"] -%} +#pragma clang diagnostic pop +{% endif -%} +#else +{% if pragmas["clang_host"] -%} +// clang compiling CUDA code in host mode +#pragma clang diagnostic pop +{% endif -%} +#endif // defined(__CUDA_ARCH__) +#endif // defined(__CUDA__) +#elif defined(__GNUC__) or defined(__GNUG__) +{% if pragmas["gcc"] -%} +#pragma GCC diagnostic pop +{% endif -%} +#endif diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp index 6f1fedae10..8986fed7b4 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp @@ -119,11 +119,10 @@ class LBWalberlaImpl : public LBWalberlaBase { using VectorField = field::GhostLayerField; template using PackInfo = field::communication::PackInfo; - template - using PackInfoStreaming = - std::conditional_t, - typename detail::KernelTrait::PackInfoPdf, - typename detail::KernelTrait::PackInfoVec>; + using PackInfoStreamingPdf = + typename detail::KernelTrait::PackInfoPdf; + using PackInfoStreamingVec = + typename detail::KernelTrait::PackInfoVec; template using RegularCommScheme = blockforest::communication::UniformBufferedScheme; @@ -134,14 +133,30 @@ class LBWalberlaImpl : public LBWalberlaBase { #if defined(__CUDACC__) template struct FieldTrait { + private: + static auto constexpr AT = lbmpy::Arch::GPU; + template + using MemcpyPackInfo = gpu::communication::MemcpyPackInfo; + + public: + template + class UniformGPUScheme + : public gpu::communication::UniformGPUScheme { + public: + explicit UniformGPUScheme(auto const &bf) + : gpu::communication::UniformGPUScheme( + bf, /* sendDirectlyFromGPU */ false, + /* useLocalCommunication */ false) {} + }; using PdfField = gpu::GPUField; using VectorField = gpu::GPUField; - template - using PackInfo = gpu::communication::MemcpyPackInfo; - template - using PackInfoStreaming = gpu::communication::MemcpyPackInfo; + template using PackInfo = MemcpyPackInfo; + using PackInfoStreamingPdf = + typename detail::KernelTrait::PackInfoPdf; + using PackInfoStreamingVec = + typename detail::KernelTrait::PackInfoVec; template - using RegularCommScheme = gpu::communication::UniformGPUScheme; + using RegularCommScheme = UniformGPUScheme; template using BoundaryCommScheme = blockforest::communication::UniformBufferedScheme; @@ -315,10 +330,6 @@ class LBWalberlaImpl : public LBWalberlaBase { template using PackInfo = typename FieldTrait::template PackInfo; - template - using PackInfoStreaming = - typename FieldTrait::template PackInfoStreaming; // communicators std::shared_ptr m_boundary_communicator; @@ -427,20 +438,22 @@ class LBWalberlaImpl : public LBWalberlaBase { } void setup_streaming_communicator() { - auto const setup = [this]() { + auto const setup = [this]() { auto const &blocks = m_lattice->get_blocks(); m_pdf_streaming_communicator = std::make_shared(blocks); m_pdf_streaming_communicator->addPackInfo( - std::make_shared(m_pdf_field_id)); + std::make_shared(m_pdf_field_id)); m_pdf_streaming_communicator->addPackInfo( - std::make_shared>( - m_last_applied_force_field_id)); + std::make_shared(m_last_applied_force_field_id)); }; + using FieldTrait = FieldTrait; + using PackInfoPdf = typename FieldTrait::PackInfoStreamingPdf; + using PackInfoVec = typename FieldTrait::PackInfoStreamingVec; if (m_has_boundaries or (m_collision_model and has_lees_edwards_bc())) { - setup.template operator()>(); + setup.template operator(), PackInfoVec>(); } else { - setup.template operator()>(); + setup.template operator()(); } } diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt index 434d968d52..eadeb04c2f 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt @@ -54,6 +54,10 @@ if(ESPRESSO_BUILD_WITH_CUDA AND WALBERLA_BUILD_WITH_CUDA) StreamSweepSinglePrecisionCUDA.cu InitialPDFsSetterDoublePrecisionCUDA.cu InitialPDFsSetterSinglePrecisionCUDA.cu + PackInfoPdfSinglePrecisionCUDA.cu + PackInfoPdfDoublePrecisionCUDA.cu + PackInfoVecSinglePrecisionCUDA.cu + PackInfoVecDoublePrecisionCUDA.cu Dynamic_UBB_double_precisionCUDA.cu Dynamic_UBB_single_precisionCUDA.cu) endif() diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precisionCUDA.cu index 8d35b5d929..0fd77e065c 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precisionCUDA.cu +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precisionCUDA.cu @@ -39,7 +39,7 @@ namespace lbm { #else #pragma push #pragma diag_suppress 177 // unused variable -#endif +#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__) #elif defined(__clang__) #if defined(__CUDA__) #if defined(__CUDA_ARCH__) @@ -58,8 +58,8 @@ namespace lbm { #pragma clang diagnostic ignored "-Wunused-variable" #pragma clang diagnostic ignored "-Wconversion" #pragma clang diagnostic ignored "-Wsign-compare" -#endif -#endif +#endif // defined(__CUDA_ARCH__) +#endif // defined(__CUDA__) #elif defined(__GNUC__) or defined(__GNUG__) #define RESTRICT __restrict__ #pragma GCC diagnostic push @@ -128,7 +128,13 @@ static FUNC_PREFIX __launch_bounds__(256) void dynamic_ubb_double_precisioncuda_ // NOLINTEND(readability-non-const-parameter*) -#if defined(__clang__) +#if defined(__NVCC__) +#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#pragma nv_diagnostic pop +#else +#pragma pop +#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#elif defined(__clang__) #if defined(__CUDA__) #if defined(__CUDA_ARCH__) // clang compiling CUDA code in device mode @@ -136,16 +142,10 @@ static FUNC_PREFIX __launch_bounds__(256) void dynamic_ubb_double_precisioncuda_ #else // clang compiling CUDA code in host mode #pragma clang diagnostic pop -#endif -#endif +#endif // defined(__CUDA_ARCH__) +#endif // defined(__CUDA__) #elif defined(__GNUC__) or defined(__GNUG__) #pragma GCC diagnostic pop -#elif defined(__CUDACC__) -#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) -#pragma nv_diagnostic pop -#else -#pragma pop -#endif #endif void Dynamic_UBB_double_precisionCUDA::run_impl(IBlock *block, IndexVectors::Type type, gpuStream_t stream) { diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precisionCUDA.cu index a046e6f9a4..4ed013d81e 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precisionCUDA.cu +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precisionCUDA.cu @@ -39,7 +39,7 @@ namespace lbm { #else #pragma push #pragma diag_suppress 177 // unused variable -#endif +#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__) #elif defined(__clang__) #if defined(__CUDA__) #if defined(__CUDA_ARCH__) @@ -58,8 +58,8 @@ namespace lbm { #pragma clang diagnostic ignored "-Wunused-variable" #pragma clang diagnostic ignored "-Wconversion" #pragma clang diagnostic ignored "-Wsign-compare" -#endif -#endif +#endif // defined(__CUDA_ARCH__) +#endif // defined(__CUDA__) #elif defined(__GNUC__) or defined(__GNUG__) #define RESTRICT __restrict__ #pragma GCC diagnostic push @@ -128,7 +128,13 @@ static FUNC_PREFIX __launch_bounds__(256) void dynamic_ubb_single_precisioncuda_ // NOLINTEND(readability-non-const-parameter*) -#if defined(__clang__) +#if defined(__NVCC__) +#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#pragma nv_diagnostic pop +#else +#pragma pop +#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#elif defined(__clang__) #if defined(__CUDA__) #if defined(__CUDA_ARCH__) // clang compiling CUDA code in device mode @@ -136,16 +142,10 @@ static FUNC_PREFIX __launch_bounds__(256) void dynamic_ubb_single_precisioncuda_ #else // clang compiling CUDA code in host mode #pragma clang diagnostic pop -#endif -#endif +#endif // defined(__CUDA_ARCH__) +#endif // defined(__CUDA__) #elif defined(__GNUC__) or defined(__GNUG__) #pragma GCC diagnostic pop -#elif defined(__CUDACC__) -#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) -#pragma nv_diagnostic pop -#else -#pragma pop -#endif #endif void Dynamic_UBB_single_precisionCUDA::run_impl(IBlock *block, IndexVectors::Type type, gpuStream_t stream) { diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp index 1ab45417dc..6503551664 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp @@ -24,8 +24,6 @@ #include "core/cell/CellInterval.h" #include "stencil/Directions.h" -#include - #if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.cu new file mode 100644 index 0000000000..5636dad6a3 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.cu @@ -0,0 +1,1423 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoPdfDoublePrecisionCUDA.cpp +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "domain_decomposition/IBlock.h" +#include "stencil/Directions.h" + +#include "PackInfoPdfDoublePrecisionCUDA.h" + +#define FUNC_PREFIX __global__ + +#if defined(__NVCC__) +#define RESTRICT __restrict__ +#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#pragma nv_diagnostic push +#pragma nv_diag_suppress 177 // unused variable +#else +#pragma push +#pragma diag_suppress 177 // unused variable +#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#elif defined(__clang__) +#if defined(__CUDA__) +#if defined(__CUDA_ARCH__) +// clang compiling CUDA code in device mode +#define RESTRICT __restrict__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#else +// clang compiling CUDA code in host mode +#define RESTRICT __restrict__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#endif // defined(__CUDA_ARCH__) +#endif // defined(__CUDA__) +#elif defined(__GNUC__) or defined(__GNUG__) +#define RESTRICT __restrict__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#elif defined(_MSC_VER) +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace pystencils { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + +namespace internal_pack_SW { +static FUNC_PREFIX __launch_bounds__(256) void pack_SW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_SW + +namespace internal_pack_BW { +static FUNC_PREFIX __launch_bounds__(256) void pack_BW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_BW + +namespace internal_pack_W { +static FUNC_PREFIX __launch_bounds__(256) void pack_W(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_W + +namespace internal_pack_TW { +static FUNC_PREFIX __launch_bounds__(256) void pack_TW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_TW + +namespace internal_pack_NW { +static FUNC_PREFIX __launch_bounds__(256) void pack_NW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_NW + +namespace internal_pack_BS { +static FUNC_PREFIX __launch_bounds__(256) void pack_BS(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_BS + +namespace internal_pack_S { +static FUNC_PREFIX __launch_bounds__(256) void pack_S(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_S + +namespace internal_pack_TS { +static FUNC_PREFIX __launch_bounds__(256) void pack_TS(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_TS + +namespace internal_pack_B { +static FUNC_PREFIX __launch_bounds__(256) void pack_B(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_B + +namespace internal_pack_T { +static FUNC_PREFIX __launch_bounds__(256) void pack_T(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_T + +namespace internal_pack_BN { +static FUNC_PREFIX __launch_bounds__(256) void pack_BN(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_BN + +namespace internal_pack_N { +static FUNC_PREFIX __launch_bounds__(256) void pack_N(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_N + +namespace internal_pack_TN { +static FUNC_PREFIX __launch_bounds__(256) void pack_TN(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_TN + +namespace internal_pack_SE { +static FUNC_PREFIX __launch_bounds__(256) void pack_SE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_SE + +namespace internal_pack_BE { +static FUNC_PREFIX __launch_bounds__(256) void pack_BE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_BE + +namespace internal_pack_E { +static FUNC_PREFIX __launch_bounds__(256) void pack_E(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_E + +namespace internal_pack_TE { +static FUNC_PREFIX __launch_bounds__(256) void pack_TE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_TE + +namespace internal_pack_NE { +static FUNC_PREFIX __launch_bounds__(256) void pack_NE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_NE + +namespace internal_unpack_SW { +static FUNC_PREFIX __launch_bounds__(256) void unpack_SW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_SW + +namespace internal_unpack_BW { +static FUNC_PREFIX __launch_bounds__(256) void unpack_BW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_BW + +namespace internal_unpack_W { +static FUNC_PREFIX __launch_bounds__(256) void unpack_W(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } +} +} // namespace internal_unpack_W + +namespace internal_unpack_TW { +static FUNC_PREFIX __launch_bounds__(256) void unpack_TW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_TW + +namespace internal_unpack_NW { +static FUNC_PREFIX __launch_bounds__(256) void unpack_NW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_NW + +namespace internal_unpack_BS { +static FUNC_PREFIX __launch_bounds__(256) void unpack_BS(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_BS + +namespace internal_unpack_S { +static FUNC_PREFIX __launch_bounds__(256) void unpack_S(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } +} +} // namespace internal_unpack_S + +namespace internal_unpack_TS { +static FUNC_PREFIX __launch_bounds__(256) void unpack_TS(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_TS + +namespace internal_unpack_B { +static FUNC_PREFIX __launch_bounds__(256) void unpack_B(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } +} +} // namespace internal_unpack_B + +namespace internal_unpack_T { +static FUNC_PREFIX __launch_bounds__(256) void unpack_T(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } +} +} // namespace internal_unpack_T + +namespace internal_unpack_BN { +static FUNC_PREFIX __launch_bounds__(256) void unpack_BN(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_BN + +namespace internal_unpack_N { +static FUNC_PREFIX __launch_bounds__(256) void unpack_N(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } +} +} // namespace internal_unpack_N + +namespace internal_unpack_TN { +static FUNC_PREFIX __launch_bounds__(256) void unpack_TN(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_TN + +namespace internal_unpack_SE { +static FUNC_PREFIX __launch_bounds__(256) void unpack_SE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_SE + +namespace internal_unpack_BE { +static FUNC_PREFIX __launch_bounds__(256) void unpack_BE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_BE + +namespace internal_unpack_E { +static FUNC_PREFIX __launch_bounds__(256) void unpack_E(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } +} +} // namespace internal_unpack_E + +namespace internal_unpack_TE { +static FUNC_PREFIX __launch_bounds__(256) void unpack_TE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_TE + +namespace internal_unpack_NE { +static FUNC_PREFIX __launch_bounds__(256) void unpack_NE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_NE + +void PackInfoPdfDoublePrecisionCUDA::pack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) { + double *buffer = reinterpret_cast(byte_buffer); + + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch (dir) { + case stencil::SW: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_SW::pack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BW: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_BW::pack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::W: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_W::pack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TW: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_TW::pack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NW: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_NW::pack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BS: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_BS::pack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::S: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_S::pack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TS: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_TS::pack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::B: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_B::pack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::T: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_T::pack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BN: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_BN::pack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::N: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_N::pack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TN: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_TN::pack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::SE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_SE::pack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_BE::pack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::E: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_E::pack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_TE::pack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_NE::pack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + default: + return; + } +} + +void PackInfoPdfDoublePrecisionCUDA::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) { + double *buffer = reinterpret_cast(byte_buffer); + + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch (communciationDirection) { + case stencil::SW: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_SW::unpack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BW: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_BW::unpack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::W: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_W::unpack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TW: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_TW::unpack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NW: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_NW::unpack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BS: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_BS::unpack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::S: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_S::unpack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TS: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_TS::unpack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::B: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_B::unpack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::T: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_T::unpack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BN: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_BN::unpack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::N: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_N::unpack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TN: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_TN::unpack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::SE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_SE::unpack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_BE::unpack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::E: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_E::unpack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_TE::unpack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_NE::unpack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + default: + return; + } +} + +uint_t PackInfoPdfDoublePrecisionCUDA::size(stencil::Direction dir, IBlock *block) { + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch (dir) { + case stencil::SW: + elementsPerCell = 1; + break; + + case stencil::BW: + elementsPerCell = 1; + break; + + case stencil::W: + elementsPerCell = 5; + break; + + case stencil::TW: + elementsPerCell = 1; + break; + + case stencil::NW: + elementsPerCell = 1; + break; + + case stencil::BS: + elementsPerCell = 1; + break; + + case stencil::S: + elementsPerCell = 5; + break; + + case stencil::TS: + elementsPerCell = 1; + break; + + case stencil::B: + elementsPerCell = 5; + break; + + case stencil::T: + elementsPerCell = 5; + break; + + case stencil::BN: + elementsPerCell = 1; + break; + + case stencil::N: + elementsPerCell = 5; + break; + + case stencil::TN: + elementsPerCell = 1; + break; + + case stencil::SE: + elementsPerCell = 1; + break; + + case stencil::BE: + elementsPerCell = 1; + break; + + case stencil::E: + elementsPerCell = 5; + break; + + case stencil::TE: + elementsPerCell = 1; + break; + + case stencil::NE: + elementsPerCell = 1; + break; + + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof(double); +} + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.h new file mode 100644 index 0000000000..256f03be49 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.h @@ -0,0 +1,64 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoPdfDoublePrecisionCUDA.h +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, +// lbmpy_walberla/pystencils_walberla from waLBerla commit +// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#pragma once + +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" + +#include "domain_decomposition/IBlock.h" + +#include "stencil/Directions.h" + +#include "gpu/GPUField.h" +#include "gpu/GPUWrapper.h" +#include "gpu/communication/GeneratedGPUPackInfo.h" + +namespace walberla { +namespace pystencils { + +class PackInfoPdfDoublePrecisionCUDA + : public ::walberla::gpu::GeneratedGPUPackInfo { +public: + PackInfoPdfDoublePrecisionCUDA(BlockDataID pdfsID_) : pdfsID(pdfsID_){}; + virtual ~PackInfoPdfDoublePrecisionCUDA() {} + + void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block, + gpuStream_t stream) override; + void communicateLocal(stencil::Direction /*dir*/, const IBlock * /* sender */, + IBlock * /* receiver */, + gpuStream_t /* stream */) override { + WALBERLA_ABORT("Local Communication not implemented yet for standard " + "PackInfos. To run your application turn of local " + "communication in the Communication class") + } + void unpack(stencil::Direction dir, unsigned char *buffer, IBlock *block, + gpuStream_t stream) override; + uint_t size(stencil::Direction dir, IBlock *block) override; + +private: + BlockDataID pdfsID; +}; + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp index e55017ab21..b1ac86db8e 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp @@ -24,8 +24,6 @@ #include "core/cell/CellInterval.h" #include "stencil/Directions.h" -#include - #if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.cu new file mode 100644 index 0000000000..51b2b40cd9 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.cu @@ -0,0 +1,1423 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoPdfSinglePrecisionCUDA.cpp +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "domain_decomposition/IBlock.h" +#include "stencil/Directions.h" + +#include "PackInfoPdfSinglePrecisionCUDA.h" + +#define FUNC_PREFIX __global__ + +#if defined(__NVCC__) +#define RESTRICT __restrict__ +#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#pragma nv_diagnostic push +#pragma nv_diag_suppress 177 // unused variable +#else +#pragma push +#pragma diag_suppress 177 // unused variable +#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#elif defined(__clang__) +#if defined(__CUDA__) +#if defined(__CUDA_ARCH__) +// clang compiling CUDA code in device mode +#define RESTRICT __restrict__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#else +// clang compiling CUDA code in host mode +#define RESTRICT __restrict__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#endif // defined(__CUDA_ARCH__) +#endif // defined(__CUDA__) +#elif defined(__GNUC__) or defined(__GNUG__) +#define RESTRICT __restrict__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#elif defined(_MSC_VER) +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace pystencils { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + +namespace internal_pack_SW { +static FUNC_PREFIX __launch_bounds__(256) void pack_SW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_SW + +namespace internal_pack_BW { +static FUNC_PREFIX __launch_bounds__(256) void pack_BW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_BW + +namespace internal_pack_W { +static FUNC_PREFIX __launch_bounds__(256) void pack_W(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_W + +namespace internal_pack_TW { +static FUNC_PREFIX __launch_bounds__(256) void pack_TW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_TW + +namespace internal_pack_NW { +static FUNC_PREFIX __launch_bounds__(256) void pack_NW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_NW + +namespace internal_pack_BS { +static FUNC_PREFIX __launch_bounds__(256) void pack_BS(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_BS + +namespace internal_pack_S { +static FUNC_PREFIX __launch_bounds__(256) void pack_S(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_S + +namespace internal_pack_TS { +static FUNC_PREFIX __launch_bounds__(256) void pack_TS(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_TS + +namespace internal_pack_B { +static FUNC_PREFIX __launch_bounds__(256) void pack_B(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_B + +namespace internal_pack_T { +static FUNC_PREFIX __launch_bounds__(256) void pack_T(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_T + +namespace internal_pack_BN { +static FUNC_PREFIX __launch_bounds__(256) void pack_BN(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_BN + +namespace internal_pack_N { +static FUNC_PREFIX __launch_bounds__(256) void pack_N(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_N + +namespace internal_pack_TN { +static FUNC_PREFIX __launch_bounds__(256) void pack_TN(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_TN + +namespace internal_pack_SE { +static FUNC_PREFIX __launch_bounds__(256) void pack_SE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_SE + +namespace internal_pack_BE { +static FUNC_PREFIX __launch_bounds__(256) void pack_BE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_BE + +namespace internal_pack_E { +static FUNC_PREFIX __launch_bounds__(256) void pack_E(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3]; + _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_E + +namespace internal_pack_TE { +static FUNC_PREFIX __launch_bounds__(256) void pack_TE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_TE + +namespace internal_pack_NE { +static FUNC_PREFIX __launch_bounds__(256) void pack_NE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3]; + } +} +} // namespace internal_pack_NE + +namespace internal_unpack_SW { +static FUNC_PREFIX __launch_bounds__(256) void unpack_SW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_SW + +namespace internal_unpack_BW { +static FUNC_PREFIX __launch_bounds__(256) void unpack_BW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_BW + +namespace internal_unpack_W { +static FUNC_PREFIX __launch_bounds__(256) void unpack_W(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } +} +} // namespace internal_unpack_W + +namespace internal_unpack_TW { +static FUNC_PREFIX __launch_bounds__(256) void unpack_TW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_TW + +namespace internal_unpack_NW { +static FUNC_PREFIX __launch_bounds__(256) void unpack_NW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_NW + +namespace internal_unpack_BS { +static FUNC_PREFIX __launch_bounds__(256) void unpack_BS(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_BS + +namespace internal_unpack_S { +static FUNC_PREFIX __launch_bounds__(256) void unpack_S(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } +} +} // namespace internal_unpack_S + +namespace internal_unpack_TS { +static FUNC_PREFIX __launch_bounds__(256) void unpack_TS(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_TS + +namespace internal_unpack_B { +static FUNC_PREFIX __launch_bounds__(256) void unpack_B(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } +} +} // namespace internal_unpack_B + +namespace internal_unpack_T { +static FUNC_PREFIX __launch_bounds__(256) void unpack_T(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } +} +} // namespace internal_unpack_T + +namespace internal_unpack_BN { +static FUNC_PREFIX __launch_bounds__(256) void unpack_BN(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_BN + +namespace internal_unpack_N { +static FUNC_PREFIX __launch_bounds__(256) void unpack_N(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } +} +} // namespace internal_unpack_N + +namespace internal_unpack_TN { +static FUNC_PREFIX __launch_bounds__(256) void unpack_TN(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_TN + +namespace internal_unpack_SE { +static FUNC_PREFIX __launch_bounds__(256) void unpack_SE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_SE + +namespace internal_unpack_BE { +static FUNC_PREFIX __launch_bounds__(256) void unpack_BE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_BE + +namespace internal_unpack_E { +static FUNC_PREFIX __launch_bounds__(256) void unpack_E(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3]; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4]; + } +} +} // namespace internal_unpack_E + +namespace internal_unpack_TE { +static FUNC_PREFIX __launch_bounds__(256) void unpack_TE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_TE + +namespace internal_unpack_NE { +static FUNC_PREFIX __launch_bounds__(256) void unpack_NE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0]; + } +} +} // namespace internal_unpack_NE + +void PackInfoPdfSinglePrecisionCUDA::pack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) { + float *buffer = reinterpret_cast(byte_buffer); + + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch (dir) { + case stencil::SW: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_SW::pack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BW: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_BW::pack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::W: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_W::pack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TW: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_TW::pack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NW: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_NW::pack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BS: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_BS::pack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::S: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_S::pack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TS: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_TS::pack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::B: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_B::pack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::T: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_T::pack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BN: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_BN::pack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::N: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_N::pack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TN: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_TN::pack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::SE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_SE::pack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_BE::pack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::E: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_E::pack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_TE::pack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_pack_NE::pack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + default: + return; + } +} + +void PackInfoPdfSinglePrecisionCUDA::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) { + float *buffer = reinterpret_cast(byte_buffer); + + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch (communciationDirection) { + case stencil::SW: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_SW::unpack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BW: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_BW::unpack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::W: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_W::unpack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TW: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_TW::unpack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NW: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_NW::unpack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BS: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_BS::unpack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::S: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_S::unpack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TS: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_TS::unpack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::B: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_B::unpack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::T: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_T::unpack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BN: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_BN::unpack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::N: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_N::unpack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TN: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_TN::unpack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::SE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_SE::unpack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::BE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_BE::unpack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::E: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_E::unpack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::TE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_TE::unpack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + case stencil::NE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers())) + float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride()); + const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride()); + const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride()); + const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride())); + dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))); + dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1))); + internal_unpack_NE::unpack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3); + break; + } + + default: + return; + } +} + +uint_t PackInfoPdfSinglePrecisionCUDA::size(stencil::Direction dir, IBlock *block) { + auto pdfs = block->getData>(pdfsID); + + CellInterval ci; + pdfs->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch (dir) { + case stencil::SW: + elementsPerCell = 1; + break; + + case stencil::BW: + elementsPerCell = 1; + break; + + case stencil::W: + elementsPerCell = 5; + break; + + case stencil::TW: + elementsPerCell = 1; + break; + + case stencil::NW: + elementsPerCell = 1; + break; + + case stencil::BS: + elementsPerCell = 1; + break; + + case stencil::S: + elementsPerCell = 5; + break; + + case stencil::TS: + elementsPerCell = 1; + break; + + case stencil::B: + elementsPerCell = 5; + break; + + case stencil::T: + elementsPerCell = 5; + break; + + case stencil::BN: + elementsPerCell = 1; + break; + + case stencil::N: + elementsPerCell = 5; + break; + + case stencil::TN: + elementsPerCell = 1; + break; + + case stencil::SE: + elementsPerCell = 1; + break; + + case stencil::BE: + elementsPerCell = 1; + break; + + case stencil::E: + elementsPerCell = 5; + break; + + case stencil::TE: + elementsPerCell = 1; + break; + + case stencil::NE: + elementsPerCell = 1; + break; + + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof(float); +} + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.h new file mode 100644 index 0000000000..c6ee2782b9 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.h @@ -0,0 +1,64 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoPdfSinglePrecisionCUDA.h +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, +// lbmpy_walberla/pystencils_walberla from waLBerla commit +// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#pragma once + +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" + +#include "domain_decomposition/IBlock.h" + +#include "stencil/Directions.h" + +#include "gpu/GPUField.h" +#include "gpu/GPUWrapper.h" +#include "gpu/communication/GeneratedGPUPackInfo.h" + +namespace walberla { +namespace pystencils { + +class PackInfoPdfSinglePrecisionCUDA + : public ::walberla::gpu::GeneratedGPUPackInfo { +public: + PackInfoPdfSinglePrecisionCUDA(BlockDataID pdfsID_) : pdfsID(pdfsID_){}; + virtual ~PackInfoPdfSinglePrecisionCUDA() {} + + void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block, + gpuStream_t stream) override; + void communicateLocal(stencil::Direction /*dir*/, const IBlock * /* sender */, + IBlock * /* receiver */, + gpuStream_t /* stream */) override { + WALBERLA_ABORT("Local Communication not implemented yet for standard " + "PackInfos. To run your application turn of local " + "communication in the Communication class") + } + void unpack(stencil::Direction dir, unsigned char *buffer, IBlock *block, + gpuStream_t stream) override; + uint_t size(stencil::Direction dir, IBlock *block) override; + +private: + BlockDataID pdfsID; +}; + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp index 6cbf3cb98d..da91325e5e 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp @@ -24,8 +24,6 @@ #include "core/cell/CellInterval.h" #include "stencil/Directions.h" -#include - #if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.cu new file mode 100644 index 0000000000..e9bae41971 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.cu @@ -0,0 +1,243 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoVecDoublePrecisionCUDA.cpp +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "domain_decomposition/IBlock.h" +#include "stencil/Directions.h" + +#include "PackInfoVecDoublePrecisionCUDA.h" + +#define FUNC_PREFIX __global__ + +#if defined(__NVCC__) +#define RESTRICT __restrict__ +#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#pragma nv_diagnostic push +#pragma nv_diag_suppress 177 // unused variable +#else +#pragma push +#pragma diag_suppress 177 // unused variable +#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#elif defined(__clang__) +#if defined(__CUDA__) +#if defined(__CUDA_ARCH__) +// clang compiling CUDA code in device mode +#define RESTRICT __restrict__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#else +// clang compiling CUDA code in host mode +#define RESTRICT __restrict__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#endif // defined(__CUDA_ARCH__) +#endif // defined(__CUDA__) +#elif defined(__GNUC__) or defined(__GNUG__) +#define RESTRICT __restrict__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#elif defined(_MSC_VER) +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace pystencils { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + +namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE { +static FUNC_PREFIX __launch_bounds__(256) void pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT _data_buffer, double *RESTRICT const _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2]; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3]; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3]; + } +} +} // namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE + +namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE { +static FUNC_PREFIX __launch_bounds__(256) void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT const _data_buffer, double *RESTRICT _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0]; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1]; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2]; + } +} +} // namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE + +void PackInfoVecDoublePrecisionCUDA::pack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) { + double *buffer = reinterpret_cast(byte_buffer); + + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch (dir) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: { + double *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers())) + double *RESTRICT const _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_field_0 = int64_t(field->xStride()); + const int64_t _stride_field_1 = int64_t(field->yStride()); + const int64_t _stride_field_2 = int64_t(field->zStride()); + const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride())); + dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))); + dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1))); + internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3); + break; + } + + default: + return; + } +} + +void PackInfoVecDoublePrecisionCUDA::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) { + double *buffer = reinterpret_cast(byte_buffer); + + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch (communciationDirection) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: { + double *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers())) + double *RESTRICT _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_field_0 = int64_t(field->xStride()); + const int64_t _stride_field_1 = int64_t(field->yStride()); + const int64_t _stride_field_2 = int64_t(field->zStride()); + const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride())); + dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))); + dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1))); + internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3); + break; + } + + default: + return; + } +} + +uint_t PackInfoVecDoublePrecisionCUDA::size(stencil::Direction dir, IBlock *block) { + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch (dir) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: + elementsPerCell = 3; + break; + + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof(double); +} + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.h new file mode 100644 index 0000000000..18884f6c9d --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.h @@ -0,0 +1,64 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoVecDoublePrecisionCUDA.h +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, +// lbmpy_walberla/pystencils_walberla from waLBerla commit +// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#pragma once + +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" + +#include "domain_decomposition/IBlock.h" + +#include "stencil/Directions.h" + +#include "gpu/GPUField.h" +#include "gpu/GPUWrapper.h" +#include "gpu/communication/GeneratedGPUPackInfo.h" + +namespace walberla { +namespace pystencils { + +class PackInfoVecDoublePrecisionCUDA + : public ::walberla::gpu::GeneratedGPUPackInfo { +public: + PackInfoVecDoublePrecisionCUDA(BlockDataID fieldID_) : fieldID(fieldID_){}; + virtual ~PackInfoVecDoublePrecisionCUDA() {} + + void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block, + gpuStream_t stream) override; + void communicateLocal(stencil::Direction /*dir*/, const IBlock * /* sender */, + IBlock * /* receiver */, + gpuStream_t /* stream */) override { + WALBERLA_ABORT("Local Communication not implemented yet for standard " + "PackInfos. To run your application turn of local " + "communication in the Communication class") + } + void unpack(stencil::Direction dir, unsigned char *buffer, IBlock *block, + gpuStream_t stream) override; + uint_t size(stencil::Direction dir, IBlock *block) override; + +private: + BlockDataID fieldID; +}; + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp index 3ddeee01b6..c3b718b2d4 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp @@ -24,8 +24,6 @@ #include "core/cell/CellInterval.h" #include "stencil/Directions.h" -#include - #if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wfloat-equal" diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.cu new file mode 100644 index 0000000000..c38b9e669b --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.cu @@ -0,0 +1,243 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoVecSinglePrecisionCUDA.cpp +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "domain_decomposition/IBlock.h" +#include "stencil/Directions.h" + +#include "PackInfoVecSinglePrecisionCUDA.h" + +#define FUNC_PREFIX __global__ + +#if defined(__NVCC__) +#define RESTRICT __restrict__ +#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#pragma nv_diagnostic push +#pragma nv_diag_suppress 177 // unused variable +#else +#pragma push +#pragma diag_suppress 177 // unused variable +#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__) +#elif defined(__clang__) +#if defined(__CUDA__) +#if defined(__CUDA_ARCH__) +// clang compiling CUDA code in device mode +#define RESTRICT __restrict__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#else +// clang compiling CUDA code in host mode +#define RESTRICT __restrict__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-variable" +#endif // defined(__CUDA_ARCH__) +#endif // defined(__CUDA__) +#elif defined(__GNUC__) or defined(__GNUG__) +#define RESTRICT __restrict__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#elif defined(_MSC_VER) +#define RESTRICT __restrict +#else +#define RESTRICT +#endif + +namespace walberla { +namespace pystencils { + +using walberla::cell::CellInterval; +using walberla::stencil::Direction; + +namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE { +static FUNC_PREFIX __launch_bounds__(256) void pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(float *RESTRICT _data_buffer, float *RESTRICT const _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2]; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3]; + _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3]; + } +} +} // namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE + +namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE { +static FUNC_PREFIX __launch_bounds__(256) void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(float *RESTRICT const _data_buffer, float *RESTRICT _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) { + if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) { + const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x; + const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y; + const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0]; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1]; + _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2]; + } +} +} // namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE + +void PackInfoVecSinglePrecisionCUDA::pack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) { + float *buffer = reinterpret_cast(byte_buffer); + + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getSliceBeforeGhostLayer(dir, ci, 1, false); + + switch (dir) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: { + float *RESTRICT _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers())) + float *RESTRICT const _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_field_0 = int64_t(field->xStride()); + const int64_t _stride_field_1 = int64_t(field->yStride()); + const int64_t _stride_field_2 = int64_t(field->zStride()); + const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride())); + dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))); + dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1))); + internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3); + break; + } + + default: + return; + } +} + +void PackInfoVecSinglePrecisionCUDA::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) { + float *buffer = reinterpret_cast(byte_buffer); + + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getGhostRegion(dir, ci, 1, false); + auto communciationDirection = stencil::inverseDir[dir]; + + switch (communciationDirection) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: { + float *RESTRICT const _data_buffer = buffer; + WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers())) + WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers())) + float *RESTRICT _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0)) + const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0)) + const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0); + WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0)) + const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0); + const int64_t _stride_field_0 = int64_t(field->xStride()); + const int64_t _stride_field_1 = int64_t(field->yStride()); + const int64_t _stride_field_2 = int64_t(field->zStride()); + const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride())); + dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))); + dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1))); + internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3); + break; + } + + default: + return; + } +} + +uint_t PackInfoVecSinglePrecisionCUDA::size(stencil::Direction dir, IBlock *block) { + auto field = block->getData>(fieldID); + + CellInterval ci; + field->getGhostRegion(dir, ci, 1, false); + + uint_t elementsPerCell = 0; + + switch (dir) { + case stencil::SW: + case stencil::BW: + case stencil::W: + case stencil::TW: + case stencil::NW: + case stencil::BS: + case stencil::S: + case stencil::TS: + case stencil::B: + case stencil::C: + case stencil::T: + case stencil::BN: + case stencil::N: + case stencil::TN: + case stencil::SE: + case stencil::BE: + case stencil::E: + case stencil::TE: + case stencil::NE: + elementsPerCell = 3; + break; + + default: + elementsPerCell = 0; + } + return ci.numCells() * elementsPerCell * sizeof(float); +} + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.h new file mode 100644 index 0000000000..c1eb6d2be7 --- /dev/null +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.h @@ -0,0 +1,64 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \\file PackInfoVecSinglePrecisionCUDA.h +//! \\author pystencils +//====================================================================================================================== + +// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, +// lbmpy_walberla/pystencils_walberla from waLBerla commit +// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f + +#pragma once + +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" + +#include "domain_decomposition/IBlock.h" + +#include "stencil/Directions.h" + +#include "gpu/GPUField.h" +#include "gpu/GPUWrapper.h" +#include "gpu/communication/GeneratedGPUPackInfo.h" + +namespace walberla { +namespace pystencils { + +class PackInfoVecSinglePrecisionCUDA + : public ::walberla::gpu::GeneratedGPUPackInfo { +public: + PackInfoVecSinglePrecisionCUDA(BlockDataID fieldID_) : fieldID(fieldID_){}; + virtual ~PackInfoVecSinglePrecisionCUDA() {} + + void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block, + gpuStream_t stream) override; + void communicateLocal(stencil::Direction /*dir*/, const IBlock * /* sender */, + IBlock * /* receiver */, + gpuStream_t /* stream */) override { + WALBERLA_ABORT("Local Communication not implemented yet for standard " + "PackInfos. To run your application turn of local " + "communication in the Communication class") + } + void unpack(stencil::Direction dir, unsigned char *buffer, IBlock *block, + gpuStream_t stream) override; + uint_t size(stencil::Direction dir, IBlock *block) override; + +private: + BlockDataID fieldID; +}; + +} // namespace pystencils +} // namespace walberla diff --git a/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.cuh b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.cuh index f9dc9ae83d..a71202df38 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.cuh +++ b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.cuh @@ -29,6 +29,10 @@ #include "generated_kernels/FieldAccessorsSinglePrecisionCUDA.cuh" #include "generated_kernels/InitialPDFsSetterDoublePrecisionCUDA.h" #include "generated_kernels/InitialPDFsSetterSinglePrecisionCUDA.h" +#include "generated_kernels/PackInfoPdfDoublePrecisionCUDA.h" +#include "generated_kernels/PackInfoPdfSinglePrecisionCUDA.h" +#include "generated_kernels/PackInfoVecDoublePrecisionCUDA.h" +#include "generated_kernels/PackInfoVecSinglePrecisionCUDA.h" #include "generated_kernels/StreamSweepDoublePrecisionCUDA.h" #include "generated_kernels/StreamSweepSinglePrecisionCUDA.h" @@ -49,6 +53,8 @@ template <> struct KernelTrait { pystencils::CollideSweepDoublePrecisionLeesEdwardsCUDA; using StreamSweep = pystencils::StreamSweepDoublePrecisionCUDA; using InitialPDFsSetter = pystencils::InitialPDFsSetterDoublePrecisionCUDA; + using PackInfoPdf = pystencils::PackInfoPdfDoublePrecisionCUDA; + using PackInfoVec = pystencils::PackInfoVecDoublePrecisionCUDA; }; template <> struct KernelTrait { @@ -58,6 +64,8 @@ template <> struct KernelTrait { pystencils::CollideSweepSinglePrecisionLeesEdwardsCUDA; using StreamSweep = pystencils::StreamSweepSinglePrecisionCUDA; using InitialPDFsSetter = pystencils::InitialPDFsSetterSinglePrecisionCUDA; + using PackInfoPdf = pystencils::PackInfoPdfSinglePrecisionCUDA; + using PackInfoVec = pystencils::PackInfoVecSinglePrecisionCUDA; }; template <> struct BoundaryHandlingTrait { diff --git a/src/walberla_bridge/tests/CMakeLists.txt b/src/walberla_bridge/tests/CMakeLists.txt index 83a7d9d2ee..06342ac225 100644 --- a/src/walberla_bridge/tests/CMakeLists.txt +++ b/src/walberla_bridge/tests/CMakeLists.txt @@ -24,9 +24,11 @@ function(ESPRESSO_ADD_TEST) espresso_unit_test( SRC ${TEST_SRC} NAME ${TEST_NAME} NUM_PROC ${TEST_NUM_PROC} DEPENDS ${TEST_DEPENDS} espresso::walberla espresso::utils) + if(WALBERLA_BUILD_WITH_CUDA) + target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla_cuda) + endif() if(${TEST_SRC} MATCHES ".*\.cu$") - target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cuda_flags - espresso::walberla_cuda) + target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cuda_flags) else() target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cpp_flags) endif() diff --git a/testsuite/scripts/benchmarks/CMakeLists.txt b/testsuite/scripts/benchmarks/CMakeLists.txt index 47583fdb57..76ecaa4612 100644 --- a/testsuite/scripts/benchmarks/CMakeLists.txt +++ b/testsuite/scripts/benchmarks/CMakeLists.txt @@ -43,7 +43,7 @@ add_custom_target( benchmark_test(FILE test_lj.py) benchmark_test(FILE test_lb.py SUFFIX cpu) -# benchmark_test(FILE test_lb.py SUFFIX gpu LABELS "gpu") # TODO WALBERLA +benchmark_test(FILE test_lb.py SUFFIX gpu LABELS "gpu") benchmark_test(FILE test_p3m.py SUFFIX cpu) benchmark_test(FILE test_p3m.py SUFFIX gpu LABELS "gpu") benchmark_test(FILE test_ferrofluid.py)