From 0f6054dc1be34b23a411c5746120cf253a5919ef Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Thu, 22 Jun 2023 10:45:46 -0400 Subject: [PATCH 1/5] CarpetX: Support openPMD 0.14 --- CarpetX/src/io_openpmd.cxx | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/CarpetX/src/io_openpmd.cxx b/CarpetX/src/io_openpmd.cxx index 9b02c575f..c7de25a93 100644 --- a/CarpetX/src/io_openpmd.cxx +++ b/CarpetX/src/io_openpmd.cxx @@ -67,19 +67,27 @@ openPMD::Format get_format() { return openPMD::Format::HDF5; if (CCTK_EQUALS(openpmd_format, "ADIOS1")) return openPMD::Format::ADIOS1; +#if OPENPMDAPI_VERSION_GE(0, 15, 0) if (CCTK_EQUALS(openpmd_format, "ADIOS2_BP")) return openPMD::Format::ADIOS2_BP; if (CCTK_EQUALS(openpmd_format, "ADIOS2_BP4")) return openPMD::Format::ADIOS2_BP4; if (CCTK_EQUALS(openpmd_format, "ADIOS2_BP5")) return openPMD::Format::ADIOS2_BP5; +#else + if (CCTK_EQUALS(openpmd_format, "ADIOS2")) + return openPMD::Format::ADIOS2; +#endif if (CCTK_EQUALS(openpmd_format, "ADIOS2_SST")) return openPMD::Format::ADIOS2_SST; if (CCTK_EQUALS(openpmd_format, "ADIOS2_SSC")) return openPMD::Format::ADIOS2_SSC; if (CCTK_EQUALS(openpmd_format, "JSON")) return openPMD::Format::JSON; - CCTK_ERROR("Internal error"); + CCTK_VERROR("The openPMD format \"%s\" is not supported in version %d.%d.%d " + "of the openPMD_api library", + openpmd_format, OPENPMDAPI_VERSION_MAJOR, + OPENPMDAPI_VERSION_MINOR, OPENPMDAPI_VERSION_PATCH); } // - fileBased: One file per iteration. Needs templated file name to encode @@ -888,9 +896,12 @@ void carpetx_openpmd_t::InputOpenPMD(const cGH *const cctkGH, if (output_ghosts || intbox == extbox) { CCTK_REAL *const ptr = fab.dataPtr() + vi * np; - // record_components.at(vi).loadChunk(openPMD::shareRaw(ptr), - // start, count); +#if OPENPMDAPI_VERSION_GE(0, 15, 0) record_components.at(vi).loadChunkRaw(ptr, start, count); +#else + record_components.at(vi).loadChunk(openPMD::shareRaw(ptr), + start, count); +#endif } else { const int amrex_size = extbox.size(); @@ -1323,9 +1334,12 @@ void carpetx_openpmd_t::OutputOpenPMD(const cGH *const cctkGH, for (int vi = 0; vi < numvars; ++vi) { if (output_ghosts || intbox == extbox) { const CCTK_REAL *const ptr = fab.dataPtr() + vi * np; - // record_components.at(vi).storeChunk(openPMD::shareRaw(ptr), - // start, count); +#if OPENPMDAPI_VERSION_GE(0, 15, 0) record_components.at(vi).storeChunkRaw(ptr, start, count); +#else + record_components.at(vi).storeChunk(openPMD::shareRaw(ptr), + start, count); +#endif } else { std::shared_ptr ptr( new CCTK_REAL[np], std::default_delete()); From 40de80a44d2ff4a1c9d6afe4d8f95433e27ea4c1 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Tue, 27 Jun 2023 11:41:55 -0400 Subject: [PATCH 2/5] CarpetX: Correct grid spacing in openPMD output --- CarpetX/src/io_openpmd.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CarpetX/src/io_openpmd.cxx b/CarpetX/src/io_openpmd.cxx index c7de25a93..753e6ea6b 100644 --- a/CarpetX/src/io_openpmd.cxx +++ b/CarpetX/src/io_openpmd.cxx @@ -1227,7 +1227,7 @@ void carpetx_openpmd_t::OutputOpenPMD(const cGH *const cctkGH, mesh.setAxisLabels(reversed(std::vector{"x", "y", "z"})); mesh.setGridSpacing(to_vector( reversed(fmap([](auto x, auto y) { return x / CCTK_REAL(y); }, - rdomain.hi - rdomain.lo, idomain.shape())))); + rdomain.hi - rdomain.lo, idomain.shape() - 1)))); mesh.setGridGlobalOffset(to_vector(reversed(rdomain.lo))); mesh.setGridUnitSI(Unit::length); // const std::map unitDimension{ From 1ab8c93d4e08763bacb5beebd7f963f17199ca72 Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 28 Jun 2023 15:46:46 -0400 Subject: [PATCH 3/5] CarpetX: Pull GPU synchronization out of grid function loop --- CarpetX/src/driver.cxx | 2 +- CarpetX/src/schedule.cxx | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CarpetX/src/driver.cxx b/CarpetX/src/driver.cxx index 9dc06f098..699682a03 100644 --- a/CarpetX/src/driver.cxx +++ b/CarpetX/src/driver.cxx @@ -1511,7 +1511,7 @@ void GHExt::PatchData::LevelData::GroupData::apply_boundary_conditions( if (!gdomain.contains(box)) BoundaryCondition(*this, box, dest).apply(); }); - synchronize(); + // synchronize(); } //////////////////////////////////////////////////////////////////////////////// diff --git a/CarpetX/src/schedule.cxx b/CarpetX/src/schedule.cxx index 1510804df..60478d185 100644 --- a/CarpetX/src/schedule.cxx +++ b/CarpetX/src/schedule.cxx @@ -2359,14 +2359,17 @@ int SyncGroupsByDirI(const cGH *restrict cctkGH, int numgroups, } // for gi }); + synchronize(); for (auto task1 : tasks1) task1(); tasks1.clear(); + synchronize(); for (auto task2 : tasks2) task2(); tasks2.clear(); + synchronize(); if (CCTK_IsFunctionAliased("MultiPatch_Interpolate")) { std::vector cactusvarinds; From 56c63af49b0b6dc237b76b99906dda3a16d1b47b Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 28 Jun 2023 20:44:16 -0400 Subject: [PATCH 4/5] CarpetX: Use fewer kernels to apply boundary conditions --- CarpetX/src/boundaries_impl.hxx | 233 +++++++++++++++++--------------- 1 file changed, 127 insertions(+), 106 deletions(-) diff --git a/CarpetX/src/boundaries_impl.hxx b/CarpetX/src/boundaries_impl.hxx index 7ea9a6290..fd313da98 100644 --- a/CarpetX/src/boundaries_impl.hxx +++ b/CarpetX/src/boundaries_impl.hxx @@ -3,6 +3,7 @@ #include "boundaries.hxx" +#include #include namespace CarpetX { @@ -219,7 +220,9 @@ void BoundaryCondition::apply_on_face_symbcxyz( // TODO: Move loop over components to the far outside + constexpr int maxncomps = 10; const int ncomps = dest.nComp(); + assert(ncomps <= maxncomps); // Periodic symmetries should have been translated to `none` static_assert(!any(symmetries == symmetry_t::periodic), ""); @@ -235,25 +238,30 @@ void BoundaryCondition::apply_on_face_symbcxyz( // If we can apply Dirichlet boundary conditions, do so. // They are the easiest and cheapest. - for (int comp = 0; comp < ncomps; ++comp) { - const CCTK_REAL dirichlet_value = groupdata.dirichlet_values.at(comp); - Loop::GF3D2 var(layout, destptr + comp * layout.np); - loop_region( - [ + std::array dirichlet_values; + for (int comp = 0; comp < ncomps; ++comp) + dirichlet_values[comp] = groupdata.dirichlet_values.at(comp); + + loop_region( + [ #ifdef CCTK_DEBUG - amin = amin, amax = amax, + amin = amin, amax = amax, #endif - dirichlet_value, - var] CCTK_DEVICE(const Arith::vect &dst) - CCTK_ATTRIBUTE_ALWAYS_INLINE { + dirichlet_values, ncomps, destptr = destptr, + layout = layout] CCTK_DEVICE(const Arith::vect &dst) + CCTK_ATTRIBUTE_ALWAYS_INLINE { #ifdef CCTK_DEBUG - for (int d = 0; d < dim; ++d) - assert(dst[d] >= amin[d] && dst[d] < amax[d]); + for (int d = 0; d < dim; ++d) + assert(dst[d] >= amin[d] && dst[d] < amax[d]); #endif + for (int comp = 0; comp < ncomps; ++comp) { + const CCTK_REAL dirichlet_value = dirichlet_values[comp]; + const Loop::GF3D2 var(layout, + destptr + comp * layout.np); var.store(dst, dirichlet_value); - }, - bmin, bmax); - } + } + }, + bmin, bmax); } else { // This is the generic case for applying boundary conditions. @@ -320,115 +328,128 @@ void BoundaryCondition::apply_on_face_symbcxyz( } } - for (int comp = 0; comp < ncomps; ++comp) { - const CCTK_REAL robin_value = groupdata.robin_values.at(comp); + std::array robin_values; + for (int comp = 0; comp < ncomps; ++comp) + robin_values[comp] = groupdata.robin_values.at(comp); + std::array reflection_parities; + for (int comp = 0; comp < ncomps; ++comp) { CCTK_REAL reflection_parity = +1; for (int d = 0; d < dim; ++d) if (symmetries[d] == symmetry_t::reflection) reflection_parity *= groupdata.parities.at(comp).at(d); using std::fabs; assert(fabs(reflection_parity) == 1); + reflection_parities[comp] = reflection_parity; + } - Loop::GF3D2 var(layout, destptr + comp * layout.np); - loop_region( - [ + loop_region( + [ #ifdef CCTK_DEBUG - amin = amin, amax = amax, dmin = dmin, dmax = dmax, + amin = amin, amax = amax, dmin = dmin, dmax = dmax, #endif - neumann_source, linear_extrapolation_source, robin_source, - robin_value, reflection_offset, reflection_parity, xmin = xmin, - dx = dx, var] CCTK_DEVICE(const Arith::vect - &dst) CCTK_ATTRIBUTE_ALWAYS_INLINE { - constexpr Arith::vect inormal{NI, NJ, NK}; - constexpr Arith::vect boundaries{BCI, BCJ, BCK}; - constexpr Arith::vect symmetries{SCI, SCJ, SCK}; - - // `src` is the point at which we are looking to determine - // the boundary value - Arith::vect src = dst; - // `delta` (if nonzero) describes a second point at which - // we are looking, so that we can calculate a gradient for - // the boundary value - Arith::vect delta{0, 0, 0}; - for (int d = 0; d < dim; ++d) { - if (boundaries[d] == boundary_t::linear_extrapolation) { - // Same slope: - // f'(0) = f'(h) - // f(h) - f(0) = f(2h) - f(h) - // f(0) = 2 f(h) - f(2h) - // f(0) is the boundary point - src[d] = linear_extrapolation_source[d]; - delta[d] = -inormal[d]; - } else if (boundaries[d] == boundary_t::neumann) { - // Same value: - // f(0) = f(h) - // f(0) is the boundary point - src[d] = neumann_source[d]; - } else if (boundaries[d] == boundary_t::robin) { - // Robin condition, specialized to 1/r fall-off: - // f(r) = finf + C/r - // Determine fall-off constant `C`: - // C = r * (f(r) - finf) - // Solve for value at boundary: - // f(r+h) = finf + C / (r + h) - // = finf + r / (r + h) * (f(r) - finf) - // Rewrite using Cartesian coordinates: - // C = |x| * (f(x) - finf) - // f(x') = finf + C / |x'| - // = finf + |x| / |x'| * (f(x) - finf) - // f(x') is the boundary point - src[d] = robin_source[d]; - } else if (symmetries[d] == symmetry_t::reflection) { - src[d] = reflection_offset[d] - dst[d]; - } else if (symmetries[d] == symmetry_t::none && - boundaries[d] == boundary_t::none) { - // this direction is not a boundary; do nothing - } else { - // std::cerr << " dst=" << dst << " d=" << d - // << " boundaries=" << boundaries - // << " symmetries=" << symmetries << - // "\n"; - assert(0); + neumann_source, linear_extrapolation_source, robin_source, + robin_values, reflection_offset, reflection_parities, xmin = xmin, + dx = dx, ncomps, destptr = destptr, + layout = layout] CCTK_DEVICE(const Arith::vect &dst) + CCTK_ATTRIBUTE_ALWAYS_INLINE { + constexpr Arith::vect inormal{NI, NJ, NK}; + constexpr Arith::vect boundaries{BCI, BCJ, BCK}; + constexpr Arith::vect symmetries{SCI, SCJ, SCK}; + + // `src` is the point at which we are looking to determine + // the boundary value + Arith::vect src = dst; + // `delta` (if nonzero) describes a second point at which + // we are looking, so that we can calculate a gradient for + // the boundary value + Arith::vect delta{0, 0, 0}; + for (int d = 0; d < dim; ++d) { + if (boundaries[d] == boundary_t::linear_extrapolation) { + // Same slope: + // f'(0) = f'(h) + // f(h) - f(0) = f(2h) - f(h) + // f(0) = 2 f(h) - f(2h) + // f(0) is the boundary point + src[d] = linear_extrapolation_source[d]; + delta[d] = -inormal[d]; + } else if (boundaries[d] == boundary_t::neumann) { + // Same value: + // f(0) = f(h) + // f(0) is the boundary point + src[d] = neumann_source[d]; + } else if (boundaries[d] == boundary_t::robin) { + // Robin condition, specialized to 1/r fall-off: + // f(r) = finf + C/r + // Determine fall-off constant `C`: + // C = r * (f(r) - finf) + // Solve for value at boundary: + // f(r+h) = finf + C / (r + h) + // = finf + r / (r + h) * (f(r) - finf) + // Rewrite using Cartesian coordinates: + // C = |x| * (f(x) - finf) + // f(x') = finf + C / |x'| + // = finf + |x| / |x'| * (f(x) - finf) + // f(x') is the boundary point + src[d] = robin_source[d]; + } else if (symmetries[d] == symmetry_t::reflection) { + src[d] = reflection_offset[d] - dst[d]; + } else if (symmetries[d] == symmetry_t::none && + boundaries[d] == boundary_t::none) { + // this direction is not a boundary; do nothing + } else { + // std::cerr << " dst=" << dst << " d=" << d + // << " boundaries=" << boundaries + // << " symmetries=" << symmetries << + // "\n"; + assert(0); + } } - } #ifdef CCTK_DEBUG - for (int d = 0; d < dim; ++d) - assert(src[d] >= dmin[d] && src[d] < dmax[d]); - assert(!all(src == dst)); + for (int d = 0; d < dim; ++d) + assert(src[d] >= dmin[d] && src[d] < dmax[d]); + assert(!all(src == dst)); #endif - CCTK_REAL val = var(src); - if constexpr (any(boundaries == boundary_t::robin)) { - for (int d = 0; d < dim; ++d) { - if (boundaries[d] == boundary_t::robin) { + + for (int comp = 0; comp < ncomps; ++comp) { + const CCTK_REAL robin_value = robin_values[comp]; + const CCTK_REAL reflection_parity = reflection_parities[comp]; + const Loop::GF3D2 var(layout, + destptr + comp * layout.np); + + CCTK_REAL val = var(src); + if constexpr (any(boundaries == boundary_t::robin)) { + for (int d = 0; d < dim; ++d) { + if (boundaries[d] == boundary_t::robin) { + using std::sqrt; + // boundary point + const auto xb = xmin + dst * dx; + const auto rb = sqrt(sum(pow2(xb))); + // interior point + const auto xi = xmin + src * dx; + const auto ri = sqrt(sum(pow2(xi))); + const auto q = ri / rb; + val = robin_value + q * (val - robin_value); + } + } + } + if constexpr (any(boundaries == + boundary_t::linear_extrapolation)) { + // Calculate gradient + const CCTK_REAL grad = val - var(src + delta); using std::sqrt; - // boundary point - const auto xb = xmin + dst * dx; - const auto rb = sqrt(sum(pow2(xb))); - // interior point - const auto xi = xmin + src * dx; - const auto ri = sqrt(sum(pow2(xi))); - const auto q = ri / rb; - val = robin_value + q * (val - robin_value); + val += sqrt(sum(pow2(dst - src)) / sum(pow2(delta))) * grad; } - } - } - if constexpr (any(boundaries == boundary_t::linear_extrapolation)) { - // Calculate gradient - const CCTK_REAL grad = val - var(src + delta); - using std::sqrt; - val += sqrt(sum(pow2(dst - src)) / sum(pow2(delta))) * grad; - } #ifdef CCTK_DEBUG - for (int d = 0; d < dim; ++d) - assert(dst[d] >= amin[d] && dst[d] < amax[d]); + for (int d = 0; d < dim; ++d) + assert(dst[d] >= amin[d] && dst[d] < amax[d]); #endif - if constexpr (any(symmetries == symmetry_t::reflection)) - val *= reflection_parity; - var.store(dst, val); - }, - bmin, bmax); - } + if constexpr (any(symmetries == symmetry_t::reflection)) + val *= reflection_parity; + var.store(dst, val); + } + }, + bmin, bmax); } } From ebfc590294fdf3280892362b105bbb7ac4821eac Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Thu, 29 Jun 2023 07:22:10 -0400 Subject: [PATCH 5/5] CarpetX: Do not synchronize GPU after prolongation stencils --- CarpetX/src/prolongate_3d_rf2_impl.hxx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CarpetX/src/prolongate_3d_rf2_impl.hxx b/CarpetX/src/prolongate_3d_rf2_impl.hxx index c2a5941c5..a04fbacfe 100644 --- a/CarpetX/src/prolongate_3d_rf2_impl.hxx +++ b/CarpetX/src/prolongate_3d_rf2_impl.hxx @@ -1468,10 +1468,10 @@ void prolongate_3d_rf2