Skip to content

Commit

Permalink
Merge pull request #153 from eschnett/eschnett/crusher
Browse files Browse the repository at this point in the history
Incorporate performance improvements
  • Loading branch information
eschnett authored Jul 3, 2023
2 parents 3391793 + ebfc590 commit b606009
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 117 deletions.
233 changes: 127 additions & 106 deletions CarpetX/src/boundaries_impl.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "boundaries.hxx"

#include <array>
#include <type_traits>

namespace CarpetX {
Expand Down Expand Up @@ -219,7 +220,9 @@ void BoundaryCondition::apply_on_face_symbcxyz(

// TODO: Move loop over components to the far outside

constexpr int maxncomps = 10;
const int ncomps = dest.nComp();
assert(ncomps <= maxncomps);

// Periodic symmetries should have been translated to `none`
static_assert(!any(symmetries == symmetry_t::periodic), "");
Expand All @@ -235,25 +238,30 @@ void BoundaryCondition::apply_on_face_symbcxyz(
// If we can apply Dirichlet boundary conditions, do so.
// They are the easiest and cheapest.

for (int comp = 0; comp < ncomps; ++comp) {
const CCTK_REAL dirichlet_value = groupdata.dirichlet_values.at(comp);
Loop::GF3D2<CCTK_REAL> var(layout, destptr + comp * layout.np);
loop_region(
[
std::array<CCTK_REAL, maxncomps> dirichlet_values;
for (int comp = 0; comp < ncomps; ++comp)
dirichlet_values[comp] = groupdata.dirichlet_values.at(comp);

loop_region(
[
#ifdef CCTK_DEBUG
amin = amin, amax = amax,
amin = amin, amax = amax,
#endif
dirichlet_value,
var] CCTK_DEVICE(const Arith::vect<int, dim> &dst)
CCTK_ATTRIBUTE_ALWAYS_INLINE {
dirichlet_values, ncomps, destptr = destptr,
layout = layout] CCTK_DEVICE(const Arith::vect<int, dim> &dst)
CCTK_ATTRIBUTE_ALWAYS_INLINE {
#ifdef CCTK_DEBUG
for (int d = 0; d < dim; ++d)
assert(dst[d] >= amin[d] && dst[d] < amax[d]);
for (int d = 0; d < dim; ++d)
assert(dst[d] >= amin[d] && dst[d] < amax[d]);
#endif
for (int comp = 0; comp < ncomps; ++comp) {
const CCTK_REAL dirichlet_value = dirichlet_values[comp];
const Loop::GF3D2<CCTK_REAL> var(layout,
destptr + comp * layout.np);
var.store(dst, dirichlet_value);
},
bmin, bmax);
}
}
},
bmin, bmax);

} else {
// This is the generic case for applying boundary conditions.
Expand Down Expand Up @@ -320,115 +328,128 @@ void BoundaryCondition::apply_on_face_symbcxyz(
}
}

for (int comp = 0; comp < ncomps; ++comp) {
const CCTK_REAL robin_value = groupdata.robin_values.at(comp);
std::array<CCTK_REAL, maxncomps> robin_values;
for (int comp = 0; comp < ncomps; ++comp)
robin_values[comp] = groupdata.robin_values.at(comp);

std::array<CCTK_REAL, maxncomps> reflection_parities;
for (int comp = 0; comp < ncomps; ++comp) {
CCTK_REAL reflection_parity = +1;
for (int d = 0; d < dim; ++d)
if (symmetries[d] == symmetry_t::reflection)
reflection_parity *= groupdata.parities.at(comp).at(d);
using std::fabs;
assert(fabs(reflection_parity) == 1);
reflection_parities[comp] = reflection_parity;
}

Loop::GF3D2<CCTK_REAL> var(layout, destptr + comp * layout.np);
loop_region(
[
loop_region(
[
#ifdef CCTK_DEBUG
amin = amin, amax = amax, dmin = dmin, dmax = dmax,
amin = amin, amax = amax, dmin = dmin, dmax = dmax,
#endif
neumann_source, linear_extrapolation_source, robin_source,
robin_value, reflection_offset, reflection_parity, xmin = xmin,
dx = dx, var] CCTK_DEVICE(const Arith::vect<int, dim>
&dst) CCTK_ATTRIBUTE_ALWAYS_INLINE {
constexpr Arith::vect<int, dim> inormal{NI, NJ, NK};
constexpr Arith::vect<boundary_t, dim> boundaries{BCI, BCJ, BCK};
constexpr Arith::vect<symmetry_t, dim> symmetries{SCI, SCJ, SCK};

// `src` is the point at which we are looking to determine
// the boundary value
Arith::vect<int, dim> src = dst;
// `delta` (if nonzero) describes a second point at which
// we are looking, so that we can calculate a gradient for
// the boundary value
Arith::vect<int, dim> delta{0, 0, 0};
for (int d = 0; d < dim; ++d) {
if (boundaries[d] == boundary_t::linear_extrapolation) {
// Same slope:
// f'(0) = f'(h)
// f(h) - f(0) = f(2h) - f(h)
// f(0) = 2 f(h) - f(2h)
// f(0) is the boundary point
src[d] = linear_extrapolation_source[d];
delta[d] = -inormal[d];
} else if (boundaries[d] == boundary_t::neumann) {
// Same value:
// f(0) = f(h)
// f(0) is the boundary point
src[d] = neumann_source[d];
} else if (boundaries[d] == boundary_t::robin) {
// Robin condition, specialized to 1/r fall-off:
// f(r) = finf + C/r
// Determine fall-off constant `C`:
// C = r * (f(r) - finf)
// Solve for value at boundary:
// f(r+h) = finf + C / (r + h)
// = finf + r / (r + h) * (f(r) - finf)
// Rewrite using Cartesian coordinates:
// C = |x| * (f(x) - finf)
// f(x') = finf + C / |x'|
// = finf + |x| / |x'| * (f(x) - finf)
// f(x') is the boundary point
src[d] = robin_source[d];
} else if (symmetries[d] == symmetry_t::reflection) {
src[d] = reflection_offset[d] - dst[d];
} else if (symmetries[d] == symmetry_t::none &&
boundaries[d] == boundary_t::none) {
// this direction is not a boundary; do nothing
} else {
// std::cerr << " dst=" << dst << " d=" << d
// << " boundaries=" << boundaries
// << " symmetries=" << symmetries <<
// "\n";
assert(0);
neumann_source, linear_extrapolation_source, robin_source,
robin_values, reflection_offset, reflection_parities, xmin = xmin,
dx = dx, ncomps, destptr = destptr,
layout = layout] CCTK_DEVICE(const Arith::vect<int, dim> &dst)
CCTK_ATTRIBUTE_ALWAYS_INLINE {
constexpr Arith::vect<int, dim> inormal{NI, NJ, NK};
constexpr Arith::vect<boundary_t, dim> boundaries{BCI, BCJ, BCK};
constexpr Arith::vect<symmetry_t, dim> symmetries{SCI, SCJ, SCK};

// `src` is the point at which we are looking to determine
// the boundary value
Arith::vect<int, dim> src = dst;
// `delta` (if nonzero) describes a second point at which
// we are looking, so that we can calculate a gradient for
// the boundary value
Arith::vect<int, dim> delta{0, 0, 0};
for (int d = 0; d < dim; ++d) {
if (boundaries[d] == boundary_t::linear_extrapolation) {
// Same slope:
// f'(0) = f'(h)
// f(h) - f(0) = f(2h) - f(h)
// f(0) = 2 f(h) - f(2h)
// f(0) is the boundary point
src[d] = linear_extrapolation_source[d];
delta[d] = -inormal[d];
} else if (boundaries[d] == boundary_t::neumann) {
// Same value:
// f(0) = f(h)
// f(0) is the boundary point
src[d] = neumann_source[d];
} else if (boundaries[d] == boundary_t::robin) {
// Robin condition, specialized to 1/r fall-off:
// f(r) = finf + C/r
// Determine fall-off constant `C`:
// C = r * (f(r) - finf)
// Solve for value at boundary:
// f(r+h) = finf + C / (r + h)
// = finf + r / (r + h) * (f(r) - finf)
// Rewrite using Cartesian coordinates:
// C = |x| * (f(x) - finf)
// f(x') = finf + C / |x'|
// = finf + |x| / |x'| * (f(x) - finf)
// f(x') is the boundary point
src[d] = robin_source[d];
} else if (symmetries[d] == symmetry_t::reflection) {
src[d] = reflection_offset[d] - dst[d];
} else if (symmetries[d] == symmetry_t::none &&
boundaries[d] == boundary_t::none) {
// this direction is not a boundary; do nothing
} else {
// std::cerr << " dst=" << dst << " d=" << d
// << " boundaries=" << boundaries
// << " symmetries=" << symmetries <<
// "\n";
assert(0);
}
}
}
#ifdef CCTK_DEBUG
for (int d = 0; d < dim; ++d)
assert(src[d] >= dmin[d] && src[d] < dmax[d]);
assert(!all(src == dst));
for (int d = 0; d < dim; ++d)
assert(src[d] >= dmin[d] && src[d] < dmax[d]);
assert(!all(src == dst));
#endif
CCTK_REAL val = var(src);
if constexpr (any(boundaries == boundary_t::robin)) {
for (int d = 0; d < dim; ++d) {
if (boundaries[d] == boundary_t::robin) {

for (int comp = 0; comp < ncomps; ++comp) {
const CCTK_REAL robin_value = robin_values[comp];
const CCTK_REAL reflection_parity = reflection_parities[comp];
const Loop::GF3D2<CCTK_REAL> var(layout,
destptr + comp * layout.np);

CCTK_REAL val = var(src);
if constexpr (any(boundaries == boundary_t::robin)) {
for (int d = 0; d < dim; ++d) {
if (boundaries[d] == boundary_t::robin) {
using std::sqrt;
// boundary point
const auto xb = xmin + dst * dx;
const auto rb = sqrt(sum(pow2(xb)));
// interior point
const auto xi = xmin + src * dx;
const auto ri = sqrt(sum(pow2(xi)));
const auto q = ri / rb;
val = robin_value + q * (val - robin_value);
}
}
}
if constexpr (any(boundaries ==
boundary_t::linear_extrapolation)) {
// Calculate gradient
const CCTK_REAL grad = val - var(src + delta);
using std::sqrt;
// boundary point
const auto xb = xmin + dst * dx;
const auto rb = sqrt(sum(pow2(xb)));
// interior point
const auto xi = xmin + src * dx;
const auto ri = sqrt(sum(pow2(xi)));
const auto q = ri / rb;
val = robin_value + q * (val - robin_value);
val += sqrt(sum(pow2(dst - src)) / sum(pow2(delta))) * grad;
}
}
}
if constexpr (any(boundaries == boundary_t::linear_extrapolation)) {
// Calculate gradient
const CCTK_REAL grad = val - var(src + delta);
using std::sqrt;
val += sqrt(sum(pow2(dst - src)) / sum(pow2(delta))) * grad;
}
#ifdef CCTK_DEBUG
for (int d = 0; d < dim; ++d)
assert(dst[d] >= amin[d] && dst[d] < amax[d]);
for (int d = 0; d < dim; ++d)
assert(dst[d] >= amin[d] && dst[d] < amax[d]);
#endif
if constexpr (any(symmetries == symmetry_t::reflection))
val *= reflection_parity;
var.store(dst, val);
},
bmin, bmax);
}
if constexpr (any(symmetries == symmetry_t::reflection))
val *= reflection_parity;
var.store(dst, val);
}
},
bmin, bmax);
}
}

Expand Down
2 changes: 1 addition & 1 deletion CarpetX/src/driver.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1511,7 +1511,7 @@ void GHExt::PatchData::LevelData::GroupData::apply_boundary_conditions(
if (!gdomain.contains(box))
BoundaryCondition(*this, box, dest).apply();
});
synchronize();
// synchronize();
}

////////////////////////////////////////////////////////////////////////////////
Expand Down
26 changes: 20 additions & 6 deletions CarpetX/src/io_openpmd.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -67,19 +67,27 @@ openPMD::Format get_format() {
return openPMD::Format::HDF5;
if (CCTK_EQUALS(openpmd_format, "ADIOS1"))
return openPMD::Format::ADIOS1;
#if OPENPMDAPI_VERSION_GE(0, 15, 0)
if (CCTK_EQUALS(openpmd_format, "ADIOS2_BP"))
return openPMD::Format::ADIOS2_BP;
if (CCTK_EQUALS(openpmd_format, "ADIOS2_BP4"))
return openPMD::Format::ADIOS2_BP4;
if (CCTK_EQUALS(openpmd_format, "ADIOS2_BP5"))
return openPMD::Format::ADIOS2_BP5;
#else
if (CCTK_EQUALS(openpmd_format, "ADIOS2"))
return openPMD::Format::ADIOS2;
#endif
if (CCTK_EQUALS(openpmd_format, "ADIOS2_SST"))
return openPMD::Format::ADIOS2_SST;
if (CCTK_EQUALS(openpmd_format, "ADIOS2_SSC"))
return openPMD::Format::ADIOS2_SSC;
if (CCTK_EQUALS(openpmd_format, "JSON"))
return openPMD::Format::JSON;
CCTK_ERROR("Internal error");
CCTK_VERROR("The openPMD format \"%s\" is not supported in version %d.%d.%d "
"of the openPMD_api library",
openpmd_format, OPENPMDAPI_VERSION_MAJOR,
OPENPMDAPI_VERSION_MINOR, OPENPMDAPI_VERSION_PATCH);
}

// - fileBased: One file per iteration. Needs templated file name to encode
Expand Down Expand Up @@ -888,9 +896,12 @@ void carpetx_openpmd_t::InputOpenPMD(const cGH *const cctkGH,

if (output_ghosts || intbox == extbox) {
CCTK_REAL *const ptr = fab.dataPtr() + vi * np;
// record_components.at(vi).loadChunk(openPMD::shareRaw(ptr),
// start, count);
#if OPENPMDAPI_VERSION_GE(0, 15, 0)
record_components.at(vi).loadChunkRaw(ptr, start, count);
#else
record_components.at(vi).loadChunk(openPMD::shareRaw(ptr),
start, count);
#endif

} else {
const int amrex_size = extbox.size();
Expand Down Expand Up @@ -1216,7 +1227,7 @@ void carpetx_openpmd_t::OutputOpenPMD(const cGH *const cctkGH,
mesh.setAxisLabels(reversed(std::vector<std::string>{"x", "y", "z"}));
mesh.setGridSpacing(to_vector<CCTK_REAL>(
reversed(fmap([](auto x, auto y) { return x / CCTK_REAL(y); },
rdomain.hi - rdomain.lo, idomain.shape()))));
rdomain.hi - rdomain.lo, idomain.shape() - 1))));
mesh.setGridGlobalOffset(to_vector<double>(reversed(rdomain.lo)));
mesh.setGridUnitSI(Unit::length);
// const std::map<openPMD::UnitDimension, double> unitDimension{
Expand Down Expand Up @@ -1323,9 +1334,12 @@ void carpetx_openpmd_t::OutputOpenPMD(const cGH *const cctkGH,
for (int vi = 0; vi < numvars; ++vi) {
if (output_ghosts || intbox == extbox) {
const CCTK_REAL *const ptr = fab.dataPtr() + vi * np;
// record_components.at(vi).storeChunk(openPMD::shareRaw(ptr),
// start, count);
#if OPENPMDAPI_VERSION_GE(0, 15, 0)
record_components.at(vi).storeChunkRaw(ptr, start, count);
#else
record_components.at(vi).storeChunk(openPMD::shareRaw(ptr),
start, count);
#endif
} else {
std::shared_ptr<CCTK_REAL> ptr(
new CCTK_REAL[np], std::default_delete<CCTK_REAL[]>());
Expand Down
8 changes: 4 additions & 4 deletions CarpetX/src/prolongate_3d_rf2_impl.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -1468,10 +1468,10 @@ void prolongate_3d_rf2<CENTI, CENTJ, CENTK, INTPI, INTPJ, INTPK, ORDERI, ORDERJ,

} // for comp

#ifdef __CUDACC__
amrex::Gpu::synchronize();
AMREX_GPU_ERROR_CHECK();
#endif
// #ifdef __CUDACC__
// amrex::Gpu::synchronize();
// AMREX_GPU_ERROR_CHECK();
// #endif
}

template <centering_t CENTI, centering_t CENTJ, centering_t CENTK,
Expand Down
3 changes: 3 additions & 0 deletions CarpetX/src/schedule.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -2359,14 +2359,17 @@ int SyncGroupsByDirI(const cGH *restrict cctkGH, int numgroups,

} // for gi
});
synchronize();

for (auto task1 : tasks1)
task1();
tasks1.clear();
synchronize();

for (auto task2 : tasks2)
task2();
tasks2.clear();
synchronize();

if (CCTK_IsFunctionAliased("MultiPatch_Interpolate")) {
std::vector<CCTK_INT> cactusvarinds;
Expand Down

0 comments on commit b606009

Please sign in to comment.