Skip to content

Commit

Permalink
Merge pull request #167 from eschnett/eschnett/summit-cuda
Browse files Browse the repository at this point in the history
Avoid run-time errors on Summit
  • Loading branch information
eschnett authored Jul 5, 2023
2 parents 881b314 + 535c9ef commit abb51d8
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 42 deletions.
50 changes: 29 additions & 21 deletions Arith/src/defs.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -191,79 +191,83 @@ constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST auto if_else(bool c, const T &x,
////////////////////////////////////////////////////////////////////////////////

// bitsign(i) = (-1)^i, the converse to signbit
constexpr ARITH_DEVICE ARITH_HOST int bitsign(bool c) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST int bitsign(bool c) {
return if_else(c, -1, 1);
}
constexpr ARITH_DEVICE ARITH_HOST int bitsign(int i) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST int bitsign(int i) {
return bitsign(i % 2 != 0);
}

// Return x if y>0, -x if y<0
template <typename T>
inline ARITH_INLINE ARITH_DEVICE ARITH_HOST T flipsign(const T &x, const T &y) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST T flipsign(const T &x,
const T &y) {
using std::copysign;
return copysign(T(1), y) * x;
}

// A max function that returns nan when any argument is nan
template <typename T>
inline ARITH_INLINE ARITH_DEVICE ARITH_HOST T max1(const T &x, const T &y) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST T max1(const T &x, const T &y) {
using std::max;
return if_else(x != x, x, if_else(y != y, y, max(x, y)));
}

// The maximum of the absolute values. This is reduces over containers.
template <typename T>
inline ARITH_INLINE ARITH_DEVICE ARITH_HOST T maxabs(const T &x) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST T maxabs(const T &x) {
using std::abs;
return abs(x);
}

template <typename T>
inline ARITH_INLINE ARITH_DEVICE ARITH_HOST T sumabs(const T &x) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST T sumabs(const T &x) {
using std::abs;
return abs(x);
}

// A min function that returns nan when any argument is nan
template <typename T>
inline ARITH_INLINE ARITH_DEVICE ARITH_HOST T min1(const T &x, const T &y) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST T min1(const T &x, const T &y) {
using std::min;
return if_else(x != x, x, if_else(y != y, y, min(x, y)));
}

// Multiply-add
template <typename T>
inline ARITH_INLINE ARITH_DEVICE ARITH_HOST T muladd(const T &x, const T &y,
const T &z) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST T muladd(const T &x, const T &y,
const T &z) {
return x * y + z;
}
template <typename T>
inline ARITH_INLINE ARITH_DEVICE ARITH_HOST T mulsub(const T &x, const T &y,
const T &z) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST T mulsub(const T &x, const T &y,
const T &z) {
return x * y - z;
}
template <typename T>
inline ARITH_INLINE ARITH_DEVICE ARITH_HOST T negmuladd(const T &x, const T &y,
const T &z) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST T negmuladd(const T &x,
const T &y,
const T &z) {
return -(x * y) + z;
}
template <typename T>
inline ARITH_INLINE ARITH_DEVICE ARITH_HOST T negmulsub(const T &x, const T &y,
const T &z) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST T negmulsub(const T &x,
const T &y,
const T &z) {
return -(x * y) - z;
}

// Factorial
inline ARITH_INLINE ARITH_DEVICE ARITH_HOST int factorial(int n) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST int factorial(int n) {
int r = 1;
while (n > 0)
r *= n--;
return r;
}

namespace detail {
template <typename T> constexpr T pown(const T &x, int n) {
template <typename T>
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST T pown(const T &x, int n) {
T r{1};
T y{x};
// invariant: initial(x^n) == r * y^n
Expand All @@ -278,18 +282,22 @@ template <typename T> constexpr T pown(const T &x, int n) {
} // namespace detail

// Raise a value to an integer power, calculated efficiently
template <typename T> constexpr T pown(const T &x, const int n) {
template <typename T>
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST T pown(const T &x, const int n) {
return n >= 0 ? detail::pown(x, n) : 1 / detail::pown(x, -n);
}

template <typename T> constexpr T pow2(const T &x) { return x * x; }
template <typename T>
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST T pow2(const T &x) {
return x * x;
}

////////////////////////////////////////////////////////////////////////////////

// Linear combination
template <typename T, typename I>
constexpr T lincom(const I &i0, const T &x0, const I &i1, const T &x1,
const I &i) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST T
lincom(const I &i0, const T &x0, const I &i1, const T &x1, const I &i) {
return T(i - i1) / T(i0 - i1) * x0 + T(i - i0) / T(i1 - i0) * x1;
}

Expand Down
19 changes: 10 additions & 9 deletions Arith/src/sum.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ using namespace std;
// summand.

template <int D, typename F>
constexpr ARITH_INLINE remove_cv_t<remove_reference_t<result_of_t<F(int)> > >
sum(F f) {
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST
remove_cv_t<remove_reference_t<result_of_t<F(int)> > >
sum(F f) {
typedef remove_cv_t<remove_reference_t<result_of_t<F(int)> > > R;
R s = zero<R>();
for (int x = 0; x < D; ++x)
Expand All @@ -28,8 +29,8 @@ sum(F f) {
}

template <int D, typename F>
constexpr
ARITH_INLINE remove_cv_t<remove_reference_t<result_of_t<F(int, int)> > >
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST
remove_cv_t<remove_reference_t<result_of_t<F(int, int)> > >
sum(F f) {
typedef remove_cv_t<remove_reference_t<result_of_t<F(int, int)> > > R;
R s = zero<R>();
Expand All @@ -40,7 +41,7 @@ constexpr
}

template <int D, typename F>
constexpr ARITH_INLINE
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST
remove_cv_t<remove_reference_t<result_of_t<F(int, int, int)> > >
sum(F f) {
typedef remove_cv_t<remove_reference_t<result_of_t<F(int, int, int)> > > R;
Expand All @@ -53,7 +54,7 @@ constexpr ARITH_INLINE
}

template <int D, typename F>
constexpr ARITH_INLINE
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST
remove_cv_t<remove_reference_t<result_of_t<F(int, int, int, int)> > >
sum(F f) {
typedef remove_cv_t<remove_reference_t<result_of_t<F(int, int, int, int)> > >
Expand All @@ -68,8 +69,8 @@ constexpr ARITH_INLINE
}

template <int D, typename F>
constexpr
ARITH_INLINE remove_cv_t<remove_reference_t<result_of_t<F(int, int)> > >
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST
remove_cv_t<remove_reference_t<result_of_t<F(int, int)> > >
sum_symm(F f) {
typedef remove_cv_t<remove_reference_t<result_of_t<F(int, int)> > > R;
R s = zero<R>();
Expand All @@ -80,7 +81,7 @@ constexpr
}

template <int D, typename F>
constexpr ARITH_INLINE
constexpr ARITH_INLINE ARITH_DEVICE ARITH_HOST
remove_cv_t<remove_reference_t<result_of_t<F(int, int, int)> > >
sum_symm(F f) {
typedef remove_cv_t<remove_reference_t<result_of_t<F(int, int, int)> > > R;
Expand Down
12 changes: 6 additions & 6 deletions CarpetX/src/boundaries.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -108,17 +108,17 @@ struct BoundaryKernel {

static constexpr int maxncomps = 10;

std::array<CCTK_REAL, maxncomps> dirichlet_values;
Arith::vect<CCTK_REAL, maxncomps> dirichlet_values;

Arith::vect<int, dim> neumann_source;

Arith::vect<int, dim> linear_extrapolation_source;

Arith::vect<int, dim> robin_source;
std::array<CCTK_REAL, maxncomps> robin_values;
Arith::vect<CCTK_REAL, maxncomps> robin_values;

Arith::vect<int, dim> reflection_offset;
std::array<CCTK_REAL, maxncomps> reflection_parities;
Arith::vect<CCTK_REAL, maxncomps> reflection_parities;

inline CCTK_ATTRIBUTE_ALWAYS_INLINE CCTK_DEVICE void
operator()(const Arith::vect<int, dim> &dst, int cmin, int cmax) const;
Expand Down Expand Up @@ -158,13 +158,13 @@ struct BoundaryCondition {
const Arith::vect<symmetry_t, dim> symmetries,
const Arith::vect<boundary_t, dim> boundaries,
//
const std::array<CCTK_REAL, BoundaryKernel::maxncomps> &dirichlet_values,
const Arith::vect<CCTK_REAL, BoundaryKernel::maxncomps> &dirichlet_values,
const Arith::vect<int, dim> &neumann_source,
const Arith::vect<int, dim> &linear_extrapolation_source,
const Arith::vect<int, dim> &robin_source,
const std::array<CCTK_REAL, BoundaryKernel::maxncomps> &robin_values,
const Arith::vect<CCTK_REAL, BoundaryKernel::maxncomps> &robin_values,
const Arith::vect<int, dim> &reflection_offset,
const std::array<CCTK_REAL, BoundaryKernel::maxncomps>
const Arith::vect<CCTK_REAL, BoundaryKernel::maxncomps>
&reflection_parities) const {
return BoundaryKernel{
#ifdef CCTK_DEBUG
Expand Down
15 changes: 9 additions & 6 deletions CarpetX/src/boundaries_impl.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ void BoundaryCondition::apply_on_face_symbcxyz(
} else {
// This is the generic case for applying boundary conditions.

std::array<CCTK_REAL, BoundaryKernel::maxncomps> dirichlet_values;
Arith::vect<CCTK_REAL, BoundaryKernel::maxncomps> dirichlet_values;
for (int comp = 0; comp < ncomps; ++comp)
dirichlet_values[comp] = groupdata.dirichlet_values.at(comp);

Expand Down Expand Up @@ -269,7 +269,7 @@ void BoundaryCondition::apply_on_face_symbcxyz(
}
}

std::array<CCTK_REAL, BoundaryKernel::maxncomps> robin_values;
Arith::vect<CCTK_REAL, BoundaryKernel::maxncomps> robin_values;
for (int comp = 0; comp < ncomps; ++comp)
robin_values[comp] = groupdata.robin_values.at(comp);

Expand All @@ -289,7 +289,7 @@ void BoundaryCondition::apply_on_face_symbcxyz(
}
}

std::array<CCTK_REAL, BoundaryKernel::maxncomps> reflection_parities;
Arith::vect<CCTK_REAL, BoundaryKernel::maxncomps> reflection_parities;
for (int comp = 0; comp < ncomps; ++comp) {
CCTK_REAL reflection_parity = +1;
for (int d = 0; d < dim; ++d)
Expand All @@ -300,12 +300,16 @@ void BoundaryCondition::apply_on_face_symbcxyz(
reflection_parities[comp] = reflection_parity;
}

// We cannot capture `destptr` directly (on Summit, with CUDA 11.5.2)
// We cannot use a `restrict` declaration either.
CCTK_REAL *const destptr1 = destptr;

const auto kernel =
[
#ifdef CCTK_DEBUG
amin = amin, amax = amax, dmin = dmin, dmax = dmax,
#endif
xmin = xmin, dx = dx, layout = layout, destptr = destptr,
xmin = xmin, dx = dx, layout = layout, destptr = destptr1,
//
cmin, cmax, dirichlet_values, neumann_source,
linear_extrapolation_source, robin_source, robin_values,
Expand Down Expand Up @@ -367,8 +371,7 @@ void BoundaryCondition::apply_on_face_symbcxyz(
}
}
#ifdef CCTK_DEBUG
for (int d = 0; d < dim; ++d)
assert(src[d] >= dmin[d] && src[d] < dmax[d]);
assert(all(src >= dmin && src < dmax));
assert(!all(src == dst));
#endif

Expand Down

0 comments on commit abb51d8

Please sign in to comment.