From f972b623813b19033fc65e9440b9e08c32faaf4a Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Tue, 24 Dec 2024 15:37:20 +0100 Subject: [PATCH] [RM] math_ref separate vec / marray overloads --- util/math_reference.h | 1043 +++++++++++++++++++++-------------------- 1 file changed, 531 insertions(+), 512 deletions(-) diff --git a/util/math_reference.h b/util/math_reference.h index 3ca887d34..38246e4b0 100644 --- a/util/math_reference.h +++ b/util/math_reference.h @@ -28,92 +28,6 @@ #include "./math_helper.h" #include -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP - -#define MAKE_VEC_AND_MARRAY_VERSIONS(func) \ - template \ - sycl::vec func(sycl::vec a) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x) { return func(x); }, a); \ - } \ - template \ - sycl::marray func(sycl::marray a) { \ - return sycl_cts::math::run_func_on_marray( \ - [](T x) { return func(x); }, a); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(func) \ - template \ - sycl::vec func(sycl::vec a, sycl::vec b) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } \ - template \ - sycl::marray func(sycl::marray a, sycl::marray b) { \ - return sycl_cts::math::run_func_on_marray( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(func) \ - template \ - sycl::vec func(sycl::vec a, sycl::vec b, \ - sycl::vec c) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ - } \ - template \ - sycl::marray func(sycl::marray a, sycl::marray b, \ - sycl::marray c) { \ - return sycl_cts::math::run_func_on_marray( \ - [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(func) \ - template \ - sycl::vec func(sycl::vec a, T b) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } \ - template \ - sycl::marray func(sycl::marray a, T b) { \ - return sycl_cts::math::run_func_on_marray( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } - -#else // definitions without marray for AdaptiveCpp - -#define MAKE_VEC_AND_MARRAY_VERSIONS(func) \ - template \ - sycl::vec func(sycl::vec a) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x) { return func(x); }, a); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(func) \ - template \ - sycl::vec func(sycl::vec a, sycl::vec b) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(func) \ - template \ - sycl::vec func(sycl::vec a, sycl::vec b, \ - sycl::vec c) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(func) \ - template \ - sycl::vec func(sycl::vec a, T b) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } - -#endif - namespace reference { /* two argument relational reference */ @@ -890,11 +804,39 @@ sycl::mdouble3 cross(sycl::mdouble3 p0, sycl::mdouble3 p1); #endif // SYCL_CTS_ENABLE_DOUBLE_TESTS -// Generic vec / marray versions of these fuctions must be defined after any -// non-templated conditional half / double overloads for correct resolution -// with DPC++. +// sycl::vec overloads of the above. These must be defined after any non-templated +// conditional half / double overloads for correct resolution with DPC++. + +#define MAKE_VEC_VERSION(func) \ + template \ + sycl::vec func(sycl::vec a) { \ + return sycl_cts::math::run_func_on_vector( \ + [](T x) { return func(x); }, a); \ + } + +#define MAKE_VEC_VERSION_2ARGS(func) \ + template \ + sycl::vec func(sycl::vec a, sycl::vec b) { \ + return sycl_cts::math::run_func_on_vector( \ + [](T x, T y) { return func(x, y); }, a, b); \ + } + +#define MAKE_VEC_VERSION_3ARGS(func) \ + template \ + sycl::vec func(sycl::vec a, sycl::vec b, \ + sycl::vec c) { \ + return sycl_cts::math::run_func_on_vector( \ + [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ + } + +#define MAKE_VEC_VERSION_WITH_SCALAR(func) \ + template \ + sycl::vec func(sycl::vec a, T b) { \ + return sycl_cts::math::run_func_on_vector( \ + [](T x, T y) { return func(x, y); }, a, b); \ + } -// Integer functions +// Common functions template int any(sycl::vec a) { @@ -903,16 +845,6 @@ int any(sycl::vec a) { } return false; } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -bool any(sycl::marray a) { - for (size_t i = 0; i < N; i++) { - if (any(a[i]) == 1) return true; - } - return false; -} -#endif template int all(sycl::vec a) { @@ -921,18 +853,8 @@ int all(sycl::vec a) { } return true; } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -bool all(sycl::marray a) { - for (size_t i = 0; i < N; i++) { - if (all(a[i]) == 0) return false; - } - return true; -} -#endif -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(bitselect) +MAKE_VEC_VERSION_3ARGS(bitselect) template sycl::vec select(sycl::vec a, sycl::vec b, @@ -946,31 +868,12 @@ sycl::vec select(sycl::vec a, sycl::vec b, } return res; } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray select(sycl::marray a, sycl::marray b, - sycl::marray c) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = c[i] ? b[i] : a[i]; - } - return res; -} -#endif template sycl_cts::resultRef> abs(sycl::vec a) { return sycl_cts::math::run_func_on_vector_result_ref( [](T x) { return abs(x); }, a); } -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> abs(sycl::marray a) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x) { return abs(x); }, a); -} -#endif template sycl_cts::resultRef> abs_diff(sycl::vec a, @@ -978,19 +881,10 @@ sycl_cts::resultRef> abs_diff(sycl::vec a, return sycl_cts::math::run_func_on_vector_result_ref( [](T x, T y) { return abs_diff(x, y); }, a, b); } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> abs_diff(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return abs_diff(x, y); }, a, b); -} -#endif -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(add_sat) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(hadd) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(rhadd) +MAKE_VEC_VERSION_2ARGS(add_sat) +MAKE_VEC_VERSION_2ARGS(hadd) +MAKE_VEC_VERSION_2ARGS(rhadd) template sycl_cts::resultRef> clamp(sycl::vec a, sycl::vec b, @@ -1011,34 +905,11 @@ sycl_cts::resultRef> clamp(sycl::vec a, T b, T c) { } return sycl_cts::resultRef>(res, undefined); } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> clamp(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return clamp(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> clamp(sycl::marray a, T b, T c) { - sycl::marray res; - std::map undefined; - for (size_t i = 0; i < N; i++) { - sycl_cts::resultRef element = clamp(a[i], b, c); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -#endif -MAKE_VEC_AND_MARRAY_VERSIONS(clz) -MAKE_VEC_AND_MARRAY_VERSIONS(ctz) +MAKE_VEC_VERSION(clz) +MAKE_VEC_VERSION(ctz) -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad_sat) +MAKE_VEC_VERSION_3ARGS(mad_sat) template sycl_cts::resultRef> max(sycl::vec a, sycl::vec b) { @@ -1050,20 +921,6 @@ sycl_cts::resultRef> max(sycl::vec a, T b) { return sycl_cts::math::run_func_on_vector_result_ref( [](T x, T y) { return max(x, y); }, a, b); } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> max(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -template -sycl_cts::resultRef> max(sycl::marray a, T b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -#endif template sycl_cts::resultRef> min(sycl::vec a, sycl::vec b) { @@ -1075,25 +932,11 @@ sycl_cts::resultRef> min(sycl::vec a, T b) { return sycl_cts::math::run_func_on_vector_result_ref( [](T x, T y) { return min(x, y); }, a, b); } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> min(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -template -sycl_cts::resultRef> min(sycl::marray a, T b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -#endif -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(mul_hi) -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad_hi) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(rotate) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(sub_sat) +MAKE_VEC_VERSION_2ARGS(mul_hi) +MAKE_VEC_VERSION_3ARGS(mad_hi) +MAKE_VEC_VERSION_2ARGS(rotate) +MAKE_VEC_VERSION_2ARGS(sub_sat) template sycl::vec::type, N> upsample( @@ -1101,18 +944,8 @@ sycl::vec::type, N> upsample( return sycl_cts::math::run_func_on_vector::type, T, N>( [](T x, T y) { return upsample(x, y); }, a, b); } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray::type, N> upsample( - sycl::marray a, - sycl::marray::type, N> b) { - return sycl_cts::math::run_func_on_marray::type, T, N>( - [](T x, T y) { return upsample(x, y); }, a, b); -} -#endif -MAKE_VEC_AND_MARRAY_VERSIONS(popcount) +MAKE_VEC_VERSION(popcount) template sycl_cts::resultRef> mad24(sycl::vec a, sycl::vec b, @@ -1120,16 +953,6 @@ sycl_cts::resultRef> mad24(sycl::vec a, sycl::vec b, return sycl_cts::math::run_func_on_vector_result_ref( [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> mad24(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); -} -#endif template sycl_cts::resultRef> mul24(sycl::vec a, @@ -1137,17 +960,8 @@ sycl_cts::resultRef> mul24(sycl::vec a, return sycl_cts::math::run_func_on_vector_result_ref( [](T x, T y) { return mul24(x, y); }, a, b); } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> mul24(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return mul24(x, y); }, a, b); -} -#endif -MAKE_VEC_AND_MARRAY_VERSIONS(degrees) +MAKE_VEC_VERSION(degrees) template sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, @@ -1169,32 +983,8 @@ sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, } return sycl_cts::resultRef>(res, undefined); } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> mix(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> mix(sycl::marray a, - sycl::marray b, T c) { - sycl::marray res; - std::map undefined; - for (size_t i = 0; i < N; i++) { - sycl_cts::resultRef element = mix(a[i], b[i], c); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -#endif -MAKE_VEC_AND_MARRAY_VERSIONS(radians) +MAKE_VEC_VERSION(radians) template sycl::vec step(T a, sycl::vec b) { @@ -1204,17 +994,6 @@ sycl::vec step(T a, sycl::vec b) { } return res; } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray step(T a, sycl::marray b) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = step(a, b[i]); - } - return res; -} -#endif template sycl_cts::resultRef> smoothstep(sycl::vec a, @@ -1236,8 +1015,426 @@ sycl_cts::resultRef> smoothstep(T a, T b, sycl::vec c) { } return sycl_cts::resultRef>(res, undefined); } -// FIXME: AdaptiveCpp does not support marray + +// Math functions + +template +struct higher_accuracy> { + using type = sycl::vec::type, N>; +}; + +MAKE_VEC_VERSION(acos) +MAKE_VEC_VERSION(acosh) +MAKE_VEC_VERSION(acospi) +MAKE_VEC_VERSION(asin) +MAKE_VEC_VERSION(asinh) +MAKE_VEC_VERSION(asinpi) +MAKE_VEC_VERSION(atan) +MAKE_VEC_VERSION_2ARGS(atan2) +MAKE_VEC_VERSION(atanh) +MAKE_VEC_VERSION(atanpi) +MAKE_VEC_VERSION_2ARGS(atan2pi) +MAKE_VEC_VERSION(cbrt) +MAKE_VEC_VERSION(ceil) +MAKE_VEC_VERSION_2ARGS(copysign) +MAKE_VEC_VERSION(cos) +MAKE_VEC_VERSION(cosh) +MAKE_VEC_VERSION(cospi) +MAKE_VEC_VERSION(erfc) +MAKE_VEC_VERSION(erf) +MAKE_VEC_VERSION(exp) +MAKE_VEC_VERSION(exp2) +MAKE_VEC_VERSION(exp10) +MAKE_VEC_VERSION(expm1) +MAKE_VEC_VERSION(fabs) +MAKE_VEC_VERSION_2ARGS(fdim) +MAKE_VEC_VERSION(floor) +MAKE_VEC_VERSION_3ARGS(fma) +MAKE_VEC_VERSION_2ARGS(fmax) +MAKE_VEC_VERSION_WITH_SCALAR(fmax) +MAKE_VEC_VERSION_2ARGS(fmin) +MAKE_VEC_VERSION_WITH_SCALAR(fmin) +MAKE_VEC_VERSION_2ARGS(fmod) + +template +sycl::vec fract(sycl::vec a, sycl::vec *b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + T value; + res[i] = reference::fract(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +template +sycl::vec frexp(sycl::vec a, sycl::vec *b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + int value; + res[i] = reference::frexp(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +MAKE_VEC_VERSION_2ARGS(hypot) + +template +sycl::vec ilogb(sycl::vec a) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::ilogb(a[i]); + } + return res; +} + +template +sycl::vec ldexp(sycl::vec a, sycl::vec b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::ldexp(a[i], b[i]); + } + return res; +} +template +sycl::vec ldexp(sycl::vec a, int b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::ldexp(a[i], b); + } + return res; +} + +MAKE_VEC_VERSION(lgamma) + +template +sycl::vec lgamma_r(sycl::vec a, sycl::vec *b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + int value; + res[i] = reference::lgamma_r(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +MAKE_VEC_VERSION(log) +MAKE_VEC_VERSION(log2) +MAKE_VEC_VERSION(log10) +MAKE_VEC_VERSION(log1p) +MAKE_VEC_VERSION(logb) + +MAKE_VEC_VERSION_3ARGS(mad) +MAKE_VEC_VERSION_2ARGS(maxmag) +MAKE_VEC_VERSION_2ARGS(minmag) + +template +sycl::vec modf(sycl::vec a, sycl::vec *b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + T value; + res[i] = reference::modf(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +template +sycl::vec nan(sycl::vec a) { + return sycl_cts::math::run_func_on_vector( + [](unsigned int x) { return nan(x); }, a); +} + +MAKE_VEC_VERSION_2ARGS(nextafter) +MAKE_VEC_VERSION_2ARGS(pow) + +template +sycl::vec pown(sycl::vec a, sycl::vec b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::pown(a[i], b[i]); + } + return res; +} + +template +sycl_cts::resultRef> powr(sycl::vec a, + sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return reference::powr(x, y); }, a, b); +} + +MAKE_VEC_VERSION_2ARGS(remainder) + +template +sycl::vec remquo(sycl::vec a, sycl::vec b, + sycl::vec *c) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + int value; + res[i] = reference::remquo(a[i], b[i], &value); + resPtr[i] = value; + } + *c = resPtr; + return res; +} + +MAKE_VEC_VERSION(rint) + +template +sycl::vec rootn(sycl::vec a, sycl::vec b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::rootn(a[i], b[i]); + } + return res; +} + +MAKE_VEC_VERSION(round) +MAKE_VEC_VERSION(rsqrt) +MAKE_VEC_VERSION(sign) + +template +sycl::vec sincos(sycl::vec a, sycl::vec *b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + T value; + res[i] = reference::sincos(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +MAKE_VEC_VERSION(sin) +MAKE_VEC_VERSION(sinh) +MAKE_VEC_VERSION(sinpi) +MAKE_VEC_VERSION(sqrt) +MAKE_VEC_VERSION_2ARGS(step) +MAKE_VEC_VERSION(tan) +MAKE_VEC_VERSION(tanh) +MAKE_VEC_VERSION(tanpi) +MAKE_VEC_VERSION(tgamma) +MAKE_VEC_VERSION(trunc) +MAKE_VEC_VERSION(recip) +MAKE_VEC_VERSION_2ARGS(divide) + +// Geometric functions + +template +T dot(sycl::vec a, sycl::vec b) { + T res = 0; + for (int i = 0; i < N; i++) res += a[i] * b[i]; + return res; +} + + +// sycl::marray overloads of the above. +// Not supported by AdaptiveCpp. #ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP + +#define MAKE_MARRAY_VERSION(func) \ + template \ + sycl::marray func(sycl::marray a) { \ + return sycl_cts::math::run_func_on_marray( \ + [](T x) { return func(x); }, a); \ + } + +#define MAKE_MARRAY_VERSION_2ARGS(func) \ + template \ + sycl::marray func(sycl::marray a, sycl::marray b) { \ + return sycl_cts::math::run_func_on_marray( \ + [](T x, T y) { return func(x, y); }, a, b); \ + } + +#define MAKE_MARRAY_VERSION_3ARGS(func) \ + template \ + sycl::marray func(sycl::marray a, sycl::marray b, \ + sycl::marray c) { \ + return sycl_cts::math::run_func_on_marray( \ + [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ + } + +#define MAKE_MARRAY_VERSION_WITH_SCALAR(func) \ + template \ + sycl::marray func(sycl::marray a, T b) { \ + return sycl_cts::math::run_func_on_marray( \ + [](T x, T y) { return func(x, y); }, a, b); \ + } + +// Common functions. + +template +bool any(sycl::marray a) { + for (size_t i = 0; i < N; i++) { + if (any(a[i]) == 1) return true; + } + return false; +} + +template +bool all(sycl::marray a) { + for (size_t i = 0; i < N; i++) { + if (all(a[i]) == 0) return false; + } + return true; +} + +MAKE_MARRAY_VERSION_3ARGS(bitselect) + +template +sycl::marray select(sycl::marray a, sycl::marray b, + sycl::marray c) { + sycl::marray res; + for (size_t i = 0; i < N; i++) { + res[i] = c[i] ? b[i] : a[i]; + } + return res; +} + +template +sycl_cts::resultRef> abs(sycl::marray a) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x) { return abs(x); }, a); +} + +template +sycl_cts::resultRef> abs_diff(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return abs_diff(x, y); }, a, b); +} + +MAKE_MARRAY_VERSION_2ARGS(add_sat) +MAKE_MARRAY_VERSION_2ARGS(hadd) +MAKE_MARRAY_VERSION_2ARGS(rhadd) + +template +sycl_cts::resultRef> clamp(sycl::marray a, + sycl::marray b, + sycl::marray c) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y, T z) { return clamp(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> clamp(sycl::marray a, T b, T c) { + sycl::marray res; + std::map undefined; + for (size_t i = 0; i < N; i++) { + sycl_cts::resultRef element = clamp(a[i], b, c); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} + +MAKE_MARRAY_VERSION(clz) +MAKE_MARRAY_VERSION(ctz) + +MAKE_MARRAY_VERSION_3ARGS(mad_sat) + +template +sycl_cts::resultRef> max(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return max(x, y); }, a, b); +} +template +sycl_cts::resultRef> max(sycl::marray a, T b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return max(x, y); }, a, b); +} + +template +sycl_cts::resultRef> min(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} +template +sycl_cts::resultRef> min(sycl::marray a, T b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} + +MAKE_MARRAY_VERSION_2ARGS(mul_hi) +MAKE_MARRAY_VERSION_3ARGS(mad_hi) +MAKE_MARRAY_VERSION_2ARGS(rotate) +MAKE_MARRAY_VERSION_2ARGS(sub_sat) + +template +sycl::marray::type, N> upsample( + sycl::marray a, + sycl::marray::type, N> b) { + return sycl_cts::math::run_func_on_marray::type, T, N>( + [](T x, T y) { return upsample(x, y); }, a, b); +} + +MAKE_MARRAY_VERSION(popcount) + +template +sycl_cts::resultRef> mad24(sycl::marray a, + sycl::marray b, + sycl::marray c) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); +} + +template +sycl_cts::resultRef> mul24(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return mul24(x, y); }, a, b); +} + +MAKE_MARRAY_VERSION(degrees) + +template +sycl_cts::resultRef> mix(sycl::marray a, + sycl::marray b, + sycl::marray c) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> mix(sycl::marray a, + sycl::marray b, T c) { + sycl::marray res; + std::map undefined; + for (size_t i = 0; i < N; i++) { + sycl_cts::resultRef element = mix(a[i], b[i], c); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} + +MAKE_MARRAY_VERSION(radians) + +template +sycl::marray step(T a, sycl::marray b) { + sycl::marray res; + for (size_t i = 0; i < N; i++) { + res[i] = step(a, b[i]); + } + return res; +} + template sycl_cts::resultRef> smoothstep(sycl::marray a, sycl::marray b, @@ -1259,69 +1456,47 @@ sycl_cts::resultRef> smoothstep(T a, T b, } return sycl_cts::resultRef>(res, undefined); } -#endif // Math functions -template -struct higher_accuracy> { - using type = sycl::vec::type, N>; -}; -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template struct higher_accuracy> { using type = sycl::marray::type, N>; }; -#endif -MAKE_VEC_AND_MARRAY_VERSIONS(acos) -MAKE_VEC_AND_MARRAY_VERSIONS(acosh) -MAKE_VEC_AND_MARRAY_VERSIONS(acospi) -MAKE_VEC_AND_MARRAY_VERSIONS(asin) -MAKE_VEC_AND_MARRAY_VERSIONS(asinh) -MAKE_VEC_AND_MARRAY_VERSIONS(asinpi) -MAKE_VEC_AND_MARRAY_VERSIONS(atan) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(atan2) -MAKE_VEC_AND_MARRAY_VERSIONS(atanh) -MAKE_VEC_AND_MARRAY_VERSIONS(atanpi) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(atan2pi) -MAKE_VEC_AND_MARRAY_VERSIONS(cbrt) -MAKE_VEC_AND_MARRAY_VERSIONS(ceil) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(copysign) -MAKE_VEC_AND_MARRAY_VERSIONS(cos) -MAKE_VEC_AND_MARRAY_VERSIONS(cosh) -MAKE_VEC_AND_MARRAY_VERSIONS(cospi) -MAKE_VEC_AND_MARRAY_VERSIONS(erfc) -MAKE_VEC_AND_MARRAY_VERSIONS(erf) -MAKE_VEC_AND_MARRAY_VERSIONS(exp) -MAKE_VEC_AND_MARRAY_VERSIONS(exp2) -MAKE_VEC_AND_MARRAY_VERSIONS(exp10) -MAKE_VEC_AND_MARRAY_VERSIONS(expm1) -MAKE_VEC_AND_MARRAY_VERSIONS(fabs) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fdim) -MAKE_VEC_AND_MARRAY_VERSIONS(floor) -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(fma) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmax) -MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(fmax) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmin) -MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(fmin) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmod) +MAKE_MARRAY_VERSION(acos) +MAKE_MARRAY_VERSION(acosh) +MAKE_MARRAY_VERSION(acospi) +MAKE_MARRAY_VERSION(asin) +MAKE_MARRAY_VERSION(asinh) +MAKE_MARRAY_VERSION(asinpi) +MAKE_MARRAY_VERSION(atan) +MAKE_MARRAY_VERSION_2ARGS(atan2) +MAKE_MARRAY_VERSION(atanh) +MAKE_MARRAY_VERSION(atanpi) +MAKE_MARRAY_VERSION_2ARGS(atan2pi) +MAKE_MARRAY_VERSION(cbrt) +MAKE_MARRAY_VERSION(ceil) +MAKE_MARRAY_VERSION_2ARGS(copysign) +MAKE_MARRAY_VERSION(cos) +MAKE_MARRAY_VERSION(cosh) +MAKE_MARRAY_VERSION(cospi) +MAKE_MARRAY_VERSION(erfc) +MAKE_MARRAY_VERSION(erf) +MAKE_MARRAY_VERSION(exp) +MAKE_MARRAY_VERSION(exp2) +MAKE_MARRAY_VERSION(exp10) +MAKE_MARRAY_VERSION(expm1) +MAKE_MARRAY_VERSION(fabs) +MAKE_MARRAY_VERSION_2ARGS(fdim) +MAKE_MARRAY_VERSION(floor) +MAKE_MARRAY_VERSION_3ARGS(fma) +MAKE_MARRAY_VERSION_2ARGS(fmax) +MAKE_MARRAY_VERSION_WITH_SCALAR(fmax) +MAKE_MARRAY_VERSION_2ARGS(fmin) +MAKE_MARRAY_VERSION_WITH_SCALAR(fmin) +MAKE_MARRAY_VERSION_2ARGS(fmod) -template -sycl::vec fract(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - T value; - res[i] = reference::fract(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray fract(sycl::marray a, sycl::marray *b) { sycl::marray res; @@ -1334,22 +1509,7 @@ sycl::marray fract(sycl::marray a, sycl::marray *b) { *b = resPtr; return res; } -#endif -template -sycl::vec frexp(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - int value; - res[i] = reference::frexp(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray frexp(sycl::marray a, sycl::marray *b) { sycl::marray res; @@ -1362,20 +1522,9 @@ sycl::marray frexp(sycl::marray a, sycl::marray *b) { *b = resPtr; return res; } -#endif -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(hypot) +MAKE_MARRAY_VERSION_2ARGS(hypot) -template -sycl::vec ilogb(sycl::vec a) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = reference::ilogb(a[i]); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray ilogb(sycl::marray a) { sycl::marray res; @@ -1384,18 +1533,7 @@ sycl::marray ilogb(sycl::marray a) { } return res; } -#endif -template -sycl::vec ldexp(sycl::vec a, sycl::vec b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = reference::ldexp(a[i], b[i]); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray ldexp(sycl::marray a, sycl::marray b) { sycl::marray res; @@ -1404,17 +1542,6 @@ sycl::marray ldexp(sycl::marray a, sycl::marray b) { } return res; } -#endif -template -sycl::vec ldexp(sycl::vec a, int b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = reference::ldexp(a[i], b); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray ldexp(sycl::marray a, int b) { sycl::marray res; @@ -1423,24 +1550,9 @@ sycl::marray ldexp(sycl::marray a, int b) { } return res; } -#endif -MAKE_VEC_AND_MARRAY_VERSIONS(lgamma) +MAKE_MARRAY_VERSION(lgamma) -template -sycl::vec lgamma_r(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - int value; - res[i] = reference::lgamma_r(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray lgamma_r(sycl::marray a, sycl::marray *b) { sycl::marray res; @@ -1453,32 +1565,17 @@ sycl::marray lgamma_r(sycl::marray a, sycl::marray *b) { *b = resPtr; return res; } -#endif -MAKE_VEC_AND_MARRAY_VERSIONS(log) -MAKE_VEC_AND_MARRAY_VERSIONS(log2) -MAKE_VEC_AND_MARRAY_VERSIONS(log10) -MAKE_VEC_AND_MARRAY_VERSIONS(log1p) -MAKE_VEC_AND_MARRAY_VERSIONS(logb) +MAKE_MARRAY_VERSION(log) +MAKE_MARRAY_VERSION(log2) +MAKE_MARRAY_VERSION(log10) +MAKE_MARRAY_VERSION(log1p) +MAKE_MARRAY_VERSION(logb) -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(maxmag) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(minmag) +MAKE_MARRAY_VERSION_3ARGS(mad) +MAKE_MARRAY_VERSION_2ARGS(maxmag) +MAKE_MARRAY_VERSION_2ARGS(minmag) -template -sycl::vec modf(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - T value; - res[i] = reference::modf(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray modf(sycl::marray a, sycl::marray *b) { sycl::marray res; @@ -1491,35 +1588,16 @@ sycl::marray modf(sycl::marray a, sycl::marray *b) { *b = resPtr; return res; } -#endif -template -sycl::vec nan(sycl::vec a) { - return sycl_cts::math::run_func_on_vector( - [](unsigned int x) { return nan(x); }, a); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray nan(sycl::marray a) { return sycl_cts::math::run_func_on_marray( [](unsigned int x) { return nan(x); }, a); } -#endif -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(nextafter) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(pow) +MAKE_MARRAY_VERSION_2ARGS(nextafter) +MAKE_MARRAY_VERSION_2ARGS(pow) -template -sycl::vec pown(sycl::vec a, sycl::vec b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = reference::pown(a[i], b[i]); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray pown(sycl::marray a, sycl::marray b) { sycl::marray res; @@ -1528,41 +1606,16 @@ sycl::marray pown(sycl::marray a, sycl::marray b) { } return res; } -#endif -template -sycl_cts::resultRef> powr(sycl::vec a, - sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return reference::powr(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl_cts::resultRef> powr(sycl::marray a, sycl::marray b) { return sycl_cts::math::run_func_on_marray_result_ref( [](T x, T y) { return reference::powr(x, y); }, a, b); } -#endif -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(remainder) +MAKE_MARRAY_VERSION_2ARGS(remainder) -template -sycl::vec remquo(sycl::vec a, sycl::vec b, - sycl::vec *c) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - int value; - res[i] = reference::remquo(a[i], b[i], &value); - resPtr[i] = value; - } - *c = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray remquo(sycl::marray a, sycl::marray b, sycl::marray *c) { @@ -1576,20 +1629,9 @@ sycl::marray remquo(sycl::marray a, sycl::marray b, *c = resPtr; return res; } -#endif -MAKE_VEC_AND_MARRAY_VERSIONS(rint) +MAKE_MARRAY_VERSION(rint) -template -sycl::vec rootn(sycl::vec a, sycl::vec b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = reference::rootn(a[i], b[i]); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray rootn(sycl::marray a, sycl::marray b) { sycl::marray res; @@ -1598,26 +1640,11 @@ sycl::marray rootn(sycl::marray a, sycl::marray b) { } return res; } -#endif -MAKE_VEC_AND_MARRAY_VERSIONS(round) -MAKE_VEC_AND_MARRAY_VERSIONS(rsqrt) -MAKE_VEC_AND_MARRAY_VERSIONS(sign) +MAKE_MARRAY_VERSION(round) +MAKE_MARRAY_VERSION(rsqrt) +MAKE_MARRAY_VERSION(sign) -template -sycl::vec sincos(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - T value; - res[i] = reference::sincos(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray sincos(sycl::marray a, sycl::marray *b) { sycl::marray res; @@ -1630,40 +1657,32 @@ sycl::marray sincos(sycl::marray a, sycl::marray *b) { *b = resPtr; return res; } -#endif - -MAKE_VEC_AND_MARRAY_VERSIONS(sin) -MAKE_VEC_AND_MARRAY_VERSIONS(sinh) -MAKE_VEC_AND_MARRAY_VERSIONS(sinpi) -MAKE_VEC_AND_MARRAY_VERSIONS(sqrt) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(step) -MAKE_VEC_AND_MARRAY_VERSIONS(tan) -MAKE_VEC_AND_MARRAY_VERSIONS(tanh) -MAKE_VEC_AND_MARRAY_VERSIONS(tanpi) -MAKE_VEC_AND_MARRAY_VERSIONS(tgamma) -MAKE_VEC_AND_MARRAY_VERSIONS(trunc) -MAKE_VEC_AND_MARRAY_VERSIONS(recip) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(divide) -// Geometric functions +MAKE_MARRAY_VERSION(sin) +MAKE_MARRAY_VERSION(sinh) +MAKE_MARRAY_VERSION(sinpi) +MAKE_MARRAY_VERSION(sqrt) +MAKE_MARRAY_VERSION_2ARGS(step) +MAKE_MARRAY_VERSION(tan) +MAKE_MARRAY_VERSION(tanh) +MAKE_MARRAY_VERSION(tanpi) +MAKE_MARRAY_VERSION(tgamma) +MAKE_MARRAY_VERSION(trunc) +MAKE_MARRAY_VERSION(recip) +MAKE_MARRAY_VERSION_2ARGS(divide) -template -T dot(sycl::vec a, sycl::vec b) { - T res = 0; - for (int i = 0; i < N; i++) res += a[i] * b[i]; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template T dot(sycl::marray a, sycl::marray b) { T res = 0; for (size_t i = 0; i < N; i++) res += a[i] * b[i]; return res; } -#endif -// Generic functions over both scalars and vec / marray types - these need to be defined last +#endif // SYCL_CTS_COMPILING_WITH_ADAPTIVECPP + + +// Generic functions over both scalars and vec / marray types. +// These need to be defined last. template auto length(T p) {