diff --git a/util/math_reference.h b/util/math_reference.h index 51fa1e15d..3ca887d34 100644 --- a/util/math_reference.h +++ b/util/math_reference.h @@ -115,6 +115,7 @@ #endif namespace reference { + /* two argument relational reference */ template auto isequal(T a, T b) { @@ -406,7 +407,6 @@ T sub_sat(T x, T y) { } } } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(sub_sat) /* upsample */ uint16_t upsample(uint8_t h, uint8_t l); @@ -449,23 +449,6 @@ struct upsample_t { using type = int64_t; }; -template -sycl::vec::type, N> upsample( - sycl::vec a, sycl::vec::type, N> b) { - return sycl_cts::math::run_func_on_vector::type, T, N>( - [](T x, T y) { return upsample(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray::type, N> upsample( - sycl::marray a, - sycl::marray::type, N> b) { - return sycl_cts::math::run_func_on_marray::type, T, N>( - [](T x, T y) { return upsample(x, y); }, a, b); -} -#endif - /* return number of non zero bits in x */ template T popcount(T x) { @@ -474,49 +457,15 @@ T popcount(T x) { if (x & (1ull << i)) lz++; return lz; } -MAKE_VEC_AND_MARRAY_VERSIONS(popcount) /* fast multiply add 24bits */ sycl_cts::resultRef mad24(int32_t x, int32_t y, int32_t z); sycl_cts::resultRef mad24(uint32_t x, uint32_t y, uint32_t z); -template -sycl_cts::resultRef> mad24(sycl::vec a, sycl::vec b, - sycl::vec c) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> mad24(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); -} -#endif - /* fast multiply 24bits */ sycl_cts::resultRef mul24(int32_t x, int32_t y); sycl_cts::resultRef mul24(uint32_t x, uint32_t y); -template -sycl_cts::resultRef> mul24(sycl::vec a, - sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return mul24(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> mul24(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return mul24(x, y); }, a, b); -} -#endif - // Common functions float degrees(float a); @@ -536,18 +485,6 @@ struct higher_accuracy { using type = double; }; -template -struct higher_accuracy> { - using type = sycl::vec::type, N>; -}; -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -struct higher_accuracy> { - using type = sycl::marray::type, N>; -}; -#endif - template T acos(T a) { return std::acos(static_cast::type>(a)); @@ -661,6 +598,12 @@ using std::ilogb; using std::ldexp; using std::lgamma; +template +T lgamma_r(T a, int *b) { + *b = (std::tgamma(a) > 0) ? 1 : -1; + return std::lgamma(a); +} + template T log(T a) { return std::log(static_cast::type>(a)); @@ -707,9 +650,7 @@ T minmag(T a, T b) { } using std::modf; - float nan(unsigned int a); - using std::nextafter; template @@ -739,7 +680,6 @@ T remquo(T x, T y, int *quo) { } using std::rint; -MAKE_VEC_AND_MARRAY_VERSIONS(rint) template T rootn(T a, int b) { @@ -817,6 +757,17 @@ sycl::mfloat4 cross(sycl::mfloat4 p0, sycl::mfloat4 p1); sycl::mfloat3 cross(sycl::mfloat3 p0, sycl::mfloat3 p1); #endif +template +T dot(T p0, T p1) { + return p0 * p1; +} + +template +T normalize(T p) { + if (p < 0) return -1; + return 1; +} + #if SYCL_CTS_ENABLE_HALF_TESTS @@ -943,38 +894,26 @@ sycl::mdouble3 cross(sycl::mdouble3 p0, sycl::mdouble3 p1); // non-templated conditional half / double overloads for correct resolution // with DPC++. -template -sycl_cts::resultRef> abs(sycl::vec a) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x) { return abs(x); }, a); -} -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> abs(sycl::marray a) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x) { return abs(x); }, a); -} -#endif +// Integer functions template -sycl_cts::resultRef> abs_diff(sycl::vec a, - sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return abs_diff(x, y); }, a, b); +int any(sycl::vec a) { + for (int i = 0; i < N; i++) { + if (any(a[i]) == 1) return true; + } + return false; } // FIXME: AdaptiveCpp does not support marray #ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template -sycl_cts::resultRef> abs_diff(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return abs_diff(x, y); }, a, b); +bool any(sycl::marray a) { + for (size_t i = 0; i < N; i++) { + if (any(a[i]) == 1) return true; + } + return false; } #endif -MAKE_VEC_AND_MARRAY_VERSIONS(acospi) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(add_sat) - template int all(sycl::vec a) { for (int i = 0; i < N; i++) { @@ -993,37 +932,65 @@ bool all(sycl::marray a) { } #endif -template -int any(sycl::vec a) { +MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(bitselect) + +template +sycl::vec select(sycl::vec a, sycl::vec b, + sycl::vec c) { + sycl::vec res; for (int i = 0; i < N; i++) { - if (any(a[i]) == 1) return true; + if (any(c[i]) == 1) + res[i] = b[i]; + else + res[i] = a[i]; } - return false; + return res; } // FIXME: AdaptiveCpp does not support marray #ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template -bool any(sycl::marray a) { +sycl::marray select(sycl::marray a, sycl::marray b, + sycl::marray c) { + sycl::marray res; for (size_t i = 0; i < N; i++) { - if (any(a[i]) == 1) return true; + res[i] = c[i] ? b[i] : a[i]; } - return false; + return res; } #endif -MAKE_VEC_AND_MARRAY_VERSIONS(acos) -MAKE_VEC_AND_MARRAY_VERSIONS(acosh) -MAKE_VEC_AND_MARRAY_VERSIONS(asin) -MAKE_VEC_AND_MARRAY_VERSIONS(asinh) -MAKE_VEC_AND_MARRAY_VERSIONS(atan) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(atan2) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(atan2pi) -MAKE_VEC_AND_MARRAY_VERSIONS(atanh) -MAKE_VEC_AND_MARRAY_VERSIONS(asinpi) -MAKE_VEC_AND_MARRAY_VERSIONS(atanpi) -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(bitselect) -MAKE_VEC_AND_MARRAY_VERSIONS(cbrt) -MAKE_VEC_AND_MARRAY_VERSIONS(ceil) +template +sycl_cts::resultRef> abs(sycl::vec a) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x) { return abs(x); }, a); +} +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl_cts::resultRef> abs(sycl::marray a) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x) { return abs(x); }, a); +} +#endif + +template +sycl_cts::resultRef> abs_diff(sycl::vec a, + sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return abs_diff(x, y); }, a, b); +} +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl_cts::resultRef> abs_diff(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return abs_diff(x, y); }, a, b); +} +#endif + +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(add_sat) +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(hadd) +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(rhadd) template sycl_cts::resultRef> clamp(sycl::vec a, sycl::vec b, @@ -1070,67 +1037,316 @@ sycl_cts::resultRef> clamp(sycl::marray a, T b, T c) { MAKE_VEC_AND_MARRAY_VERSIONS(clz) MAKE_VEC_AND_MARRAY_VERSIONS(ctz) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(copysign) -MAKE_VEC_AND_MARRAY_VERSIONS(cos) -MAKE_VEC_AND_MARRAY_VERSIONS(cosh) -MAKE_VEC_AND_MARRAY_VERSIONS(cospi) -MAKE_VEC_AND_MARRAY_VERSIONS(degrees) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(divide) -MAKE_VEC_AND_MARRAY_VERSIONS(erf) -MAKE_VEC_AND_MARRAY_VERSIONS(erfc) -MAKE_VEC_AND_MARRAY_VERSIONS(exp) -MAKE_VEC_AND_MARRAY_VERSIONS(exp2) -MAKE_VEC_AND_MARRAY_VERSIONS(exp10) -MAKE_VEC_AND_MARRAY_VERSIONS(expm1) -MAKE_VEC_AND_MARRAY_VERSIONS(fabs) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fdim) -MAKE_VEC_AND_MARRAY_VERSIONS(floor) -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(fma) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmax) -MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(fmax) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmin) -MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(fmin) -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmod) + +MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad_sat) template -sycl::vec fract(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - T value; - res[i] = reference::fract(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; +sycl_cts::resultRef> max(sycl::vec a, sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return max(x, y); }, a, b); +} +template +sycl_cts::resultRef> max(sycl::vec a, T b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return max(x, y); }, a, b); } // FIXME: AdaptiveCpp does not support marray #ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template -sycl::marray fract(sycl::marray a, sycl::marray *b) { - sycl::marray res; - sycl::marray resPtr; - for (size_t i = 0; i < N; i++) { - T value; - res[i] = reference::fract(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; +sycl_cts::resultRef> max(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return max(x, y); }, a, b); +} +template +sycl_cts::resultRef> max(sycl::marray a, T b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return max(x, y); }, a, b); } #endif template -sycl::vec frexp(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - int value; - res[i] = reference::frexp(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; +sycl_cts::resultRef> min(sycl::vec a, sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} +template +sycl_cts::resultRef> min(sycl::vec a, T b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl_cts::resultRef> min(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} +template +sycl_cts::resultRef> min(sycl::marray a, T b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} +#endif + +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(mul_hi) +MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad_hi) +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(rotate) +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(sub_sat) + +template +sycl::vec::type, N> upsample( + sycl::vec a, sycl::vec::type, N> b) { + return sycl_cts::math::run_func_on_vector::type, T, N>( + [](T x, T y) { return upsample(x, y); }, a, b); +} +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl::marray::type, N> upsample( + sycl::marray a, + sycl::marray::type, N> b) { + return sycl_cts::math::run_func_on_marray::type, T, N>( + [](T x, T y) { return upsample(x, y); }, a, b); +} +#endif + +MAKE_VEC_AND_MARRAY_VERSIONS(popcount) + +template +sycl_cts::resultRef> mad24(sycl::vec a, sycl::vec b, + sycl::vec c) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); +} +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl_cts::resultRef> mad24(sycl::marray a, + sycl::marray b, + sycl::marray c) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); +} +#endif + +template +sycl_cts::resultRef> mul24(sycl::vec a, + sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return mul24(x, y); }, a, b); +} +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl_cts::resultRef> mul24(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return mul24(x, y); }, a, b); +} +#endif + +MAKE_VEC_AND_MARRAY_VERSIONS(degrees) + +template +sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, + sycl::vec c) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, + T c) { + sycl::vec res; + std::map undefined; + for (int i = 0; i < N; i++) { + sycl_cts::resultRef element = mix(a[i], b[i], c); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl_cts::resultRef> mix(sycl::marray a, + sycl::marray b, + sycl::marray c) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> mix(sycl::marray a, + sycl::marray b, T c) { + sycl::marray res; + std::map undefined; + for (size_t i = 0; i < N; i++) { + sycl_cts::resultRef element = mix(a[i], b[i], c); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} +#endif + +MAKE_VEC_AND_MARRAY_VERSIONS(radians) + +template +sycl::vec step(T a, sycl::vec b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = step(a, b[i]); + } + return res; +} +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl::marray step(T a, sycl::marray b) { + sycl::marray res; + for (size_t i = 0; i < N; i++) { + res[i] = step(a, b[i]); + } + return res; +} +#endif + +template +sycl_cts::resultRef> smoothstep(sycl::vec a, + sycl::vec b, + sycl::vec c) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y, T z) { return smoothstep(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> smoothstep(T a, T b, sycl::vec c) { + sycl::vec res; + std::map undefined; + for (int i = 0; i < N; i++) { + sycl_cts::resultRef element = smoothstep(a, b, c[i]); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl_cts::resultRef> smoothstep(sycl::marray a, + sycl::marray b, + sycl::marray c) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y, T z) { return smoothstep(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> smoothstep(T a, T b, + sycl::marray c) { + sycl::marray res; + std::map undefined; + for (size_t i = 0; i < N; i++) { + sycl_cts::resultRef element = smoothstep(a, b, c[i]); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} +#endif + +// Math functions + +template +struct higher_accuracy> { + using type = sycl::vec::type, N>; +}; +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +struct higher_accuracy> { + using type = sycl::marray::type, N>; +}; +#endif + +MAKE_VEC_AND_MARRAY_VERSIONS(acos) +MAKE_VEC_AND_MARRAY_VERSIONS(acosh) +MAKE_VEC_AND_MARRAY_VERSIONS(acospi) +MAKE_VEC_AND_MARRAY_VERSIONS(asin) +MAKE_VEC_AND_MARRAY_VERSIONS(asinh) +MAKE_VEC_AND_MARRAY_VERSIONS(asinpi) +MAKE_VEC_AND_MARRAY_VERSIONS(atan) +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(atan2) +MAKE_VEC_AND_MARRAY_VERSIONS(atanh) +MAKE_VEC_AND_MARRAY_VERSIONS(atanpi) +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(atan2pi) +MAKE_VEC_AND_MARRAY_VERSIONS(cbrt) +MAKE_VEC_AND_MARRAY_VERSIONS(ceil) +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(copysign) +MAKE_VEC_AND_MARRAY_VERSIONS(cos) +MAKE_VEC_AND_MARRAY_VERSIONS(cosh) +MAKE_VEC_AND_MARRAY_VERSIONS(cospi) +MAKE_VEC_AND_MARRAY_VERSIONS(erfc) +MAKE_VEC_AND_MARRAY_VERSIONS(erf) +MAKE_VEC_AND_MARRAY_VERSIONS(exp) +MAKE_VEC_AND_MARRAY_VERSIONS(exp2) +MAKE_VEC_AND_MARRAY_VERSIONS(exp10) +MAKE_VEC_AND_MARRAY_VERSIONS(expm1) +MAKE_VEC_AND_MARRAY_VERSIONS(fabs) +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fdim) +MAKE_VEC_AND_MARRAY_VERSIONS(floor) +MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(fma) +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmax) +MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(fmax) +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmin) +MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(fmin) +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmod) + +template +sycl::vec fract(sycl::vec a, sycl::vec *b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + T value; + res[i] = reference::fract(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl::marray fract(sycl::marray a, sycl::marray *b) { + sycl::marray res; + sycl::marray resPtr; + for (size_t i = 0; i < N; i++) { + T value; + res[i] = reference::fract(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} +#endif + +template +sycl::vec frexp(sycl::vec a, sycl::vec *b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + int value; + res[i] = reference::frexp(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; } // FIXME: AdaptiveCpp does not support marray #ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP @@ -1148,7 +1364,6 @@ sycl::marray frexp(sycl::marray a, sycl::marray *b) { } #endif -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(hadd) MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(hypot) template @@ -1212,11 +1427,6 @@ sycl::marray ldexp(sycl::marray a, int b) { MAKE_VEC_AND_MARRAY_VERSIONS(lgamma) -template -T lgamma_r(T a, int *b) { - *b = (std::tgamma(a) > 0) ? 1 : -1; - return std::lgamma(a); -} template sycl::vec lgamma_r(sycl::vec a, sycl::vec *b) { sycl::vec res; @@ -1250,107 +1460,9 @@ MAKE_VEC_AND_MARRAY_VERSIONS(log2) MAKE_VEC_AND_MARRAY_VERSIONS(log10) MAKE_VEC_AND_MARRAY_VERSIONS(log1p) MAKE_VEC_AND_MARRAY_VERSIONS(logb) -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad) -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad_hi) -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad_sat) - -template -sycl_cts::resultRef> max(sycl::vec a, sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -template -sycl_cts::resultRef> max(sycl::vec a, T b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> max(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -template -sycl_cts::resultRef> max(sycl::marray a, T b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -#endif +MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad) MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(maxmag) - -template -sycl_cts::resultRef> min(sycl::vec a, sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -template -sycl_cts::resultRef> min(sycl::vec a, T b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> min(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -template -sycl_cts::resultRef> min(sycl::marray a, T b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -#endif - -template -sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, - sycl::vec c) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, - T c) { - sycl::vec res; - std::map undefined; - for (int i = 0; i < N; i++) { - sycl_cts::resultRef element = mix(a[i], b[i], c); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> mix(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> mix(sycl::marray a, - sycl::marray b, T c) { - sycl::marray res; - std::map undefined; - for (size_t i = 0; i < N; i++) { - sycl_cts::resultRef element = mix(a[i], b[i], c); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -#endif - MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(minmag) template @@ -1381,8 +1493,6 @@ sycl::marray modf(sycl::marray a, sycl::marray *b) { } #endif -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(mul_hi) - template sycl::vec nan(sycl::vec a) { return sycl_cts::math::run_func_on_vector( @@ -1398,7 +1508,6 @@ sycl::marray nan(sycl::marray a) { #endif MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(nextafter) - MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(pow) template @@ -1437,8 +1546,6 @@ sycl_cts::resultRef> powr(sycl::marray a, } #endif -MAKE_VEC_AND_MARRAY_VERSIONS(radians) -MAKE_VEC_AND_MARRAY_VERSIONS(recip) MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(remainder) template @@ -1471,7 +1578,7 @@ sycl::marray remquo(sycl::marray a, sycl::marray b, } #endif -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(rhadd) +MAKE_VEC_AND_MARRAY_VERSIONS(rint) template sycl::vec rootn(sycl::vec a, sycl::vec b) { @@ -1493,36 +1600,8 @@ sycl::marray rootn(sycl::marray a, sycl::marray b) { } #endif - -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(rotate) MAKE_VEC_AND_MARRAY_VERSIONS(round) MAKE_VEC_AND_MARRAY_VERSIONS(rsqrt) - -template -sycl::vec select(sycl::vec a, sycl::vec b, - sycl::vec c) { - sycl::vec res; - for (int i = 0; i < N; i++) { - if (any(c[i]) == 1) - res[i] = b[i]; - else - res[i] = a[i]; - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray select(sycl::marray a, sycl::marray b, - sycl::marray c) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = c[i] ? b[i] : a[i]; - } - return res; -} -#endif - MAKE_VEC_AND_MARRAY_VERSIONS(sign) template @@ -1556,89 +1635,18 @@ sycl::marray sincos(sycl::marray a, sycl::marray *b) { MAKE_VEC_AND_MARRAY_VERSIONS(sin) MAKE_VEC_AND_MARRAY_VERSIONS(sinh) MAKE_VEC_AND_MARRAY_VERSIONS(sinpi) - -template -sycl_cts::resultRef> smoothstep(sycl::vec a, - sycl::vec b, - sycl::vec c) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y, T z) { return smoothstep(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> smoothstep(T a, T b, sycl::vec c) { - sycl::vec res; - std::map undefined; - for (int i = 0; i < N; i++) { - sycl_cts::resultRef element = smoothstep(a, b, c[i]); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> smoothstep(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return smoothstep(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> smoothstep(T a, T b, - sycl::marray c) { - sycl::marray res; - std::map undefined; - for (size_t i = 0; i < N; i++) { - sycl_cts::resultRef element = smoothstep(a, b, c[i]); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -#endif - MAKE_VEC_AND_MARRAY_VERSIONS(sqrt) - -template -sycl::vec step(T a, sycl::vec b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = step(a, b[i]); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray step(T a, sycl::marray b) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = step(a, b[i]); - } - return res; -} -#endif - MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(step) MAKE_VEC_AND_MARRAY_VERSIONS(tan) MAKE_VEC_AND_MARRAY_VERSIONS(tanh) MAKE_VEC_AND_MARRAY_VERSIONS(tanpi) MAKE_VEC_AND_MARRAY_VERSIONS(tgamma) MAKE_VEC_AND_MARRAY_VERSIONS(trunc) - +MAKE_VEC_AND_MARRAY_VERSIONS(recip) +MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(divide) // Geometric functions -template -T dot(T p0, T p1) { - return p0 * p1; -} - template T dot(sycl::vec a, sycl::vec b) { T res = 0; @@ -1655,6 +1663,8 @@ T dot(sycl::marray a, sycl::marray b) { } #endif +// Generic functions over both scalars and vec / marray types - these need to be defined last + template auto length(T p) { return reference::sqrt(reference::dot(p, p)); @@ -1665,27 +1675,6 @@ auto distance(T p0, T p1) { return reference::length(p0 - p1); } -template -T normalize(T p) { - if (p < 0) return -1; - return 1; -} - -template -float fast_length(T p0) { - return reference::sqrt(fast_dot(p0)); -} - -template -float fast_distance(T p0, T p1) { - return reference::fast_length(p0 - p1); -} - -template -T fast_normalize(T p0) { - return p0 * reference::rsqrt(fast_dot(p0)); -} - template sycl::vec normalize(sycl::vec a) { sycl::vec res; @@ -1706,6 +1695,21 @@ sycl::marray normalize(sycl::marray a) { } #endif +template +float fast_length(T p0) { + return reference::sqrt(fast_dot(p0)); +} + +template +float fast_distance(T p0, T p1) { + return reference::fast_length(p0 - p1); +} + +template +T fast_normalize(T p0) { + return p0 * reference::rsqrt(fast_dot(p0)); +} + } // namespace reference #endif // __SYCLCTS_UTIL_MATH_REFERENCE_H