From b9e29dbe81914ef096ebf4179780dd644d88dcb0 Mon Sep 17 00:00:00 2001 From: Fabian Knorr Date: Wed, 18 Dec 2024 10:09:22 +0100 Subject: [PATCH] Re-group functions in math_reference according to feature macros --- cmake/FindDPCPP.cmake | 4 +- util/math_reference.cpp | 301 +++--- util/math_reference.h | 2059 ++++++++++++++++++++------------------- 3 files changed, 1182 insertions(+), 1182 deletions(-) diff --git a/cmake/FindDPCPP.cmake b/cmake/FindDPCPP.cmake index f9f285d0e..ee399bbc5 100644 --- a/cmake/FindDPCPP.cmake +++ b/cmake/FindDPCPP.cmake @@ -23,7 +23,7 @@ set(DPCPP_FLAGS "-fsycl;-sycl-std=2020;${DPCPP_FLAGS}") # -fsycl-id-queries-fit-in-int is an optimization enabled by default, but # adds non-conformant behavior that limits the number of work-items in an # invocation of a kernel, so we disable this behavior here. -set(DPCPP_FLAGS "${DPCPP_FLAGS};-fno-sycl-id-queries-fit-in-int") +# set(DPCPP_FLAGS "${DPCPP_FLAGS};-fno-sycl-id-queries-fit-in-int") # Set target triple(s) if specified if(DEFINED DPCPP_TARGET_TRIPLES) @@ -62,7 +62,7 @@ endif() option(DPCPP_SYCL2020_CONFORMANT_APIS "Comply with the SYCL 2020 specification" ON) if(DPCPP_SYCL2020_CONFORMANT_APIS) - set(CMAKE_CXX_FLAGS "-DSYCL2020_CONFORMANT_APIS -fpreview-breaking-changes ${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "-DSYCL2020_CONFORMANT_APIS ${CMAKE_CXX_FLAGS}") endif() add_library(DPCPP::Runtime INTERFACE IMPORTED GLOBAL) diff --git a/util/math_reference.cpp b/util/math_reference.cpp index bb84a698a..f182c7738 100644 --- a/util/math_reference.cpp +++ b/util/math_reference.cpp @@ -68,14 +68,6 @@ T bitselect_f_t(T x, T y, T z) { float bitselect(float a, float b, float c) { return bitselect_f_t(a, b, c); } -double bitselect(double a, double b, double c) { - return bitselect_f_t(a, b, c); -} -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half bitselect(sycl::half a, sycl::half b, sycl::half c) { - return bitselect_f_t(a, b, c); -} -#endif /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- DEGREES * @@ -86,14 +78,8 @@ T degrees_t(T a) { return a * (180.0 / M_PI); } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half degrees(sycl::half a) { return degrees_t(a); } -#endif - float degrees(float a) { return degrees_t(a); } -double degrees(double a) { return degrees_t(a); } - /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- RADIANS * */ @@ -103,13 +89,8 @@ T radians_t(T a) { return a * (M_PI / 180.0); } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half radians(sycl::half a) { return radians_t(a); } -#endif - float radians(float a) { return radians_t(a); } -double radians(double a) { return radians_t(a); } /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- STEP * */ @@ -120,14 +101,8 @@ T step_t(T a, T b) { return 1.0; } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half step(sycl::half a, sycl::half b) { return step_t(a, b); } -#endif - float step(float a, float b) { return step_t(a, b); } -double step(double a, double b) { return step_t(a, b); } - /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- SMOOTHSTEP * */ @@ -139,18 +114,9 @@ sycl_cts::resultRef smoothstep_t(T a, T b, T c) { return t * t * (3 - 2 * t); } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl_cts::resultRef smoothstep(sycl::half a, sycl::half b, - sycl::half c) { - return smoothstep_t(a, b, c); -} -#endif sycl_cts::resultRef smoothstep(float a, float b, float c) { return smoothstep_t(a, b, c); } -sycl_cts::resultRef smoothstep(double a, double b, double c) { - return smoothstep_t(a, b, c); -} /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- SIGN * @@ -165,14 +131,8 @@ T sign_t(T a) { return +0.0; } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half sign(sycl::half a) { return sign_t(a); } -#endif - float sign(float a) { return sign_t(a); } -double sign(double a) { return sign_t(a); } - /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- MAD_SAT * */ @@ -270,22 +230,10 @@ sycl_cts::resultRef mix_t(T x, T y, T a) { return sycl_cts::resultRef(T(), true); } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl_cts::resultRef mix(const sycl::half a, const sycl::half b, - const sycl::half c) { - return mix_t(a, b, c); -} -#endif - sycl_cts::resultRef mix(const float a, const float b, const float c) { return mix_t(a, b, c); } -sycl_cts::resultRef mix(const double a, const double b, - const double c) { - return mix_t(a, b, c); -} - /* ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- MUL_HI * */ @@ -472,49 +420,105 @@ sycl_cts::resultRef mul24(uint32_t x, uint32_t y) { * */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half acospi(sycl::half a) { return reference_acospi(a); } -#endif float acospi(float a) { return reference_acospi(a); } -double acospi(double a) { return reference_acospil(a); } - -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half asinpi(sycl::half a) { return reference_asinpi(a); } -#endif float asinpi(float a) { return reference_asinpi(a); } -double asinpi(double a) { return reference_asinpil(a); } +float atanpi(float a) { return reference_atanpi(a); } +float atan2pi(float a, float b) { return reference_atan2pi(a, b); } +float cospi(float a) { return reference_cospi(a); } +float fma(float a, float b, float c) { return reference_fma(a, b, c, 0); } + +float fract(float a, float* b) { + *b = std::floor(a); + return std::fmin(a - *b, nextafter(1.0f, 0.0f)); +} + +float nan(unsigned int a) { return std::nanf(std::to_string(a).c_str()); } +float sinpi(float a) { return reference_sinpi(a); } +float tanpi(float a) { return reference_tanpi(a); } + +// Geometric functions + +template +sycl::vec cross_t(sycl::vec a, sycl::vec b) { + sycl::vec res; + std::vector temp_res(4); + std::vector av({a.x(), a.y(), a.z()}); + std::vector bv({b.x(), b.y(), b.z()}); + temp_res[0] = av[1] * bv[2] - av[2] * bv[1]; + temp_res[1] = av[2] * bv[0] - av[0] * bv[2]; + temp_res[2] = av[0] * bv[1] - av[1] * bv[0]; + temp_res[3] = 0.0; + for (int i = 0; i < N; i++) res[i] = temp_res[i]; + + return res; +} + +sycl::float4 cross(sycl::float4 p0, sycl::float4 p1) { return cross_t(p0, p1); } +sycl::float3 cross(sycl::float3 p0, sycl::float3 p1) { return cross_t(p0, p1); } + +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl::marray cross_t(sycl::marray a, sycl::marray b) { + sycl::marray res; + std::vector temp_res(4); + std::vector av({a[0], a[1], a[2]}); + std::vector bv({b[0], b[1], b[2]}); + temp_res[0] = av[1] * bv[2] - av[2] * bv[1]; + temp_res[1] = av[2] * bv[0] - av[0] * bv[2]; + temp_res[2] = av[0] * bv[1] - av[1] * bv[0]; + temp_res[3] = 0.0; + for (size_t i = 0; i < N; i++) res[i] = temp_res[i]; + return res; +} + +sycl::mfloat4 cross(sycl::mfloat4 p0, sycl::mfloat4 p1) { + return cross_t(p0, p1); +} +sycl::mfloat3 cross(sycl::mfloat3 p0, sycl::mfloat3 p1) { + return cross_t(p0, p1); +} +#endif // SYCL_CTS_COMPILING_WITH_ADAPTIVECPP #if SYCL_CTS_ENABLE_HALF_TESTS + +sycl::half bitselect(sycl::half a, sycl::half b, sycl::half c) { + return bitselect_f_t(a, b, c); +} + +sycl::half degrees(sycl::half a) { return degrees_t(a); } +sycl::half radians(sycl::half a) { return radians_t(a); } +sycl::half step(sycl::half a, sycl::half b) { return step_t(a, b); } + +sycl_cts::resultRef smoothstep(sycl::half a, sycl::half b, + sycl::half c) { + return smoothstep_t(a, b, c); +} + +sycl::half sign(sycl::half a) { return sign_t(a); } + +sycl_cts::resultRef mix(const sycl::half a, const sycl::half b, + const sycl::half c) { + return mix_t(a, b, c); +} + +sycl::half acospi(sycl::half a) { return reference_acospi(a); } +sycl::half asinpi(sycl::half a) { return reference_asinpi(a); } sycl::half atanpi(sycl::half a) { return reference_atanpi(a); } -#endif -float atanpi(float a) { return reference_atanpi(a); } -double atanpi(double a) { return reference_atanpil(a); } -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half atan2pi(sycl::half a, sycl::half b) { return reference_atan2pi(a, b); } -#endif -float atan2pi(float a, float b) { return reference_atan2pi(a, b); } -double atan2pi(double a, double b) { return reference_atan2pil(a, b); } -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half cospi(sycl::half a) { return reference_cospi(a); } -#endif -float cospi(float a) { return reference_cospi(a); } -double cospi(double a) { return reference_cospil(a); } -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half fma(sycl::half a, sycl::half b, sycl::half c) { return reference_fma(a, b, c, 0); } -#endif -float fma(float a, float b, float c) { return reference_fma(a, b, c, 0); } -double fma(double a, double b, double c) { return reference_fmal(a, b, c); } // AdaptiveCpp does not yet support sycl::bit_cast, which is used in // `nextafter`. -#if SYCL_CTS_ENABLE_HALF_TESTS && !SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +#if !SYCL_CTS_COMPILING_WITH_ADAPTIVECPP sycl::half fdim(sycl::half a, sycl::half b) { if (a > b) { // to get rounding to nearest even @@ -533,39 +537,22 @@ sycl::half fdim(sycl::half a, sycl::half b) { } #endif -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half fract(sycl::half a, sycl::half *b) { *b = std::floor(a); return std::fmin(a - *b, nextafter(sycl::half(1.0), sycl::half(0.0))); } -#endif -float fract(float a, float *b) { - *b = std::floor(a); - return std::fmin(a - *b, nextafter(1.0f, 0.0f)); -} -double fract(double a, double *b) { - *b = std::floor(a); - return std::fmin(a - *b, nextafter(1.0, 0.0)); -} -float nan(unsigned int a) { return std::nanf(std::to_string(a).c_str()); } -double nan(unsigned long a) { return std::nan(std::to_string(a).c_str()); } -double nan(unsigned long long a) { return std::nan(std::to_string(a).c_str()); } -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half nan(unsigned short a) { return nan(unsigned(a)); } -#endif -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half modf(sycl::half a, sycl::half *b) { float resPtr; float res = modf(static_cast(a), &resPtr); *b = static_cast(resPtr); return res; } -#endif // AdaptiveCpp does not yet support sycl::bit_cast -#if SYCL_CTS_ENABLE_HALF_TESTS && !SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +#if !SYCL_CTS_COMPILING_WITH_ADAPTIVECPP sycl::half nextafter(sycl::half x, sycl::half y) { if (std::isnan(x)) return x; @@ -593,75 +580,9 @@ sycl::half nextafter(sycl::half x, sycl::half y) { } #endif -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half sinpi(sycl::half a) { return reference_sinpi(a); } -#endif -float sinpi(float a) { return reference_sinpi(a); } -double sinpi(double a) { return reference_sinpil(a); } - -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half tanpi(sycl::half a) { return reference_tanpi(a); } -#endif -float tanpi(float a) { return reference_tanpi(a); } -double tanpi(double a) { return reference_tanpil(a); } - -// Geometric functions -template -sycl::vec cross_t(sycl::vec a, sycl::vec b) { - sycl::vec res; - std::vector temp_res(4); - std::vector av({a.x(), a.y(), a.z()}); - std::vector bv({b.x(), b.y(), b.z()}); - temp_res[0] = av[1] * bv[2] - av[2] * bv[1]; - temp_res[1] = av[2] * bv[0] - av[0] * bv[2]; - temp_res[2] = av[0] * bv[1] - av[1] * bv[0]; - temp_res[3] = 0.0; - for (int i = 0; i < N; i++) res[i] = temp_res[i]; - - return res; -} - -sycl::float4 cross(sycl::float4 p0, sycl::float4 p1) { return cross_t(p0, p1); } -sycl::float3 cross(sycl::float3 p0, sycl::float3 p1) { return cross_t(p0, p1); } -sycl::double4 cross(sycl::double4 p0, sycl::double4 p1) { - return cross_t(p0, p1); -} -sycl::double3 cross(sycl::double3 p0, sycl::double3 p1) { - return cross_t(p0, p1); -} - -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray cross_t(sycl::marray a, sycl::marray b) { - sycl::marray res; - std::vector temp_res(4); - std::vector av({a[0], a[1], a[2]}); - std::vector bv({b[0], b[1], b[2]}); - temp_res[0] = av[1] * bv[2] - av[2] * bv[1]; - temp_res[1] = av[2] * bv[0] - av[0] * bv[2]; - temp_res[2] = av[0] * bv[1] - av[1] * bv[0]; - temp_res[3] = 0.0; - for (size_t i = 0; i < N; i++) res[i] = temp_res[i]; - return res; -} - -sycl::mfloat4 cross(sycl::mfloat4 p0, sycl::mfloat4 p1) { - return cross_t(p0, p1); -} -sycl::mfloat3 cross(sycl::mfloat3 p0, sycl::mfloat3 p1) { - return cross_t(p0, p1); -} -sycl::mdouble4 cross(sycl::mdouble4 p0, sycl::mdouble4 p1) { - return cross_t(p0, p1); -} -sycl::mdouble3 cross(sycl::mdouble3 p0, sycl::mdouble3 p1) { - return cross_t(p0, p1); -} -#endif // SYCL_CTS_COMPILING_WITH_ADAPTIVECPP - -#if SYCL_CTS_ENABLE_HALF_TESTS sycl::half fast_dot(float p0) { return std::pow(p0, 2); } sycl::half fast_dot(sycl::float2 p0) { return std::pow(p0.x(), 2) + std::pow(p0.y(), 2); @@ -686,6 +607,64 @@ sycl::half fast_dot(sycl::mfloat4 p0) { std::pow(p0[3], 2); } #endif + +#endif // SYCL_CTS_ENABLE_HALF_TESTS + +#if SYCL_CTS_ENABLE_DOUBLE_TESTS + +double bitselect(double a, double b, double c) { + return bitselect_f_t(a, b, c); +} + +double degrees(double a) { return degrees_t(a); } +double radians(double a) { return radians_t(a); } +double step(double a, double b) { return step_t(a, b); } + +sycl_cts::resultRef smoothstep(double a, double b, double c) { + return smoothstep_t(a, b, c); +} + +double sign(double a) { return sign_t(a); } + +sycl_cts::resultRef mix(const double a, const double b, + const double c) { + return mix_t(a, b, c); +} + +double acospi(double a) { return reference_acospil(a); } +double asinpi(double a) { return reference_asinpil(a); } +double atanpi(double a) { return reference_atanpil(a); } +double atan2pi(double a, double b) { return reference_atan2pil(a, b); } +double cospi(double a) { return reference_cospil(a); } +double fma(double a, double b, double c) { return reference_fmal(a, b, c); } + +double fract(double a, double* b) { + *b = std::floor(a); + return std::fmin(a - *b, nextafter(1.0, 0.0)); +} + +double nan(unsigned long a) { return std::nan(std::to_string(a).c_str()); } +double nan(unsigned long long a) { return std::nan(std::to_string(a).c_str()); } + +double sinpi(double a) { return reference_sinpil(a); } +double tanpi(double a) { return reference_tanpil(a); } + +sycl::double4 cross(sycl::double4 p0, sycl::double4 p1) { + return cross_t(p0, p1); +} +sycl::double3 cross(sycl::double3 p0, sycl::double3 p1) { + return cross_t(p0, p1); +} + +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +sycl::mdouble4 cross(sycl::mdouble4 p0, sycl::mdouble4 p1) { + return cross_t(p0, p1); +} +sycl::mdouble3 cross(sycl::mdouble3 p0, sycl::mdouble3 p1) { + return cross_t(p0, p1); +} #endif +#endif // SYCL_CTS_ENABLE_DOUBLE_TESTS + } /* namespace reference */ diff --git a/util/math_reference.h b/util/math_reference.h index 38d1cad37..2324de19a 100644 --- a/util/math_reference.h +++ b/util/math_reference.h @@ -28,93 +28,8 @@ #include "./math_helper.h" #include -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP - -#define MAKE_VEC_AND_MARRAY_VERSIONS(func) \ - template \ - sycl::vec func(sycl::vec a) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x) { return func(x); }, a); \ - } \ - template \ - sycl::marray func(sycl::marray a) { \ - return sycl_cts::math::run_func_on_marray( \ - [](T x) { return func(x); }, a); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(func) \ - template \ - sycl::vec func(sycl::vec a, sycl::vec b) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } \ - template \ - sycl::marray func(sycl::marray a, sycl::marray b) { \ - return sycl_cts::math::run_func_on_marray( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(func) \ - template \ - sycl::vec func(sycl::vec a, sycl::vec b, \ - sycl::vec c) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ - } \ - template \ - sycl::marray func(sycl::marray a, sycl::marray b, \ - sycl::marray c) { \ - return sycl_cts::math::run_func_on_marray( \ - [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(func) \ - template \ - sycl::vec func(sycl::vec a, T b) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } \ - template \ - sycl::marray func(sycl::marray a, T b) { \ - return sycl_cts::math::run_func_on_marray( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } - -#else // definitions without marray for AdaptiveCpp - -#define MAKE_VEC_AND_MARRAY_VERSIONS(func) \ - template \ - sycl::vec func(sycl::vec a) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x) { return func(x); }, a); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(func) \ - template \ - sycl::vec func(sycl::vec a, sycl::vec b) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(func) \ - template \ - sycl::vec func(sycl::vec a, sycl::vec b, \ - sycl::vec c) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ - } - -#define MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(func) \ - template \ - sycl::vec func(sycl::vec a, T b) { \ - return sycl_cts::math::run_func_on_vector( \ - [](T x, T y) { return func(x, y); }, a, b); \ - } - -#endif - namespace reference { + /* two argument relational reference */ template auto isequal(T a, T b) { @@ -205,85 +120,22 @@ template bool any(T x) { return sycl_cts::math::if_msb_set(x); } -template -int any(sycl::vec a) { - for (int i = 0; i < N; i++) { - if (any(a[i]) == 1) return true; - } - return false; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -bool any(sycl::marray a) { - for (size_t i = 0; i < N; i++) { - if (any(a[i]) == 1) return true; - } - return false; -} -#endif template bool all(T x) { return sycl_cts::math::if_msb_set(x); } -template -int all(sycl::vec a) { - for (int i = 0; i < N; i++) { - if (all(a[i]) == 0) return false; - } - return true; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -bool all(sycl::marray a) { - for (size_t i = 0; i < N; i++) { - if (all(a[i]) == 0) return false; - } - return true; -} -#endif template T bitselect(T a, T b, T c) { return (c & b) | (~c & a); } -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half bitselect(sycl::half a, sycl::half b, sycl::half c); -#endif float bitselect(float a, float b, float c); -double bitselect(double a, double b, double c); -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(bitselect) template T select(T a, T b, bool c) { return c ? b : a; } -template -sycl::vec select(sycl::vec a, sycl::vec b, - sycl::vec c) { - sycl::vec res; - for (int i = 0; i < N; i++) { - if (any(c[i]) == 1) - res[i] = b[i]; - else - res[i] = a[i]; - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray select(sycl::marray a, sycl::marray b, - sycl::marray c) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = c[i] ? b[i] : a[i]; - } - return res; -} -#endif /* absolute value */ template @@ -292,18 +144,6 @@ sycl_cts::resultRef abs(T x) { T result = x < 0 ? T(-U(x)) : x; return result < 0 ? sycl_cts::resultRef(0, true) : result; } -template -sycl_cts::resultRef> abs(sycl::vec a) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x) { return abs(x); }, a); -} -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> abs(sycl::marray a) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x) { return abs(x); }, a); -} -#endif /* absolute difference */ template @@ -319,21 +159,6 @@ sycl_cts::resultRef abs_diff(T a, T b) { ? sycl_cts::resultRef(0, true) : T(result); } -template -sycl_cts::resultRef> abs_diff(sycl::vec a, - sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return abs_diff(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> abs_diff(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return abs_diff(x, y); }, a, b); -} -#endif /* add with saturation */ template @@ -353,7 +178,6 @@ T add_sat(T a, T b) { return r; } } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(add_sat) /* half add */ template @@ -361,14 +185,12 @@ T hadd(T a, T b) { if (std::is_unsigned::value) return (a >> 1) + (b >> 1) + ((a & b) & 0x1); return (a >> 1) + (b >> 1) + (a & b & 1); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(hadd) /* round up half add */ template T rhadd(T a, T b) { return (a >> 1) + (b >> 1) + ((a & 1) | (b & 1)); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(rhadd) /* clamp */ template @@ -376,48 +198,6 @@ sycl_cts::resultRef clamp(T v, T minv, T maxv) { if (minv > maxv) return sycl_cts::resultRef(T(), true); return (v < minv) ? minv : ((v > maxv) ? maxv : v); } -template -sycl_cts::resultRef> clamp(sycl::vec a, sycl::vec b, - sycl::vec c) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y, T z) { return clamp(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> clamp(sycl::vec a, T b, T c) { - sycl::vec res; - std::map undefined; - for (int i = 0; i < N; i++) { - sycl_cts::resultRef element = clamp(a[i], b, c); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> clamp(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return clamp(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> clamp(sycl::marray a, T b, T c) { - sycl::marray res; - std::map undefined; - for (size_t i = 0; i < N; i++) { - sycl_cts::resultRef element = clamp(a[i], b, c); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -#endif /* count leading zeros */ template @@ -430,7 +210,6 @@ T clz(T x) { lz++; return static_cast(lz); } -MAKE_VEC_AND_MARRAY_VERSIONS(clz) /* count trailing zeros */ template @@ -445,7 +224,6 @@ T ctz(T x) { tz++; return static_cast(tz); } -MAKE_VEC_AND_MARRAY_VERSIONS(ctz) // mad_hi is after mul_hi @@ -463,8 +241,6 @@ int mad_sat(int, int, int); long mad_sat(long, long, long); long long mad_sat(long long, long long, long long); -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad_sat) - /* maximum value */ template sycl_cts::resultRef max(T a, T b) { @@ -474,30 +250,6 @@ sycl_cts::resultRef max(T a, T b) { return (a < b) ? b : a; return sycl_cts::resultRef(T(), true); } -template -sycl_cts::resultRef> max(sycl::vec a, sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -template -sycl_cts::resultRef> max(sycl::vec a, T b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> max(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -template -sycl_cts::resultRef> max(sycl::marray a, T b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return max(x, y); }, a, b); -} -#endif /* minimum value */ template @@ -508,30 +260,6 @@ sycl_cts::resultRef min(T a, T b) { return (b < a) ? b : a; return sycl_cts::resultRef(T(), true); } -template -sycl_cts::resultRef> min(sycl::vec a, sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -template -sycl_cts::resultRef> min(sycl::vec a, T b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> min(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -template -sycl_cts::resultRef> min(sycl::marray a, T b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return min(x, y); }, a, b); -} -#endif /* multiply and return high part */ unsigned char mul_hi(unsigned char, unsigned char); @@ -545,14 +273,12 @@ short mul_hi(short, short); int mul_hi(int, int); long mul_hi(long, long); long long mul_hi(long long, long long); -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(mul_hi) /* multiply add, get high part */ template T mad_hi(T x, T y, T z) { return mul_hi(x, y) + z; } -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad_hi) /* bitwise rotate */ template @@ -570,7 +296,6 @@ T rotate(T v, T i) { size_t nBits = sycl_cts::math::num_bits(v) - size_t(i_mod); return T((v << i_mod) | ((v >> nBits) & mask)); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(rotate) /* substract with saturation */ template @@ -596,7 +321,6 @@ T sub_sat(T x, T y) { } } } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(sub_sat) /* upsample */ uint16_t upsample(uint8_t h, uint8_t l); @@ -639,23 +363,6 @@ struct upsample_t { using type = int64_t; }; -template -sycl::vec::type, N> upsample( - sycl::vec a, sycl::vec::type, N> b) { - return sycl_cts::math::run_func_on_vector::type, T, N>( - [](T x, T y) { return upsample(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray::type, N> upsample( - sycl::marray a, - sycl::marray::type, N> b) { - return sycl_cts::math::run_func_on_marray::type, T, N>( - [](T x, T y) { return upsample(x, y); }, a, b); -} -#endif - /* return number of non zero bits in x */ template T popcount(T x) { @@ -664,613 +371,179 @@ T popcount(T x) { if (x & (1ull << i)) lz++; return lz; } -MAKE_VEC_AND_MARRAY_VERSIONS(popcount) /* fast multiply add 24bits */ sycl_cts::resultRef mad24(int32_t x, int32_t y, int32_t z); sycl_cts::resultRef mad24(uint32_t x, uint32_t y, uint32_t z); -template -sycl_cts::resultRef> mad24(sycl::vec a, sycl::vec b, - sycl::vec c) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> mad24(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); -} -#endif - /* fast multiply 24bits */ sycl_cts::resultRef mul24(int32_t x, int32_t y); sycl_cts::resultRef mul24(uint32_t x, uint32_t y); -template -sycl_cts::resultRef> mul24(sycl::vec a, - sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return mul24(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> mul24(sycl::marray a, - sycl::marray b) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return mul24(x, y); }, a, b); -} -#endif - // Common functions -// clamp is in Integer functions - -/* degrees */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half degrees(sycl::half); -#endif float degrees(float a); -double degrees(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(degrees) - -// max and min are in Integer functions - -/* mix */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl_cts::resultRef mix(const sycl::half a, const sycl::half b, - const sycl::half c); -#endif sycl_cts::resultRef mix(const float a, const float b, const float c); -sycl_cts::resultRef mix(const double a, const double b, const double c); - -template -sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, - sycl::vec c) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, - T c) { - sycl::vec res; - std::map undefined; - for (int i = 0; i < N; i++) { - sycl_cts::resultRef element = mix(a[i], b[i], c); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> mix(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> mix(sycl::marray a, - sycl::marray b, T c) { - sycl::marray res; - std::map undefined; - for (size_t i = 0; i < N; i++) { - sycl_cts::resultRef element = mix(a[i], b[i], c); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -#endif - -/* radians */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half radians(sycl::half); -#endif float radians(float a); -double radians(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(radians) - -/* step */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half step(sycl::half a, sycl::half b); -#endif float step(float a, float b); -double step(double a, double b); -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(step) - -template -sycl::vec step(T a, sycl::vec b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = step(a, b[i]); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray step(T a, sycl::marray b) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = step(a, b[i]); - } - return res; -} -#endif - -/* smoothstep */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl_cts::resultRef smoothstep(sycl::half a, sycl::half b, - sycl::half c); -#endif sycl_cts::resultRef smoothstep(float a, float b, float c); -sycl_cts::resultRef smoothstep(double a, double b, double c); - -template -sycl_cts::resultRef> smoothstep(sycl::vec a, - sycl::vec b, - sycl::vec c) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y, T z) { return smoothstep(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> smoothstep(T a, T b, sycl::vec c) { - sycl::vec res; - std::map undefined; - for (int i = 0; i < N; i++) { - sycl_cts::resultRef element = smoothstep(a, b, c[i]); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl_cts::resultRef> smoothstep(sycl::marray a, - sycl::marray b, - sycl::marray c) { - return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y, T z) { return smoothstep(x, y, z); }, a, b, c); -} -template -sycl_cts::resultRef> smoothstep(T a, T b, - sycl::marray c) { - sycl::marray res; - std::map undefined; - for (size_t i = 0; i < N; i++) { - sycl_cts::resultRef element = smoothstep(a, b, c[i]); - if (element.undefined.empty()) - res[i] = element.res; - else - undefined[i] = true; - } - return sycl_cts::resultRef>(res, undefined); -} -#endif - -/* sign */ -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half sign(sycl::half a); -#endif float sign(float a); -double sign(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(sign) // Math Functions template struct higher_accuracy; -#if SYCL_CTS_ENABLE_HALF_TESTS -template <> -struct higher_accuracy { - using type = float; -}; -#endif template <> struct higher_accuracy { using type = double; }; -template <> -struct higher_accuracy { - using type = long double; -}; - -template -struct higher_accuracy> { - using type = sycl::vec::type, N>; -}; -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -struct higher_accuracy> { - using type = sycl::marray::type, N>; -}; -#endif template T acos(T a) { return std::acos(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(acos) template T acosh(T a) { return std::acosh(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(acosh) -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half acospi(sycl::half a); -#endif float acospi(float a); -double acospi(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(acospi) template T asin(T a) { return std::asin(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(asin) template T asinh(T a) { return std::asinh(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(asinh) -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half asinpi(sycl::half a); -#endif float asinpi(float a); -double asinpi(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(asinpi) template T atan(T a) { return std::atan(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(atan) template T atan2(T a, T b) { return std::atan2(static_cast::type>(a), b); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(atan2) template T atanh(T a) { return std::atanh(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(atanh) -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half atanpi(sycl::half a); -#endif float atanpi(float a); -double atanpi(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(atanpi) -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half atan2pi(sycl::half a, sycl::half b); -#endif float atan2pi(float a, float b); -double atan2pi(double a, double b); -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(atan2pi) template T cbrt(T a) { return std::cbrt(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(cbrt) using std::ceil; -MAKE_VEC_AND_MARRAY_VERSIONS(ceil) - using std::copysign; -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(copysign) template T cos(T a) { return std::cos(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(cos) template T cosh(T a) { return std::cosh(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(cosh) -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half cospi(sycl::half a); -#endif float cospi(float a); -double cospi(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(cospi) template T erfc(T a) { return std::erfc(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(erfc) template T erf(T a) { return std::erf(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(erf) template T exp(T a) { return std::exp(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(exp) template T exp2(T a) { return std::exp2(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(exp2) template T exp10(T a) { return std::pow(static_cast::type>(10), static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(exp10) template T expm1(T a) { return std::expm1(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(expm1) using std::fabs; -MAKE_VEC_AND_MARRAY_VERSIONS(fabs) - using std::fdim; -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half fdim(sycl::half a, sycl::half b); -#endif -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fdim) - using std::floor; -MAKE_VEC_AND_MARRAY_VERSIONS(floor) - -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half fma(sycl::half a, sycl::half b, sycl::half c); -#endif float fma(float a, float b, float c); -double fma(double a, double b, double c); - -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(fma) - using std::fmax; -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmax) -MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(fmax) - using std::fmin; -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmin) -MAKE_VEC_AND_MARRAY_VERSIONS_WITH_SCALAR(fmin) - using std::fmod; -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(fmod) - -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half fract(sycl::half a, sycl::half *b); -#endif float fract(float a, float *b); -double fract(double a, double *b); - -template -sycl::vec fract(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - T value; - res[i] = fract(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray fract(sycl::marray a, sycl::marray *b) { - sycl::marray res; - sycl::marray resPtr; - for (size_t i = 0; i < N; i++) { - T value; - res[i] = fract(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -#endif - -using std::frexp; -template -sycl::vec frexp(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - int value; - res[i] = frexp(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray frexp(sycl::marray a, sycl::marray *b) { - sycl::marray res; - sycl::marray resPtr; - for (size_t i = 0; i < N; i++) { - int value; - res[i] = frexp(a[i], &value); - ; - resPtr[i] = value; - } - *b = resPtr; - return res; -} -#endif +using std::frexp; template T hypot(T a, T b) { return std::hypot(static_cast::type>(a), b); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(hypot) using std::ilogb; -template -sycl::vec ilogb(sycl::vec a) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = ilogb(a[i]); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray ilogb(sycl::marray a) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = ilogb(a[i]); - } - return res; -} -#endif - using std::ldexp; -template -sycl::vec ldexp(sycl::vec a, sycl::vec b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = ldexp(a[i], b[i]); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray ldexp(sycl::marray a, sycl::marray b) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = ldexp(a[i], b[i]); - } - return res; -} -#endif -template -sycl::vec ldexp(sycl::vec a, int b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = ldexp(a[i], b); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray ldexp(sycl::marray a, int b) { - sycl::marray res; - for (size_t i = 0; i < N; i++) { - res[i] = ldexp(a[i], b); - } - return res; -} -#endif - using std::lgamma; -MAKE_VEC_AND_MARRAY_VERSIONS(lgamma) template -T lgamma_r(T a, int *b) { +T lgamma_r(T a, int* b) { *b = (std::tgamma(a) > 0) ? 1 : -1; return std::lgamma(a); } -template -sycl::vec lgamma_r(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - int value; - res[i] = lgamma_r(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -template -sycl::marray lgamma_r(sycl::marray a, sycl::marray *b) { - sycl::marray res; - sycl::marray resPtr; - for (size_t i = 0; i < N; i++) { - int value; - res[i] = lgamma_r(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -#endif template T log(T a) { return std::log(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(log) template T log2(T a) { return std::log2(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(log2) template T log10(T a) { return std::log10(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(log10) template T log1p(T a) { return std::log1p(static_cast::type>(a)); } -MAKE_VEC_AND_MARRAY_VERSIONS(log1p) using std::logb; -MAKE_VEC_AND_MARRAY_VERSIONS(logb) template T mad(T a, T b, T c) { return a * b + c; } -MAKE_VEC_AND_MARRAY_VERSIONS_3ARGS(mad) template T maxmag(T a, T b) { @@ -1280,7 +553,6 @@ T maxmag(T a, T b) { return b; return fmax(a, b); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(maxmag) template T minmag(T a, T b) { @@ -1290,173 +562,1058 @@ T minmag(T a, T b) { return b; return fmin(a, b); } -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(minmag) using std::modf; -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half modf(sycl::half a, sycl::half *b); +float nan(unsigned int a); +using std::nextafter; + +template +T pow(T a, T b) { + return std::pow(static_cast::type>(a), + static_cast::type>(b)); +} + +template +T pown(T a, int b) { + return std::pow(static_cast::type>(a), + static_cast::type>(b)); +} + +template +sycl_cts::resultRef powr(T a, T b) { + if (a < 0) return sycl_cts::resultRef(T(), true); + return std::pow(static_cast::type>(a), + static_cast::type>(b)); +} + +using std::remainder; + +template +T remquo(T x, T y, int* quo) { + return reference_remquol(x, y, quo); +} + +using std::rint; + +template +T rootn(T a, int b) { + return std::pow(static_cast::type>(a), + static_cast::type>(1.0 / b)); +} + +using std::round; + +template +T rsqrt(T a) { + return 1 / std::sqrt(static_cast::type>(a)); +} + +template +T sincos(T a, T* b) { + *b = std::cos(static_cast::type>(a)); + return std::sin(static_cast::type>(a)); +} + +template +T sin(T a) { + return std::sin(static_cast::type>(a)); +} + +template +T sinh(T a) { + return std::sinh(static_cast::type>(a)); +} + +float sinpi(float a); + +template +T sqrt(T a) { + return std::sqrt(static_cast::type>(a)); +} + +template +T tan(T a) { + return std::tan(static_cast::type>(a)); +} + +template +T tanh(T a) { + return std::tanh(static_cast::type>(a)); +} + +float tanpi(float a); + +template +T tgamma(T a) { + return std::tgamma(static_cast::type>(a)); +} + +using std::trunc; + +template +T recip(T a) { + return 1.0 / a; +} + +template +T divide(T a, T b) { + return a / b; +} + +// Geometric functions + +sycl::float4 cross(sycl::float4 p0, sycl::float4 p1); +sycl::float3 cross(sycl::float3 p0, sycl::float3 p1); + +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +sycl::mfloat4 cross(sycl::mfloat4 p0, sycl::mfloat4 p1); +sycl::mfloat3 cross(sycl::mfloat3 p0, sycl::mfloat3 p1); #endif -template -sycl::vec modf(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - T value; - res[i] = modf(a[i], &value); + +template +T dot(T p0, T p1) { + return p0 * p1; +} + +template +T normalize(T p) { + if (p < 0) return -1; + return 1; +} + +#if SYCL_CTS_ENABLE_HALF_TESTS + +template <> +struct higher_accuracy { + using type = float; +}; + +sycl::half bitselect(sycl::half a, sycl::half b, sycl::half c); +sycl::half degrees(sycl::half); +sycl_cts::resultRef mix(const sycl::half a, const sycl::half b, + const sycl::half c); +sycl::half radians(sycl::half); +sycl::half step(sycl::half a, sycl::half b); +sycl_cts::resultRef smoothstep(sycl::half a, sycl::half b, + sycl::half c); +sycl::half sign(sycl::half a); +sycl::half acospi(sycl::half a); +sycl::half asinpi(sycl::half a); +sycl::half atanpi(sycl::half a); +sycl::half atan2pi(sycl::half a, sycl::half b); +sycl::half cospi(sycl::half a); +sycl::half fdim(sycl::half a, sycl::half b); +sycl::half fma(sycl::half a, sycl::half b, sycl::half c); +sycl::half fract(sycl::half a, sycl::half* b); +sycl::half modf(sycl::half a, sycl::half* b); +sycl::half nan(unsigned short a); + +template +sycl::vec nan(sycl::vec a) { + return sycl_cts::math::run_func_on_vector( + [](unsigned short x) { return nan(x); }, a); +} + +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +sycl::marray nan(sycl::marray a) { + return sycl_cts::math::run_func_on_marray( + [](unsigned short x) { return nan(x); }, a); +} +#endif + +sycl::half nextafter(sycl::half a, sycl::half b); +sycl::half sinpi(sycl::half a); +sycl::half tanpi(sycl::half a); + +sycl::half fast_dot(float p0); +sycl::half fast_dot(sycl::float2 p0); +sycl::half fast_dot(sycl::float3 p0); +sycl::half fast_dot(sycl::float4 p0); + +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +sycl::half fast_dot(sycl::mfloat2 p0); +sycl::half fast_dot(sycl::mfloat3 p0); +sycl::half fast_dot(sycl::mfloat4 p0); +#endif + +#endif // SYCL_CTS_ENABLE_HALF_TESTS + +#if SYCL_CTS_ENABLE_DOUBLE_TESTS + +template <> +struct higher_accuracy { + using type = long double; +}; + +double bitselect(double a, double b, double c); +double degrees(double a); +sycl_cts::resultRef mix(const double a, const double b, const double c); +double radians(double a); +double step(double a, double b); +sycl_cts::resultRef smoothstep(double a, double b, double c); +double sign(double a); + +double acospi(double a); +double asinpi(double a); +double atanpi(double a); +double atan2pi(double a, double b); +double cospi(double a); +double fma(double a, double b, double c); +double fract(double a, double* b); +double nan(unsigned long a); +double nan(unsigned long long a); + +template +std::enable_if_t || + std::is_same_v, + sycl::vec> +nan(sycl::vec a) { + return sycl_cts::math::run_func_on_vector( + [](T x) { return nan(x); }, a); +} + +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +template +std::enable_if_t || + std::is_same_v, + sycl::marray> +nan(sycl::marray a) { + return sycl_cts::math::run_func_on_marray( + [](T x) { return nan(x); }, a); +} +#endif + +double sinpi(double a); +double tanpi(double a); + +sycl::double4 cross(sycl::double4 p0, sycl::double4 p1); +sycl::double3 cross(sycl::double3 p0, sycl::double3 p1); + +// FIXME: AdaptiveCpp does not support marray +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP +sycl::mdouble4 cross(sycl::mdouble4 p0, sycl::mdouble4 p1); +sycl::mdouble3 cross(sycl::mdouble3 p0, sycl::mdouble3 p1); +#endif + +#endif // SYCL_CTS_ENABLE_DOUBLE_TESTS + +// sycl::vec overloads of the above. These must be defined after any +// non-templated conditional half / double overloads for correct resolution with +// DPC++. + +#define MAKE_VEC_VERSION(func) \ + template \ + sycl::vec func(sycl::vec a) { \ + return sycl_cts::math::run_func_on_vector( \ + [](T x) { return func(x); }, a); \ + } + +#define MAKE_VEC_VERSION_2ARGS(func) \ + template \ + sycl::vec func(sycl::vec a, sycl::vec b) { \ + return sycl_cts::math::run_func_on_vector( \ + [](T x, T y) { return func(x, y); }, a, b); \ + } + +#define MAKE_VEC_VERSION_3ARGS(func) \ + template \ + sycl::vec func(sycl::vec a, sycl::vec b, \ + sycl::vec c) { \ + return sycl_cts::math::run_func_on_vector( \ + [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ + } + +#define MAKE_VEC_VERSION_WITH_SCALAR(func) \ + template \ + sycl::vec func(sycl::vec a, T b) { \ + return sycl_cts::math::run_func_on_vector( \ + [](T x, T y) { return func(x, y); }, a, b); \ + } + +// Common functions + +template +int any(sycl::vec a) { + for (int i = 0; i < N; i++) { + if (any(a[i]) == 1) return true; + } + return false; +} + +template +int all(sycl::vec a) { + for (int i = 0; i < N; i++) { + if (all(a[i]) == 0) return false; + } + return true; +} + +MAKE_VEC_VERSION_3ARGS(bitselect) + +template +sycl::vec select(sycl::vec a, sycl::vec b, + sycl::vec c) { + sycl::vec res; + for (int i = 0; i < N; i++) { + if (any(c[i]) == 1) + res[i] = b[i]; + else + res[i] = a[i]; + } + return res; +} + +template +sycl_cts::resultRef> abs(sycl::vec a) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x) { return abs(x); }, a); +} + +template +sycl_cts::resultRef> abs_diff(sycl::vec a, + sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return abs_diff(x, y); }, a, b); +} + +MAKE_VEC_VERSION_2ARGS(add_sat) +MAKE_VEC_VERSION_2ARGS(hadd) +MAKE_VEC_VERSION_2ARGS(rhadd) + +template +sycl_cts::resultRef> clamp(sycl::vec a, sycl::vec b, + sycl::vec c) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y, T z) { return clamp(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> clamp(sycl::vec a, T b, T c) { + sycl::vec res; + std::map undefined; + for (int i = 0; i < N; i++) { + sycl_cts::resultRef element = clamp(a[i], b, c); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} + +MAKE_VEC_VERSION(clz) +MAKE_VEC_VERSION(ctz) + +MAKE_VEC_VERSION_3ARGS(mad_sat) + +template +sycl_cts::resultRef> max(sycl::vec a, sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return max(x, y); }, a, b); +} +template +sycl_cts::resultRef> max(sycl::vec a, T b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return max(x, y); }, a, b); +} + +template +sycl_cts::resultRef> min(sycl::vec a, sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} +template +sycl_cts::resultRef> min(sycl::vec a, T b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} + +MAKE_VEC_VERSION_2ARGS(mul_hi) +MAKE_VEC_VERSION_3ARGS(mad_hi) +MAKE_VEC_VERSION_2ARGS(rotate) +MAKE_VEC_VERSION_2ARGS(sub_sat) + +template +sycl::vec::type, N> upsample( + sycl::vec a, sycl::vec::type, N> b) { + return sycl_cts::math::run_func_on_vector::type, T, N>( + [](T x, T y) { return upsample(x, y); }, a, b); +} + +MAKE_VEC_VERSION(popcount) + +template +sycl_cts::resultRef> mad24(sycl::vec a, sycl::vec b, + sycl::vec c) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); +} + +template +sycl_cts::resultRef> mul24(sycl::vec a, + sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return mul24(x, y); }, a, b); +} + +MAKE_VEC_VERSION(degrees) + +template +sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, + sycl::vec c) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> mix(sycl::vec a, sycl::vec b, + T c) { + sycl::vec res; + std::map undefined; + for (int i = 0; i < N; i++) { + sycl_cts::resultRef element = mix(a[i], b[i], c); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} + +MAKE_VEC_VERSION(radians) + +template +sycl::vec step(T a, sycl::vec b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = step(a, b[i]); + } + return res; +} + +template +sycl_cts::resultRef> smoothstep(sycl::vec a, + sycl::vec b, + sycl::vec c) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y, T z) { return smoothstep(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> smoothstep(T a, T b, sycl::vec c) { + sycl::vec res; + std::map undefined; + for (int i = 0; i < N; i++) { + sycl_cts::resultRef element = smoothstep(a, b, c[i]); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} + +// Math functions + +template +struct higher_accuracy> { + using type = sycl::vec::type, N>; +}; + +MAKE_VEC_VERSION(acos) +MAKE_VEC_VERSION(acosh) +MAKE_VEC_VERSION(acospi) +MAKE_VEC_VERSION(asin) +MAKE_VEC_VERSION(asinh) +MAKE_VEC_VERSION(asinpi) +MAKE_VEC_VERSION(atan) +MAKE_VEC_VERSION_2ARGS(atan2) +MAKE_VEC_VERSION(atanh) +MAKE_VEC_VERSION(atanpi) +MAKE_VEC_VERSION_2ARGS(atan2pi) +MAKE_VEC_VERSION(cbrt) +MAKE_VEC_VERSION(ceil) +MAKE_VEC_VERSION_2ARGS(copysign) +MAKE_VEC_VERSION(cos) +MAKE_VEC_VERSION(cosh) +MAKE_VEC_VERSION(cospi) +MAKE_VEC_VERSION(erfc) +MAKE_VEC_VERSION(erf) +MAKE_VEC_VERSION(exp) +MAKE_VEC_VERSION(exp2) +MAKE_VEC_VERSION(exp10) +MAKE_VEC_VERSION(expm1) +MAKE_VEC_VERSION(fabs) +MAKE_VEC_VERSION_2ARGS(fdim) +MAKE_VEC_VERSION(floor) +MAKE_VEC_VERSION_3ARGS(fma) +MAKE_VEC_VERSION_2ARGS(fmax) +MAKE_VEC_VERSION_WITH_SCALAR(fmax) +MAKE_VEC_VERSION_2ARGS(fmin) +MAKE_VEC_VERSION_WITH_SCALAR(fmin) +MAKE_VEC_VERSION_2ARGS(fmod) + +template +sycl::vec fract(sycl::vec a, sycl::vec* b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + T value; + res[i] = reference::fract(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +template +sycl::vec frexp(sycl::vec a, sycl::vec* b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + int value; + res[i] = reference::frexp(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +MAKE_VEC_VERSION_2ARGS(hypot) + +template +sycl::vec ilogb(sycl::vec a) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::ilogb(a[i]); + } + return res; +} + +template +sycl::vec ldexp(sycl::vec a, sycl::vec b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::ldexp(a[i], b[i]); + } + return res; +} +template +sycl::vec ldexp(sycl::vec a, int b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::ldexp(a[i], b); + } + return res; +} + +MAKE_VEC_VERSION(lgamma) + +template +sycl::vec lgamma_r(sycl::vec a, sycl::vec* b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + int value; + res[i] = reference::lgamma_r(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +MAKE_VEC_VERSION(log) +MAKE_VEC_VERSION(log2) +MAKE_VEC_VERSION(log10) +MAKE_VEC_VERSION(log1p) +MAKE_VEC_VERSION(logb) + +MAKE_VEC_VERSION_3ARGS(mad) +MAKE_VEC_VERSION_2ARGS(maxmag) +MAKE_VEC_VERSION_2ARGS(minmag) + +template +sycl::vec modf(sycl::vec a, sycl::vec* b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + T value; + res[i] = reference::modf(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +template +sycl::vec nan(sycl::vec a) { + return sycl_cts::math::run_func_on_vector( + [](unsigned int x) { return nan(x); }, a); +} + +MAKE_VEC_VERSION_2ARGS(nextafter) +MAKE_VEC_VERSION_2ARGS(pow) + +template +sycl::vec pown(sycl::vec a, sycl::vec b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::pown(a[i], b[i]); + } + return res; +} + +template +sycl_cts::resultRef> powr(sycl::vec a, + sycl::vec b) { + return sycl_cts::math::run_func_on_vector_result_ref( + [](T x, T y) { return reference::powr(x, y); }, a, b); +} + +MAKE_VEC_VERSION_2ARGS(remainder) + +template +sycl::vec remquo(sycl::vec a, sycl::vec b, + sycl::vec* c) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + int value; + res[i] = reference::remquo(a[i], b[i], &value); + resPtr[i] = value; + } + *c = resPtr; + return res; +} + +MAKE_VEC_VERSION(rint) + +template +sycl::vec rootn(sycl::vec a, sycl::vec b) { + sycl::vec res; + for (int i = 0; i < N; i++) { + res[i] = reference::rootn(a[i], b[i]); + } + return res; +} + +MAKE_VEC_VERSION(round) +MAKE_VEC_VERSION(rsqrt) +MAKE_VEC_VERSION(sign) + +template +sycl::vec sincos(sycl::vec a, sycl::vec* b) { + sycl::vec res; + sycl::vec resPtr; + for (int i = 0; i < N; i++) { + T value; + res[i] = reference::sincos(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +MAKE_VEC_VERSION(sin) +MAKE_VEC_VERSION(sinh) +MAKE_VEC_VERSION(sinpi) +MAKE_VEC_VERSION(sqrt) +MAKE_VEC_VERSION_2ARGS(step) +MAKE_VEC_VERSION(tan) +MAKE_VEC_VERSION(tanh) +MAKE_VEC_VERSION(tanpi) +MAKE_VEC_VERSION(tgamma) +MAKE_VEC_VERSION(trunc) +MAKE_VEC_VERSION(recip) +MAKE_VEC_VERSION_2ARGS(divide) + +// Geometric functions + +template +T dot(sycl::vec a, sycl::vec b) { + T res = 0; + for (int i = 0; i < N; i++) res += a[i] * b[i]; + return res; +} + +// sycl::marray overloads of the above. +// Not supported by AdaptiveCpp. +#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP + +#define MAKE_MARRAY_VERSION(func) \ + template \ + sycl::marray func(sycl::marray a) { \ + return sycl_cts::math::run_func_on_marray( \ + [](T x) { return func(x); }, a); \ + } + +#define MAKE_MARRAY_VERSION_2ARGS(func) \ + template \ + sycl::marray func(sycl::marray a, sycl::marray b) { \ + return sycl_cts::math::run_func_on_marray( \ + [](T x, T y) { return func(x, y); }, a, b); \ + } + +#define MAKE_MARRAY_VERSION_3ARGS(func) \ + template \ + sycl::marray func(sycl::marray a, sycl::marray b, \ + sycl::marray c) { \ + return sycl_cts::math::run_func_on_marray( \ + [](T x, T y, T z) { return func(x, y, z); }, a, b, c); \ + } + +#define MAKE_MARRAY_VERSION_WITH_SCALAR(func) \ + template \ + sycl::marray func(sycl::marray a, T b) { \ + return sycl_cts::math::run_func_on_marray( \ + [](T x, T y) { return func(x, y); }, a, b); \ + } + +// Common functions. + +template +bool any(sycl::marray a) { + for (size_t i = 0; i < N; i++) { + if (any(a[i]) == 1) return true; + } + return false; +} + +template +bool all(sycl::marray a) { + for (size_t i = 0; i < N; i++) { + if (all(a[i]) == 0) return false; + } + return true; +} + +MAKE_MARRAY_VERSION_3ARGS(bitselect) + +template +sycl::marray select(sycl::marray a, sycl::marray b, + sycl::marray c) { + sycl::marray res; + for (size_t i = 0; i < N; i++) { + res[i] = c[i] ? b[i] : a[i]; + } + return res; +} + +template +sycl_cts::resultRef> abs(sycl::marray a) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x) { return abs(x); }, a); +} + +template +sycl_cts::resultRef> abs_diff(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return abs_diff(x, y); }, a, b); +} + +MAKE_MARRAY_VERSION_2ARGS(add_sat) +MAKE_MARRAY_VERSION_2ARGS(hadd) +MAKE_MARRAY_VERSION_2ARGS(rhadd) + +template +sycl_cts::resultRef> clamp(sycl::marray a, + sycl::marray b, + sycl::marray c) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y, T z) { return clamp(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> clamp(sycl::marray a, T b, T c) { + sycl::marray res; + std::map undefined; + for (size_t i = 0; i < N; i++) { + sycl_cts::resultRef element = clamp(a[i], b, c); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} + +MAKE_MARRAY_VERSION(clz) +MAKE_MARRAY_VERSION(ctz) + +MAKE_MARRAY_VERSION_3ARGS(mad_sat) + +template +sycl_cts::resultRef> max(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return max(x, y); }, a, b); +} +template +sycl_cts::resultRef> max(sycl::marray a, T b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return max(x, y); }, a, b); +} + +template +sycl_cts::resultRef> min(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} +template +sycl_cts::resultRef> min(sycl::marray a, T b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return min(x, y); }, a, b); +} + +MAKE_MARRAY_VERSION_2ARGS(mul_hi) +MAKE_MARRAY_VERSION_3ARGS(mad_hi) +MAKE_MARRAY_VERSION_2ARGS(rotate) +MAKE_MARRAY_VERSION_2ARGS(sub_sat) + +template +sycl::marray::type, N> upsample( + sycl::marray a, + sycl::marray::type, N> b) { + return sycl_cts::math::run_func_on_marray::type, T, N>( + [](T x, T y) { return upsample(x, y); }, a, b); +} + +MAKE_MARRAY_VERSION(popcount) + +template +sycl_cts::resultRef> mad24(sycl::marray a, + sycl::marray b, + sycl::marray c) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y, T z) { return mad24(x, y, z); }, a, b, c); +} + +template +sycl_cts::resultRef> mul24(sycl::marray a, + sycl::marray b) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y) { return mul24(x, y); }, a, b); +} + +MAKE_MARRAY_VERSION(degrees) + +template +sycl_cts::resultRef> mix(sycl::marray a, + sycl::marray b, + sycl::marray c) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y, T z) { return mix(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> mix(sycl::marray a, + sycl::marray b, T c) { + sycl::marray res; + std::map undefined; + for (size_t i = 0; i < N; i++) { + sycl_cts::resultRef element = mix(a[i], b[i], c); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} + +MAKE_MARRAY_VERSION(radians) + +template +sycl::marray step(T a, sycl::marray b) { + sycl::marray res; + for (size_t i = 0; i < N; i++) { + res[i] = step(a, b[i]); + } + return res; +} + +template +sycl_cts::resultRef> smoothstep(sycl::marray a, + sycl::marray b, + sycl::marray c) { + return sycl_cts::math::run_func_on_marray_result_ref( + [](T x, T y, T z) { return smoothstep(x, y, z); }, a, b, c); +} +template +sycl_cts::resultRef> smoothstep(T a, T b, + sycl::marray c) { + sycl::marray res; + std::map undefined; + for (size_t i = 0; i < N; i++) { + sycl_cts::resultRef element = smoothstep(a, b, c[i]); + if (element.undefined.empty()) + res[i] = element.res; + else + undefined[i] = true; + } + return sycl_cts::resultRef>(res, undefined); +} + +// Math functions + +template +struct higher_accuracy> { + using type = sycl::marray::type, N>; +}; + +MAKE_MARRAY_VERSION(acos) +MAKE_MARRAY_VERSION(acosh) +MAKE_MARRAY_VERSION(acospi) +MAKE_MARRAY_VERSION(asin) +MAKE_MARRAY_VERSION(asinh) +MAKE_MARRAY_VERSION(asinpi) +MAKE_MARRAY_VERSION(atan) +MAKE_MARRAY_VERSION_2ARGS(atan2) +MAKE_MARRAY_VERSION(atanh) +MAKE_MARRAY_VERSION(atanpi) +MAKE_MARRAY_VERSION_2ARGS(atan2pi) +MAKE_MARRAY_VERSION(cbrt) +MAKE_MARRAY_VERSION(ceil) +MAKE_MARRAY_VERSION_2ARGS(copysign) +MAKE_MARRAY_VERSION(cos) +MAKE_MARRAY_VERSION(cosh) +MAKE_MARRAY_VERSION(cospi) +MAKE_MARRAY_VERSION(erfc) +MAKE_MARRAY_VERSION(erf) +MAKE_MARRAY_VERSION(exp) +MAKE_MARRAY_VERSION(exp2) +MAKE_MARRAY_VERSION(exp10) +MAKE_MARRAY_VERSION(expm1) +MAKE_MARRAY_VERSION(fabs) +MAKE_MARRAY_VERSION_2ARGS(fdim) +MAKE_MARRAY_VERSION(floor) +MAKE_MARRAY_VERSION_3ARGS(fma) +MAKE_MARRAY_VERSION_2ARGS(fmax) +MAKE_MARRAY_VERSION_WITH_SCALAR(fmax) +MAKE_MARRAY_VERSION_2ARGS(fmin) +MAKE_MARRAY_VERSION_WITH_SCALAR(fmin) +MAKE_MARRAY_VERSION_2ARGS(fmod) + +template +sycl::marray fract(sycl::marray a, sycl::marray* b) { + sycl::marray res; + sycl::marray resPtr; + for (size_t i = 0; i < N; i++) { + T value; + res[i] = reference::fract(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +template +sycl::marray frexp(sycl::marray a, sycl::marray* b) { + sycl::marray res; + sycl::marray resPtr; + for (size_t i = 0; i < N; i++) { + int value; + res[i] = reference::frexp(a[i], &value); + resPtr[i] = value; + } + *b = resPtr; + return res; +} + +MAKE_MARRAY_VERSION_2ARGS(hypot) + +template +sycl::marray ilogb(sycl::marray a) { + sycl::marray res; + for (size_t i = 0; i < N; i++) { + res[i] = reference::ilogb(a[i]); + } + return res; +} + +template +sycl::marray ldexp(sycl::marray a, sycl::marray b) { + sycl::marray res; + for (size_t i = 0; i < N; i++) { + res[i] = reference::ldexp(a[i], b[i]); + } + return res; +} +template +sycl::marray ldexp(sycl::marray a, int b) { + sycl::marray res; + for (size_t i = 0; i < N; i++) { + res[i] = reference::ldexp(a[i], b); + } + return res; +} + +MAKE_MARRAY_VERSION(lgamma) + +template +sycl::marray lgamma_r(sycl::marray a, sycl::marray* b) { + sycl::marray res; + sycl::marray resPtr; + for (size_t i = 0; i < N; i++) { + int value; + res[i] = reference::lgamma_r(a[i], &value); resPtr[i] = value; } *b = resPtr; return res; } -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP + +MAKE_MARRAY_VERSION(log) +MAKE_MARRAY_VERSION(log2) +MAKE_MARRAY_VERSION(log10) +MAKE_MARRAY_VERSION(log1p) +MAKE_MARRAY_VERSION(logb) + +MAKE_MARRAY_VERSION_3ARGS(mad) +MAKE_MARRAY_VERSION_2ARGS(maxmag) +MAKE_MARRAY_VERSION_2ARGS(minmag) + template sycl::marray modf(sycl::marray a, sycl::marray *b) { sycl::marray res; sycl::marray resPtr; for (int i = 0; i < N; i++) { T value; - res[i] = modf(a[i], &value); + res[i] = reference::modf(a[i], &value); resPtr[i] = value; } *b = resPtr; return res; } -#endif -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half nan(unsigned short a); -#endif -float nan(unsigned int a); -double nan(unsigned long a); -double nan(unsigned long long a); -#if SYCL_CTS_ENABLE_HALF_TESTS -template -sycl::vec nan(sycl::vec a) { - return sycl_cts::math::run_func_on_vector( - [](unsigned short x) { return nan(x); }, a); -} -#endif -template -sycl::vec nan(sycl::vec a) { - return sycl_cts::math::run_func_on_vector( - [](unsigned int x) { return nan(x); }, a); -} -template -std::enable_if_t || - std::is_same_v, - sycl::vec> -nan(sycl::vec a) { - return sycl_cts::math::run_func_on_vector( - [](T x) { return nan(x); }, a); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -#if SYCL_CTS_ENABLE_HALF_TESTS -template -sycl::marray nan(sycl::marray a) { - return sycl_cts::math::run_func_on_marray( - [](unsigned short x) { return nan(x); }, a); -} -#endif template sycl::marray nan(sycl::marray a) { return sycl_cts::math::run_func_on_marray( [](unsigned int x) { return nan(x); }, a); } -template -std::enable_if_t || - std::is_same_v, - sycl::marray> -nan(sycl::marray a) { - return sycl_cts::math::run_func_on_marray( - [](T x) { return nan(x); }, a); -} -#endif - -using std::nextafter; -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half nextafter(sycl::half a, sycl::half b); -#endif -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(nextafter) -template -T pow(T a, T b) { - return std::pow(static_cast::type>(a), - static_cast::type>(b)); -} -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(pow) +MAKE_MARRAY_VERSION_2ARGS(nextafter) +MAKE_MARRAY_VERSION_2ARGS(pow) -template -T pown(T a, int b) { - return std::pow(static_cast::type>(a), - static_cast::type>(b)); -} -template -sycl::vec pown(sycl::vec a, sycl::vec b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = pown(a[i], b[i]); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray pown(sycl::marray a, sycl::marray b) { sycl::marray res; for (size_t i = 0; i < N; i++) { - res[i] = pown(a[i], b[i]); + res[i] = reference::pown(a[i], b[i]); } return res; } -#endif -template -sycl_cts::resultRef powr(T a, T b) { - if (a < 0) return sycl_cts::resultRef(T(), true); - return std::pow(static_cast::type>(a), - static_cast::type>(b)); -} -template -sycl_cts::resultRef> powr(sycl::vec a, - sycl::vec b) { - return sycl_cts::math::run_func_on_vector_result_ref( - [](T x, T y) { return powr(x, y); }, a, b); -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl_cts::resultRef> powr(sycl::marray a, sycl::marray b) { return sycl_cts::math::run_func_on_marray_result_ref( - [](T x, T y) { return powr(x, y); }, a, b); + [](T x, T y) { return reference::powr(x, y); }, a, b); } -#endif - -using std::remainder; -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(remainder) -template -T remquo(T x, T y, int *quo) { - return reference_remquol(x, y, quo); -} +MAKE_MARRAY_VERSION_2ARGS(remainder) -template -sycl::vec remquo(sycl::vec a, sycl::vec b, - sycl::vec *c) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - int value; - res[i] = remquo(a[i], b[i], &value); - resPtr[i] = value; - } - *c = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray remquo(sycl::marray a, sycl::marray b, sycl::marray *c) { @@ -1464,187 +1621,69 @@ sycl::marray remquo(sycl::marray a, sycl::marray b, sycl::marray resPtr; for (size_t i = 0; i < N; i++) { int value; - res[i] = remquo(a[i], b[i], &value); + res[i] = reference::remquo(a[i], b[i], &value); resPtr[i] = value; } *c = resPtr; return res; } -#endif -using std::rint; -MAKE_VEC_AND_MARRAY_VERSIONS(rint) +MAKE_MARRAY_VERSION(rint) -template -T rootn(T a, int b) { - return std::pow(static_cast::type>(a), - static_cast::type>(1.0 / b)); -} -template -sycl::vec rootn(sycl::vec a, sycl::vec b) { - sycl::vec res; - for (int i = 0; i < N; i++) { - res[i] = rootn(a[i], b[i]); - } - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray rootn(sycl::marray a, sycl::marray b) { sycl::marray res; for (size_t i = 0; i < N; i++) { - res[i] = rootn(a[i], b[i]); + res[i] = reference::rootn(a[i], b[i]); } return res; } -#endif -using std::round; -MAKE_VEC_AND_MARRAY_VERSIONS(round) - -template -T rsqrt(T a) { - return 1 / std::sqrt(static_cast::type>(a)); -} -MAKE_VEC_AND_MARRAY_VERSIONS(rsqrt) +MAKE_MARRAY_VERSION(round) +MAKE_MARRAY_VERSION(rsqrt) +MAKE_MARRAY_VERSION(sign) -template -T sincos(T a, T *b) { - *b = std::cos(static_cast::type>(a)); - return std::sin(static_cast::type>(a)); -} -template -sycl::vec sincos(sycl::vec a, sycl::vec *b) { - sycl::vec res; - sycl::vec resPtr; - for (int i = 0; i < N; i++) { - T value; - res[i] = sincos(a[i], &value); - resPtr[i] = value; - } - *b = resPtr; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template sycl::marray sincos(sycl::marray a, sycl::marray *b) { sycl::marray res; sycl::marray resPtr; for (size_t i = 0; i < N; i++) { T value; - res[i] = sincos(a[i], &value); + res[i] = reference::sincos(a[i], &value); resPtr[i] = value; } *b = resPtr; return res; } -#endif - -template -T sin(T a) { - return std::sin(static_cast::type>(a)); -} -MAKE_VEC_AND_MARRAY_VERSIONS(sin) - -template -T sinh(T a) { - return std::sinh(static_cast::type>(a)); -} -MAKE_VEC_AND_MARRAY_VERSIONS(sinh) - -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half sinpi(sycl::half a); -#endif -float sinpi(float a); -double sinpi(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(sinpi) - -template -T sqrt(T a) { - return std::sqrt(static_cast::type>(a)); -} -MAKE_VEC_AND_MARRAY_VERSIONS(sqrt) - -template -T tan(T a) { - return std::tan(static_cast::type>(a)); -} -MAKE_VEC_AND_MARRAY_VERSIONS(tan) - -template -T tanh(T a) { - return std::tanh(static_cast::type>(a)); -} -MAKE_VEC_AND_MARRAY_VERSIONS(tanh) - -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half tanpi(sycl::half a); -#endif -float tanpi(float a); -double tanpi(double a); -MAKE_VEC_AND_MARRAY_VERSIONS(tanpi) - -template -T tgamma(T a) { - return std::tgamma(static_cast::type>(a)); -} -MAKE_VEC_AND_MARRAY_VERSIONS(tgamma) - -using std::trunc; -MAKE_VEC_AND_MARRAY_VERSIONS(trunc) - -template -T recip(T a) { - return 1.0 / a; -} -MAKE_VEC_AND_MARRAY_VERSIONS(recip) - -template -T divide(T a, T b) { - return a / b; -} -MAKE_VEC_AND_MARRAY_VERSIONS_2ARGS(divide) - -// Geometric functions - -sycl::float4 cross(sycl::float4 p0, sycl::float4 p1); -sycl::float3 cross(sycl::float3 p0, sycl::float3 p1); -sycl::double4 cross(sycl::double4 p0, sycl::double4 p1); -sycl::double3 cross(sycl::double3 p0, sycl::double3 p1); -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -sycl::mfloat4 cross(sycl::mfloat4 p0, sycl::mfloat4 p1); -sycl::mfloat3 cross(sycl::mfloat3 p0, sycl::mfloat3 p1); -sycl::mdouble4 cross(sycl::mdouble4 p0, sycl::mdouble4 p1); -sycl::mdouble3 cross(sycl::mdouble3 p0, sycl::mdouble3 p1); -#endif +MAKE_MARRAY_VERSION(sin) +MAKE_MARRAY_VERSION(sinh) +MAKE_MARRAY_VERSION(sinpi) +MAKE_MARRAY_VERSION(sqrt) +MAKE_MARRAY_VERSION_2ARGS(step) +MAKE_MARRAY_VERSION(tan) +MAKE_MARRAY_VERSION(tanh) +MAKE_MARRAY_VERSION(tanpi) +MAKE_MARRAY_VERSION(tgamma) +MAKE_MARRAY_VERSION(trunc) +MAKE_MARRAY_VERSION(recip) +MAKE_MARRAY_VERSION_2ARGS(divide) -template -T dot(T p0, T p1) { - return p0 * p1; -} -template -T dot(sycl::vec a, sycl::vec b) { - T res = 0; - for (int i = 0; i < N; i++) res += a[i] * b[i]; - return res; -} -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP template T dot(sycl::marray a, sycl::marray b) { T res = 0; for (size_t i = 0; i < N; i++) res += a[i] * b[i]; return res; } -#endif + +#endif // SYCL_CTS_COMPILING_WITH_ADAPTIVECPP + +// Generic functions over both scalars and vec / marray types. +// These need to be defined last. template auto length(T p) { - return sqrt(reference::dot(p, p)); + return reference::sqrt(reference::dot(p, p)); } template @@ -1652,11 +1691,6 @@ auto distance(T p0, T p1) { return reference::length(p0 - p1); } -template -T normalize(T p) { - if (p < 0) return -1; - return 1; -} template sycl::vec normalize(sycl::vec a) { sycl::vec res; @@ -1677,22 +1711,9 @@ sycl::marray normalize(sycl::marray a) { } #endif -#if SYCL_CTS_ENABLE_HALF_TESTS -sycl::half fast_dot(float p0); -sycl::half fast_dot(sycl::float2 p0); -sycl::half fast_dot(sycl::float3 p0); -sycl::half fast_dot(sycl::float4 p0); -// FIXME: AdaptiveCpp does not support marray -#ifndef SYCL_CTS_COMPILING_WITH_ADAPTIVECPP -sycl::half fast_dot(sycl::mfloat2 p0); -sycl::half fast_dot(sycl::mfloat3 p0); -sycl::half fast_dot(sycl::mfloat4 p0); -#endif -#endif - template float fast_length(T p0) { - return sqrt(fast_dot(p0)); + return reference::sqrt(fast_dot(p0)); } template @@ -1702,7 +1723,7 @@ float fast_distance(T p0, T p1) { template T fast_normalize(T p0) { - return p0 * rsqrt(fast_dot(p0)); + return p0 * reference::rsqrt(fast_dot(p0)); } } // namespace reference