diff --git a/libcudacxx/include/cuda/std/__complex/nvbf16.h b/libcudacxx/include/cuda/std/__complex/nvbf16.h index d90c30e2210..612ebba3352 100644 --- a/libcudacxx/include/cuda/std/__complex/nvbf16.h +++ b/libcudacxx/include/cuda/std/__complex/nvbf16.h @@ -63,6 +63,39 @@ struct __libcpp_complex_overload_traits<__nv_bfloat16, false, false> typedef complex<__nv_bfloat16> _ComplexType; }; +// This is a workaround against the user defining macros __CUDA_NO_BFLOAT16_CONVERSIONS__ __CUDA_NO_BFLOAT16_OPERATORS__ +template <> +struct __complex_can_implicitly_construct<__nv_bfloat16, float> : true_type +{}; + +template <> +struct __complex_can_implicitly_construct<__nv_bfloat16, double> : true_type +{}; + +template <> +struct __complex_can_implicitly_construct : true_type +{}; + +template <> +struct __complex_can_implicitly_construct : true_type +{}; + +template +inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 __convert_to_bfloat16(const _Tp& __value) noexcept +{ + return __value; +} + +inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 __convert_to_bfloat16(const float& __value) noexcept +{ + return __float2bfloat16(__value); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 __convert_to_bfloat16(const double& __value) noexcept +{ + return __double2bfloat16(__value); +} + template <> class _LIBCUDACXX_TEMPLATE_VIS _CCCL_ALIGNAS(alignof(__nv_bfloat162)) complex<__nv_bfloat16> { @@ -80,14 +113,14 @@ class _LIBCUDACXX_TEMPLATE_VIS _CCCL_ALIGNAS(alignof(__nv_bfloat162)) complex<__ template ::value, int> = 0> _LIBCUDACXX_INLINE_VISIBILITY complex(const complex<_Up>& __c) - : __repr_(static_cast(__c.real()), static_cast(__c.imag())) + : __repr_(__convert_to_bfloat16(__c.real()), __convert_to_bfloat16(__c.imag())) {} template ::value, int> = 0, __enable_if_t<_CCCL_TRAIT(is_constructible, value_type, _Up), int> = 0> _LIBCUDACXX_INLINE_VISIBILITY explicit complex(const complex<_Up>& __c) - : __repr_(static_cast(__c.real()), static_cast(__c.imag())) + : __repr_(__convert_to_bfloat16(__c.real()), __convert_to_bfloat16(__c.imag())) {} _LIBCUDACXX_INLINE_VISIBILITY complex& operator=(const value_type& __re) @@ -100,8 +133,8 @@ class _LIBCUDACXX_TEMPLATE_VIS _CCCL_ALIGNAS(alignof(__nv_bfloat162)) complex<__ template _LIBCUDACXX_INLINE_VISIBILITY complex& operator=(const complex<_Up>& __c) { - __repr_.x = __c.real(); - __repr_.y = __c.imag(); + __repr_.x = __convert_to_bfloat16(__c.real()); + __repr_.y = __convert_to_bfloat16(__c.imag()); return *this; } @@ -155,24 +188,24 @@ class _LIBCUDACXX_TEMPLATE_VIS _CCCL_ALIGNAS(alignof(__nv_bfloat162)) complex<__ _LIBCUDACXX_INLINE_VISIBILITY complex& operator+=(const value_type& __re) { - __repr_.x += __re; + __repr_.x = __hadd(__repr_.x, __re); return *this; } _LIBCUDACXX_INLINE_VISIBILITY complex& operator-=(const value_type& __re) { - __repr_.x -= __re; + __repr_.x = __hsub(__repr_.x, __re); return *this; } _LIBCUDACXX_INLINE_VISIBILITY complex& operator*=(const value_type& __re) { - __repr_.x *= __re; - __repr_.y *= __re; + __repr_.x = __hmul(__repr_.x, __re); + __repr_.y = __hmul(__repr_.y, __re); return *this; } _LIBCUDACXX_INLINE_VISIBILITY complex& operator/=(const value_type& __re) { - __repr_.x /= __re; - __repr_.y /= __re; + __repr_.x = __hdiv(__repr_.x, __re); + __repr_.y = __hdiv(__repr_.y, __re); return *this; } @@ -195,9 +228,41 @@ class _LIBCUDACXX_TEMPLATE_VIS _CCCL_ALIGNAS(alignof(__nv_bfloat162)) complex<__ } }; +template <> // complex +template <> // complex<__half> +inline _LIBCUDACXX_INLINE_VISIBILITY complex::complex(const complex<__nv_bfloat16>& __c) + : __re_(__bfloat162float(__c.real())) + , __im_(__bfloat162float(__c.imag())) +{} + +template <> // complex +template <> // complex<__half> +inline _LIBCUDACXX_INLINE_VISIBILITY complex::complex(const complex<__nv_bfloat16>& __c) + : __re_(__bfloat162float(__c.real())) + , __im_(__bfloat162float(__c.imag())) +{} + +template <> // complex +template <> // complex<__nv_bfloat16> +inline _LIBCUDACXX_INLINE_VISIBILITY complex& complex::operator=(const complex<__nv_bfloat16>& __c) +{ + __re_ = __bfloat162float(__c.real()); + __im_ = __bfloat162float(__c.imag()); + return *this; +} + +template <> // complex +template <> // complex<__nv_bfloat16> +inline _LIBCUDACXX_INLINE_VISIBILITY complex& complex::operator=(const complex<__nv_bfloat16>& __c) +{ + __re_ = __bfloat162float(__c.real()); + __im_ = __bfloat162float(__c.imag()); + return *this; +} + inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 arg(__nv_bfloat16 __re) { - return _CUDA_VSTD::atan2f(__nv_bfloat16(0), __re); + return _CUDA_VSTD::atan2(__int2bfloat16_rn(0), __re); } // We have performance issues with some trigonometric functions with __nv_bfloat16 diff --git a/libcudacxx/include/cuda/std/__complex/nvfp16.h b/libcudacxx/include/cuda/std/__complex/nvfp16.h index 7bd0ea02776..b3154a4b233 100644 --- a/libcudacxx/include/cuda/std/__complex/nvfp16.h +++ b/libcudacxx/include/cuda/std/__complex/nvfp16.h @@ -60,6 +60,39 @@ struct __libcpp_complex_overload_traits<__half, false, false> typedef complex<__half> _ComplexType; }; +// This is a workaround against the user defining macros __CUDA_NO_HALF_CONVERSIONS__ __CUDA_NO_HALF_OPERATORS__ +template <> +struct __complex_can_implicitly_construct<__half, float> : true_type +{}; + +template <> +struct __complex_can_implicitly_construct<__half, double> : true_type +{}; + +template <> +struct __complex_can_implicitly_construct : true_type +{}; + +template <> +struct __complex_can_implicitly_construct : true_type +{}; + +template +inline _LIBCUDACXX_INLINE_VISIBILITY __half __convert_to_half(const _Tp& __value) noexcept +{ + return __value; +} + +inline _LIBCUDACXX_INLINE_VISIBILITY __half __convert_to_half(const float& __value) noexcept +{ + return __float2half(__value); +} + +inline _LIBCUDACXX_INLINE_VISIBILITY __half __convert_to_half(const double& __value) noexcept +{ + return __double2half(__value); +} + template <> class _LIBCUDACXX_TEMPLATE_VIS _CCCL_ALIGNAS(alignof(__half2)) complex<__half> { @@ -77,14 +110,14 @@ class _LIBCUDACXX_TEMPLATE_VIS _CCCL_ALIGNAS(alignof(__half2)) complex<__half> template ::value, int> = 0> _LIBCUDACXX_INLINE_VISIBILITY complex(const complex<_Up>& __c) - : __repr_(static_cast(__c.real()), static_cast(__c.imag())) + : __repr_(__convert_to_half(__c.real()), __convert_to_half(__c.imag())) {} template ::value, int> = 0, __enable_if_t<_CCCL_TRAIT(is_constructible, value_type, _Up), int> = 0> _LIBCUDACXX_INLINE_VISIBILITY explicit complex(const complex<_Up>& __c) - : __repr_(static_cast(__c.real()), static_cast(__c.imag())) + : __repr_(__convert_to_half(__c.real()), __convert_to_half(__c.imag())) {} _LIBCUDACXX_INLINE_VISIBILITY complex& operator=(const value_type& __re) @@ -97,8 +130,8 @@ class _LIBCUDACXX_TEMPLATE_VIS _CCCL_ALIGNAS(alignof(__half2)) complex<__half> template _LIBCUDACXX_INLINE_VISIBILITY complex& operator=(const complex<_Up>& __c) { - __repr_.x = __c.real(); - __repr_.y = __c.imag(); + __repr_.x = __convert_to_half(__c.real()); + __repr_.y = __convert_to_half(__c.imag()); return *this; } @@ -152,24 +185,24 @@ class _LIBCUDACXX_TEMPLATE_VIS _CCCL_ALIGNAS(alignof(__half2)) complex<__half> _LIBCUDACXX_INLINE_VISIBILITY complex& operator+=(const value_type& __re) { - __repr_.x += __re; + __repr_.x = __hadd(__repr_.x, __re); return *this; } _LIBCUDACXX_INLINE_VISIBILITY complex& operator-=(const value_type& __re) { - __repr_.x -= __re; + __repr_.x = __hsub(__repr_.x, __re); return *this; } _LIBCUDACXX_INLINE_VISIBILITY complex& operator*=(const value_type& __re) { - __repr_.x *= __re; - __repr_.y *= __re; + __repr_.x = __hmul(__repr_.x, __re); + __repr_.y = __hmul(__repr_.y, __re); return *this; } _LIBCUDACXX_INLINE_VISIBILITY complex& operator/=(const value_type& __re) { - __repr_.x /= __re; - __repr_.y /= __re; + __repr_.x = __hdiv(__repr_.x, __re); + __repr_.y = __hdiv(__repr_.y, __re); return *this; } @@ -192,9 +225,41 @@ class _LIBCUDACXX_TEMPLATE_VIS _CCCL_ALIGNAS(alignof(__half2)) complex<__half> } }; +template <> // complex +template <> // complex<__half> +inline _LIBCUDACXX_INLINE_VISIBILITY complex::complex(const complex<__half>& __c) + : __re_(__half2float(__c.real())) + , __im_(__half2float(__c.imag())) +{} + +template <> // complex +template <> // complex<__half> +inline _LIBCUDACXX_INLINE_VISIBILITY complex::complex(const complex<__half>& __c) + : __re_(__half2float(__c.real())) + , __im_(__half2float(__c.imag())) +{} + +template <> // complex +template <> // complex<__half> +inline _LIBCUDACXX_INLINE_VISIBILITY complex& complex::operator=(const complex<__half>& __c) +{ + __re_ = __half2float(__c.real()); + __im_ = __half2float(__c.imag()); + return *this; +} + +template <> // complex +template <> // complex<__half> +inline _LIBCUDACXX_INLINE_VISIBILITY complex& complex::operator=(const complex<__half>& __c) +{ + __re_ = __half2float(__c.real()); + __im_ = __half2float(__c.imag()); + return *this; +} + inline _LIBCUDACXX_INLINE_VISIBILITY __half arg(__half __re) { - return _CUDA_VSTD::atan2f(__half(0), __re); + return _CUDA_VSTD::atan2(__int2half_rn(0), __re); } // We have performance issues with some trigonometric functions with __half diff --git a/libcudacxx/include/cuda/std/__cuda/cmath_nvbf16.h b/libcudacxx/include/cuda/std/__cuda/cmath_nvbf16.h index 506438bf120..2186393409c 100644 --- a/libcudacxx/include/cuda/std/__cuda/cmath_nvbf16.h +++ b/libcudacxx/include/cuda/std/__cuda/cmath_nvbf16.h @@ -37,47 +37,47 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD // trigonometric functions inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 sin(__nv_bfloat16 __v) { - NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hsin(__v);), (return __nv_bfloat16(::sin(float(__v)));)) + NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hsin(__v);), (return __float2bfloat16(::sin(__bfloat162float(__v)));)) } inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 sinh(__nv_bfloat16 __v) { - return __nv_bfloat16(::sinh(float(__v))); + return __float2bfloat16(::sinh(__bfloat162float(__v))); } inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 cos(__nv_bfloat16 __v) { - NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hcos(__v);), (return __nv_bfloat16(::cos(float(__v)));)) + NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hcos(__v);), (return __float2bfloat16(::cos(__bfloat162float(__v)));)) } inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 cosh(__nv_bfloat16 __v) { - return __nv_bfloat16(::cosh(float(__v))); + return __float2bfloat16(::cosh(__bfloat162float(__v))); } inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 exp(__nv_bfloat16 __v) { - NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hexp(__v);), (return __nv_bfloat16(::exp(float(__v)));)) + NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hexp(__v);), (return __float2bfloat16(::exp(__bfloat162float(__v)));)) } inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 hypot(__nv_bfloat16 __x, __nv_bfloat16 __y) { - return __nv_bfloat16(::hypot(float(__x), float(__y))); + return __float2bfloat16(::hypot(__bfloat162float(__x), __bfloat162float(__y))); } inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 atan2(__nv_bfloat16 __x, __nv_bfloat16 __y) { - return __nv_bfloat16(::atan2(float(__x), float(__y))); + return __float2bfloat16(::atan2(__bfloat162float(__x), __bfloat162float(__y))); } inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 log(__nv_bfloat16 __x) { - NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hlog(__x);), (return __nv_bfloat16(::log(float(__x)));)) + NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hlog(__x);), (return __float2bfloat16(::log(__bfloat162float(__x)));)) } inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 sqrt(__nv_bfloat16 __x) { - NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hsqrt(__x);), (return __nv_bfloat16(::sqrt(float(__x)));)) + NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hsqrt(__x);), (return __float2bfloat16(::sqrt(__bfloat162float(__x)));)) } // floating point helper @@ -123,7 +123,7 @@ inline _LIBCUDACXX_INLINE_VISIBILITY bool isfinite(__nv_bfloat16 __v) inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 __constexpr_copysign(__nv_bfloat16 __x, __nv_bfloat16 __y) noexcept { - return __nv_bfloat16(::copysignf(float(__x), float(__y))); + return __float2bfloat16(::copysignf(__bfloat162float(__x), __bfloat162float(__y))); } inline _LIBCUDACXX_INLINE_VISIBILITY __nv_bfloat16 copysign(__nv_bfloat16 __x, __nv_bfloat16 __y) diff --git a/libcudacxx/include/cuda/std/__cuda/cmath_nvfp16.h b/libcudacxx/include/cuda/std/__cuda/cmath_nvfp16.h index 1eea9afe333..8706b65475a 100644 --- a/libcudacxx/include/cuda/std/__cuda/cmath_nvfp16.h +++ b/libcudacxx/include/cuda/std/__cuda/cmath_nvfp16.h @@ -35,7 +35,7 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD inline _LIBCUDACXX_INLINE_VISIBILITY __half sin(__half __v) { NV_IF_ELSE_TARGET(NV_PROVIDES_SM_53, (return ::hsin(__v);), ({ - float __vf = __v; + float __vf = __half2float(__v); __vf = ::sin(__vf); __half_raw __ret_repr = ::__float2half_rn(__vf); @@ -61,7 +61,7 @@ inline _LIBCUDACXX_INLINE_VISIBILITY __half sin(__half __v) inline _LIBCUDACXX_INLINE_VISIBILITY __half sinh(__half __v) { - return __half(::sinh(float(__v))); + return __float2half(::sinh(__half2float(__v))); } // clang-format off @@ -71,7 +71,7 @@ inline _LIBCUDACXX_INLINE_VISIBILITY __half cos(__half __v) return ::hcos(__v); ), ( { - float __vf = __v; + float __vf = __half2float(__v); __vf = ::cos(__vf); __half_raw __ret_repr = ::__float2half_rn(__vf); @@ -94,7 +94,7 @@ inline _LIBCUDACXX_INLINE_VISIBILITY __half cos(__half __v) inline _LIBCUDACXX_INLINE_VISIBILITY __half cosh(__half __v) { - return __half(::cosh(float(__v))); + return __float2half(::cosh(__half2float(__v))); } // clang-format off @@ -104,7 +104,7 @@ inline _LIBCUDACXX_INLINE_VISIBILITY __half exp(__half __v) return ::hexp(__v); ), ( { - float __vf = __v; + float __vf = __half2float(__v); __vf = ::exp(__vf); __half_raw __ret_repr = ::__float2half_rn(__vf); @@ -127,12 +127,12 @@ inline _LIBCUDACXX_INLINE_VISIBILITY __half exp(__half __v) inline _LIBCUDACXX_INLINE_VISIBILITY __half hypot(__half __x, __half __y) { - return __half(::hypot(float(__x), float(__y))); + return __float2half(::hypot(__half2float(__x), __half2float(__y))); } inline _LIBCUDACXX_INLINE_VISIBILITY __half atan2(__half __x, __half __y) { - return __half(::atan2(float(__x), float(__y))); + return __float2half(::atan2(__half2float(__x), __half2float(__y))); } // clang-format off @@ -142,7 +142,7 @@ inline _LIBCUDACXX_INLINE_VISIBILITY __half log(__half __x) return ::hlog(__x); ), ( { - float __vf = __x; + float __vf = __half2float(__x); __vf = ::log(__vf); __half_raw __ret_repr = ::__float2half_rn(__vf); @@ -164,7 +164,7 @@ inline _LIBCUDACXX_INLINE_VISIBILITY __half log(__half __x) inline _LIBCUDACXX_INLINE_VISIBILITY __half sqrt(__half __x) { - NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hsqrt(__x);), (return __half(::sqrt(float(__x)));)) + NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hsqrt(__x);), (return __float2half(::sqrt(__half2float(__x)));)) } // floating point helper @@ -210,7 +210,7 @@ inline _LIBCUDACXX_INLINE_VISIBILITY bool isfinite(__half __v) inline _LIBCUDACXX_INLINE_VISIBILITY __half __constexpr_copysign(__half __x, __half __y) noexcept { - return __half(::copysignf(float(__x), float(__y))); + return __float2half(::copysignf(__half2float(__x), __half2float(__y))); } inline _LIBCUDACXX_INLINE_VISIBILITY __half copysign(__half __x, __half __y) diff --git a/libcudacxx/test/libcudacxx/cuda/complex/half_bfloat/complex.bad_macros.pass.cpp b/libcudacxx/test/libcudacxx/cuda/complex/half_bfloat/complex.bad_macros.pass.cpp new file mode 100644 index 00000000000..0bd9da2fad4 --- /dev/null +++ b/libcudacxx/test/libcudacxx/cuda/complex/half_bfloat/complex.bad_macros.pass.cpp @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// +//===----------------------------------------------------------------------===// + +#define __CUDA_NO_HALF_CONVERSIONS__ 1 +#define __CUDA_NO_HALF_OPERATORS__ 1 +#define __CUDA_NO_BFLOAT16_CONVERSIONS__ 1 +#define __CUDA_NO_BFLOAT16_OPERATORS__ 1 +#define __CUDA_NO_HALF2_OPERATORS__ 1 +#define __CUDA_NO_BFLOAT162_OPERATORS__ 1 + +#include +#include + +#include "test_macros.h" + +template +__host__ __device__ void test_assignment(cuda::std::complex v = {}) +{ + cuda::std::complex converting(v); + + cuda::std::complex assigning{}; + assigning = v; +} + +__host__ __device__ void test() +{ +#ifdef _LIBCUDACXX_HAS_NVFP16 + test_assignment<__half, float>(); + test_assignment<__half, double>(); + test_assignment(); + test_assignment(); +#endif // _LIBCUDACXX_HAS_NVFP16 +#ifdef _LIBCUDACXX_HAS_NVBF16 + test_assignment<__nv_bfloat16, float>(); + test_assignment<__nv_bfloat16, double>(); + test_assignment(); + test_assignment(); +#endif // _LIBCUDACXX_HAS_NVBF16 +} + +int main(int arg, char** argv) +{ + test(); + return 0; +}