From 52f7ee2b94d3dbf9175d0c9de2e4ce1545d3dddf Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Wed, 25 Sep 2024 11:55:34 +0300 Subject: [PATCH] Patch addAlpha definitions to work around buggy HIP complex operator overloads --- src/lapack/gpu/add.cu | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/lapack/gpu/add.cu b/src/lapack/gpu/add.cu index 9e9f3e97e7..4ef3329b64 100644 --- a/src/lapack/gpu/add.cu +++ b/src/lapack/gpu/add.cu @@ -32,6 +32,20 @@ __device__ inline void addAlpha(const T& alpha, const T& a, T& b) { b = b + alpha * a; } +#ifdef DLAF_WITH_HIP +template <> +__device__ inline void addAlpha(const hipFloatComplex& alpha, const hipFloatComplex& a, + hipFloatComplex& b) { + b = b + hipCmulf(alpha, a); +} + +template <> +__device__ inline void addAlpha(const hipDoubleComplex& alpha, + const hipDoubleComplex& a, hipDoubleComplex& b) { + b = b + hipCmul(alpha, a); +} +#endif + template __device__ inline void sum(const T& /*alpha*/, const T& a, T& b) { b = b + a;