From 020b3e168279873ce462d09420479ca9f2606a38 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 1 Jun 2024 00:51:18 +0200 Subject: [PATCH 01/18] fix handling of INF arguments --- kernel/x86_64/dscal.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/kernel/x86_64/dscal.c b/kernel/x86_64/dscal.c index 05c5c7f16b..0b028b7829 100644 --- a/kernel/x86_64/dscal.c +++ b/kernel/x86_64/dscal.c @@ -169,8 +169,12 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS while(j < n1) { - x[i]=0.0; - x[i+inc_x]=0.0; + if (isinf(x[i])||isnan(x[i])) + x[i]=NAN; + else x[i]=0.0; + if (isinf(x[i+inc_x])||isnan(x[i+inc_x])) + x[i+inc_x]=NAN; + else x[i+inc_x]=0.0; i += 2*inc_x ; j+=2; @@ -179,7 +183,9 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS while(j < n) { - x[i]=0.0; + if (isinf(x[i])||isnan(x[i])) + x[i]=NAN; + else x[i]=0.0; i += inc_x ; j++; @@ -213,9 +219,9 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS BLASLONG n1 = n & -8; if ( n1 > 0 ) { - if ( da == 0.0 ) - dscal_kernel_8_zero(n1 , &da , x); - else +// if ( da == 0.0 ) +// dscal_kernel_8_zero(n1 , &da , x); +// else dscal_kernel_8(n1 , &da , x); } @@ -223,7 +229,9 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS { for ( i=n1 ; i Date: Thu, 6 Jun 2024 23:58:16 +0200 Subject: [PATCH 02/18] Test corner cases of all SCAL variants --- utest/test_zscal.c | 448 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 446 insertions(+), 2 deletions(-) diff --git a/utest/test_zscal.c b/utest/test_zscal.c index 22642630c7..09e63752c2 100644 --- a/utest/test_zscal.c +++ b/utest/test_zscal.c @@ -1,5 +1,449 @@ #include "openblas_utest.h" #include +#ifdef BUILD_SINGLE + +#ifndef NAN +#define NAN 0.0/0.0 +#endif +#ifndef INFINITY +#define INFINITY 1.0/0.0 +#endif + +CTEST(sscal, 0_nan) +{ + blasint N=9; + blasint incX=1; + float i = 0.0; + float x[] = {NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN}; + BLASFUNC(sscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(sscal, 0_nan_inc_2) +{ + blasint N=9; + blasint incX=2; + float i = 0.0; + float x[] = {NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, + NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN}; + BLASFUNC(sscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(sscal, nan_0) +{ + blasint N=9; + blasint incX=1; + float i = NAN; + float x[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + BLASFUNC(sscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(sscal, nan_0_inc_2) +{ + blasint N=9; + blasint incX=2; + float i = NAN; + float x[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + BLASFUNC(sscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(sscal, 0_inf) +{ + blasint N=9; + blasint incX=1; + float i = 0.0; + float x[] = {INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY}; + BLASFUNC(sscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(sscal, 0_inf_inc_2) +{ + blasint N=9; + blasint incX=2; + float i = 0.0; + float x[] = {INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, + INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY}; + BLASFUNC(sscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(sscal, inf_0) +{ + blasint N=9; + blasint incX=1; + float i = INFINITY; + float x[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + BLASFUNC(sscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(sscal, inf_0_inc_2) +{ + blasint N=9; + blasint incX=2; + float i = INFINITY; + float x[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + BLASFUNC(sscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(sscal, nan_inf) +{ + blasint N=9; + blasint incX=1; + float i = NAN; + float x[] = {INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY}; + BLASFUNC(sscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(sscal, nan_inf_inc_2) +{ + blasint N=9; + blasint incX=2; + float i = NAN; + float x[] = {INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, + INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY}; + BLASFUNC(sscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(sscal, inf_nan) +{ + blasint N=9; + blasint incX=1; + float i = INFINITY; + float x[] = {NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN}; + BLASFUNC(sscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(sscal, inf_nan_inc_2) +{ + blasint N=9; + blasint incX=2; + float i = INFINITY; + float x[] = {NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, + NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN}; + BLASFUNC(sscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +#endif + +#ifdef BUILD_DOUBLE + +#ifndef NAN +#define NAN 0.0/0.0 +#endif +#ifndef INFINITY +#define INFINITY 1.0/0.0 +#endif + +CTEST(dscal, 0_nan) +{ + blasint N=9; + blasint incX=1; + double i = 0.0; + double x[] = {NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN}; + BLASFUNC(dscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(dscal, 0_nan_inc_2) +{ + blasint N=9; + blasint incX=2; + double i = 0.0; + double x[] = {NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, + NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN}; + BLASFUNC(dscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(dscal, nan_0) +{ + blasint N=9; + blasint incX=1; + double i = NAN; + double x[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + BLASFUNC(dscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(dscal, nan_0_inc_2) +{ + blasint N=9; + blasint incX=2; + double i = NAN; + double x[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + BLASFUNC(dscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(dscal, 0_inf) +{ + blasint N=9; + blasint incX=1; + double i = 0.0; + double x[] = {INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY}; + BLASFUNC(dscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(dscal, 0_inf_inc_2) +{ + blasint N=9; + blasint incX=2; + double i = 0.0; + double x[] = {INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, + INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY}; + BLASFUNC(dscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(dscal, inf_0) +{ + blasint N=9; + blasint incX=1; + double i = INFINITY; + double x[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + BLASFUNC(dscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(dscal, inf_0_inc_2) +{ + blasint N=9; + blasint incX=2; + double i = INFINITY; + double x[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + BLASFUNC(dscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(dscal, nan_inf) +{ + blasint N=9; + blasint incX=1; + double i = NAN; + double x[] = {INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY}; + BLASFUNC(dscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(dscal, nan_inf_inc_2) +{ + blasint N=9; + blasint incX=2; + double i = NAN; + double x[] = {INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, + INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY, INFINITY}; + BLASFUNC(dscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(dscal, inf_nan) +{ + blasint N=9; + blasint incX=1; + double i = INFINITY; + double x[] = {NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN}; + BLASFUNC(dscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +CTEST(dscal, inf_nan_inc_2) +{ + blasint N=9; + blasint incX=2; + double i = INFINITY; + double x[] = {NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, + NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN}; + BLASFUNC(dscal)(&N, &i, x, &incX); + ASSERT_TRUE(isnan(x[0])); + ASSERT_TRUE(isnan(x[8])); +} + +#endif + +#ifdef BUILD_COMPLEX + +#ifndef NAN +#define NAN 0.0/0.0 +#endif +#ifndef INFINITY +#define INFINITY 1.0/0.0 +#endif + +CTEST(cscal, i_nan) +{ + blasint N=9; + blasint incX=1; + float i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 }; + float nan[] = {NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0}; + BLASFUNC(cscal)(&N, i, nan, &incX); + ASSERT_TRUE(isnan(nan[0])); + ASSERT_TRUE(isnan(nan[1])); + ASSERT_TRUE(isnan(nan[16])); + ASSERT_TRUE(isnan(nan[17])); +} + +CTEST(cscal, i_nan_inc_2) +{ + blasint N=9; + blasint incX=2; + float i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 }; + float nan[] = {NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, + NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0}; + BLASFUNC(cscal)(&N, i, nan, &incX); + ASSERT_TRUE(isnan(nan[0])); + ASSERT_TRUE(isnan(nan[1])); + ASSERT_TRUE(isnan(nan[16])); + ASSERT_TRUE(isnan(nan[17])); +} + +CTEST(cscal, nan_i) +{ + blasint N=9; + blasint incX=1; + float i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 }; + float nan[] = {NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0}; + BLASFUNC(cscal)(&N, nan, i, &incX); + ASSERT_TRUE(isnan(i[0])); + ASSERT_TRUE(isnan(i[1])); + ASSERT_TRUE(isnan(i[16])); + ASSERT_TRUE(isnan(i[17])); +} + +CTEST(cscal, nan_i_inc_2) +{ + blasint N=9; + blasint incX=2; + float i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, + 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 }; + float nan[] = {NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0}; + BLASFUNC(cscal)(&N, nan, i, &incX); + ASSERT_TRUE(isnan(i[0])); + ASSERT_TRUE(isnan(i[1])); + ASSERT_TRUE(isnan(i[16])); + ASSERT_TRUE(isnan(i[17])); +} + +CTEST(cscal, i_inf) +{ + blasint N=9; + blasint incX=1; + float i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 }; + float inf[] = {INFINITY, 0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0}; + BLASFUNC(cscal)(&N, i, inf, &incX); + ASSERT_TRUE(isnan(inf[0])); + ASSERT_TRUE(isinf(inf[1])); + ASSERT_TRUE(isnan(inf[16])); + ASSERT_TRUE(isinf(inf[17])); +} + +CTEST(cscal, i_inf_inc_2) +{ + blasint N=9; + blasint incX=2; + float i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 }; + float inf[] = {INFINITY, 0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, + INFINITY, 0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0}; + BLASFUNC(cscal)(&N, i, inf, &incX); + ASSERT_TRUE(isnan(inf[0])); + ASSERT_TRUE(isinf(inf[1])); + ASSERT_TRUE(isnan(inf[16])); + ASSERT_TRUE(isinf(inf[17])); +} + +CTEST(cscal, inf_i) +{ + blasint N=9; + blasint incX=1; + float i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 }; + float inf[] = {INFINITY, 0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0}; + BLASFUNC(cscal)(&N, inf, i, &incX); + ASSERT_TRUE(isnan(i[0])); + ASSERT_TRUE(isinf(i[1])); + ASSERT_TRUE(isnan(i[16])); + ASSERT_TRUE(isinf(i[17])); +} + +CTEST(cscal, inf_i_inc_2) +{ + blasint N=9; + blasint incX=2; + float i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, + 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 }; + float inf[] = {INFINITY, 0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0}; + BLASFUNC(cscal)(&N, inf, i, &incX); + ASSERT_TRUE(isnan(i[0])); + ASSERT_TRUE(isinf(i[1])); + ASSERT_TRUE(isnan(i[16])); + ASSERT_TRUE(isinf(i[17])); +} + +CTEST(cscal, i_0inf) +{ + blasint N=9; + blasint incX=1; + float i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 }; + float inf[] = {0,INFINITY, 0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY}; + BLASFUNC(cscal)(&N, i, inf, &incX); + ASSERT_TRUE(isinf(inf[0])); + ASSERT_TRUE(isnan(inf[1])); + ASSERT_TRUE(isinf(inf[16])); + ASSERT_TRUE(isnan(inf[17])); +} + +CTEST(cscal, i_0inf_inc_2) +{ + blasint N=9; + blasint incX=2; + float i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 }; + float inf[] = {0,INFINITY, 0,INFINITY, 0,INFINITY, 0,INFINITY, 0,INFINITY, 0,INFINITY, 0,INFINITY, 0,INFINITY, 0,INFINITY, + 0,INFINITY, 0,INFINITY, 0,INFINITY, 0,INFINITY, 0,INFINITY, 0,INFINITY, 0,INFINITY, 0,INFINITY, 0,INFINITY}; + BLASFUNC(cscal)(&N, i, inf, &incX); + ASSERT_TRUE(isinf(inf[0])); + ASSERT_TRUE(isnan(inf[1])); + ASSERT_TRUE(isinf(inf[16])); + ASSERT_TRUE(isnan(inf[17])); +} + +#endif + #ifdef BUILD_COMPLEX16 #ifndef NAN @@ -25,7 +469,7 @@ CTEST(zscal, i_nan) CTEST(zscal, i_nan_inc_2) { blasint N=9; - blasint incX=1; + blasint incX=2; double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 }; double nan[] = {NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0}; @@ -52,7 +496,7 @@ CTEST(zscal, nan_i) CTEST(zscal, nan_i_inc_2) { blasint N=9; - blasint incX=1; + blasint incX=2; double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 }; double nan[] = {NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0}; From 1abafcd9b2c32a07a67f9db7b57485c464828bad Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 6 Jun 2024 23:59:43 +0200 Subject: [PATCH 03/18] handle corner cases involving NAN and/or INF --- kernel/x86_64/cscal.c | 59 ++++++++++++++++++++++++++++++------------- kernel/x86_64/dscal.c | 5 ++++ kernel/x86_64/sscal.c | 44 +++++++++++++++++++++----------- 3 files changed, 76 insertions(+), 32 deletions(-) diff --git a/kernel/x86_64/cscal.c b/kernel/x86_64/cscal.c index 95a99b8b97..212a215941 100644 --- a/kernel/x86_64/cscal.c +++ b/kernel/x86_64/cscal.c @@ -259,11 +259,22 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, while(j < n1) { + if (isnan(x[i]) || isinf(x[i])) + temp0 = NAN; + else temp0 = -da_i * x[i+1]; + if (!isinf(x[i+1])) x[i+1] = da_i * x[i]; + else + x[i+1] = NAN; x[i] = temp0; + if (isnan(x[i+inc_x]) || isinf(x[i+inc_x])) + temp1 = NAN; + else temp1 = -da_i * x[i+1+inc_x]; - x[i+1+inc_x] = da_i * x[i+inc_x]; + if (!isinf(x[i+1+inc_x])) + x[i+1+inc_x] = da_i * x[i+inc_x]; + else x[i+1+inc_x] = NAN; x[i+inc_x] = temp1; i += 2*inc_x ; j+=2; @@ -272,9 +283,14 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, while(j < n) { - - temp0 = -da_i * x[i+1]; + + if (isnan(x[i]) || isinf(x[i])) + temp0 = NAN; + else + temp0 = -da_i * x[i+1]; + if (!isinf(x[i+1])) x[i+1] = da_i * x[i]; + else x[i+1] = NAN; x[i] = temp0; i += inc_x ; j++; @@ -365,42 +381,51 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, else cscal_kernel_16_zero_r(n1 , alpha , x); else - if ( da_i == 0 ) - cscal_kernel_16_zero_i(n1 , alpha , x); - else cscal_kernel_16(n1 , alpha , x); i = n1 << 1; j = n1; } - - if ( da_r == 0.0 ) + if ( da_r == 0.0 || isnan(da_r) ) { - if ( da_i == 0.0 ) { - + FLOAT res=0.0; + if (isnan(da_r)) res= da_r; while(j < n) { - - x[i]=0.0; - x[i+1]=0.0; + x[i]=res; + x[i+1]=res; i += 2 ; j++; } } - else + else if (isinf(da_r)) { + while(j < n) + { + x[i]= NAN; + x[i+1] = da_r; + i += 2 ; + j++; + + } + + } else { while(j < n) { - temp0 = -da_i * x[i+1]; - x[i+1] = da_i * x[i]; - x[i] = temp0; + if (isinf(x[i])) + temp0 = NAN; + if (!isinf(x[i+1])) + x[i+1] = da_i * x[i]; + else x[i+1] = NAN; + if ( x[i] == x[i]) //preserve NaN + x[i] = temp0; i += 2 ; j++; diff --git a/kernel/x86_64/dscal.c b/kernel/x86_64/dscal.c index 0b028b7829..e7182c5ce8 100644 --- a/kernel/x86_64/dscal.c +++ b/kernel/x86_64/dscal.c @@ -234,6 +234,11 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS else x[i] = 0.0; } } + else if (isinf(da)){ + for ( i=n1 ; i 0 ) { @@ -151,10 +154,9 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS i = n1 * inc_x; j = n1; } - +#endif while(j < n) { - x[i] *= da; i += inc_x ; j++; @@ -162,16 +164,15 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS } } - return(0); } BLASLONG n1 = n & -16; if ( n1 > 0 ) { - if ( da == 0.0 ) - sscal_kernel_16_zero(n1 , &da , x); - else + //if ( da == 0.0 ) + // sscal_kernel_16_zero(n1 , &da , x); + //else sscal_kernel_16(n1 , &da , x); } @@ -179,7 +180,18 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS { for ( i=n1 ; i Date: Fri, 7 Jun 2024 09:39:08 +0200 Subject: [PATCH 04/18] Handle corner cases with INF and NAN arguments --- kernel/mips/scal.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/mips/scal.c b/kernel/mips/scal.c index 01f708b1d9..d51fd9ccdd 100644 --- a/kernel/mips/scal.c +++ b/kernel/mips/scal.c @@ -35,7 +35,12 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS { if ( da == 0.0 ) - x[i]=0.0; + if (isnan(x[i])||isinf(x[i])) + x[i]=NAN; + else + x[i]=0.0; + else if (isnan(da)) + x[i]=NAN; else x[i] = da * x[i] ; From c7cacd9b3897c5df725905905c2ef92b111dec39 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 7 Jun 2024 13:48:56 +0200 Subject: [PATCH 05/18] disable the shortcut for da=0 to ensure proper handling of INF and NAN --- kernel/arm64/scal.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/arm64/scal.S b/kernel/arm64/scal.S index 09c41cdaab..5029890f67 100644 --- a/kernel/arm64/scal.S +++ b/kernel/arm64/scal.S @@ -168,8 +168,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmp N, xzr ble .Lscal_kernel_L999 - fcmp DA, #0.0 - beq .Lscal_kernel_zero + //fcmp DA, #0.0 + //beq .Lscal_kernel_zero cmp INC_X, #1 bne .Lscal_kernel_S_BEGIN From 6ffaf998179ab4fbf17c140f84e244ab02dc72f4 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 7 Jun 2024 14:46:58 +0200 Subject: [PATCH 06/18] disable da=0 shortcut to handle NAN and INF correctly --- kernel/x86/scal.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/x86/scal.S b/kernel/x86/scal.S index 377d4ef616..a24c832d35 100644 --- a/kernel/x86/scal.S +++ b/kernel/x86/scal.S @@ -71,8 +71,9 @@ je .L300 # Alpha != ZERO /* Alpha == ZERO */ - cmpl $1,%esi - jne .L104 +// cmpl $1,%esi +// jne .L104 + jmp .L104 movl %edx, %ecx # ecx = n sarl $3, %ecx # (n >> 3) From f1248b849d11d6a436a6e19983c9c6695c143bc7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 22 Jun 2024 15:55:29 +0200 Subject: [PATCH 07/18] handle INF and NAN in input --- kernel/power/dscal.c | 57 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/kernel/power/dscal.c b/kernel/power/dscal.c index 96c4e51bc5..2bbc1ea6d7 100644 --- a/kernel/power/dscal.c +++ b/kernel/power/dscal.c @@ -73,14 +73,38 @@ static void dscal_kernel_8_zero (BLASLONG n, FLOAT *x) for( i=0; i> 3) & 0x3; for (j = 0; j < align; j++) { - x[j] = 0.0; + if (isfinite(x[j])) + x[j] = 0.0; + else + x[j] = NAN; } } BLASLONG n1 = (n-j) & -16; @@ -127,8 +154,10 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS while(j < n) { - - x[j]=0.0; + if (!isfinite(x[j])) + x[j]=NAN; + else + x[j]=0.0; j++; } @@ -176,8 +205,10 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS while(j < n) { - - x[i]=0.0; + if (!isfinite(x[i])) + x[i]=NAN; + else + x[i]=0.0; i += inc_x ; j++; } From 7f8f037a36c05d3d8cf0ffc74db2202e64b95a22 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 22 Jun 2024 16:03:30 +0200 Subject: [PATCH 08/18] handle INF and NAN in input --- kernel/power/sscal.c | 57 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/kernel/power/sscal.c b/kernel/power/sscal.c index 65572a8c1f..12246b0a38 100644 --- a/kernel/power/sscal.c +++ b/kernel/power/sscal.c @@ -74,14 +74,38 @@ static void sscal_kernel_16_zero( BLASLONG n, FLOAT *x ) for( i=0; i> 2) & 0x7; for (j = 0; j < align; j++) { - x[j] = 0.0; + if (isfinite(x[j])) + x[j] = 0.0; + else + x[j] = NAN; } } BLASLONG n1 = (n-j) & -32; @@ -129,8 +156,10 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS while(j < n) { - - x[j]=0.0; + if (isfinite(x[j])) + x[j]=0.0; + else + x[j]=NAN; j++; } @@ -178,8 +207,10 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS while(j < n) { - - x[i]=0.0; + if (isfinite(x[i])) + x[i]=0.0; + else + x[i]=NAN; i += inc_x ; j++; } From 0a744a939a8d52827b23724364d61ac9dca9e06d Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 22 Jun 2024 21:07:43 +0200 Subject: [PATCH 09/18] temporarily(?) disable the alpha=0 branch to handle NaN/Inf in x --- kernel/x86_64/scal_sse2.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/scal_sse2.S b/kernel/x86_64/scal_sse2.S index 20dd7fa2d8..b778895ba3 100644 --- a/kernel/x86_64/scal_sse2.S +++ b/kernel/x86_64/scal_sse2.S @@ -75,7 +75,7 @@ comisd %xmm0, %xmm1 jne .L100 # Alpha != ZERO jp .L100 # For Alpha = NaN - + je .L100 # disable the Alpha=zero path as it does not handle x=inf or nan /* Alpha == ZERO */ cmpq $SIZE, INCX jne .L50 From 68f2501958181da615837874f55fb113558da4e0 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 22 Jun 2024 21:08:57 +0200 Subject: [PATCH 10/18] temporarily(?) disable the alpha=0 branch to handle Inf/NaN in x --- kernel/x86_64/scal_sse.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/scal_sse.S b/kernel/x86_64/scal_sse.S index b92688d9e4..91149af3ff 100644 --- a/kernel/x86_64/scal_sse.S +++ b/kernel/x86_64/scal_sse.S @@ -76,7 +76,7 @@ shufps $0, %xmm0, %xmm0 jne .L100 # Alpha != ZERO - + je .L100 /* Alpha == ZERO */ cmpq $SIZE, INCX jne .L50 From bd47630bcfb7c2f974d4b21e9c64ea9571929c35 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 23 Jun 2024 00:54:39 +0200 Subject: [PATCH 11/18] exclude the alpha=0 branch as it does not handle NaN or Inf in x --- kernel/mips64/scal.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/mips64/scal.S b/kernel/mips64/scal.S index b28b8a309d..e392a9c6a5 100644 --- a/kernel/mips64/scal.S +++ b/kernel/mips64/scal.S @@ -79,6 +79,9 @@ bc1f $fcc0, .L50 NOP + bc1t $fcc0, .L50 + NOP + bne INCX, TEMP, .L20 dsra I, N, 3 From c08113c2798b907dce8efbea41b5f6545c71d913 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 23 Jun 2024 01:12:33 +0200 Subject: [PATCH 12/18] fix special cases of x= NAN or INF --- kernel/mips/dscal_msa.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/mips/dscal_msa.c b/kernel/mips/dscal_msa.c index 2e41d8bef2..e95f0a6552 100644 --- a/kernel/mips/dscal_msa.c +++ b/kernel/mips/dscal_msa.c @@ -42,7 +42,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, if (1 == inc_x) { - if (0.0 == da) + if (0) //if (0.0 == da ) { v2f64 zero_v = {0.0, 0.0}; @@ -243,9 +243,11 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, if (da == 0.0) { for (i = n; i--;) - { - *x = 0.0; - + { + if (isfinite(*x)) + *x = 0.0; + else + *x = NAN; x += inc_x; } } From 541e1b6959790f2af0955a6f9540788b68a0abee Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 23 Jun 2024 10:37:55 +0200 Subject: [PATCH 13/18] disable the fast path for inc=1, alpha=0 as it does not handle x=NaN or Inf --- kernel/mips/sscal_msa.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/mips/sscal_msa.c b/kernel/mips/sscal_msa.c index 66e17b8446..a1c7df4528 100644 --- a/kernel/mips/sscal_msa.c +++ b/kernel/mips/sscal_msa.c @@ -42,7 +42,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, if (1 == inc_x) { - if (0.0 == da) + if (0) // if (0.0 == da) { v4f32 zero_v = {0.0, 0.0, 0.0, 0.0}; @@ -259,7 +259,10 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, { for (i = n; i--;) { - *x = 0; + if (isfinite(*x) + *x = 0; + else + *x = NAN; x += inc_x; } } From a11f086c1717c074c34a9232f49338ed94f81b57 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 23 Jun 2024 12:55:19 +0200 Subject: [PATCH 14/18] Update sscal_msa.c --- kernel/mips/sscal_msa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/mips/sscal_msa.c b/kernel/mips/sscal_msa.c index a1c7df4528..bfd477b6a5 100644 --- a/kernel/mips/sscal_msa.c +++ b/kernel/mips/sscal_msa.c @@ -259,7 +259,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, { for (i = n; i--;) { - if (isfinite(*x) + if (isfinite(*x)) *x = 0; else *x = NAN; From 9e24121e7e3bb983a468e115e514c408c9fb0cb7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 23 Jun 2024 17:48:18 +0200 Subject: [PATCH 15/18] temporarily(?) disable da=0 shortcut to handle x=Inf or NAN --- kernel/x86/scal.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/x86/scal.S b/kernel/x86/scal.S index a24c832d35..b0c232b1bd 100644 --- a/kernel/x86/scal.S +++ b/kernel/x86/scal.S @@ -68,12 +68,12 @@ ftst fnstsw %ax andb $68, %ah - je .L300 # Alpha != ZERO +// je .L300 # Alpha != ZERO + jmp .L300 /* Alpha == ZERO */ -// cmpl $1,%esi -// jne .L104 - jmp .L104 + cmpl $1,%esi + jne .L104 movl %edx, %ecx # ecx = n sarl $3, %ecx # (n >> 3) From c1019d583248c0ffbf85a33cd7eaa232acacfca9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 27 Jun 2024 10:58:59 +0200 Subject: [PATCH 16/18] Handle INF and NAN in inputs --- kernel/riscv64/scal.c | 5 ++++- kernel/riscv64/scal_vector.c | 4 ++-- kernel/riscv64/zscal.c | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/riscv64/scal.c b/kernel/riscv64/scal.c index 4ef49e2934..6c713aa18c 100644 --- a/kernel/riscv64/scal.c +++ b/kernel/riscv64/scal.c @@ -48,7 +48,10 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS { if ( da == 0.0 ) - x[i]=0.0; + if (isfinite(x[i])) + x[i]=0.0; + else + x[i]=NAN; else x[i] = da * x[i] ; diff --git a/kernel/riscv64/scal_vector.c b/kernel/riscv64/scal_vector.c index 8fa9315f6b..a1ba41c4f6 100644 --- a/kernel/riscv64/scal_vector.c +++ b/kernel/riscv64/scal_vector.c @@ -71,7 +71,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS FLOAT_V_T v0, v1; unsigned int gvl = 0; if(inc_x == 1){ - if(da == 0.0){ + if (0){ //if(da == 0.0){ memset(&x[0], 0, n * sizeof(FLOAT)); }else{ gvl = VSETVL(n); @@ -96,7 +96,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS } } }else{ - if(da == 0.0){ + if (0) { //if(da == 0.0){ BLASLONG stride_x = inc_x * sizeof(FLOAT); BLASLONG ix = 0; gvl = VSETVL(n); diff --git a/kernel/riscv64/zscal.c b/kernel/riscv64/zscal.c index c4855f73ea..8499145f47 100644 --- a/kernel/riscv64/zscal.c +++ b/kernel/riscv64/zscal.c @@ -61,9 +61,9 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, F { temp = - da_i * x[ip+1] ; if (isnan(x[ip]) || isinf(x[ip])) temp = NAN; - if (!isinf(x[ip+1])) + if (!isinf(x[ip+1])) x[ip+1] = da_i * x[ip] ; - else x[ip+1] = NAN; + else x[ip+1] = NAN; } } else From 2a5fe97e3b01c13682764ec61e297a6252265887 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 27 Jun 2024 16:21:57 +0200 Subject: [PATCH 17/18] temporarily(?) disable the alpha=0 branch as it does not handle INF,NAN --- kernel/power/scal.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/power/scal.S b/kernel/power/scal.S index 19fdd32aba..7d3e232450 100644 --- a/kernel/power/scal.S +++ b/kernel/power/scal.S @@ -84,8 +84,9 @@ cmpwi cr0, N, 0 blelr- cr0 - fcmpu cr0, FZERO, ALPHA - bne- cr0, LL(A1I1) +// fcmpu cr0, FZERO, ALPHA +// bne- cr0, LL(A1I1) + b LL(A1I1) cmpwi cr0, INCX, SIZE bne- cr0, LL(A0IN) From f3c364c2cc71b80584baa70e1ba5e24bd70d8435 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 27 Jun 2024 22:18:27 +0200 Subject: [PATCH 18/18] temporarily(?) disable the alpha=0 branch as it fails to handle INF,NAN --- kernel/sparc/scal.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/sparc/scal.S b/kernel/sparc/scal.S index 36d9ce2abd..fd61c82018 100644 --- a/kernel/sparc/scal.S +++ b/kernel/sparc/scal.S @@ -120,8 +120,10 @@ FCLR(29) - FCMP ALPHA, FZERO - fbne .LL100 +// FCMP ALPHA, FZERO +// fbne .LL100 + b .LL100 + sll INCX, BASE_SHIFT, INCX cmp INCX, SIZE