From f2a9b50cd33c11ee95373e465e1089acf3e25378 Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Thu, 28 Sep 2023 20:42:29 +0100 Subject: [PATCH] BUG, SIMD: use scalar cmul on bad Apple clang x86_64 (#24828) * BUG, SIMD: use scalar cmul on bad Apple clang x86_64 Apple clang 14.0.0 outputs code with non-deterministic behaviour for the AVX2-accelerated `multiply` ufunc on `complex64` and `complex128` for x86_64 on macOS with AVX2 enabled. This bug is fixed by Apple clang 14.0.3, but 14.0.0 is still commonly the available toolchain on CI images. In order to not output unsound code, this simply skips the SIMD version of the ufunc when using an affected compiler. * MAINT: Formatting cleanup. --------- Co-authored-by: Charles Harris --- numpy/core/src/umath/loops_arithm_fp.dispatch.c.src | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/numpy/core/src/umath/loops_arithm_fp.dispatch.c.src b/numpy/core/src/umath/loops_arithm_fp.dispatch.c.src index c8bcedb6bbdc..30111258d646 100644 --- a/numpy/core/src/umath/loops_arithm_fp.dispatch.c.src +++ b/numpy/core/src/umath/loops_arithm_fp.dispatch.c.src @@ -367,6 +367,16 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@) } #endif #if @VECTOR@ + // Certain versions of Apple clang (commonly used in CI images) produce + // non-deterministic output in the mul path with AVX2 enabled on x86_64. + // Work around by scalarising. + #if @is_mul@ \ + && defined(NPY_CPU_AMD64) && defined(__clang__) \ + && defined(__apple_build_version__) \ + && __apple_build_version__ >= 14000000 \ + && __apple_build_version__ < 14030000 + goto loop_scalar; + #endif // end affected Apple clang. if (is_mem_overlap(b_src0, b_ssrc0, b_dst, b_sdst, len) || is_mem_overlap(b_src1, b_ssrc1, b_dst, b_sdst, len) || b_sdst % sizeof(@ftype@) != 0 || b_sdst == 0 ||