From 00daba7bfe2b7f74717acf7f460f2b3f6d258068 Mon Sep 17 00:00:00 2001 From: Colin Vanden Heuvel Date: Sun, 17 Apr 2022 12:23:02 -0500 Subject: [PATCH] SIMD detection for Clang on arm64 and restructuring (#384) * Merge SIMD detection into one find_package call * Revert submodule updates in 20853a94e Co-authored-by: Colin Vanden Heuvel --- cmake/FindAVX.cmake | 157 -------------- cmake/FindFMA.cmake | 92 -------- cmake/FindNEON.cmake | 81 ------- cmake/FindSIMD.cmake | 505 +++++++++++++++++++++++++++++++++++++++++++ cmake/FindSSE.cmake | 312 -------------------------- src/CMakeLists.txt | 79 +++---- 6 files changed, 535 insertions(+), 691 deletions(-) delete mode 100644 cmake/FindAVX.cmake delete mode 100644 cmake/FindFMA.cmake delete mode 100644 cmake/FindNEON.cmake create mode 100644 cmake/FindSIMD.cmake delete mode 100644 cmake/FindSSE.cmake diff --git a/cmake/FindAVX.cmake b/cmake/FindAVX.cmake deleted file mode 100644 index 3e786acd49..0000000000 --- a/cmake/FindAVX.cmake +++ /dev/null @@ -1,157 +0,0 @@ -# This script checks for the highest level of AVX support on the host -# by compiling and running small C++ programs that use AVX intrinsics. -# -# You can invoke this module using the following command: -# -# FIND_PACKAGE(AVX [major[.minor]] [EXACT] [QUIET|REQUIRED]) -# -# where the version string is one of: -# -# 1.0 for AVX support -# 2.0 for AVX2 support -# -# Note that any ".0" in the above version string is optional. -# -# If any AVX support is detected, the following variables are set: -# -# AVX_FOUND = 1 -# AVX_VERSION = the requested version, if EXACT is true, or -# the highest AVX version found. -# AVX_FLAGS = compile flags for the version of AVX found -# -# If AVX is not supported on the host platform, these variables are -# not set. If QUIET is true, the module does not print a message if -# AVX if missing. If REQUIRED is true, the module produces a fatal -# error if AVX support is missing. -# -set(AVX_FLAGS) -set(AVX_FOUND) -set(DETECTED_AVX_10) -set(DETECTED_AVX_20) - -if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) - execute_process(COMMAND ${CMAKE_CXX_COMPILER} "-dumpversion" OUTPUT_VARIABLE GCC_VERSION_STRING) - if(GCC_VERSION_STRING VERSION_GREATER 4.2 AND NOT APPLE AND NOT CMAKE_CROSSCOMPILING) - SET(AVX_FLAGS "${AVX_FLAGS} -march=native") - message(STATUS "Using CPU native flags for AVX optimization: ${AVX_FLAGS}") - endif() -elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT CMAKE_CROSSCOMPILING) - SET(AVX_FLAGS "${AVX_FLAGS} -march=native") - message(STATUS "Using CPU native flags for AVX optimization: ${AVX_FLAGS}") -endif() - -include(CheckCXXSourceRuns) -set(CMAKE_REQUIRED_FLAGS) - - -# Generate a list of AVX versions to test. -if(AVX_FIND_VERSION_EXACT) - if(AVX_FIND_VERSION VERSION_EQUAL "2.0") - set(_AVX_TEST_20 1) - elseif(AVX_FIND_VERSION VERSION_EQUAL "1.0") - set(_AVX_TEST_10 1) - endif() -else() - if(NOT AVX_FIND_VERSION VERSION_GREATER "2.0") - set(_AVX_TEST_20 1) - endif() - if(NOT AVX_FIND_VERSION VERSION_GREATER "1.0") - set(_AVX_TEST_10 1) - endif() -endif() - -# Check for AVX2 support. -if(_AVX_TEST_20) - if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(CMAKE_REQUIRED_FLAGS "-mavx2") - elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - set(CMAKE_REQUIRED_FLAGS "-xHost") - elseif(MSVC AND NOT CMAKE_CL_64) - set(CMAKE_REQUIRED_FLAGS "/arch:AVX2") - endif() - check_cxx_source_runs(" - #include - int main() - { - __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4); - __m256i result = _mm256_abs_epi32 (a); - return 0; - }" DETECTED_AVX_20) -endif() - -# Check for AVX support. -if(_AVX_TEST_10) - if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(CMAKE_REQUIRED_FLAGS "-mavx") - elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - set(CMAKE_REQUIRED_FLAGS "-xHost") - elseif(MSVC AND NOT CMAKE_CL_64) - set(CMAKE_REQUIRED_FLAGS "/arch:AVX") - endif() - check_cxx_source_runs(" - #include - int main() - { - __m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f); - __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); - __m256 result = _mm256_add_ps (a, b); - return 0; - }" DETECTED_AVX_10) -endif() - -set(CMAKE_REQUIRED_FLAGS) - - -if(DETECTED_AVX_20) - set(AVX_VERSION "2.0") - set(AVX_STR "2_0") - set(AVX_FOUND 1) -elseif(DETECTED_AVX_10) - set(AVX_VERSION "1.0") - set(AVX_STR "1_0") - set(AVX_FOUND 1) -endif() - - -if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - if(DETECTED_AVX_20) - SET(AVX_FLAGS "${AVX_FLAGS} -mavx2") - elseif(DETECTED_AVX_10) - SET(AVX_FLAGS "${AVX_FLAGS} -mavx") - endif() -elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - set(AVX_FLAGS "-xHost") -elseif(MSVC) - if(DETECTED_AVX_20) - SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2") - elseif(DETECTED_AVX_10) - SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX") - endif() -endif() - -if(AVX_FOUND) - message(STATUS " Found AVX ${AVX_VERSION} extensions, using flags: ${AVX_FLAGS}") -else() - message(STATUS " No AVX support found") - set(AVX_FLAGS "") -endif() - -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${AVX_FLAGS}") -set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${AVX_FLAGS}") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${AVX_FLAGS}") - -return() -#------------------------------------- - -# If no AVX support is found, print an error message. -if(AVX_FIND_VERSION) - set(_AVX_ERROR_MESSAGE "AVX ${AVX_FIND_VERSION} support is not found on this architecture") -else() - set(_AVX_ERROR_MESSAGE "AVX support is not found on this architecture") -endif() - -if(AVX_FIND_REQUIRED) - message(FATAL_ERROR "${_AVX_ERROR_MESSAGE}") -elseif(NOT AVX_FIND_QUIETLY) - message(STATUS "${_AVX_ERROR_MESSAGE}") -endif() diff --git a/cmake/FindFMA.cmake b/cmake/FindFMA.cmake deleted file mode 100644 index d0cec9c5b8..0000000000 --- a/cmake/FindFMA.cmake +++ /dev/null @@ -1,92 +0,0 @@ -# This script checks for the highest level of FMA support on the host -# by compiling and running small C++ programs that uses FMA intrinsics. -# -# You can invoke this module using the following command: -# -# FIND_PACKAGE(FMA [QUIET|REQUIRED]) -# -# If any FMA support is detected, the following variables are set: -# -# FMA_FOUND = 1 -# FMA_FLAGS = compile flags for the version of FMA found -# -# If FMA is not supported on the host platform, these variables are -# not set. If QUIET is true, the module does not print a message if -# FMA if missing. If REQUIRED is true, the module produces a fatal -# error if FMA support is missing. -# -set(FMA_FLAGS) -set(FMA_FOUND) -set(DETECTED_FMA) - -if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) - execute_process(COMMAND ${CMAKE_CXX_COMPILER} "-dumpversion" OUTPUT_VARIABLE GCC_VERSION_STRING) - if(GCC_VERSION_STRING VERSION_GREATER 4.2 AND NOT APPLE AND NOT CMAKE_CROSSCOMPILING) - SET(FMA_FLAGS "${FMA_FLAGS} -march=native") - message(STATUS "Using CPU native flags for FMA optimization: ${FMA_FLAGS}") - endif() -elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT CMAKE_CROSSCOMPILING) - SET(FMA_FLAGS "${FMA_FLAGS} -march=native") - message(STATUS "Using CPU native flags for FMA optimization: ${FMA_FLAGS}") -endif() - - -include(CheckCXXSourceRuns) -set(CMAKE_REQUIRED_FLAGS) - -# Generate a list of FMA versions to test. -set(_FMA_TEST 1) - -# Check for FMA support. -if(_FMA_TEST) - if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(CMAKE_REQUIRED_FLAGS "-mavx2 -mfma") - elseif(MSVC AND NOT CMAKE_CL_64) - set(CMAKE_REQUIRED_FLAGS "/arch:AVX2") - endif() - check_cxx_source_runs(" - #include - int main() - { - __m256d a = _mm256_set_pd (-1, 2, -3, 4); - __m256d b = _mm256_set_pd (-2, 3, -4, 1); - __m256d c = _mm256_set_pd (-11, 6, 4, -1); - - __m256d result = _mm256_fmsub_pd (a, b, c); - return 0; - }" DETECTED_FMA) -endif() - -set(CMAKE_REQUIRED_FLAGS) - -if(DETECTED_FMA) - SET(FMA_FOUND 1) - if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - SET(FMA_FLAGS "${FMA_FLAGS} -mfma") - endif() -endif() - -if(FMA_FOUND) - message(STATUS " Found FMA extensions, using flags: ${FMA_FLAGS}") -else() - message(STATUS " No FMA support found") - set(FMA_FLAGS "") -endif() - -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${FMA_FLAGS}") -set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${FMA_FLAGS}") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${FMA_FLAGS}") - -return() -#----------------------- - -# If no FMA support is found, print an error message. -if(FMA_FIND_REQUIRED) - set(_FMA_ERROR_MESSAGE "FMA support is not found on this architecture") -endif() - -if(FMA_FIND_REQUIRED) - message(FATAL_ERROR "${_FMA_ERROR_MESSAGE}") -elseif(NOT FMA_FIND_QUIETLY) - message(STATUS "${_FMA_ERROR_MESSAGE}") -endif() diff --git a/cmake/FindNEON.cmake b/cmake/FindNEON.cmake deleted file mode 100644 index 19aae80ccb..0000000000 --- a/cmake/FindNEON.cmake +++ /dev/null @@ -1,81 +0,0 @@ -# This script checks for the highest level of NEON support on the host -# by compiling and running small C++ programs that uses NEON intrinsics. -# -# You can invoke this module using the following command: -# -# FIND_PACKAGE(NEON) -# -# If any NEON support is detected, the following variables are set: -# -# NEON_FOUND = 1 -# NEON_VERSION = 2_0 (assumes Advanced SIMD 2.0) -# NEON_FLAGS = compile flags for the version of NEON found -# -# If NEON is not supported on the host platform, these variables are -# not set. -# -set(NEON_FLAGS) -set(NEON_FOUND) -set(DETECTED_NEON) - -if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) - execute_process(COMMAND ${CMAKE_CXX_COMPILER} "-dumpversion" OUTPUT_VARIABLE GCC_VERSION_STRING) - if(GCC_VERSION_STRING VERSION_GREATER 4.2 AND NOT APPLE AND NOT CMAKE_CROSSCOMPILING) - SET(NEON_FLAGS "${NEON_FLAGS} -march=native") - message(STATUS "Using CPU native flags for NEON optimization: ${NEON_FLAGS}") - endif() -elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT CMAKE_CROSSCOMPILING) - SET(NEON_FLAGS "${NEON_FLAGS} -march=native") - message(STATUS "Using CPU native flags for NEON optimization: ${NEON_FLAGS}") -endif() - -include(CheckCXXSourceRuns) -set(CMAKE_REQUIRED_FLAGS) -#set(CMAKE_REQUIRED_INCLUDES arm_neon.h) - -# Check for NEON support. -check_cxx_source_runs(" -#include -int main() -{ - float64_t a[2] = { 1., 2. }; - float64_t b[2] = { -1., 3. }; - float64_t c[2]; - - float64x2_t va = vld1q_f64(&a[0]); - float64x2_t vb = vld1q_f64(&b[0]); - float64x2_t vc = vaddq_f64(va, vb); - vst1q_f64(&c[0], vc); - - if (c[0] == 0. && c[1] == 5.) - return 0; - else - return 0; -} -" DETECTED_NEON) - -set(CMAKE_REQUIRED_FLAGS) - -if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - if(DETECTED_NEON) - SET(NEON_FLAGS "${NEON_FLAGS}") - set(NEON_STR "2_0") - SET(NEON_FOUND 1) - else() - # Setting -ffloat-store to alleviate 32bit vs 64bit discrepancies on non-SIMD platforms. - set(NEON_FLAGS "-ffloat-store") - endif() -endif() - -if(NEON_FOUND) - message(STATUS " Found NEON extensions, using flags: ${NEON_FLAGS}") -else() - message(STATUS " No NEON support found") - set(NEON_FLAGS "") -endif() - -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${NEON_FLAGS}") -set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${NEON_FLAGS}") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${NEON_FLAGS}") - -return() diff --git a/cmake/FindSIMD.cmake b/cmake/FindSIMD.cmake new file mode 100644 index 0000000000..7226e6e2b4 --- /dev/null +++ b/cmake/FindSIMD.cmake @@ -0,0 +1,505 @@ +# SPDX-License-Identifier: BSD-3-Clause +# +# This script combines the search routines for various SIMD technologies into one place. +# + + +# This script checks for the highest level of SSE support on the host +# by compiling and running small C++ programs that uses SSE intrinsics. +# +# If any SSE support is detected, the following variables are set: +# +# SSE_FOUND = 1 +# SSE_VERSION = the highest SSE version found. +# SSE_FLAGS = compile flags for the version of SSE found +# +# If SSE is not supported on the host platform, these variables are +# not set. +# +# NOTE: 64-bit x86 architectures provide SSE 2.0 or support by default so it is not tested here. + +function (test_sse_availability) + + set(SSE_FLAGS) + set(SSE_FOUND) + set(DETECTED_SSE_41) + set(DETECTED_SSE_42) + set(DETECTED_SSE_30) + + include(CheckCXXSourceRuns) + set(CMAKE_REQUIRED_FLAGS) + + set(_SSE_TEST_42 1) + set(_SSE_TEST_41 1) + set(_SSE_TEST_30 1) + +# Check for SSE 4.2 support. + if(_SSE_TEST_42) + if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(CMAKE_REQUIRED_FLAGS "-msse4.2") + endif() + check_cxx_source_runs(" + #include + #include + int main() + { + long long a[2] = { 1, 2 }; + long long b[2] = { -1, 3 }; + long long c[2]; + __m128i va = _mm_loadu_si128((__m128i*)a); + __m128i vb = _mm_loadu_si128((__m128i*)b); + __m128i vc = _mm_cmpgt_epi64(va, vb); + + _mm_storeu_si128((__m128i*)c, vc); + if (c[0] == -1LL && c[1] == 0LL) + return 0; + else + return 1; + }" + DETECTED_SSE_42) + endif() + +# Check for SSE 4.1 support. + if(_SSE_TEST_41) + if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(CMAKE_REQUIRED_FLAGS "-msse4.1") + endif() + check_cxx_source_runs(" + #include + #include + int main() + { + long long a[2] = { 1, 2 }; + long long b[2] = { -1, 2 }; + long long c[2]; + __m128i va = _mm_loadu_si128((__m128i*)a); + __m128i vb = _mm_loadu_si128((__m128i*)b); + __m128i vc = _mm_cmpeq_epi64(va, vb); + + _mm_storeu_si128((__m128i*)c, vc); + if (c[0] == 0LL && c[1] == -1LL) + return 0; + else + return 1; + }" DETECTED_SSE_41) + endif() + +# Check for SSE 3 support. + if(_SSE_TEST_30) + if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(CMAKE_REQUIRED_FLAGS "-msse3") + endif() + check_cxx_source_runs(" + #include + #ifdef _WIN32 + #include + #else + #include + #endif + + int main() + { + float a[4] = { 1.0f, 2.0f, 3.0f, 4.0f }; + float b[4] = { 3.0f, 5.0f, 7.0f, 9.0f }; + float c[4]; + + __m128 va = _mm_loadu_ps(a); + __m128 vb = _mm_loadu_ps(b); + __m128 vc = _mm_hadd_ps(va, vb); + + _mm_storeu_ps(c, vc); + if (c[0] == 3.0f && c[1] == 7.0f && c[2] == 8.0f && c[3] == 16.0f) + return 0; + else + return 1; + }" DETECTED_SSE_30) + endif() + + + set(CMAKE_REQUIRED_FLAGS) + + if(DETECTED_SSE_42) + set(SSE_VERSION "4.2") + set(SSE_STR "4_2") + set(SSE_FOUND 1) + elseif(DETECTED_SSE_41) + set(SSE_VERSION "4.1") + set(SSE_STR "4_1") + set(SSE_FOUND 1) + elseif(DETECTED_SSE_30) + set(SSE_VERSION "3.0") + set(SSE_STR "3_0") + set(SSE_FOUND 1) + endif() + + + if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) + if(DETECTED_SSE_42) + set(SSE_FLAGS "${SSE_FLAGS} -msse4.2 -mfpmath=sse") + endif() + if(DETECTED_SSE_41) + set(SSE_FLAGS "${SSE_FLAGS} -msse4.1 -mfpmath=sse") + endif() + if(DETECTED_SSE_30) + set(SSE_FLAGS "${SSE_FLAGS} -msse3 -mfpmath=sse") + endif() + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + # clang does not require "-mfpmath" which is automatic + if(DETECTED_SSE_42) + set(SSE_FLAGS "${SSE_FLAGS} -msse4.2") + endif() + if(DETECTED_SSE_41) + set(SSE_FLAGS "${SSE_FLAGS} -msse4.1") + endif() + if(DETECTED_SSE_30) + set(SSE_FLAGS "${SSE_FLAGS} -msse3") + endif() + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + set(SSE_FLAGS "-xHost") + elseif(MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4) + endif() + + # Export flags to caller scope + if(SSE_FOUND) + set(SSE_FOUND TRUE PARENT_SCOPE) + set(SSE_FLAGS "${SSE_FLAGS}" PARENT_SCOPE) + set(SSE_VERSION "${SSE_VERSION}" PARENT_SCOPE) + else() + set(SSE_FOUND FALSE PARENT_SCOPE) + set(SSE_FLAGS "") + endif() + + return() + +endfunction() + +# This script checks for the highest level of FMA support on the host +# by compiling and running small C++ programs that uses FMA intrinsics. + +# If any FMA support is detected, the following variables are set: +# +# FMA_FOUND = 1 +# FMA_FLAGS = compile flags for the version of FMA found +# +# If FMA is not supported on the host platform, these variables are +# not set. + +function (test_fma_availability) + set(FMA_FLAGS) + set(FMA_FOUND) + set(DETECTED_FMA) + + include(CheckCXXSourceRuns) + set(CMAKE_REQUIRED_FLAGS) + +# Generate a list of FMA versions to test. + set(_FMA_TEST 1) + +# Check for FMA support. + if(_FMA_TEST) + if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(CMAKE_REQUIRED_FLAGS "-mavx2 -mfma") + elseif(MSVC AND NOT CMAKE_CL_64) + set(CMAKE_REQUIRED_FLAGS "/arch:AVX2") + endif() + check_cxx_source_runs(" + #include + int main() + { + __m256d a = _mm256_set_pd (-1, 2, -3, 4); + __m256d b = _mm256_set_pd (-2, 3, -4, 1); + __m256d c = _mm256_set_pd (-11, 6, 4, -1); + + __m256d result = _mm256_fmsub_pd (a, b, c); + return 0; + }" DETECTED_FMA) + endif() + + set(CMAKE_REQUIRED_FLAGS) + + if(DETECTED_FMA) + SET(FMA_FOUND 1) + if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + SET(FMA_FLAGS "${FMA_FLAGS} -mfma") + endif() + endif() + + if(FMA_FOUND) + set(FMA_FOUND TRUE PARENT_SCOPE) + set(FMA_FLAGS "${FMA_FLAGS}" PARENT_SCOPE) + else() + set(FMA_FOUND FALSE PARENT_SCOPE) + set(FMA_FLAGS "") + endif() + + + return() +endfunction() + + +# This script checks for the highest level of AVX support on the host +# by compiling and running small C++ programs that use AVX intrinsics. +# +# If any AVX support is detected, the following variables are set: +# +# AVX_FOUND = 1 +# AVX_VERSION = the requested version, if EXACT is true, or +# the highest AVX version found. +# AVX_FLAGS = compile flags for the version of AVX found +# +# If AVX is not supported on the host platform, these variables are +# not set. + +function(test_avx_availability) + + set(AVX_FLAGS) + set(AVX_FOUND) + set(DETECTED_AVX_10) + set(DETECTED_AVX_20) + + include(CheckCXXSourceRuns) + set(CMAKE_REQUIRED_FLAGS) + + set(_AVX_TEST_20 1) + set(_AVX_TEST_10 1) + +# Check for AVX2 support. + if(_AVX_TEST_20) + if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(CMAKE_REQUIRED_FLAGS "-mavx2") + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + set(CMAKE_REQUIRED_FLAGS "-xHost") + elseif(MSVC AND NOT CMAKE_CL_64) + set(CMAKE_REQUIRED_FLAGS "/arch:AVX2") + endif() + check_cxx_source_runs(" + #include + int main() + { + __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4); + __m256i result = _mm256_abs_epi32 (a); + return 0; + }" DETECTED_AVX_20) + endif() + +# Check for AVX support. + if(_AVX_TEST_10) + if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(CMAKE_REQUIRED_FLAGS "-mavx") + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + set(CMAKE_REQUIRED_FLAGS "-xHost") + elseif(MSVC AND NOT CMAKE_CL_64) + set(CMAKE_REQUIRED_FLAGS "/arch:AVX") + endif() + check_cxx_source_runs(" + #include + int main() + { + __m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f); + __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); + __m256 result = _mm256_add_ps (a, b); + return 0; + }" DETECTED_AVX_10) + endif() + + set(CMAKE_REQUIRED_FLAGS) + + + if(DETECTED_AVX_20) + set(AVX_VERSION "2.0") + set(AVX_STR "2_0") + set(AVX_FOUND 1) + elseif(DETECTED_AVX_10) + set(AVX_VERSION "1.0") + set(AVX_STR "1_0") + set(AVX_FOUND 1) + endif() + + + if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + if(DETECTED_AVX_20) + SET(AVX_FLAGS "${AVX_FLAGS} -mavx2") + endif() + if(DETECTED_AVX_10) + SET(AVX_FLAGS "${AVX_FLAGS} -mavx") + endif() + elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") + set(AVX_FLAGS "-xHost") + elseif(MSVC) + if(DETECTED_AVX_20) + SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2") + endif() + if(DETECTED_AVX_10) + SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX") + endif() + endif() + + if(AVX_FOUND) + set(AVX_FOUND TRUE PARENT_SCOPE) + set(AVX_FLAGS "${AVX_FLAGS}" PARENT_SCOPE) + set(AVX_VERSION "${AVX_VERSION}" PARENT_SCOPE) + else() + set(AVX_FOUND FALSE PARENT_SCOPE) + set(AVX_FLAGS "") + endif() + + + return() + +endfunction() + +# This script checks for the highest level of NEON support on the host +# by compiling and running small C++ programs that uses NEON intrinsics. +# +# If any NEON support is detected, the following variables are set: +# +# NEON_FOUND = 1 +# NEON_VERSION = 2_0 (assumes Advanced SIMD 2.0) +# NEON_FLAGS = compile flags for the version of NEON found +# +# If NEON is not supported on the host platform, these variables are +# not set. +# + +function(test_neon_availability) + + set(NEON_FLAGS) + set(NEON_FOUND) + set(DETECTED_NEON) + + include(CheckCXXSourceRuns) + + set(CMAKE_REQUIRED_FLAGS "-march=armv8-a") +# Check for NEON support. + check_cxx_source_runs(" +#include + int main() + { + float64_t a[2] = { 1., 2. }; + float64_t b[2] = { -1., 3. }; + float64_t c[2]; + + float64x2_t va = vld1q_f64(&a[0]); + float64x2_t vb = vld1q_f64(&b[0]); + float64x2_t vc = vaddq_f64(va, vb); + vst1q_f64(&c[0], vc); + + if (c[0] == 0. && c[1] == 5.) + return 0; + else + return 0; + } + " DETECTED_NEON) + + set(CMAKE_REQUIRED_FLAGS) + + if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + if(DETECTED_NEON) + SET(NEON_FLAGS "-march=armv8-a") + set(NEON_STR "2_0") + SET(NEON_FOUND 1) + else() + # Setting -ffloat-store to alleviate 32bit vs 64bit discrepancies on non-SIMD platforms. + set(NEON_FLAGS "-ffloat-store") + endif() + endif() + + if(NEON_FOUND) + set(NEON_FOUND TRUE PARENT_SCOPE) + set(NEON_FLAGS "${NEON_FLAGS}" PARENT_SCOPE) + else() + set(NEON_FOUND FALSE PARENT_SCOPE) + set(NEON_FLAGS "") + endif() + + return() + +endfunction() + + + +### +# +# Perform SIMD checks as defined above +# +### + +set(SIMD_FLAGS "") + +set(SIMD_SSE "FALSE" CACHE STRING "Any detected SSE SIMD version, else FALSE") +set(SIMD_AVX "FALSE" CACHE STRING "Any detected AVX SIMD version, else FALSE") +set(SIMD_FMA ${FMA_FOUND} CACHE BOOL "Whether AVX2 FMA extensions were a detected SIMD feature") +set(SIMD_NEON ${NEON_FOUND} CACHE BOOL "Whether NEON was a detected SIMD feature") + +# Check availability of SSE instructions +test_sse_availability() +if (SSE_FOUND) + if (NOT ${SIMD_FIND_QUIETLY}) + message(STATUS "Target supports SSE instructions") + endif() + + set(SIMD_FLAGS "${SIMD_FLAGS} ${SSE_FLAGS}") + set(SIMD_SSE "${SSE_VERSION}") +endif() + +test_avx_availability() +if (AVX_FOUND) + if (NOT ${SIMD_FIND_QUIETLY}) + message(STATUS "Target supports AVX instructions") + endif() + + set(SIMD_FLAGS "${SIMD_FLAGS} ${AVX_FLAGS}") + set(SIMD_AVX "${AVX_VERSION}") +endif() + +test_fma_availability() +if (FMA_FOUND) + if (NOT ${SIMD_FIND_QUIETLY}) + message(STATUS "Target supports AVX2 FMA instructions") + endif() + + set(SIMD_FLAGS "${SIMD_FLAGS} ${FMA_FLAGS}") +endif() + +test_neon_availability() +if (NEON_FOUND) + if (NOT ${SIMD_FIND_QUIETLY}) + message(STATUS "Target supports NEON instructions") + endif() + + set(SIMD_FLAGS "${SIMD_FLAGS} ${NEON_FLAGS}") +endif() + +# Determine whether to use SIMD flags or automatic detection +if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) + execute_process(COMMAND ${CMAKE_CXX_COMPILER} "-dumpversion" OUTPUT_VARIABLE GCC_VERSION_STRING) + if(GCC_VERSION_STRING VERSION_GREATER 4.2 AND NOT APPLE AND NOT CMAKE_CROSSCOMPILING) + SET(SIMD_FLAGS "-march=native") + if (NOT SIMD_FIND_QUIETLY) + message(STATUS "Using automatic native flag for SIMD optimization") + endif() + endif() +elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT CMAKE_CROSSCOMPILING) + execute_process(COMMAND ${CMAKE_CXX_COMPILER} "-dumpversion" OUTPUT_VARIABLE CLANG_VERSION_STRING) + if(CLANG_VERSION_STRING VERSION_GREATER_EQUAL 15.0 AND NOT CMAKE_CROSSCOMPILING) + SET(SIMD_FLAGS "-march=native") + if (NOT SIMD_FIND_QUIETLY) + message(STATUS "Using automatic native flag for SIMD optimization") + endif() + elseif(CMAKE_HOST_UNIX) + execute_process(COMMAND uname -m OUTPUT_VARIABLE UNIX_MACHINE_ARCH) + if(UNIX_MACHINE_ARCH MATCHES "x86_64|x86|amd64") + SET(SIMD_FLAGS "-march=native") + if (NOT SIMD_FIND_QUIETLY) + message(STATUS "Using automatic native flag for SIMD optimization") + endif() + endif() + endif() +endif() + + +set(SIMD_C_FLAGS "${SIMD_FLAGS}" CACHE STRING "Flags used for compiling C programs with SIMD support") +set(SIMD_CXX_FLAGS "${SIMD_FLAGS}" CACHE STRING "Flags used for compiling C++ programs with SIMD support") + +mark_as_advanced(SIMD_SSE SIMD_AVX SIMD_FMA SIMD_NEON SIMD_C_FLAGS SIMD_CXX_FLAGS) + diff --git a/cmake/FindSSE.cmake b/cmake/FindSSE.cmake deleted file mode 100644 index 10d36da7c4..0000000000 --- a/cmake/FindSSE.cmake +++ /dev/null @@ -1,312 +0,0 @@ -# This script checks for the highest level of SSE support on the host -# by compiling and running small C++ programs that uses SSE intrinsics. -# -# You can invoke this module using the following command: -# -# FIND_PACKAGE(SSE [major[.minor]] [EXACT] [QUIET|REQUIRED]) -# -# where the version string is one of: -# -# 1.0 for SSE support -# 2.0 for SSE2 support -# 3.0 for SSE3 support -# 3.1 for SSSE3 support -# 4.1 for SSE 4.1 support -# 4.2 for SSE 4.2 support -# -# Note that any ".0" in the above version string is optional. -# -# If any SSE support is detected, the following variables are set: -# -# SSE_FOUND = 1 -# SSE_VERSION = the requested version, if EXACT is true, or -# the highest SSE version found. -# SSE_FLAGS = compile flags for the version of SSE found -# -# If SSE is not supported on the host platform, these variables are -# not set. If QUIET is true, the module does not print a message if -# SSE if missing. If REQUIRED is true, the module produces a fatal -# error if SSE support is missing. -# -set(SSE_FLAGS) -set(SSE_FOUND) -set(DETECTED_SSE_41) -set(DETECTED_SSE_42) -set(DETECTED_SSE_10) -set(DETECTED_SSE_20) -set(DETECTED_SSE_30) - -if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) - execute_process(COMMAND ${CMAKE_CXX_COMPILER} "-dumpversion" OUTPUT_VARIABLE GCC_VERSION_STRING) - if(GCC_VERSION_STRING VERSION_GREATER 4.2 AND NOT APPLE AND NOT CMAKE_CROSSCOMPILING) - SET(SSE_FLAGS "${SSE_FLAGS} -march=native") - message(STATUS "Using CPU native flags for SSE optimization: ${SSE_FLAGS}") - endif() -elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT CMAKE_CROSSCOMPILING) - SET(SSE_FLAGS "${SSE_FLAGS} -march=native") - message(STATUS "Using CPU native flags for SSE optimization: ${SSE_FLAGS}") -endif() - -include(CheckCXXSourceRuns) -set(CMAKE_REQUIRED_FLAGS) - - -# Generate a list of SSE versions to test. -if(SSE_FIND_VERSION_EXACT) - if(SSE_FIND_VERSION VERSION_EQUAL "4.2") - set(_SSE_TEST_42 1) - elseif(SSE_FIND_VERSION VERSION_EQUAL "4.1") - set(_SSE_TEST_41 1) - elseif(SSE_FIND_VERSION VERSION_EQUAL "3.0") - set(_SSE_TEST_30 1) - elseif(SSE_FIND_VERSION VERSION_EQUAL "2.0") - set(_SSE_TEST_20 1) - elseif(SSE_FIND_VERSION VERSION_EQUAL "1.0") - set(_SSE_TEST_10 1) - endif() -else() - if(NOT SSE_FIND_VERSION VERSION_GREATER "4.2") - set(_SSE_TEST_42 1) - endif() - if(NOT SSE_FIND_VERSION VERSION_GREATER "4.1") - set(_SSE_TEST_41 1) - endif() - if(NOT SSE_FIND_VERSION VERSION_GREATER "3.0") - set(_SSE_TEST_30 1) - endif() - if(NOT SSE_FIND_VERSION VERSION_GREATER "2.0") - set(_SSE_TEST_20 1) - endif() - if(NOT SSE_FIND_VERSION VERSION_GREATER "1.0") - set(_SSE_TEST_10 1) - endif() -endif() - - -# Check for SSE 4.2 support. -if(_SSE_TEST_42) - if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(CMAKE_REQUIRED_FLAGS "-msse4.2") - endif() - check_cxx_source_runs(" - #include - #include - int main() - { - long long a[2] = { 1, 2 }; - long long b[2] = { -1, 3 }; - long long c[2]; - __m128i va = _mm_loadu_si128((__m128i*)a); - __m128i vb = _mm_loadu_si128((__m128i*)b); - __m128i vc = _mm_cmpgt_epi64(va, vb); - - _mm_storeu_si128((__m128i*)c, vc); - if (c[0] == -1LL && c[1] == 0LL) - return 0; - else - return 1; - }" - DETECTED_SSE_42) -endif() - -# Check for SSE 4.1 support. -if(_SSE_TEST_41) - if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(CMAKE_REQUIRED_FLAGS "-msse4.1") - endif() - check_cxx_source_runs(" - #include - #include - int main() - { - long long a[2] = { 1, 2 }; - long long b[2] = { -1, 2 }; - long long c[2]; - __m128i va = _mm_loadu_si128((__m128i*)a); - __m128i vb = _mm_loadu_si128((__m128i*)b); - __m128i vc = _mm_cmpeq_epi64(va, vb); - - _mm_storeu_si128((__m128i*)c, vc); - if (c[0] == 0LL && c[1] == -1LL) - return 0; - else - return 1; - }" DETECTED_SSE_41) -endif() - -# Check for SSE 3 support. -if(_SSE_TEST_30) - if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(CMAKE_REQUIRED_FLAGS "-msse3") - endif() - check_cxx_source_runs(" - #include - #ifdef _WIN32 - #include - #else - #include - #endif - - int main() - { - float a[4] = { 1.0f, 2.0f, 3.0f, 4.0f }; - float b[4] = { 3.0f, 5.0f, 7.0f, 9.0f }; - float c[4]; - - __m128 va = _mm_loadu_ps(a); - __m128 vb = _mm_loadu_ps(b); - __m128 vc = _mm_hadd_ps(va, vb); - - _mm_storeu_ps(c, vc); - if (c[0] == 3.0f && c[1] == 7.0f && c[2] == 8.0f && c[3] == 16.0f) - return 0; - else - return 1; - }" DETECTED_SSE_30) -endif() - -# Check for SSE2 support. -if(_SSE_TEST_20) - if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(CMAKE_REQUIRED_FLAGS "-msse2") - elseif(MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4) - set(CMAKE_REQUIRED_FLAGS "/arch:SSE2") - endif() - check_cxx_source_runs(" - #include - int main() - { - int a[4] = { 1, 2, 3, 4 }; - int b[4] = { 3, 6, -4, -4 }; - int c[4]; - - __m128i va = _mm_loadu_si128((__m128i*)a); - __m128i vb = _mm_loadu_si128((__m128i*)b); - __m128i vc = _mm_add_epi32(va, vb); - - _mm_storeu_si128((__m128i*)c, vc); - if (c[0] == 4 && c[1] == 8 && c[2] == -1 && c[3] == 0) - return 0; - else - return 1; - }" DETECTED_SSE_20) -endif() - -# Check for SSE support. -if(_SSE_TEST_10) - if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(CMAKE_REQUIRED_FLAGS "-msse") - elseif(MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4) - set(CMAKE_REQUIRED_FLAGS "/arch:SSE") - endif() - check_cxx_source_runs(" - #include - int main() - { - float a[4] = { 1.0f, 2.0f, 3.0f, 4.0f }; - float b[4] = { 2.0f, 3.0f, 4.0f, 5.0f }; - float c[4]; - __m128 va = _mm_loadu_ps(a); - __m128 vb = _mm_loadu_ps(b); - __m128 vc = _mm_add_ps(va, vb); - - _mm_storeu_ps(c, vc); - if (c[0] == 3.0f && c[1] == 5.0f && c[2] == 7.0f && c[3] == 9.0f) - return 0; - else - return 1; - }" DETECTED_SSE_10) -endif() - -set(CMAKE_REQUIRED_FLAGS) - -if(DETECTED_SSE_42) - set(SSE_VERSION "4.2") - set(SSE_STR "4_2") - set(SSE_FOUND 1) -elseif(DETECTED_SSE_41) - set(SSE_VERSION "4.1") - set(SSE_STR "4_1") - set(SSE_FOUND 1) -elseif(DETECTED_SSE_30) - set(SSE_VERSION "3.0") - set(SSE_STR "3_0") - set(SSE_FOUND 1) -elseif(DETECTED_SSE_20) - set(SSE_VERSION "2.0") - set(SSE_STR "2_0") - set(SSE_FOUND 1) -elseif(DETECTED_SSE_10) - set(SSE_VERSION "1.0") - set(SSE_STR "1_0") - set(SSE_FOUND 1) -endif() - - -if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) - if(DETECTED_SSE_42) - set(SSE_FLAGS "${SSE_FLAGS} -msse4.2 -mfpmath=sse") - elseif(DETECTED_SSE_41) - set(SSE_FLAGS "${SSE_FLAGS} -msse4.1 -mfpmath=sse") - elseif(DETECTED_SSE_30) - set(SSE_FLAGS "${SSE_FLAGS} -msse3 -mfpmath=sse") - elseif(DETECTED_SSE_20) - set(SSE_FLAGS "${SSE_FLAGS} -msse2 -mfpmath=sse") - elseif(DETECTED_SSE_10) - set(SSE_FLAGS "${SSE_FLAGS} -msse -mfpmath=sse") - else() - # Setting -ffloat-store to alleviate 32bit vs 64bit discrepancies on non-SSE platforms. - set(SSE_FLAGS "-ffloat-store") - endif() -elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - # clang does not require "-mfpmath" which is automatic - if(DETECTED_SSE_42) - set(SSE_FLAGS "${SSE_FLAGS} -msse4.2") - elseif(DETECTED_SSE_41) - set(SSE_FLAGS "${SSE_FLAGS} -msse4.1") - elseif(DETECTED_SSE_30) - set(SSE_FLAGS "${SSE_FLAGS} -msse3") - elseif(DETECTED_SSE_20) - set(SSE_FLAGS "${SSE_FLAGS} -msse2") - elseif(DETECTED_SSE_10) - set(SSE_FLAGS "${SSE_FLAGS} -msse") - else() - # Setting -ffloat-store to alleviate 32bit vs 64bit discrepancies on non-SSE platforms. - set(SSE_FLAGS "-ffloat-store") - endif() -elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - set(SSE_FLAGS "-xHost") -elseif(MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 4) - if(DETECTED_SSE_20) - set(SSE_FLAGS "${SSE_FLAGS} /arch:SSE2") - elseif(DETECTED_SSE_10) - set(SSE_FLAGS "${SSE_FLAGS} /arch:SSE") - endif() -endif() - -if(SSE_FOUND) - message(STATUS " Found SSE ${SSE_VERSION} extensions, using flags: ${SSE_FLAGS}") -else() - message(STATUS " No SSE support found") - set(SSE_FLAGS "") -endif() - -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${SSE_FLAGS}") -set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${SSE_FLAGS}") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${SSE_FLAGS}") - -return() -#------------------------------------- - -# If no SSE support is found, print an error message. -if(SSE_FIND_VERSION) - set(_SSE_ERROR_MESSAGE "SSE ${SSE_FIND_VERSION} support is not found on this architecture") -else() - set(_SSE_ERROR_MESSAGE "SSE support is not found on this architecture") -endif() - -if(SSE_FIND_REQUIRED) - message(FATAL_ERROR "${_SSE_ERROR_MESSAGE}") -elseif(NOT SSE_FIND_QUIETLY) - message(STATUS "${_SSE_ERROR_MESSAGE}") -endif() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9a7aa5c625..73d127f585 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -174,89 +174,70 @@ endif() option(USE_SIMD "Enable use of SIMD if supported (SSE, AVX, NEON)" ON) if(USE_SIMD) - - # Figure out SIMD level support - message(STATUS "Searching for SSE...") - find_package(SSE) + + # Figure out SIMD support + message(STATUS "Testing SIMD capabilities...") + find_package(SIMD) # Set substitution variables for configuration file - if(SSE_FOUND) + if(SIMD_SSE) set(CHRONO_HAS_SSE "#define CHRONO_HAS_SSE") - set(CHRONO_SSE_LEVEL "#define CHRONO_SSE_LEVEL \"${SSE_VERSION}\"") - if(DETECTED_SSE_10) - set(CHRONO_SSE_1_0 "#define CHRONO_SSE_1_0") - endif() - if(DETECTED_SSE_20) - set(CHRONO_SSE_2_0 "#define CHRONO_SSE_2_0") - endif() - if(DETECTED_SSE_30) + set(CHRONO_SSE_LEVEL "#define CHRONO_SSE_LEVEL \"${SIMD_SSE}\"") + + # SSE up to and including 2.0 is supported on all 64-bit x86 systems + set(CHRONO_SSE_1_0 "#define CHRONO_SSE_1_0") + set(CHRONO_SSE_2_0 "#define CHRONO_SSE_2_0") + + if(${SIMD_SSE} VERSION_GREATER_EQUAL 3.0) set(CHRONO_SSE_3_0 "#define CHRONO_SSE_3_0") endif() - if(DETECTED_SSE_41) + if(${SIMD_SSE} VERSION_GREATER_EQUAL 4.1) set(CHRONO_SSE_4_1 "#define CHRONO_SSE_4_1") endif() - if(DETECTED_SSE_42) + if(${SIMD_SSE} VERSION_GREATER_EQUAL 4.2) set(CHRONO_SSE_4_2 "#define CHRONO_SSE_4_2") endif() endif() - # Figure out AVX level support - message(STATUS "Searching for AVX...") - find_package(AVX) + # Figure out AVX support # Set substitution variables for configuration file - if(AVX_FOUND) + if(SIMD_AVX) set(CHRONO_HAS_AVX "#define CHRONO_HAS_AVX") - set(CHRONO_AVX_LEVEL "#define CHRONO_AVX_LEVEL \"${AVX_VERSION}\"") - if(DETECTED_AVX_10) + set(CHRONO_AVX_LEVEL "#define CHRONO_AVX_LEVEL \"${SIMD_AVX}\"") + if(${SIMD_AVX} VERSION_GREATER_EQUAL 1.0) set(CHRONO_AVX_1_0 "#define CHRONO_AVX_1_0") endif() - if(DETECTED_AVX_20) + if(${SIMD_AVX} VERSION_GREATER_EQUAL 2.0) set(CHRONO_AVX_2_0 "#define CHRONO_AVX_2_0") endif() endif() - # Figure out FMA level support - set(CHECK_FMA TRUE) + # Figure out FMA support + set(ALLOW_FMA TRUE) if(MSVC) if(NOT CH_WHOLE_PROG_OPT) message(STATUS "FMA requires enabling whole program optimization. FMA check disabled.") - set(CHECK_FMA FALSE) - endif() - if(NOT DETECTED_AVX_20) - message(STATU "FMA requires AVX2 support. FMA check disabled.") - set(CHECK_FMA FALSE) + set(ALLOW_FMA FALSE) endif() endif() - if(CHECK_FMA) - message(STATUS "Searching for FMA...") - find_package(FMA) - + if(SIMD_FMA AND ALLOW_FMA) # Set substitution variables for configuration file - if(FMA_FOUND) - set(CHRONO_HAS_FMA "#define CHRONO_HAS_FMA") - endif() + set(CHRONO_HAS_FMA "#define CHRONO_HAS_FMA") endif() - # Add SSE, AVX, and FMA flags to Chrono compiler flags - # Note that these flags are already added to CMake compiler flags - set(CH_CXX_FLAGS "${CH_CXX_FLAGS} ${SSE_FLAGS} ${AVX_FLAGS} ${FMA_FLAGS}") - set(CH_C_FLAGS "${CH_C_FLAGS} ${SSE_FLAGS} ${AVX_FLAGS} ${FMA_FLAGS}") - - # Check if the system has NEON support - message(STATUS "Searching for NEON...") - find_package(NEON) - + # Figure out NEON support + # Set substitution variables for configuration file. - if (NEON_FOUND) + if(SIMD_NEON) set (CHRONO_HAS_NEON "#define CHRONO_HAS_NEON") endif() - # Add NEON flags to Chrono compiler flags + # Add SIMD flags to Chrono compiler flags # Note that these flags are already added to CMake compiler flags - set(CH_CXX_FLAGS "${CH_CXX_FLAGS} ${NEON_FLAGS}") - set(CH_C_FLAGS "${CH_C_FLAGS} ${NEON_FLAGS}") + set(CH_C_FLAGS "${CH_C_FLAGS} ${SIMD_C_FLAGS}") + set(CH_CXX_FLAGS "${CH_CXX_FLAGS} ${SIMD_CXX_FLAGS}") else()