Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable automatic function multiversioning #26

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 42 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,50 @@ target_compile_features (photospline
PUBLIC
cxx_constexpr
)
target_compile_options (photospline PUBLIC -O3)
target_compile_options (photospline PUBLIC -O3 -fPIC)
target_compile_options (photospline PRIVATE -Wall -Wextra)
IF (CMAKE_SYSTEM_PROCESSOR MATCHES "^(i.86|x86_64)$")
target_compile_options (photospline PUBLIC -msse2 -msse3 -msse4 -msse4.1 -msse4.2 -mno-avx)
ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)")
target_compile_options (photospline PUBLIC -maltivec)
ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "^sparc")
target_compile_options (photospline PUBLIC -mvis)

# For newer compilers we would like to use 'target cloning'/'function multiversioning'
# to cover different instruction sets portably (using preprocessor logic so that
# downstream code picks it up 'for free'), but for old compilers which can't do that,
# we still want to make sure that a reasonable baseline instruction set is used.
# The logic here may be overridden by manually setting USE_TARGET_CLONING.
IF (NOT DEFINED USE_TARGET_CLONING)
# This logic should mirror what is in include/photospline/detail/simd.h
IF (CMAKE_SYSTEM_PROCESSOR MATCHES "^(i.86|x86_64)$")
IF (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(USE_TARGET_CLONING FALSE)
ELSEIF (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
IF (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.3)
set(USE_TARGET_CLONING TRUE)
ELSE ()
set(USE_TARGET_CLONING FALSE)
ENDIF ()
ELSE ()
# Assume other compilers do not support this
set(USE_TARGET_CLONING FALSE)
ENDIF ()
ELSE ()
# No detailed treatment of non-x86 architectures at this time
set(USE_TARGET_CLONING FALSE)
ENDIF()
ENDIF (NOT DEFINED USE_TARGET_CLONING)

IF (USE_TARGET_CLONING)
MESSAGE("-- Will assume use of target cloning for vector instruction sets")
ELSE ()
MESSAGE("-- Will NOT assume use of target cloning for vector instruction sets")

IF (CMAKE_SYSTEM_PROCESSOR MATCHES "^(i.86|x86_64)$")
# explicitly disable AVX to avoid crashing on non-AVX-enabled machines
target_compile_options (photospline PUBLIC -msse2 -msse3 -msse4 -msse4.1 -msse4.2 -mno-avx)
ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)")
target_compile_options (photospline PUBLIC -maltivec)
ELSEIF (CMAKE_SYSTEM_PROCESSOR MATCHES "^sparc")
target_compile_options (photospline PUBLIC -mvis)
ENDIF ()
ENDIF ()

target_link_libraries (photospline
PUBLIC
${CFITSIO_LIBRARIES}
Expand Down
6 changes: 6 additions & 0 deletions include/photospline/detail/bspline_multi.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ namespace photospline{

template <typename Alloc>
template <typename Float>
PHOTOSPLINE_TARGET_CLONE
void splinetable<Alloc>::ndsplineeval_multibasis_core(const int *centers, const typename detail::simd_vector<Float>::type*** localbasis, typename detail::simd_vector<Float>::type* result) const{
#if (defined(__i386__) || defined (__x86_64__)) && defined(__ELF__)
/*
Expand Down Expand Up @@ -80,6 +81,7 @@ namespace{

template <typename Alloc>
template <typename Float, unsigned int D>
PHOTOSPLINE_TARGET_CLONE
void splinetable<Alloc>::ndsplineeval_multibasis_coreD(const int *centers, const typename detail::simd_vector<Float>::type*** localbasis, typename detail::simd_vector<Float>::type* result) const{
#if (defined(__i386__) || defined (__x86_64__)) && defined(__ELF__)
/*
Expand Down Expand Up @@ -150,6 +152,7 @@ void splinetable<Alloc>::ndsplineeval_multibasis_coreD(const int *centers, const

template <typename Alloc>
template <typename Float, unsigned int D, unsigned int Order>
PHOTOSPLINE_TARGET_CLONE
void splinetable<Alloc>::ndsplineeval_multibasis_coreD_FixedOrder(const int *centers, const typename detail::simd_vector<Float>::type*** localbasis, typename detail::simd_vector<Float>::type* result) const{
#if (defined(__i386__) || defined (__x86_64__)) && defined(__ELF__)
/*
Expand Down Expand Up @@ -223,6 +226,7 @@ void splinetable<Alloc>::ndsplineeval_multibasis_coreD_FixedOrder(const int *cen

template <typename Alloc>
template<typename Float, unsigned int ... Orders>
PHOTOSPLINE_TARGET_CLONE
void splinetable<Alloc>::ndsplineeval_multibasis_core_KnownOrder(const int *centers, const typename detail::simd_vector<Float>::type*** localbasis, typename detail::simd_vector<Float>::type* result) const{
#if (defined(__i386__) || defined (__x86_64__)) && defined(__ELF__)
/*
Expand Down Expand Up @@ -294,6 +298,7 @@ void splinetable<Alloc>::ndsplineeval_multibasis_core_KnownOrder(const int *cent

template<typename Alloc>
template<typename Float>
PHOTOSPLINE_TARGET_CLONE
void
splinetable<Alloc>::ndsplineeval_gradient(const double* x, const int* centers, double* evaluates) const
{
Expand Down Expand Up @@ -351,6 +356,7 @@ splinetable<Alloc>::ndsplineeval_gradient(const double* x, const int* centers, d

template<typename Alloc>
template<typename Float>
PHOTOSPLINE_TARGET_CLONE
void splinetable<Alloc>::evaluator_type<Float>::ndsplineeval_gradient(const double* x, const int* centers, double* evaluates) const
{
uint32_t maxdegree = *std::max_element(table.order,table.order+table.ndim) + 1;
Expand Down
30 changes: 30 additions & 0 deletions include/photospline/detail/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,34 @@ struct simd_vector {

}}

#if defined(__i386__) || defined(__amd64__)
#define PHOTOSPLINE_VECTOR_ISN_VARIANTS "avx512f","avx2","avx","sse4.2","default"
#ifdef __clang__ //clang, obviously
// this feature exists and works nicely in clang 14, except that
// "multiversioned functions do not yet support function templates"
// which is most of the places we want to use this
#define PHOTOSPLINE_USE_TARGET_CLONING 0
#elif defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) //gcc
//Activating this feature causes gcc 8.3 to crash, but 8.5 works
#if __GNUC__ >= 9 || (__GNUC__ == 8 && __GNUC_MINOR__ > 3)
#define PHOTOSPLINE_USE_TARGET_CLONING 1
#else
#define PHOTOSPLINE_USE_TARGET_CLONING 0
#endif
#else
//for other compilers, assume we don't have this
#define PHOTOSPLINE_USE_TARGET_CLONING 0
#endif
#else
//For other architectures, leave this alone for now
#define PHOTOSPLINE_USE_TARGET_CLONING 0
#endif

#if PHOTOSPLINE_USE_TARGET_CLONING
#define PHOTOSPLINE_TARGET_CLONE __attribute__ ((target_clones( PHOTOSPLINE_VECTOR_ISN_VARIANTS )))
#else
//make this a no-op
#define PHOTOSPLINE_TARGET_CLONE
#endif

#endif //PHOTOSPLINE_SIMD_H