Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable building on any 64-bit little-endian architecture #566

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ have_man_html=no
case $target in
aarch64*) target_aarch64="yes" ;;
powerpc64*) target_ppc="yes" ;;
x86_64*) target_x86_64="yes" ;;
esac

AC_CHECK_HEADERS([windows.h], [AM_CONDITIONAL(TARGET_WIN, true)], [AM_CONDITIONAL(TARGET_WIN, false)])
Expand All @@ -92,6 +93,7 @@ AM_CONDITIONAL(HAVE_PS2PDF, test "x${have_ps2pdf}" = "xyes")
AM_CONDITIONAL(HAVE_MAN_HTML, test "x${have_man_html}" = "xyes")
AM_CONDITIONAL(TARGET_PPC, test "x${target_ppc}" = "xyes")
AM_CONDITIONAL(TARGET_AARCH64, test "x${target_aarch64}" = "xyes")
AM_CONDITIONAL(TARGET_X86_64, test "x${target_x86_64}" = "xyes")
AM_PROG_CC_C_O

AC_CONFIG_FILES([Makefile
Expand Down
19 changes: 8 additions & 11 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ else
if TARGET_AARCH64
AM_CFLAGS += -march=armv8-a+simd -mtune=generic
else
if TARGET_X86_64
AM_CFLAGS += -march=x86-64 -mtune=generic
endif
endif
endif

# Conditionally set NDEBUG based on ENABLE_DEBUG
if ENABLE_DEBUG
Expand Down Expand Up @@ -86,20 +88,15 @@ util.h \
vsearch.h \
xstring.h

if TARGET_PPC
libcpu_a_SOURCES = cpu.cc $(VSEARCHHEADERS)
noinst_LIBRARIES = libcpu.a libcityhash.a
else
if TARGET_AARCH64
libcpu_a_SOURCES = cpu.cc $(VSEARCHHEADERS)
noinst_LIBRARIES = libcpu.a libcityhash.a
else
if TARGET_X86_64
libcpu_sse2_a_SOURCES = cpu.cc $(VSEARCHHEADERS)
libcpu_sse2_a_CXXFLAGS = $(AM_CXXFLAGS) -msse2
libcpu_ssse3_a_SOURCES = cpu.cc $(VSEARCHHEADERS)
libcpu_ssse3_a_CXXFLAGS = $(AM_CXXFLAGS) -mssse3 -DSSSE3
noinst_LIBRARIES = libcpu_sse2.a libcpu_ssse3.a libcityhash.a
endif
else
libcpu_a_SOURCES = cpu.cc $(VSEARCHHEADERS)
noinst_LIBRARIES = libcpu.a libcityhash.a
endif

libcityhash_a_SOURCES = city.cc city.h
Expand All @@ -114,8 +111,8 @@ else

libcityhash_a_CXXFLAGS = $(AM_CXXFLAGS) -Wno-sign-compare

if TARGET_PPC
__top_builddir__bin_vsearch_LDADD = libcityhash.a libcpu.a
if TARGET_X86_64
__top_builddir__bin_vsearch_LDADD = libcityhash.a libcpu_ssse3.a libcpu_sse2.a
else
if TARGET_AARCH64
__top_builddir__bin_vsearch_LDADD = libcityhash.a libcpu.a
Expand Down
2 changes: 1 addition & 1 deletion src/align_simd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ const uint16x8_t neon_mask =
#define v_shift_left(a) vextq_s16((v_zero), (a), 7)
#define v_mask_gt(a, b) vaddvq_u16(vandq_u16((vcgtq_s16((a), (b))), neon_mask))

#elif __x86_64__
#elif defined(__x86_64__) || defined(SIMDE_VERSION)

typedef __m128i VECTOR_SHORT;

Expand Down
17 changes: 13 additions & 4 deletions src/cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,20 @@ void increment_counters_from_bitmap(count_t * counters,
}
}

#elif __x86_64__
#elif __x86_64__ || defined(SIMDE_VERSION)

#ifdef __x86_64__
#include <emmintrin.h>
#else
#define SIMDE_ENABLE_NATIVE_ALIASES
#include <simde/x86/sse2.h>
#endif

#ifdef SSSE3
#if defined(SIMDE_VERSION)
void increment_counters_from_bitmap(count_t * counters,
unsigned char * bitmap,
unsigned int totalbits)
#elif defined(SSSE3)
void increment_counters_from_bitmap_ssse3(count_t * counters,
unsigned char * bitmap,
unsigned int totalbits)
Expand Down Expand Up @@ -189,7 +198,7 @@ void increment_counters_from_bitmap_sse2(count_t * counters,
// 0xf7fbfdfe -> 1111'0111'1111'1011'1111'1101'1111'1110 (32 bits)
static constexpr auto mask2 = static_cast<int32_t>(0xf7fbfdfe);

#ifdef SSSE3
#if defined(SSSE3) || defined(SIMDE_VERSION)
const auto c1 = _mm_set_epi32(0x01010101, 0x01010101, 0x00000000, 0x00000000);
#endif
const auto c2 = _mm_set_epi32(mask1, mask2, mask1, mask2);
Expand All @@ -202,7 +211,7 @@ void increment_counters_from_bitmap_sse2(count_t * counters,
for(auto j = 0U; j < r; j++)
{
const auto xmm0 = _mm_loadu_si128((__m128i *) p++);
#ifdef SSSE3
#if defined(SSSE3) || defined(SIMDE_VERSION)
const auto xmm1 = _mm_shuffle_epi8(xmm0, c1);
#else
const auto xmm6 = _mm_unpacklo_epi8(xmm0, xmm0);
Expand Down
2 changes: 1 addition & 1 deletion src/vsearch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ void cpu_features_detect()
}
}
#else
#error Unknown architecture
// simde
#endif
}

Expand Down
13 changes: 7 additions & 6 deletions src/vsearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,13 @@

#ifdef __x86_64__

#define PROG_CPU "x86_64"
#define PROG_CPU x86_64
#include <x86intrin.h>

#elif __PPC__

#ifdef __LITTLE_ENDIAN__
#define PROG_CPU "ppc64le"
#define PROG_CPU ppc64le
#include <altivec.h>
#undef bool
#else
Expand All @@ -121,13 +121,14 @@

#elif __aarch64__

#define PROG_CPU "aarch64"
#define PROG_CPU aarch64
#include <arm_neon.h>

#else

#error Unknown architecture (not ppc64le, aarch64 or x86_64)

#define PROG_CPU simde
#define SIMDE_ENABLE_NATIVE_ALIASES
#include <simde/x86/avx512.h>
#endif


Expand Down Expand Up @@ -190,7 +191,7 @@
#endif


#define PROG_ARCH PROG_OS "_" PROG_CPU
#define PROG_ARCH PROG_OS "_" "PROG_CPU"

#ifdef HAVE_DLFCN_H
#include <dlfcn.h>
Expand Down