diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp index b50a2c8bc8..846f542409 100644 --- a/faiss/impl/ScalarQuantizer.cpp +++ b/faiss/impl/ScalarQuantizer.cpp @@ -55,6 +55,15 @@ namespace faiss { #endif #endif +#if defined(__aarch64__) +#if defined(__GNUC__) && __GNUC__ < 8 +#warning \ + "Cannot enable NEON optimizations in scalar quantizer if the compiler is GCC<8" +#else +#define USE_NEON +#endif +#endif + namespace { typedef ScalarQuantizer::QuantizerType QuantizerType; @@ -105,7 +114,7 @@ struct Codec8bit { } #endif -#ifdef __aarch64__ +#ifdef USE_NEON static FAISS_ALWAYS_INLINE float32x4x2_t decode_8_components(const uint8_t* code, int i) { float32_t result[8] = {}; @@ -175,7 +184,7 @@ struct Codec4bit { } #endif -#ifdef __aarch64__ +#ifdef USE_NEON static FAISS_ALWAYS_INLINE float32x4x2_t decode_8_components(const uint8_t* code, int i) { float32_t result[8] = {}; @@ -336,7 +345,7 @@ struct Codec6bit { #endif -#ifdef __aarch64__ +#ifdef USE_NEON static FAISS_ALWAYS_INLINE float32x4x2_t decode_8_components(const uint8_t* code, int i) { float32_t result[8] = {}; @@ -437,7 +446,7 @@ struct QuantizerTemplate #endif -#ifdef __aarch64__ +#ifdef USE_NEON template struct QuantizerTemplate @@ -546,7 +555,7 @@ struct QuantizerTemplate #endif -#ifdef __aarch64__ +#ifdef USE_NEON template struct QuantizerTemplate @@ -634,7 +643,7 @@ struct QuantizerFP16<8> : QuantizerFP16<1> { #endif -#ifdef __aarch64__ +#ifdef USE_NEON template <> struct QuantizerFP16<8> : QuantizerFP16<1> { @@ -714,7 +723,7 @@ struct QuantizerBF16<8> : QuantizerBF16<1> { #endif -#ifdef __aarch64__ +#ifdef USE_NEON template <> struct QuantizerBF16<8> : QuantizerBF16<1> { @@ -795,7 +804,7 @@ struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> { #endif -#ifdef __aarch64__ +#ifdef USE_NEON template <> struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> { @@ -884,7 +893,7 @@ struct Quantizer8bitDirectSigned<8> : Quantizer8bitDirectSigned<1> { #endif -#ifdef __aarch64__ +#ifdef USE_NEON template <> struct Quantizer8bitDirectSigned<8> : Quantizer8bitDirectSigned<1> { @@ -1242,7 +1251,7 @@ struct SimilarityL2<8> { #endif -#ifdef __aarch64__ +#ifdef USE_NEON template <> struct SimilarityL2<8> { static constexpr int simdwidth = 8; @@ -1402,7 +1411,7 @@ struct SimilarityIP<8> { }; #endif -#ifdef __aarch64__ +#ifdef USE_NEON template <> struct SimilarityIP<8> { @@ -1599,7 +1608,7 @@ struct DCTemplate : SQDistanceComputer { #endif -#ifdef __aarch64__ +#ifdef USE_NEON template struct DCTemplate : SQDistanceComputer { @@ -1815,7 +1824,7 @@ struct DistanceComputerByte : SQDistanceComputer { #endif -#ifdef __aarch64__ +#ifdef USE_NEON template struct DistanceComputerByte : SQDistanceComputer { @@ -2041,7 +2050,7 @@ ScalarQuantizer::SQuantizer* ScalarQuantizer::select_quantizer() const { if (d % 16 == 0) { return select_quantizer_1<16>(qtype, d, trained); } else -#elif defined(USE_F16C) || defined(__aarch64__) +#elif defined(USE_F16C) || defined(USE_NEON) if (d % 8 == 0) { return select_quantizer_1<8>(qtype, d, trained); } else @@ -2082,7 +2091,7 @@ SQDistanceComputer* ScalarQuantizer::get_distance_computer( qtype, d, trained); } } else -#elif defined(USE_F16C) || defined(__aarch64__) +#elif defined(USE_F16C) || defined(USE_NEON) if (d % 8 == 0) { if (metric == METRIC_L2) { return select_distance_computer>(qtype, d, trained); @@ -2457,7 +2466,7 @@ InvertedListScanner* ScalarQuantizer::select_InvertedListScanner( return sel0_InvertedListScanner<16>( mt, this, quantizer, store_pairs, sel, by_residual); } else -#elif defined(USE_F16C) || defined(__aarch64__) +#elif defined(USE_F16C) || defined(USE_NEON) if (d % 8 == 0) { return sel0_InvertedListScanner<8>( mt, this, quantizer, store_pairs, sel, by_residual);