Disable the NEON optimisations on gcc <= 8 (facebookresearch#3869)

Summary: GCC7 doesnt support all the necessary NEON intrinsics, which is really a shame. However this means that for aarch64 GCC cannot compile faiss with neon intrinsics, so we should avoid using them. This is similar to facebookresearch#3860, build issues on GCC7, which I need. This one is a bit uglier, since GCC7 does support NEON just not all of the intrinsics. Pull Request resolved: facebookresearch#3869 Reviewed By: asadoughi Differential Revision: D63081962 Pulled By: junjieqi fbshipit-source-id: 69827cd447dd405b3ef70d651996f9ad00b6213e
shuchu · Sep 26, 2024 · 1ee7561 · 1ee7561
1 parent d8aec60
commit 1ee7561
Showing 1 changed file with 25 additions and 16 deletions.
diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp
@@ -55,6 +55,15 @@ namespace faiss {
 #endif
 #endif
 
+#if defined(__aarch64__)
+#if defined(__GNUC__) && __GNUC__ < 8
+#warning \
+        "Cannot enable NEON optimizations in scalar quantizer if the compiler is GCC<8"
+#else
+#define USE_NEON
+#endif
+#endif
+
 namespace {
 
 typedef ScalarQuantizer::QuantizerType QuantizerType;
@@ -105,7 +114,7 @@ struct Codec8bit {
     }
 #endif
 
-#ifdef __aarch64__
+#ifdef USE_NEON
     static FAISS_ALWAYS_INLINE float32x4x2_t
     decode_8_components(const uint8_t* code, int i) {
         float32_t result[8] = {};
@@ -175,7 +184,7 @@ struct Codec4bit {
     }
 #endif
 
-#ifdef __aarch64__
+#ifdef USE_NEON
     static FAISS_ALWAYS_INLINE float32x4x2_t
     decode_8_components(const uint8_t* code, int i) {
         float32_t result[8] = {};
@@ -336,7 +345,7 @@ struct Codec6bit {
 
 #endif
 
-#ifdef __aarch64__
+#ifdef USE_NEON
     static FAISS_ALWAYS_INLINE float32x4x2_t
     decode_8_components(const uint8_t* code, int i) {
         float32_t result[8] = {};
@@ -437,7 +446,7 @@ struct QuantizerTemplate<Codec, QuantizerTemplateScaling::UNIFORM, 8>
 
 #endif
 
-#ifdef __aarch64__
+#ifdef USE_NEON
 
 template <class Codec>
 struct QuantizerTemplate<Codec, QuantizerTemplateScaling::UNIFORM, 8>
@@ -546,7 +555,7 @@ struct QuantizerTemplate<Codec, QuantizerTemplateScaling::NON_UNIFORM, 8>
 
 #endif
 
-#ifdef __aarch64__
+#ifdef USE_NEON
 
 template <class Codec>
 struct QuantizerTemplate<Codec, QuantizerTemplateScaling::NON_UNIFORM, 8>
@@ -634,7 +643,7 @@ struct QuantizerFP16<8> : QuantizerFP16<1> {
 
 #endif
 
-#ifdef __aarch64__
+#ifdef USE_NEON
 
 template <>
 struct QuantizerFP16<8> : QuantizerFP16<1> {
@@ -714,7 +723,7 @@ struct QuantizerBF16<8> : QuantizerBF16<1> {
 
 #endif
 
-#ifdef __aarch64__
+#ifdef USE_NEON
 
 template <>
 struct QuantizerBF16<8> : QuantizerBF16<1> {
@@ -795,7 +804,7 @@ struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> {
 
 #endif
 
-#ifdef __aarch64__
+#ifdef USE_NEON
 
 template <>
 struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> {
@@ -884,7 +893,7 @@ struct Quantizer8bitDirectSigned<8> : Quantizer8bitDirectSigned<1> {
 
 #endif
 
-#ifdef __aarch64__
+#ifdef USE_NEON
 
 template <>
 struct Quantizer8bitDirectSigned<8> : Quantizer8bitDirectSigned<1> {
@@ -1242,7 +1251,7 @@ struct SimilarityL2<8> {
 
 #endif
 
-#ifdef __aarch64__
+#ifdef USE_NEON
 template <>
 struct SimilarityL2<8> {
     static constexpr int simdwidth = 8;
@@ -1402,7 +1411,7 @@ struct SimilarityIP<8> {
 };
 #endif
 
-#ifdef __aarch64__
+#ifdef USE_NEON
 
 template <>
 struct SimilarityIP<8> {
@@ -1599,7 +1608,7 @@ struct DCTemplate<Quantizer, Similarity, 8> : SQDistanceComputer {
 
 #endif
 
-#ifdef __aarch64__
+#ifdef USE_NEON
 
 template <class Quantizer, class Similarity>
 struct DCTemplate<Quantizer, Similarity, 8> : SQDistanceComputer {
@@ -1815,7 +1824,7 @@ struct DistanceComputerByte<Similarity, 8> : SQDistanceComputer {
 
 #endif
 
-#ifdef __aarch64__
+#ifdef USE_NEON
 
 template <class Similarity>
 struct DistanceComputerByte<Similarity, 8> : SQDistanceComputer {
@@ -2041,7 +2050,7 @@ ScalarQuantizer::SQuantizer* ScalarQuantizer::select_quantizer() const {
     if (d % 16 == 0) {
         return select_quantizer_1<16>(qtype, d, trained);
     } else
-#elif defined(USE_F16C) || defined(__aarch64__)
+#elif defined(USE_F16C) || defined(USE_NEON)
     if (d % 8 == 0) {
         return select_quantizer_1<8>(qtype, d, trained);
     } else
@@ -2082,7 +2091,7 @@ SQDistanceComputer* ScalarQuantizer::get_distance_computer(
                     qtype, d, trained);
         }
     } else
-#elif defined(USE_F16C) || defined(__aarch64__)
+#elif defined(USE_F16C) || defined(USE_NEON)
     if (d % 8 == 0) {
         if (metric == METRIC_L2) {
             return select_distance_computer<SimilarityL2<8>>(qtype, d, trained);
@@ -2457,7 +2466,7 @@ InvertedListScanner* ScalarQuantizer::select_InvertedListScanner(
         return sel0_InvertedListScanner<16>(
                 mt, this, quantizer, store_pairs, sel, by_residual);
     } else
-#elif defined(USE_F16C) || defined(__aarch64__)
+#elif defined(USE_F16C) || defined(USE_NEON)
     if (d % 8 == 0) {
         return sel0_InvertedListScanner<8>(
                 mt, this, quantizer, store_pairs, sel, by_residual);