remove simd specifics; focus on naive implementation first

Wasted-Audio · Nov 17, 2023 · b57b589 · b57b589
1 parent c2b8a1b
commit b57b589
Showing 1 changed file with 0 additions and 45 deletions.
diff --git a/hvcc/generators/ir2c/static/HvSignalRFFT.c b/hvcc/generators/ir2c/static/HvSignalRFFT.c
@@ -84,27 +84,10 @@ void __hv_rfft_f(SignalRFFT *o, hv_bInf_t bIn, hv_bOutf_t bOut0, hv_bOutf_t bOut
   pffft_transform_ordered(o->setup, &bIn, bOut, work, PFFFT_FORWARD);
 
   // uninterleave result into the output buffers
-  #if HV_SIMD_SSE || HV_SIMD_AVX
-  for (int i = 0, j = 0; j < n; j += 4, i += 8) {
-    __m128 a = _mm_load_ps(bOut+i);                // LRLR
-    __m128 b = _mm_load_ps(bOut+4+i);              // LRLR
-    __m128 x = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2,0,2,0)); // LLLL
-    __m128 y = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3,1,3,1)); // RRRR
-    _mm_store_ps(bOut0+j, x);
-    _mm_store_ps(bOut1+j, y);
-  }
-  #elif HV_SIMD_NEON
-  for (int i = 0, j = 0; j < n; j += 4, i += 8) {
-    float32x4x2_t a = vld2q_f32(bOut+i); // load and uninterleave
-    vst1q_f32(bOut0+j, a.val[0]);
-    vst1q_f32(bOut1+j, a.val[1]);
-  }
-  #else // HV_SIMD_NONE
   for (int j = 0; j < n; ++j) {
     bOut0[n+j] = bOut[0+2*j];
     bOut1[n+j] = bOut[1+2*j];
   }
-  #endif
 
   __hv_store_f(inputs+h_orig, bIn); // store the new input to the inputs buffer
   hTable_setHead(&o->inputs, wrap(h_orig+HV_N_SIMD, m));
@@ -130,40 +113,12 @@ void __hv_rifft_f(SignalRFFT *o, hv_bInf_t bIn0, hv_bInf_t bIn1, hv_bOutf_t bOut
   float *const bIn = (float *)(hv_alloca(sizeof(bOut)));
 
   // interleave the input buffers into the transform buffer
-  #if HV_SIMD_AVX
-  for (int i = 0, j = 0; j < n; j += 8, i += 16) {
-    __m256 x = _mm256_load_ps(bIn00);     // LLLLLLLL
-    __m256 y = _mm256_load_ps(bIn10);     // RRRRRRRR
-    __m256 a = _mm256_unpacklo_ps(x, y);  // LRLRLRLR
-    __m256 b = _mm256_unpackhi_ps(x, y);  // LRLRLRLR
-    _mm256_store_ps(bIn+i, a);
-    _mm256_store_ps(bIn+8+i, b);
-  }
-  #elif HV_SIMD_SSE
-  for (int i = 0, j = 0; j < n4; j += 4, i += 8) {
-    __m128 x = _mm_load_ps(bIn00);     // LLLL
-    __m128 y = _mm_load_ps(bIn10);     // RRRR
-    __m128 a = _mm_unpacklo_ps(x, y);  // LRLR
-    __m128 b = _mm_unpackhi_ps(x, y);  // LRLR
-    _mm_store_ps(bIn+i, a);
-    _mm_store_ps(bIn+4+i, b);
-  }
-  #elif HV_SIMD_NEON
-  // https://community.arm.com/groups/processors/blog/2012/03/13/coding-for-neon--part-5-rearranging-vectors
-  for (int i = 0, j = 0; j < n4; j += 4, i += 8) {
-    float32x4_t x = vld1q_f32(bIn00);
-    float32x4_t y = vld1q_f32(bIn10);
-    float32x4x2_t z = {x, y};
-    vst2q_f32(bIn+i, z); // interleave and store
-  }
-  #else // HV_SIMD_NONE
   for (int i = 0; i < 2; ++i) {
     for (int j = 0; j < n; ++j) {
       bIn[0+2*j] = bIn00[n+j];
       bIn[1+2*j] = bIn10[n+j];
     }
   }
-  #endif
 
   pffft_transform_ordered(o->setup, bIn, bOut, work, PFFFT_BACKWARD);