From 426a03c530d326acf15ca2586c6f4a7f13446259 Mon Sep 17 00:00:00 2001 From: Clayton Smith Date: Sat, 14 Oct 2023 10:48:10 -0400 Subject: [PATCH] Fix buffer overflow in volk_32fc_x2_square_dist_32f_a_sse3 Signed-off-by: Clayton Smith --- kernels/volk/volk_32fc_x2_square_dist_32f.h | 26 ++++----------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/kernels/volk/volk_32fc_x2_square_dist_32f.h b/kernels/volk/volk_32fc_x2_square_dist_32f.h index 558e25c91..4a93d5bf9 100644 --- a/kernels/volk/volk_32fc_x2_square_dist_32f.h +++ b/kernels/volk/volk_32fc_x2_square_dist_32f.h @@ -181,41 +181,25 @@ static inline void volk_32fc_x2_square_dist_32f_a_sse3(float* target, xmm1 = _mm_setzero_ps(); xmm1 = _mm_loadl_pi(xmm1, (__m64*)src0); - xmm2 = _mm_load_ps((float*)&points[0]); xmm1 = _mm_movelh_ps(xmm1, xmm1); - xmm3 = _mm_load_ps((float*)&points[2]); - for (; i < bound - 1; ++i) { + for (; i < bound; ++i) { + xmm2 = _mm_load_ps((float*)&points[0]); xmm4 = _mm_sub_ps(xmm1, xmm2); + xmm3 = _mm_load_ps((float*)&points[2]); xmm5 = _mm_sub_ps(xmm1, xmm3); - points += 4; + xmm6 = _mm_mul_ps(xmm4, xmm4); xmm7 = _mm_mul_ps(xmm5, xmm5); - xmm2 = _mm_load_ps((float*)&points[0]); - xmm4 = _mm_hadd_ps(xmm6, xmm7); - xmm3 = _mm_load_ps((float*)&points[2]); - _mm_store_ps(target, xmm4); + points += 4; target += 4; } - xmm4 = _mm_sub_ps(xmm1, xmm2); - xmm5 = _mm_sub_ps(xmm1, xmm3); - - points += 4; - xmm6 = _mm_mul_ps(xmm4, xmm4); - xmm7 = _mm_mul_ps(xmm5, xmm5); - - xmm4 = _mm_hadd_ps(xmm6, xmm7); - - _mm_store_ps(target, xmm4); - - target += 4; - if (num_bytes >> 4 & 1) { xmm2 = _mm_load_ps((float*)&points[0]);