From 3bcc87d92e80234858e0854359c3520db81d4217 Mon Sep 17 00:00:00 2001 From: Clayton Smith Date: Sat, 14 Oct 2023 20:33:04 -0400 Subject: [PATCH] Fix undefined behaviour in dot product kernels Signed-off-by: Clayton Smith --- kernels/volk/volk_16i_32fc_dot_prod_32fc.h | 120 ++++++--------- kernels/volk/volk_32fc_32f_dot_prod_32fc.h | 169 +++++++++------------ 2 files changed, 121 insertions(+), 168 deletions(-) diff --git a/kernels/volk/volk_16i_32fc_dot_prod_32fc.h b/kernels/volk/volk_16i_32fc_dot_prod_32fc.h index c49e5297..6dc672bc 100644 --- a/kernels/volk/volk_16i_32fc_dot_prod_32fc.h +++ b/kernels/volk/volk_16i_32fc_dot_prod_32fc.h @@ -147,8 +147,7 @@ static inline void volk_16i_32fc_dot_prod_32fc_u_sse(lv_32fc_t* result, unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const short* aPtr = input; const float* bPtr = (float*)taps; @@ -207,18 +206,17 @@ static inline void volk_16i_32fc_dot_prod_32fc_u_sse(lv_32fc_t* result, _mm_store_ps(dotProductVector, dotProdVal0); // Store the results back into the dot product vector - *realpt = dotProductVector[0]; - *imagpt = dotProductVector[1]; - *realpt += dotProductVector[2]; - *imagpt += dotProductVector[3]; + returnValue = lv_cmake(dotProductVector[0], dotProductVector[1]); + returnValue += lv_cmake(dotProductVector[2], dotProductVector[3]); number = eighthPoints * 8; for (; number < num_points; number++) { - *realpt += ((*aPtr) * (*bPtr++)); - *imagpt += ((*aPtr++) * (*bPtr++)); + returnValue += lv_cmake(aPtr[0] * bPtr[0], aPtr[0] * bPtr[1]); + aPtr += 1; + bPtr += 2; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } #endif /*LV_HAVE_SSE && LV_HAVE_MMX*/ @@ -235,8 +233,7 @@ static inline void volk_16i_32fc_dot_prod_32fc_u_avx2_fma(lv_32fc_t* result, unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const short* aPtr = input; const float* bPtr = (float*)taps; @@ -294,22 +291,19 @@ static inline void volk_16i_32fc_dot_prod_32fc_u_avx2_fma(lv_32fc_t* result, _mm256_store_ps(dotProductVector, dotProdVal0); // Store the results back into the dot product vector - *realpt = dotProductVector[0]; - *imagpt = dotProductVector[1]; - *realpt += dotProductVector[2]; - *imagpt += dotProductVector[3]; - *realpt += dotProductVector[4]; - *imagpt += dotProductVector[5]; - *realpt += dotProductVector[6]; - *imagpt += dotProductVector[7]; + returnValue = lv_cmake(dotProductVector[0], dotProductVector[1]); + returnValue += lv_cmake(dotProductVector[2], dotProductVector[3]); + returnValue += lv_cmake(dotProductVector[4], dotProductVector[5]); + returnValue += lv_cmake(dotProductVector[6], dotProductVector[7]); number = sixteenthPoints * 16; for (; number < num_points; number++) { - *realpt += ((*aPtr) * (*bPtr++)); - *imagpt += ((*aPtr++) * (*bPtr++)); + returnValue += lv_cmake(aPtr[0] * bPtr[0], aPtr[0] * bPtr[1]); + aPtr += 1; + bPtr += 2; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } #endif /*LV_HAVE_AVX2 && lV_HAVE_FMA*/ @@ -326,8 +320,7 @@ static inline void volk_16i_32fc_dot_prod_32fc_u_avx2(lv_32fc_t* result, unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const short* aPtr = input; const float* bPtr = (float*)taps; @@ -391,22 +384,19 @@ static inline void volk_16i_32fc_dot_prod_32fc_u_avx2(lv_32fc_t* result, _mm256_store_ps(dotProductVector, dotProdVal0); // Store the results back into the dot product vector - *realpt = dotProductVector[0]; - *imagpt = dotProductVector[1]; - *realpt += dotProductVector[2]; - *imagpt += dotProductVector[3]; - *realpt += dotProductVector[4]; - *imagpt += dotProductVector[5]; - *realpt += dotProductVector[6]; - *imagpt += dotProductVector[7]; + returnValue = lv_cmake(dotProductVector[0], dotProductVector[1]); + returnValue += lv_cmake(dotProductVector[2], dotProductVector[3]); + returnValue += lv_cmake(dotProductVector[4], dotProductVector[5]); + returnValue += lv_cmake(dotProductVector[6], dotProductVector[7]); number = sixteenthPoints * 16; for (; number < num_points; number++) { - *realpt += ((*aPtr) * (*bPtr++)); - *imagpt += ((*aPtr++) * (*bPtr++)); + returnValue += lv_cmake(aPtr[0] * bPtr[0], aPtr[0] * bPtr[1]); + aPtr += 1; + bPtr += 2; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } #endif /*LV_HAVE_AVX2*/ @@ -424,8 +414,7 @@ static inline void volk_16i_32fc_dot_prod_32fc_a_sse(lv_32fc_t* result, unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const short* aPtr = input; const float* bPtr = (float*)taps; @@ -484,18 +473,17 @@ static inline void volk_16i_32fc_dot_prod_32fc_a_sse(lv_32fc_t* result, _mm_store_ps(dotProductVector, dotProdVal0); // Store the results back into the dot product vector - *realpt = dotProductVector[0]; - *imagpt = dotProductVector[1]; - *realpt += dotProductVector[2]; - *imagpt += dotProductVector[3]; + returnValue = lv_cmake(dotProductVector[0], dotProductVector[1]); + returnValue += lv_cmake(dotProductVector[2], dotProductVector[3]); number = eighthPoints * 8; for (; number < num_points; number++) { - *realpt += ((*aPtr) * (*bPtr++)); - *imagpt += ((*aPtr++) * (*bPtr++)); + returnValue += lv_cmake(aPtr[0] * bPtr[0], aPtr[0] * bPtr[1]); + aPtr += 1; + bPtr += 2; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } #endif /*LV_HAVE_SSE && LV_HAVE_MMX*/ @@ -511,8 +499,7 @@ static inline void volk_16i_32fc_dot_prod_32fc_a_avx2(lv_32fc_t* result, unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const short* aPtr = input; const float* bPtr = (float*)taps; @@ -576,22 +563,19 @@ static inline void volk_16i_32fc_dot_prod_32fc_a_avx2(lv_32fc_t* result, _mm256_store_ps(dotProductVector, dotProdVal0); // Store the results back into the dot product vector - *realpt = dotProductVector[0]; - *imagpt = dotProductVector[1]; - *realpt += dotProductVector[2]; - *imagpt += dotProductVector[3]; - *realpt += dotProductVector[4]; - *imagpt += dotProductVector[5]; - *realpt += dotProductVector[6]; - *imagpt += dotProductVector[7]; + returnValue = lv_cmake(dotProductVector[0], dotProductVector[1]); + returnValue += lv_cmake(dotProductVector[2], dotProductVector[3]); + returnValue += lv_cmake(dotProductVector[4], dotProductVector[5]); + returnValue += lv_cmake(dotProductVector[6], dotProductVector[7]); number = sixteenthPoints * 16; for (; number < num_points; number++) { - *realpt += ((*aPtr) * (*bPtr++)); - *imagpt += ((*aPtr++) * (*bPtr++)); + returnValue += lv_cmake(aPtr[0] * bPtr[0], aPtr[0] * bPtr[1]); + aPtr += 1; + bPtr += 2; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } @@ -608,8 +592,7 @@ static inline void volk_16i_32fc_dot_prod_32fc_a_avx2_fma(lv_32fc_t* result, unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const short* aPtr = input; const float* bPtr = (float*)taps; @@ -667,22 +650,19 @@ static inline void volk_16i_32fc_dot_prod_32fc_a_avx2_fma(lv_32fc_t* result, _mm256_store_ps(dotProductVector, dotProdVal0); // Store the results back into the dot product vector - *realpt = dotProductVector[0]; - *imagpt = dotProductVector[1]; - *realpt += dotProductVector[2]; - *imagpt += dotProductVector[3]; - *realpt += dotProductVector[4]; - *imagpt += dotProductVector[5]; - *realpt += dotProductVector[6]; - *imagpt += dotProductVector[7]; + returnValue = lv_cmake(dotProductVector[0], dotProductVector[1]); + returnValue += lv_cmake(dotProductVector[2], dotProductVector[3]); + returnValue += lv_cmake(dotProductVector[4], dotProductVector[5]); + returnValue += lv_cmake(dotProductVector[6], dotProductVector[7]); number = sixteenthPoints * 16; for (; number < num_points; number++) { - *realpt += ((*aPtr) * (*bPtr++)); - *imagpt += ((*aPtr++) * (*bPtr++)); + returnValue += lv_cmake(aPtr[0] * bPtr[0], aPtr[0] * bPtr[1]); + aPtr += 1; + bPtr += 2; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } diff --git a/kernels/volk/volk_32fc_32f_dot_prod_32fc.h b/kernels/volk/volk_32fc_32f_dot_prod_32fc.h index 8c74c259..a59a92c8 100644 --- a/kernels/volk/volk_32fc_32f_dot_prod_32fc.h +++ b/kernels/volk/volk_32fc_32f_dot_prod_32fc.h @@ -61,21 +61,18 @@ static inline void volk_32fc_32f_dot_prod_32fc_generic(lv_32fc_t* result, unsigned int num_points) { - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue = lv_cmake(0.0f, 0.0f); const float* aPtr = (float*)input; const float* bPtr = taps; unsigned int number = 0; - *realpt = 0; - *imagpt = 0; - for (number = 0; number < num_points; number++) { - *realpt += ((*aPtr++) * (*bPtr)); - *imagpt += ((*aPtr++) * (*bPtr++)); + returnValue += lv_cmake(aPtr[0] * bPtr[0], aPtr[1] * bPtr[0]); + aPtr += 2; + bPtr += 1; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } #endif /*LV_HAVE_GENERIC*/ @@ -93,8 +90,7 @@ static inline void volk_32fc_32f_dot_prod_32fc_a_avx2_fma(lv_32fc_t* result, unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const float* aPtr = (float*)input; const float* bPtr = taps; @@ -145,22 +141,19 @@ static inline void volk_32fc_32f_dot_prod_32fc_a_avx2_fma(lv_32fc_t* result, _mm256_store_ps(dotProductVector, dotProdVal0); // Store the results back into the dot product vector - *realpt = dotProductVector[0]; - *imagpt = dotProductVector[1]; - *realpt += dotProductVector[2]; - *imagpt += dotProductVector[3]; - *realpt += dotProductVector[4]; - *imagpt += dotProductVector[5]; - *realpt += dotProductVector[6]; - *imagpt += dotProductVector[7]; + returnValue = lv_cmake(dotProductVector[0], dotProductVector[1]); + returnValue += lv_cmake(dotProductVector[2], dotProductVector[3]); + returnValue += lv_cmake(dotProductVector[4], dotProductVector[5]); + returnValue += lv_cmake(dotProductVector[6], dotProductVector[7]); number = sixteenthPoints * 16; for (; number < num_points; number++) { - *realpt += ((*aPtr++) * (*bPtr)); - *imagpt += ((*aPtr++) * (*bPtr++)); + returnValue += lv_cmake(aPtr[0] * bPtr[0], aPtr[1] * bPtr[0]); + aPtr += 2; + bPtr += 1; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } #endif /*LV_HAVE_AVX2 && LV_HAVE_FMA*/ @@ -178,8 +171,7 @@ static inline void volk_32fc_32f_dot_prod_32fc_a_avx(lv_32fc_t* result, unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const float* aPtr = (float*)input; const float* bPtr = taps; @@ -236,22 +228,19 @@ static inline void volk_32fc_32f_dot_prod_32fc_a_avx(lv_32fc_t* result, _mm256_store_ps(dotProductVector, dotProdVal0); // Store the results back into the dot product vector - *realpt = dotProductVector[0]; - *imagpt = dotProductVector[1]; - *realpt += dotProductVector[2]; - *imagpt += dotProductVector[3]; - *realpt += dotProductVector[4]; - *imagpt += dotProductVector[5]; - *realpt += dotProductVector[6]; - *imagpt += dotProductVector[7]; + returnValue = lv_cmake(dotProductVector[0], dotProductVector[1]); + returnValue += lv_cmake(dotProductVector[2], dotProductVector[3]); + returnValue += lv_cmake(dotProductVector[4], dotProductVector[5]); + returnValue += lv_cmake(dotProductVector[6], dotProductVector[7]); number = sixteenthPoints * 16; for (; number < num_points; number++) { - *realpt += ((*aPtr++) * (*bPtr)); - *imagpt += ((*aPtr++) * (*bPtr++)); + returnValue += lv_cmake(aPtr[0] * bPtr[0], aPtr[1] * bPtr[0]); + aPtr += 2; + bPtr += 1; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } #endif /*LV_HAVE_AVX*/ @@ -269,8 +258,7 @@ static inline void volk_32fc_32f_dot_prod_32fc_a_sse(lv_32fc_t* result, unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const float* aPtr = (float*)input; const float* bPtr = taps; @@ -323,18 +311,17 @@ static inline void volk_32fc_32f_dot_prod_32fc_a_sse(lv_32fc_t* result, _mm_store_ps(dotProductVector, dotProdVal0); // Store the results back into the dot product vector - *realpt = dotProductVector[0]; - *imagpt = dotProductVector[1]; - *realpt += dotProductVector[2]; - *imagpt += dotProductVector[3]; + returnValue = lv_cmake(dotProductVector[0], dotProductVector[1]); + returnValue += lv_cmake(dotProductVector[2], dotProductVector[3]); number = eighthPoints * 8; for (; number < num_points; number++) { - *realpt += ((*aPtr++) * (*bPtr)); - *imagpt += ((*aPtr++) * (*bPtr++)); + returnValue += lv_cmake(aPtr[0] * bPtr[0], aPtr[1] * bPtr[0]); + aPtr += 2; + bPtr += 1; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } #endif /*LV_HAVE_SSE*/ @@ -352,8 +339,7 @@ static inline void volk_32fc_32f_dot_prod_32fc_u_avx2_fma(lv_32fc_t* result, unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const float* aPtr = (float*)input; const float* bPtr = taps; @@ -404,22 +390,19 @@ static inline void volk_32fc_32f_dot_prod_32fc_u_avx2_fma(lv_32fc_t* result, _mm256_store_ps(dotProductVector, dotProdVal0); // Store the results back into the dot product vector - *realpt = dotProductVector[0]; - *imagpt = dotProductVector[1]; - *realpt += dotProductVector[2]; - *imagpt += dotProductVector[3]; - *realpt += dotProductVector[4]; - *imagpt += dotProductVector[5]; - *realpt += dotProductVector[6]; - *imagpt += dotProductVector[7]; + returnValue = lv_cmake(dotProductVector[0], dotProductVector[1]); + returnValue += lv_cmake(dotProductVector[2], dotProductVector[3]); + returnValue += lv_cmake(dotProductVector[4], dotProductVector[5]); + returnValue += lv_cmake(dotProductVector[6], dotProductVector[7]); number = sixteenthPoints * 16; for (; number < num_points; number++) { - *realpt += ((*aPtr++) * (*bPtr)); - *imagpt += ((*aPtr++) * (*bPtr++)); + returnValue += lv_cmake(aPtr[0] * bPtr[0], aPtr[1] * bPtr[0]); + aPtr += 2; + bPtr += 1; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } #endif /*LV_HAVE_AVX2 && LV_HAVE_FMA*/ @@ -437,8 +420,7 @@ static inline void volk_32fc_32f_dot_prod_32fc_u_avx(lv_32fc_t* result, unsigned int number = 0; const unsigned int sixteenthPoints = num_points / 16; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const float* aPtr = (float*)input; const float* bPtr = taps; @@ -495,22 +477,19 @@ static inline void volk_32fc_32f_dot_prod_32fc_u_avx(lv_32fc_t* result, _mm256_store_ps(dotProductVector, dotProdVal0); // Store the results back into the dot product vector - *realpt = dotProductVector[0]; - *imagpt = dotProductVector[1]; - *realpt += dotProductVector[2]; - *imagpt += dotProductVector[3]; - *realpt += dotProductVector[4]; - *imagpt += dotProductVector[5]; - *realpt += dotProductVector[6]; - *imagpt += dotProductVector[7]; + returnValue = lv_cmake(dotProductVector[0], dotProductVector[1]); + returnValue += lv_cmake(dotProductVector[2], dotProductVector[3]); + returnValue += lv_cmake(dotProductVector[4], dotProductVector[5]); + returnValue += lv_cmake(dotProductVector[6], dotProductVector[7]); number = sixteenthPoints * 16; for (; number < num_points; number++) { - *realpt += ((*aPtr++) * (*bPtr)); - *imagpt += ((*aPtr++) * (*bPtr++)); + returnValue += lv_cmake(aPtr[0] * bPtr[0], aPtr[1] * bPtr[0]); + aPtr += 2; + bPtr += 1; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } #endif /*LV_HAVE_AVX*/ @@ -527,8 +506,7 @@ volk_32fc_32f_dot_prod_32fc_neon_unroll(lv_32fc_t* __restrict result, unsigned int number; const unsigned int quarterPoints = num_points / 8; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const float* inputPtr = (float*)input; const float* tapsPtr = taps; float zero[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; @@ -581,19 +559,18 @@ volk_32fc_32f_dot_prod_32fc_neon_unroll(lv_32fc_t* __restrict result, // store results back to a complex (array of 2 floats) vst1q_f32(accVector_real, real_accumulator0); vst1q_f32(accVector_imag, imag_accumulator0); - *realpt = - accVector_real[0] + accVector_real[1] + accVector_real[2] + accVector_real[3]; - - *imagpt = - accVector_imag[0] + accVector_imag[1] + accVector_imag[2] + accVector_imag[3]; + returnValue = lv_cmake( + accVector_real[0] + accVector_real[1] + accVector_real[2] + accVector_real[3], + accVector_imag[0] + accVector_imag[1] + accVector_imag[2] + accVector_imag[3]); // clean up the remainder for (number = quarterPoints * 8; number < num_points; number++) { - *realpt += ((*inputPtr++) * (*tapsPtr)); - *imagpt += ((*inputPtr++) * (*tapsPtr++)); + returnValue += lv_cmake(inputPtr[0] * tapsPtr[0], inputPtr[1] * tapsPtr[0]); + inputPtr += 2; + tapsPtr += 1; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } #endif /*LV_HAVE_NEON*/ @@ -610,8 +587,7 @@ static inline void volk_32fc_32f_dot_prod_32fc_a_neon(lv_32fc_t* __restrict resu unsigned int number; const unsigned int quarterPoints = num_points / 4; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const float* inputPtr = (float*)input; const float* tapsPtr = taps; float zero[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; @@ -651,19 +627,18 @@ static inline void volk_32fc_32f_dot_prod_32fc_a_neon(lv_32fc_t* __restrict resu // store results back to a complex (array of 2 floats) vst1q_f32(accVector_real, real_accumulator); vst1q_f32(accVector_imag, imag_accumulator); - *realpt = - accVector_real[0] + accVector_real[1] + accVector_real[2] + accVector_real[3]; - - *imagpt = - accVector_imag[0] + accVector_imag[1] + accVector_imag[2] + accVector_imag[3]; + returnValue = lv_cmake( + accVector_real[0] + accVector_real[1] + accVector_real[2] + accVector_real[3], + accVector_imag[0] + accVector_imag[1] + accVector_imag[2] + accVector_imag[3]); // clean up the remainder for (number = quarterPoints * 4; number < num_points; number++) { - *realpt += ((*inputPtr++) * (*tapsPtr)); - *imagpt += ((*inputPtr++) * (*tapsPtr++)); + returnValue += lv_cmake(inputPtr[0] * tapsPtr[0], inputPtr[1] * tapsPtr[0]); + inputPtr += 2; + tapsPtr += 1; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } #endif /*LV_HAVE_NEON*/ @@ -700,8 +675,7 @@ static inline void volk_32fc_32f_dot_prod_32fc_u_sse(lv_32fc_t* result, unsigned int number = 0; const unsigned int eighthPoints = num_points / 8; - float res[2]; - float *realpt = &res[0], *imagpt = &res[1]; + lv_32fc_t returnValue; const float* aPtr = (float*)input; const float* bPtr = taps; @@ -754,18 +728,17 @@ static inline void volk_32fc_32f_dot_prod_32fc_u_sse(lv_32fc_t* result, _mm_store_ps(dotProductVector, dotProdVal0); // Store the results back into the dot product vector - *realpt = dotProductVector[0]; - *imagpt = dotProductVector[1]; - *realpt += dotProductVector[2]; - *imagpt += dotProductVector[3]; + returnValue = lv_cmake(dotProductVector[0], dotProductVector[1]); + returnValue += lv_cmake(dotProductVector[2], dotProductVector[3]); number = eighthPoints * 8; for (; number < num_points; number++) { - *realpt += ((*aPtr++) * (*bPtr)); - *imagpt += ((*aPtr++) * (*bPtr++)); + returnValue += lv_cmake(aPtr[0] * bPtr[0], aPtr[1] * bPtr[0]); + aPtr += 2; + bPtr += 1; } - *result = *(lv_32fc_t*)(&res[0]); + *result = returnValue; } #endif /*LV_HAVE_SSE*/