Skip to content

Commit

Permalink
convert 32f->32i: fix compiler warnings about loss of int precision
Browse files Browse the repository at this point in the history
Compilers warn that converting INT_MAX to float leads to INT_MAX+1 as
float value.

That's absolutely not surprising: bits(single precision float mantissa)
< bits(int), so an int of value 2^N - 1 must inherently be
non-representable for N close to bits(int), and MAX_INT = 2³²-1.

So, the upround that I'm doing here explicitly is always done, on every
machine I could think of, and so what this compiler warning silencing
does is always done, anyways.

Finally makes VOLK a zero-warning compile on my clang16 x68_64 avx2
machine.

Signed-off-by: Marcus Müller <[email protected]>
  • Loading branch information
marcusmueller committed Oct 20, 2023
1 parent a26a1b8 commit ea6fcca
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions kernels/volk/volk_32f_s32f_convert_32i.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ static inline void volk_32f_s32f_convert_32i_u_avx(int32_t* outputVector,
int32_t* outputVectorPtr = outputVector;

float min_val = INT_MIN;
float max_val = INT_MAX;
float max_val = (uint32_t)INT_MAX + 1;
float r;

__m256 vScalar = _mm256_set1_ps(scalar);
Expand Down Expand Up @@ -127,7 +127,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector,
int32_t* outputVectorPtr = outputVector;

float min_val = INT_MIN;
float max_val = INT_MAX;
float max_val = (uint32_t)INT_MAX + 1;
float r;

__m128 vScalar = _mm_set_ps1(scalar);
Expand Down Expand Up @@ -178,7 +178,7 @@ static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector,
int32_t* outputVectorPtr = outputVector;

float min_val = INT_MIN;
float max_val = INT_MAX;
float max_val = (uint32_t)INT_MAX + 1;
float r;

__m128 vScalar = _mm_set_ps1(scalar);
Expand Down Expand Up @@ -225,7 +225,7 @@ static inline void volk_32f_s32f_convert_32i_generic(int32_t* outputVector,
int32_t* outputVectorPtr = outputVector;
const float* inputVectorPtr = inputVector;
const float min_val = (float)INT_MIN;
const float max_val = (float)INT_MAX;
const float max_val = (float)((uint32_t)INT_MAX + 1);

for (unsigned int number = 0; number < num_points; number++) {
const float r = *inputVectorPtr++ * scalar;
Expand Down Expand Up @@ -267,7 +267,7 @@ static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector,
int32_t* outputVectorPtr = outputVector;

float min_val = INT_MIN;
float max_val = INT_MAX;
float max_val = (uint32_t)INT_MAX + 1;
float r;

__m256 vScalar = _mm256_set1_ps(scalar);
Expand Down Expand Up @@ -318,7 +318,7 @@ static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector,
int32_t* outputVectorPtr = outputVector;

float min_val = INT_MIN;
float max_val = INT_MAX;
float max_val = (uint32_t)INT_MAX + 1;
float r;

__m128 vScalar = _mm_set_ps1(scalar);
Expand Down Expand Up @@ -369,7 +369,7 @@ static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector,
int32_t* outputVectorPtr = outputVector;

float min_val = INT_MIN;
float max_val = INT_MAX;
float max_val = (uint32_t)INT_MAX + 1;
float r;

__m128 vScalar = _mm_set_ps1(scalar);
Expand Down

0 comments on commit ea6fcca

Please sign in to comment.