diff --git a/sse2neon.h b/sse2neon.h index be74ca1d..73cf5a91 100644 --- a/sse2neon.h +++ b/sse2neon.h @@ -3019,9 +3019,9 @@ FORCE_INLINE __m128d _mm_add_sd(__m128d a, __m128d b) #if defined(__aarch64__) || defined(_M_ARM64) return _mm_move_sd(a, _mm_add_pd(a, b)); #else - double a0, a1, b0; + double a0, b0; a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0)); - a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1)); + uint64_t a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1)); b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0)); double c[2]; c[0] = a0 + b0; @@ -3301,13 +3301,13 @@ FORCE_INLINE __m128d _mm_cmpge_sd(__m128d a, __m128d b) return _mm_move_sd(a, _mm_cmpge_pd(a, b)); #else // expand "_mm_cmpge_pd()" to reduce unnecessary operations - double a0, a1, b0; + double a0, b0; a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0)); - a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1)); + uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1); b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0)); uint64_t d[2]; d[0] = a0 >= b0 ? ~UINT64_C(0) : UINT64_C(0); - d[1] = a1.u64; + d[1] = a1; return vreinterpretq_m128d_u64(vld1q_u64(d)); #endif @@ -3371,13 +3371,13 @@ FORCE_INLINE __m128d _mm_cmpgt_sd(__m128d a, __m128d b) return _mm_move_sd(a, _mm_cmpgt_pd(a, b)); #else // expand "_mm_cmpge_pd()" to reduce unnecessary operations - double a0, a1, b0; + double a0, b0; a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0)); - a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1)); + uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1); b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0)); uint64_t d[2]; d[0] = a0 > b0 ? ~UINT64_C(0) : UINT64_C(0); - d[1] = a1.u64; + d[1] = a1; return vreinterpretq_m128d_u64(vld1q_u64(d)); #endif @@ -3416,11 +3416,11 @@ FORCE_INLINE __m128d _mm_cmple_sd(__m128d a, __m128d b) // expand "_mm_cmpge_pd()" to reduce unnecessary operations double a0, a1, b0; a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0)); - a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1)); + uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1); b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0)); uint64_t d[2]; d[0] = a0 <= b0 ? ~UINT64_C(0) : UINT64_C(0); - d[1] = a1.u64; + d[1] = a1; return vreinterpretq_m128d_u64(vld1q_u64(d)); #endif @@ -3486,13 +3486,13 @@ FORCE_INLINE __m128d _mm_cmplt_sd(__m128d a, __m128d b) #if defined(__aarch64__) || defined(_M_ARM64) return _mm_move_sd(a, _mm_cmplt_pd(a, b)); #else - double a0, a1, b0; + double a0, b0; a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0)); - a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1)); + uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1); b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0)); uint64_t d[2]; d[0] = a0 < b0 ? ~UINT64_C(0) : UINT64_C(0); - d[1] = a1.u64; + d[1] = a1; return vreinterpretq_m128d_u64(vld1q_u64(d)); #endif @@ -3682,13 +3682,13 @@ FORCE_INLINE __m128d _mm_cmpord_sd(__m128d a, __m128d b) #if defined(__aarch64__) || defined(_M_ARM64) return _mm_move_sd(a, _mm_cmpord_pd(a, b)); #else - double a0, a1, b0; + double a0, b0; a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0)); - a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1)); + uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1); b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0)); uint64_t d[2]; d[0] = (a0 == a0 && b0 == b0) ? ~UINT64_C(0) : UINT64_C(0); - d[1] = a1.u64; + d[1] = a1; return vreinterpretq_m128d_u64(vld1q_u64(d)); #endif @@ -3729,13 +3729,13 @@ FORCE_INLINE __m128d _mm_cmpunord_sd(__m128d a, __m128d b) #if defined(__aarch64__) || defined(_M_ARM64) return _mm_move_sd(a, _mm_cmpunord_pd(a, b)); #else - double a0, a1, b0; + double a0, b0; a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0)); - a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1)); + uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1); b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0)); uint64_t d[2]; d[0] = (a0 == a0 && b0 == b0) ? UINT64_C(0) : ~UINT64_C(0); - d[1] = a1.u64; + d[1] = a1; return vreinterpretq_m128d_u64(vld1q_u64(d)); #endif