Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
howjmay committed Jul 9, 2024
1 parent ad3e318 commit bb5cbf0
Showing 1 changed file with 19 additions and 19 deletions.
38 changes: 19 additions & 19 deletions sse2neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -3019,9 +3019,9 @@ FORCE_INLINE __m128d _mm_add_sd(__m128d a, __m128d b)
#if defined(__aarch64__) || defined(_M_ARM64)
return _mm_move_sd(a, _mm_add_pd(a, b));
#else
double a0, a1, b0;
double a0, b0;
a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
uint64_t a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
double c[2];
c[0] = a0 + b0;
Expand Down Expand Up @@ -3301,13 +3301,13 @@ FORCE_INLINE __m128d _mm_cmpge_sd(__m128d a, __m128d b)
return _mm_move_sd(a, _mm_cmpge_pd(a, b));
#else
// expand "_mm_cmpge_pd()" to reduce unnecessary operations
double a0, a1, b0;
double a0, b0;
a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
uint64_t d[2];
d[0] = a0 >= b0 ? ~UINT64_C(0) : UINT64_C(0);
d[1] = a1.u64;
d[1] = a1;

return vreinterpretq_m128d_u64(vld1q_u64(d));
#endif
Expand Down Expand Up @@ -3371,13 +3371,13 @@ FORCE_INLINE __m128d _mm_cmpgt_sd(__m128d a, __m128d b)
return _mm_move_sd(a, _mm_cmpgt_pd(a, b));
#else
// expand "_mm_cmpge_pd()" to reduce unnecessary operations
double a0, a1, b0;
double a0, b0;
a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
uint64_t d[2];
d[0] = a0 > b0 ? ~UINT64_C(0) : UINT64_C(0);
d[1] = a1.u64;
d[1] = a1;

return vreinterpretq_m128d_u64(vld1q_u64(d));
#endif
Expand Down Expand Up @@ -3416,11 +3416,11 @@ FORCE_INLINE __m128d _mm_cmple_sd(__m128d a, __m128d b)
// expand "_mm_cmpge_pd()" to reduce unnecessary operations
double a0, a1, b0;
a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
uint64_t d[2];
d[0] = a0 <= b0 ? ~UINT64_C(0) : UINT64_C(0);
d[1] = a1.u64;
d[1] = a1;

return vreinterpretq_m128d_u64(vld1q_u64(d));
#endif
Expand Down Expand Up @@ -3486,13 +3486,13 @@ FORCE_INLINE __m128d _mm_cmplt_sd(__m128d a, __m128d b)
#if defined(__aarch64__) || defined(_M_ARM64)
return _mm_move_sd(a, _mm_cmplt_pd(a, b));
#else
double a0, a1, b0;
double a0, b0;
a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
uint64_t d[2];
d[0] = a0 < b0 ? ~UINT64_C(0) : UINT64_C(0);
d[1] = a1.u64;
d[1] = a1;

return vreinterpretq_m128d_u64(vld1q_u64(d));
#endif
Expand Down Expand Up @@ -3682,13 +3682,13 @@ FORCE_INLINE __m128d _mm_cmpord_sd(__m128d a, __m128d b)
#if defined(__aarch64__) || defined(_M_ARM64)
return _mm_move_sd(a, _mm_cmpord_pd(a, b));
#else
double a0, a1, b0;
double a0, b0;
a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
uint64_t d[2];
d[0] = (a0 == a0 && b0 == b0) ? ~UINT64_C(0) : UINT64_C(0);
d[1] = a1.u64;
d[1] = a1;

return vreinterpretq_m128d_u64(vld1q_u64(d));
#endif
Expand Down Expand Up @@ -3729,13 +3729,13 @@ FORCE_INLINE __m128d _mm_cmpunord_sd(__m128d a, __m128d b)
#if defined(__aarch64__) || defined(_M_ARM64)
return _mm_move_sd(a, _mm_cmpunord_pd(a, b));
#else
double a0, a1, b0;
double a0, b0;
a0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0));
a1 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1));
uint64_t a1 = vgetq_lane_u64(vreinterpretq_u64_m128d(a), 1);
b0 = recast_f64(vgetq_lane_u64(vreinterpretq_u64_m128d(b), 0));
uint64_t d[2];
d[0] = (a0 == a0 && b0 == b0) ? UINT64_C(0) : ~UINT64_C(0);
d[1] = a1.u64;
d[1] = a1;

return vreinterpretq_m128d_u64(vld1q_u64(d));
#endif
Expand Down

0 comments on commit bb5cbf0

Please sign in to comment.