diff --git a/crates/std_float/src/libm32.rs b/crates/std_float/src/libm32.rs index 21ba223ed05..c5d195089e4 100644 --- a/crates/std_float/src/libm32.rs +++ b/crates/std_float/src/libm32.rs @@ -16,16 +16,19 @@ where fn asin(self) -> Self { let PI_BY_2 = Self::splat(1.57079632679489661923); let arg = self; - let LIM: Self = Self::splat(0.70710678118654752440); - let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2); - let s: Self = - ((arg).lanes_lt(Self::splat(0.0))).select(-Self::splat(1.0), Self::splat(1.0)); - let x: Self = + let LIM = Self::splat(0.70710678118654752440); + let c = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2); + let s = ((arg).lanes_lt(Self::splat(0.0))).select(-Self::splat(1.0), Self::splat(1.0)); + let x = ((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt()); - let y: Self = (Self::splat(0.11644821f32)) - .mul_add(x * x, Self::splat(0.04343228f32)) - .mul_add(x * x, Self::splat(0.17078044f32)) - .mul_add(x * x, Self::splat(0.99991643f32)) + let y = (Self::splat(0.12778643f32)) + .mul_add(x * x, -Self::splat(0.12145509f32)) + .mul_add(x * x, Self::splat(0.09684546f32)) + .mul_add(x * x, Self::splat(0.009571692f32)) + .mul_add(x * x, Self::splat(0.047712374f32)) + .mul_add(x * x, Self::splat(0.07478066f32)) + .mul_add(x * x, Self::splat(0.1666726f32)) + .mul_add(x * x, Self::splat(1f32)) * x; ((arg * arg).lanes_lt(LIM * LIM)).select(y, c - y * s) } @@ -34,20 +37,19 @@ where let PI_BY_2 = Self::splat(1.57079632679489661923); let PI = Self::splat(3.14159265358979323846); let arg = self; - let LIM: Self = Self::splat(0.9); - let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(PI, Self::splat(0.0)); - let s: Self = - ((arg).lanes_lt(Self::splat(0.0))).select(Self::splat(1.0), -Self::splat(1.0)); - let x: Self = + let LIM = Self::splat(0.70710678118654752440); + let c = ((arg).lanes_lt(Self::splat(0.0))).select(PI, Self::splat(0.0)); + let s = ((arg).lanes_lt(Self::splat(0.0))).select(Self::splat(1.0), -Self::splat(1.0)); + let x = ((arg * arg).lanes_lt(LIM * LIM)).select(arg, (Self::splat(1.0) - arg * arg).sqrt()); - let y: Self = (Self::splat(1.3740137f32)) - .mul_add(x * x, -Self::splat(3.1993167f32)) - .mul_add(x * x, Self::splat(3.103398f32)) - .mul_add(x * x, -Self::splat(1.4533828f32)) - .mul_add(x * x, Self::splat(0.41395915f32)) - .mul_add(x * x, Self::splat(0.03113007f32)) - .mul_add(x * x, Self::splat(0.16861732f32)) - .mul_add(x * x, Self::splat(0.99998593f32)) + let y = (Self::splat(0.12778643f32)) + .mul_add(x * x, -Self::splat(0.12145509f32)) + .mul_add(x * x, Self::splat(0.09684546f32)) + .mul_add(x * x, Self::splat(0.009571692f32)) + .mul_add(x * x, Self::splat(0.047712374f32)) + .mul_add(x * x, Self::splat(0.07478066f32)) + .mul_add(x * x, Self::splat(0.1666726f32)) + .mul_add(x * x, Self::splat(1f32)) * x; ((arg * arg).lanes_lt(LIM * LIM)).select(PI_BY_2 - y, c - y * s) } @@ -55,10 +57,10 @@ where fn atan(self) -> Self { let PI_BY_2 = Self::splat(1.57079632679489661923); let arg = self; - let LIM: Self = Self::splat(1.0); - let c: Self = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2); - let x: Self = ((arg.abs()).lanes_lt(LIM)).select(arg, arg.recip()); - let y: Self = (-Self::splat(0.0039602574f32)) + let LIM = Self::splat(1.0); + let c = ((arg).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2); + let x = ((arg.abs()).lanes_lt(LIM)).select(arg, arg.recip()); + let y = (-Self::splat(0.0039602574f32)) .mul_add(x * x, Self::splat(0.021659138f32)) .mul_add(x * x, -Self::splat(0.05587457f32)) .mul_add(x * x, Self::splat(0.09664151f32)) @@ -74,16 +76,16 @@ where let PI_BY_2 = Self::splat(1.57079632679489661923); let PI = Self::splat(3.14159265358979323846); let y = self; - let offset180: Self = ((y).lanes_lt(Self::splat(0.0))).select(-PI, PI); - let x1: Self = ((x).lanes_lt(Self::splat(0.0))).select(-x, x); - let y1: Self = ((x).lanes_lt(Self::splat(0.0))).select(-y, y); - let offset1: Self = ((x).lanes_lt(Self::splat(0.0))).select(offset180, Self::splat(0.0)); - let offset90: Self = ((y).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2); - let x2: Self = ((y1.abs()).lanes_gt(x1)).select(y1, x1); - let y2: Self = ((y1.abs()).lanes_gt(x1)).select(-x1, y1); - let offset2: Self = ((y1.abs()).lanes_gt(x1)).select(offset1 + offset90, offset1); - let x3: Self = y2 / x2; - let y3: Self = (-Self::splat(0.0039602574f32)) + let offset180 = ((y).lanes_lt(Self::splat(0.0))).select(-PI, PI); + let x1 = ((x).lanes_lt(Self::splat(0.0))).select(-x, x); + let y1 = ((x).lanes_lt(Self::splat(0.0))).select(-y, y); + let offset1 = ((x).lanes_lt(Self::splat(0.0))).select(offset180, Self::splat(0.0)); + let offset90 = ((y).lanes_lt(Self::splat(0.0))).select(-PI_BY_2, PI_BY_2); + let x2 = ((y1.abs()).lanes_gt(x1)).select(y1, x1); + let y2 = ((y1.abs()).lanes_gt(x1)).select(-x1, y1); + let offset2 = ((y1.abs()).lanes_gt(x1)).select(offset1 + offset90, offset1); + let x3 = y2 / x2; + let y3 = (-Self::splat(0.0039602574f32)) .mul_add(x3 * x3, Self::splat(0.021659138f32)) .mul_add(x3 * x3, -Self::splat(0.05587457f32)) .mul_add(x3 * x3, Self::splat(0.09664151f32)) @@ -96,12 +98,12 @@ where } #[inline] fn exp2(self) -> Self { + let EXP2_SCALE = Self::splat(8388608.0f32); + let EXP2_ONE = Self::splat(1065353216.0f32); let arg = self; - let r: Self = arg.round(); - let mul: Self = Self::from_bits(unsafe { - (r.mul_add(Self::splat(8388608.0f32), Self::splat(1065353216.0f32))).to_uint_unchecked() - }); - let x: Self = arg - r; + let r = arg.round(); + let mul = Self::from_bits((r.mul_add(EXP2_SCALE, EXP2_ONE)).cast::()); + let x = arg - r; (Self::splat(0.000015310081f32)) .mul_add(x, Self::splat(0.0001547802f32)) .mul_add(x, Self::splat(0.0013333454f32)) @@ -119,11 +121,39 @@ where (arg * LOG2_E).exp2() } #[inline] + fn log2(self) -> Self { + let ONE_BITS = Self::UintType::splat(0x3f800000_u32); + let ONE_MASK = Self::UintType::splat(0x007fffff_u32); + let LOG2_OFFSET = Self::IntType::splat(127_i32); + let LOG2_SHIFT = Self::IntType::splat(23_i32); + let arg = self; + let arg_bits = arg.to_bits(); + let exponent = (arg_bits.cast::() >> LOG2_SHIFT) - LOG2_OFFSET; + let x = Self::from_bits((arg_bits & ONE_MASK) | ONE_BITS) - Self::splat(1.5); + let y = (Self::splat(0.00033940058f32)) + .mul_add(x, -Self::splat(0.0005435155f32)) + .mul_add(x, Self::splat(0.00051382656f32)) + .mul_add(x, -Self::splat(0.0008369385f32)) + .mul_add(x, Self::splat(0.0015296092f32)) + .mul_add(x, -Self::splat(0.0025230509f32)) + .mul_add(x, Self::splat(0.0041680275f32)) + .mul_add(x, -Self::splat(0.007033716f32)) + .mul_add(x, Self::splat(0.012062632f32)) + .mul_add(x, -Self::splat(0.021109587f32)) + .mul_add(x, Self::splat(0.037996903f32)) + .mul_add(x, -Self::splat(0.071244195f32)) + .mul_add(x, Self::splat(0.1424884f32)) + .mul_add(x, -Self::splat(0.3205989f32)) + .mul_add(x, Self::splat(0.9617967f32)) + .mul_add(x, Self::splat(0.5849625f32)); + y + (exponent.cast::()) + } + #[inline] fn sin(self) -> Self { let RECIP_2PI = Self::splat(0.15915494309189533577); let arg = self; - let scaled: Self = arg * RECIP_2PI; - let x: Self = scaled - scaled.round(); + let scaled = arg * RECIP_2PI; + let x = scaled - scaled.round(); (-Self::splat(12.26886f32)) .mul_add(x * x, Self::splat(41.21624f32)) .mul_add(x * x, -Self::splat(76.58672f32)) @@ -136,8 +166,8 @@ where fn cos(self) -> Self { let RECIP_2PI = Self::splat(0.15915494309189533577); let arg = self; - let scaled: Self = arg * RECIP_2PI; - let x: Self = scaled - scaled.round(); + let scaled = arg * RECIP_2PI; + let x = scaled - scaled.round(); (Self::splat(6.5286584f32)) .mul_add(x * x, -Self::splat(25.973276f32)) .mul_add(x * x, Self::splat(60.17118f32)) @@ -150,10 +180,10 @@ where fn tan(self) -> Self { let RECIP_PI = Self::splat(0.31830988618379067154); let arg = self; - let scaled: Self = arg * RECIP_PI; - let x: Self = scaled - scaled.round(); - let recip: Self = Self::splat(1.0) / (x * x - Self::splat(0.25)); - let y: Self = (Self::splat(0.014397301f32)) + let scaled = arg * RECIP_PI; + let x = scaled - scaled.round(); + let recip = Self::splat(1.0) / (x * x - Self::splat(0.25)); + let y = (Self::splat(0.014397301f32)) .mul_add(x * x, Self::splat(0.021017345f32)) .mul_add(x * x, Self::splat(0.05285888f32)) .mul_add(x * x, Self::splat(0.13475448f32))