Added a formatter for generic radix floats.

- Added more tests for binary floats. - Added support for mandatory signs in integer writers. - Added support for generic radix floats. - Added extensive unittests for generic radix floats. - Implemented the write float ToLexical API. - Added the WriteFloat trait. Disabled some QuickCheck tests due to the following bug: BurntSushi/quickcheck#295
Alexhuszagh · Aug 9, 2021 · f9cf171 · f9cf171
1 parent 369eed2
commit f9cf171
Show file tree

Hide file tree

Showing 26 changed files with 1,451 additions and 114 deletions.
diff --git a/lexical-asm/Cargo.toml b/lexical-asm/Cargo.toml
@@ -56,6 +56,7 @@ format = [
     "lexical-util/format",
     "lexical-parse-integer/format",
     "lexical-parse-float/format",
+    "lexical-write-integer/format",
     "lexical-write-float/format"
 ]
 compact = [

diff --git a/lexical-benchmark/write-integer/Cargo.toml b/lexical-benchmark/write-integer/Cargo.toml
@@ -28,6 +28,7 @@ default = ["std"]
 std = ["lexical-util/std", "lexical-write-integer/std"]
 radix = ["lexical-util/radix", "lexical-write-integer/radix"]
 power-of-two = ["lexical-util/power-of-two", "lexical-write-integer/power-of-two"]
+format = ["lexical-util/format", "lexical-write-integer/format"]
 
 [[bench]]
 name = "json"

diff --git a/lexical-core/Cargo.toml b/lexical-core/Cargo.toml
@@ -76,6 +76,7 @@ radix = [
 format = [
     "lexical-parse-integer/format",
     "lexical-parse-float/format",
+    "lexical-write-integer/format",
     "lexical-write-float/format"
 ]
 # Reduce code size at the cost of performance.

diff --git a/lexical-size/Cargo.toml.in b/lexical-size/Cargo.toml.in
@@ -58,6 +58,7 @@ format = [
     "lexical-util/format",
     "lexical-parse-integer/format",
     "lexical-parse-float/format",
+    "lexical-write-integer/format",
     "lexical-write-float/format"
 ]
 compact = [

diff --git a/lexical-util/src/algorithm.rs b/lexical-util/src/algorithm.rs
@@ -30,6 +30,13 @@ pub fn rtrim_char_count(slc: &[u8], c: u8) -> usize {
     slc.iter().rev().take_while(|&&si| si == c).count()
 }
 
+/// Count the number of leading characters equal to a given value.
+#[inline]
+#[cfg(feature = "write")]
+pub fn ltrim_char_count(slc: &[u8], c: u8) -> usize {
+    slc.iter().take_while(|&&si| si == c).count()
+}
+
 /// Trim character from the end (right-side) of a slice.
 #[inline]
 #[cfg(feature = "write")]

diff --git a/lexical-util/src/num.rs b/lexical-util/src/num.rs
@@ -491,6 +491,8 @@ pub trait Float: Number + ops::Neg<Output = Self> {
     // Re-export the to and from bits methods.
     fn to_bits(self) -> Self::Unsigned;
     fn from_bits(u: Self::Unsigned) -> Self;
+    fn ln(self) -> Self;
+    fn floor(self) -> Self;
     fn is_sign_positive(self) -> bool;
     fn is_sign_negative(self) -> bool;
 
@@ -744,6 +746,16 @@ impl Float for f32 {
         f32::from_bits(u)
     }
 
+    #[inline]
+    fn ln(self) -> f32 {
+        f32::ln(self)
+    }
+
+    #[inline]
+    fn floor(self) -> f32 {
+        f32::floor(self)
+    }
+
     #[inline]
     fn is_sign_positive(self) -> bool {
         f32::is_sign_positive(self)
@@ -782,6 +794,16 @@ impl Float for f64 {
         f64::from_bits(u)
     }
 
+    #[inline]
+    fn ln(self) -> f64 {
+        f64::ln(self)
+    }
+
+    #[inline]
+    fn floor(self) -> f64 {
+        f64::floor(self)
+    }
+
     #[inline]
     fn is_sign_positive(self) -> bool {
         f64::is_sign_positive(self)

diff --git a/lexical-util/tests/algorithm_tests.rs b/lexical-util/tests/algorithm_tests.rs
@@ -11,6 +11,24 @@ fn copy_to_dest_test() {
     assert_eq!(&dst[..5], src);
 }
 
+#[test]
+#[cfg(feature = "write")]
+fn ltrim_char_test() {
+    let w = "0001";
+    let x = "1010";
+    let y = "1.00";
+    let z = "1e05";
+
+    assert_eq!(algorithm::ltrim_char_count(w.as_bytes(), b'0'), 3);
+    assert_eq!(algorithm::ltrim_char_count(x.as_bytes(), b'0'), 0);
+    assert_eq!(algorithm::ltrim_char_count(x.as_bytes(), b'1'), 1);
+    assert_eq!(algorithm::ltrim_char_count(y.as_bytes(), b'0'), 0);
+    assert_eq!(algorithm::ltrim_char_count(y.as_bytes(), b'1'), 1);
+    assert_eq!(algorithm::ltrim_char_count(z.as_bytes(), b'0'), 0);
+    assert_eq!(algorithm::ltrim_char_count(z.as_bytes(), b'1'), 1);
+    assert_eq!(algorithm::ltrim_char_count(z.as_bytes(), b'5'), 0);
+}
+
 #[test]
 #[cfg(feature = "write")]
 fn rtrim_char_test() {

diff --git a/lexical-util/tests/num_tests.rs b/lexical-util/tests/num_tests.rs
@@ -197,6 +197,8 @@ fn check_float<T: num::Float>(mut x: T) {
     assert_eq!(T::from_bits(x.to_bits()), x);
     let _ = x.is_sign_positive();
     let _ = x.is_sign_negative();
+    let _ = x.ln();
+    let _ = x.floor();
 
     // Check properties
     let _ = x.to_bits() & T::SIGN_MASK;

diff --git a/lexical-write-float/Cargo.toml b/lexical-write-float/Cargo.toml
@@ -31,6 +31,7 @@ features = []
 static_assertions = "1"
 
 [dev-dependencies]
+approx = "0.5.0"
 quickcheck = "1.0.3"
 proptest = "0.10.1"
 fraction = "0.8.0"

diff --git a/lexical-write-float/src/algorithm.rs b/lexical-write-float/src/algorithm.rs
@@ -1 +1,23 @@
+//! Implementation of the Dragonbox algorithm.
+
+#![cfg(not(feature = "compact"))]
 #![doc(hidden)]
+
+use crate::options::Options;
+use lexical_util::num::Float;
+
+// TODO(ahuszagh) Implement...
+
+/// Optimized float-to-string algorithm for decimal strings.
+/// # Safety
+///
+/// Safe as long as the float isn't special (NaN or Infinity), and `bytes`
+/// is large enough to hold the significant digits.
+#[allow(unused)] // TODO(ahuszagh) Remove...
+pub unsafe fn write_float<F: Float, const FORMAT: u128>(
+    float: F,
+    bytes: &mut [u8],
+    options: &Options,
+) -> usize {
+    todo!();
+}
diff --git a/lexical-write-float/src/api.rs b/lexical-write-float/src/api.rs
@@ -0,0 +1,144 @@
+//! Implements the algorithm in terms of the lexical API.
+
+#![doc(hidden)]
+
+use crate::options::Options;
+use crate::write::WriteFloat;
+use lexical_util::format::{NumberFormat, STANDARD};
+use lexical_util::{to_lexical, to_lexical_with_options};
+
+/// Check if a buffer is sufficiently large.
+fn check_buffer<const FORMAT: u128>(len: usize, options: &Options) -> bool {
+    let format = NumberFormat::<{ FORMAT }> {};
+
+    // At least 2 for the decimal point and sign.
+    let mut count: usize = 2;
+
+    // First need to calculate maximum number of digits from leading or
+    // trailing zeros, IE, the exponent break.
+    if !format.no_exponent_notation() {
+        let min_exp = options.negative_exponent_break().map_or(-5, |x| x.get());
+        let max_exp = options.positive_exponent_break().map_or(9, |x| x.get());
+        let exp = min_exp.abs().max(max_exp) as usize;
+        if cfg!(feature = "power-of-two") && exp < 13 {
+            // 11 for the exponent digits in binary, 1 for the sign, 1 for the symbol
+            count += 13;
+        } else if exp < 5 {
+            // 3 for the exponent digits in decimal, 1 for the sign, 1 for the symbol
+            count += 5;
+        } else {
+            // More leading or trailing zeros than the exponent digits.
+            count += exp;
+        }
+    } else if cfg!(feature = "power-of-two") {
+        // Min is 2^-1075.
+        count += 1075;
+    } else {
+        // Min is 10^-324.
+        count += 324;
+    }
+
+    // Now add the number of significant digits.
+    let radix = format.radix();
+    let formatted_digits = if radix == 10 {
+        // Really should be 18, but add some extra to be cautious.
+        28
+    } else {
+        //  BINARY:
+        //      53 significant mantissa bits for binary, add a few extra.
+        //  RADIX:
+        //      Our limit is `delta`. The maximum relative delta is 2.22e-16,
+        //      around 1. If we have values below 1, our delta is smaller, but
+        //      the max fraction is also a lot smaller. Above, and our fraction
+        //      must be < 1.0, so our delta is less significant. Therefore,
+        //      if our fraction is just less than 1, for a float near 2.0,
+        //      we can do at **maximum** 33 digits (for base 3). Let's just
+        //      assume it's a lot higher, and go with 64.
+        64
+    };
+    let digits = if let Some(max_digits) = options.max_significant_digits() {
+        formatted_digits.min(max_digits.get())
+    } else {
+        formatted_digits
+    };
+    let digits = if let Some(min_digits) = options.min_significant_digits() {
+        digits.max(min_digits.get())
+    } else {
+        formatted_digits
+    };
+    count += digits;
+
+    len > count
+}
+
+// API
+
+const DEFAULT_OPTIONS: Options = Options::new();
+
+// Implement ToLexical for numeric type.
+macro_rules! float_to_lexical {
+    ($($t:tt $(, #[$meta:meta])? ; )*) => ($(
+        impl ToLexical for $t {
+            $(#[$meta:meta])?
+            unsafe fn to_lexical_unchecked<'a>(self, bytes: &'a mut [u8])
+                -> &'a mut [u8]
+            {
+                debug_assert!(check_buffer::<{ STANDARD }>(bytes.len(), &DEFAULT_OPTIONS));
+                // SAFETY: safe if `check_buffer::<STANDARD>(bytes.len(), &options)`.
+                unsafe {
+                    let len = self.write_float::<{ STANDARD }>(bytes, &DEFAULT_OPTIONS);
+                    &mut index_unchecked_mut!(bytes[..len])
+                }
+            }
+
+            $(#[$meta:meta])?
+            fn to_lexical<'a>(self, bytes: &'a mut [u8])
+                -> &'a mut [u8]
+            {
+                assert!(check_buffer::<{ STANDARD }>(bytes.len(), &DEFAULT_OPTIONS));
+                // SAFETY: safe since `check_buffer::<STANDARD>(bytes.len(), &options)`.
+                unsafe { self.to_lexical_unchecked(bytes) }
+            }
+        }
+
+        impl ToLexicalWithOptions for $t {
+            type Options = Options;
+
+            $(#[$meta:meta])?
+            unsafe fn to_lexical_with_options_unchecked<'a, const FORMAT: u128>(
+                self,
+                bytes: &'a mut [u8],
+                options: &Self::Options,
+            ) -> &'a mut [u8]
+            {
+                assert!(NumberFormat::<{ FORMAT }> {}.is_valid());
+                debug_assert!(check_buffer::<{ FORMAT }>(bytes.len(), &options));
+                // SAFETY: safe if `check_buffer::<FORMAT>(bytes.len(), &options)`.
+                unsafe {
+                    let len = self.write_float::<{ FORMAT }>(bytes, &options);
+                    &mut index_unchecked_mut!(bytes[..len])
+                }
+            }
+
+            $(#[$meta:meta])?
+            fn to_lexical_with_options<'a, const FORMAT: u128>(
+                self,
+                bytes: &'a mut [u8],
+                options: &Self::Options,
+            ) -> &'a mut [u8]
+            {
+                assert!(NumberFormat::<{ FORMAT }> {}.is_valid());
+                assert!(check_buffer::<{ FORMAT }>(bytes.len(), &options));
+                // SAFETY: safe since `check_buffer::<FORMAT>(bytes.len(), &options)`.
+                unsafe { self.to_lexical_with_options_unchecked::<FORMAT>(bytes, options) }
+            }
+        }
+    )*)
+}
+
+to_lexical! {}
+to_lexical_with_options! {}
+float_to_lexical! {
+    f32 ;
+    f64 ;
+}
diff --git a/lexical-write-float/src/binary.rs b/lexical-write-float/src/binary.rs
@@ -601,7 +601,7 @@ where
     let zero_digits = fast_ceildiv(zero_bits, bits_per_digit) as usize;
 
     // Write our 0 digits.
-    // SAFETY: must be safe since since `bytes.len() < BUFFER_SIZE - 2`.
+    // SAFETY: safe if `bytes.len() > BUFFER_SIZE - 2`.
     unsafe {
         index_unchecked_mut!(bytes[0]) = b'0';
         index_unchecked_mut!(bytes[1]) = decimal_point;
@@ -691,7 +691,7 @@ where
     let mut cursor: usize;
     if leading_digits >= count {
         // We have more leading digits than digits we wrote: can write
-        // any additional digits, and then just write the remaining ones.
+        // any additional digits, and then just write the remaining zeros.
         // SAFETY: safe if the buffer is large enough to hold the significant digits.
         unsafe {
             let digits = &mut index_unchecked_mut!(bytes[count..leading_digits]);
@@ -747,9 +747,8 @@ where
 /// Optimized float-to-string algorithm for power of 2 radixes.
 ///
 /// This assumes the float is:
-///     1). Non-zero
-///     2). Non-special (NaN or Infinite).
-///     3). Non-negative.
+///     1). Non-special (NaN or Infinite).
+///     2). Non-negative.
 ///
 /// # Safety
 ///
@@ -776,6 +775,7 @@ where
     let format = NumberFormat::<{ FORMAT }> {};
     assert!(format.is_valid());
     debug_assert!(!float.is_special());
+    debug_assert!(float >= F::ZERO);
 
     // Quickly calculate the number of bits we would have written.
     // This simulates writing the digits, so we can calculate the

diff --git a/lexical-write-float/src/compact.rs b/lexical-write-float/src/compact.rs
@@ -35,9 +35,9 @@ use lexical_write_integer::write::WriteInteger;
 /// with Integers", by Florian Loitsch, available online at:
 /// <https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf>.
 ///
-/// # Preconditions
-///
-/// `float` must not be special (NaN or Infinity).
+/// This assumes the float is:
+///     1). Non-special (NaN or Infinite).
+///     2). Non-negative.
 ///
 /// # Safety
 ///
@@ -55,6 +55,7 @@ pub unsafe fn write_float<F: Float, const FORMAT: u128>(
     let format = NumberFormat::<{ FORMAT }> {};
     assert!(format.is_valid());
     debug_assert!(!float.is_special());
+    debug_assert!(float >= F::ZERO);
 
     // Write our mantissa digits to a temporary buffer.
     let digits: mem::MaybeUninit<[u8; 32]> = mem::MaybeUninit::uninit();

diff --git a/lexical-write-float/src/hex.rs b/lexical-write-float/src/hex.rs
@@ -1,2 +1,23 @@
+//! Optimized float serializer for hexadecimal floats.
+
 #![cfg(feature = "power-of-two")]
 #![doc(hidden)]
+
+use crate::options::Options;
+use lexical_util::num::Float;
+
+// TODO(ahuszagh) Implement...
+
+/// Optimized float-to-string algorithm for decimal strings.
+/// # Safety
+///
+/// Safe as long as the float isn't special (NaN or Infinity), and `bytes`
+/// is large enough to hold the significant digits.
+#[allow(unused)] // TODO(ahuszagh) Remove...
+pub unsafe fn write_float<F: Float, const FORMAT: u128>(
+    float: F,
+    bytes: &mut [u8],
+    options: &Options,
+) -> usize {
+    todo!();
+}
diff --git a/lexical-write-float/src/lib.rs b/lexical-write-float/src/lib.rs
@@ -67,8 +67,11 @@ pub mod hex;
 pub mod options;
 pub mod radix;
 
+mod api;
+mod write;
+
 // Re-exports
-//pub use self::api::{ToLexical, ToLexicalWithOptions};
+pub use self::api::{ToLexical, ToLexicalWithOptions};
 pub use self::options::{Options, OptionsBuilder};
 pub use lexical_util::constants::{FormattedSize, BUFFER_SIZE};
 pub use lexical_util::format::{NumberFormatBuilder, STANDARD};