Skip to content
This repository has been archived by the owner on Sep 1, 2021. It is now read-only.

Commit

Permalink
Added a formatter for generic radix floats.
Browse files Browse the repository at this point in the history
- Added more tests for binary floats.
- Added support for mandatory signs in integer writers.
- Added support for generic radix floats.
- Added extensive unittests for generic radix floats.
- Implemented the write float ToLexical API.
- Added the WriteFloat trait.

Disabled some QuickCheck tests due to the following bug:
BurntSushi/quickcheck#295
  • Loading branch information
Alexhuszagh committed Aug 9, 2021
1 parent 369eed2 commit f9cf171
Show file tree
Hide file tree
Showing 26 changed files with 1,451 additions and 114 deletions.
1 change: 1 addition & 0 deletions lexical-asm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ format = [
"lexical-util/format",
"lexical-parse-integer/format",
"lexical-parse-float/format",
"lexical-write-integer/format",
"lexical-write-float/format"
]
compact = [
Expand Down
1 change: 1 addition & 0 deletions lexical-benchmark/write-integer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ default = ["std"]
std = ["lexical-util/std", "lexical-write-integer/std"]
radix = ["lexical-util/radix", "lexical-write-integer/radix"]
power-of-two = ["lexical-util/power-of-two", "lexical-write-integer/power-of-two"]
format = ["lexical-util/format", "lexical-write-integer/format"]

[[bench]]
name = "json"
Expand Down
1 change: 1 addition & 0 deletions lexical-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ radix = [
format = [
"lexical-parse-integer/format",
"lexical-parse-float/format",
"lexical-write-integer/format",
"lexical-write-float/format"
]
# Reduce code size at the cost of performance.
Expand Down
1 change: 1 addition & 0 deletions lexical-size/Cargo.toml.in
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ format = [
"lexical-util/format",
"lexical-parse-integer/format",
"lexical-parse-float/format",
"lexical-write-integer/format",
"lexical-write-float/format"
]
compact = [
Expand Down
7 changes: 7 additions & 0 deletions lexical-util/src/algorithm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ pub fn rtrim_char_count(slc: &[u8], c: u8) -> usize {
slc.iter().rev().take_while(|&&si| si == c).count()
}

/// Count the number of leading characters equal to a given value.
#[inline]
#[cfg(feature = "write")]
pub fn ltrim_char_count(slc: &[u8], c: u8) -> usize {
slc.iter().take_while(|&&si| si == c).count()
}

/// Trim character from the end (right-side) of a slice.
#[inline]
#[cfg(feature = "write")]
Expand Down
22 changes: 22 additions & 0 deletions lexical-util/src/num.rs
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,8 @@ pub trait Float: Number + ops::Neg<Output = Self> {
// Re-export the to and from bits methods.
fn to_bits(self) -> Self::Unsigned;
fn from_bits(u: Self::Unsigned) -> Self;
fn ln(self) -> Self;
fn floor(self) -> Self;
fn is_sign_positive(self) -> bool;
fn is_sign_negative(self) -> bool;

Expand Down Expand Up @@ -744,6 +746,16 @@ impl Float for f32 {
f32::from_bits(u)
}

#[inline]
fn ln(self) -> f32 {
f32::ln(self)
}

#[inline]
fn floor(self) -> f32 {
f32::floor(self)
}

#[inline]
fn is_sign_positive(self) -> bool {
f32::is_sign_positive(self)
Expand Down Expand Up @@ -782,6 +794,16 @@ impl Float for f64 {
f64::from_bits(u)
}

#[inline]
fn ln(self) -> f64 {
f64::ln(self)
}

#[inline]
fn floor(self) -> f64 {
f64::floor(self)
}

#[inline]
fn is_sign_positive(self) -> bool {
f64::is_sign_positive(self)
Expand Down
18 changes: 18 additions & 0 deletions lexical-util/tests/algorithm_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,24 @@ fn copy_to_dest_test() {
assert_eq!(&dst[..5], src);
}

#[test]
#[cfg(feature = "write")]
fn ltrim_char_test() {
let w = "0001";
let x = "1010";
let y = "1.00";
let z = "1e05";

assert_eq!(algorithm::ltrim_char_count(w.as_bytes(), b'0'), 3);
assert_eq!(algorithm::ltrim_char_count(x.as_bytes(), b'0'), 0);
assert_eq!(algorithm::ltrim_char_count(x.as_bytes(), b'1'), 1);
assert_eq!(algorithm::ltrim_char_count(y.as_bytes(), b'0'), 0);
assert_eq!(algorithm::ltrim_char_count(y.as_bytes(), b'1'), 1);
assert_eq!(algorithm::ltrim_char_count(z.as_bytes(), b'0'), 0);
assert_eq!(algorithm::ltrim_char_count(z.as_bytes(), b'1'), 1);
assert_eq!(algorithm::ltrim_char_count(z.as_bytes(), b'5'), 0);
}

#[test]
#[cfg(feature = "write")]
fn rtrim_char_test() {
Expand Down
2 changes: 2 additions & 0 deletions lexical-util/tests/num_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,8 @@ fn check_float<T: num::Float>(mut x: T) {
assert_eq!(T::from_bits(x.to_bits()), x);
let _ = x.is_sign_positive();
let _ = x.is_sign_negative();
let _ = x.ln();
let _ = x.floor();

// Check properties
let _ = x.to_bits() & T::SIGN_MASK;
Expand Down
1 change: 1 addition & 0 deletions lexical-write-float/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ features = []
static_assertions = "1"

[dev-dependencies]
approx = "0.5.0"
quickcheck = "1.0.3"
proptest = "0.10.1"
fraction = "0.8.0"
Expand Down
22 changes: 22 additions & 0 deletions lexical-write-float/src/algorithm.rs
Original file line number Diff line number Diff line change
@@ -1 +1,23 @@
//! Implementation of the Dragonbox algorithm.

#![cfg(not(feature = "compact"))]
#![doc(hidden)]

use crate::options::Options;
use lexical_util::num::Float;

// TODO(ahuszagh) Implement...

/// Optimized float-to-string algorithm for decimal strings.
/// # Safety
///
/// Safe as long as the float isn't special (NaN or Infinity), and `bytes`
/// is large enough to hold the significant digits.
#[allow(unused)] // TODO(ahuszagh) Remove...
pub unsafe fn write_float<F: Float, const FORMAT: u128>(
float: F,
bytes: &mut [u8],
options: &Options,
) -> usize {
todo!();
}
144 changes: 144 additions & 0 deletions lexical-write-float/src/api.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
//! Implements the algorithm in terms of the lexical API.

#![doc(hidden)]

use crate::options::Options;
use crate::write::WriteFloat;
use lexical_util::format::{NumberFormat, STANDARD};
use lexical_util::{to_lexical, to_lexical_with_options};

/// Check if a buffer is sufficiently large.
fn check_buffer<const FORMAT: u128>(len: usize, options: &Options) -> bool {
let format = NumberFormat::<{ FORMAT }> {};

// At least 2 for the decimal point and sign.
let mut count: usize = 2;

// First need to calculate maximum number of digits from leading or
// trailing zeros, IE, the exponent break.
if !format.no_exponent_notation() {
let min_exp = options.negative_exponent_break().map_or(-5, |x| x.get());
let max_exp = options.positive_exponent_break().map_or(9, |x| x.get());
let exp = min_exp.abs().max(max_exp) as usize;
if cfg!(feature = "power-of-two") && exp < 13 {
// 11 for the exponent digits in binary, 1 for the sign, 1 for the symbol
count += 13;
} else if exp < 5 {
// 3 for the exponent digits in decimal, 1 for the sign, 1 for the symbol
count += 5;
} else {
// More leading or trailing zeros than the exponent digits.
count += exp;
}
} else if cfg!(feature = "power-of-two") {
// Min is 2^-1075.
count += 1075;
} else {
// Min is 10^-324.
count += 324;
}

// Now add the number of significant digits.
let radix = format.radix();
let formatted_digits = if radix == 10 {
// Really should be 18, but add some extra to be cautious.
28
} else {
// BINARY:
// 53 significant mantissa bits for binary, add a few extra.
// RADIX:
// Our limit is `delta`. The maximum relative delta is 2.22e-16,
// around 1. If we have values below 1, our delta is smaller, but
// the max fraction is also a lot smaller. Above, and our fraction
// must be < 1.0, so our delta is less significant. Therefore,
// if our fraction is just less than 1, for a float near 2.0,
// we can do at **maximum** 33 digits (for base 3). Let's just
// assume it's a lot higher, and go with 64.
64
};
let digits = if let Some(max_digits) = options.max_significant_digits() {
formatted_digits.min(max_digits.get())
} else {
formatted_digits
};
let digits = if let Some(min_digits) = options.min_significant_digits() {
digits.max(min_digits.get())
} else {
formatted_digits
};
count += digits;

len > count
}

// API

const DEFAULT_OPTIONS: Options = Options::new();

// Implement ToLexical for numeric type.
macro_rules! float_to_lexical {
($($t:tt $(, #[$meta:meta])? ; )*) => ($(
impl ToLexical for $t {
$(#[$meta:meta])?
unsafe fn to_lexical_unchecked<'a>(self, bytes: &'a mut [u8])
-> &'a mut [u8]
{
debug_assert!(check_buffer::<{ STANDARD }>(bytes.len(), &DEFAULT_OPTIONS));
// SAFETY: safe if `check_buffer::<STANDARD>(bytes.len(), &options)`.
unsafe {
let len = self.write_float::<{ STANDARD }>(bytes, &DEFAULT_OPTIONS);
&mut index_unchecked_mut!(bytes[..len])
}
}

$(#[$meta:meta])?
fn to_lexical<'a>(self, bytes: &'a mut [u8])
-> &'a mut [u8]
{
assert!(check_buffer::<{ STANDARD }>(bytes.len(), &DEFAULT_OPTIONS));
// SAFETY: safe since `check_buffer::<STANDARD>(bytes.len(), &options)`.
unsafe { self.to_lexical_unchecked(bytes) }
}
}

impl ToLexicalWithOptions for $t {
type Options = Options;

$(#[$meta:meta])?
unsafe fn to_lexical_with_options_unchecked<'a, const FORMAT: u128>(
self,
bytes: &'a mut [u8],
options: &Self::Options,
) -> &'a mut [u8]
{
assert!(NumberFormat::<{ FORMAT }> {}.is_valid());
debug_assert!(check_buffer::<{ FORMAT }>(bytes.len(), &options));
// SAFETY: safe if `check_buffer::<FORMAT>(bytes.len(), &options)`.
unsafe {
let len = self.write_float::<{ FORMAT }>(bytes, &options);
&mut index_unchecked_mut!(bytes[..len])
}
}

$(#[$meta:meta])?
fn to_lexical_with_options<'a, const FORMAT: u128>(
self,
bytes: &'a mut [u8],
options: &Self::Options,
) -> &'a mut [u8]
{
assert!(NumberFormat::<{ FORMAT }> {}.is_valid());
assert!(check_buffer::<{ FORMAT }>(bytes.len(), &options));
// SAFETY: safe since `check_buffer::<FORMAT>(bytes.len(), &options)`.
unsafe { self.to_lexical_with_options_unchecked::<FORMAT>(bytes, options) }
}
}
)*)
}

to_lexical! {}
to_lexical_with_options! {}
float_to_lexical! {
f32 ;
f64 ;
}
10 changes: 5 additions & 5 deletions lexical-write-float/src/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,7 @@ where
let zero_digits = fast_ceildiv(zero_bits, bits_per_digit) as usize;

// Write our 0 digits.
// SAFETY: must be safe since since `bytes.len() < BUFFER_SIZE - 2`.
// SAFETY: safe if `bytes.len() > BUFFER_SIZE - 2`.
unsafe {
index_unchecked_mut!(bytes[0]) = b'0';
index_unchecked_mut!(bytes[1]) = decimal_point;
Expand Down Expand Up @@ -691,7 +691,7 @@ where
let mut cursor: usize;
if leading_digits >= count {
// We have more leading digits than digits we wrote: can write
// any additional digits, and then just write the remaining ones.
// any additional digits, and then just write the remaining zeros.
// SAFETY: safe if the buffer is large enough to hold the significant digits.
unsafe {
let digits = &mut index_unchecked_mut!(bytes[count..leading_digits]);
Expand Down Expand Up @@ -747,9 +747,8 @@ where
/// Optimized float-to-string algorithm for power of 2 radixes.
///
/// This assumes the float is:
/// 1). Non-zero
/// 2). Non-special (NaN or Infinite).
/// 3). Non-negative.
/// 1). Non-special (NaN or Infinite).
/// 2). Non-negative.
///
/// # Safety
///
Expand All @@ -776,6 +775,7 @@ where
let format = NumberFormat::<{ FORMAT }> {};
assert!(format.is_valid());
debug_assert!(!float.is_special());
debug_assert!(float >= F::ZERO);

// Quickly calculate the number of bits we would have written.
// This simulates writing the digits, so we can calculate the
Expand Down
7 changes: 4 additions & 3 deletions lexical-write-float/src/compact.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ use lexical_write_integer::write::WriteInteger;
/// with Integers", by Florian Loitsch, available online at:
/// <https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf>.
///
/// # Preconditions
///
/// `float` must not be special (NaN or Infinity).
/// This assumes the float is:
/// 1). Non-special (NaN or Infinite).
/// 2). Non-negative.
///
/// # Safety
///
Expand All @@ -55,6 +55,7 @@ pub unsafe fn write_float<F: Float, const FORMAT: u128>(
let format = NumberFormat::<{ FORMAT }> {};
assert!(format.is_valid());
debug_assert!(!float.is_special());
debug_assert!(float >= F::ZERO);

// Write our mantissa digits to a temporary buffer.
let digits: mem::MaybeUninit<[u8; 32]> = mem::MaybeUninit::uninit();
Expand Down
21 changes: 21 additions & 0 deletions lexical-write-float/src/hex.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,23 @@
//! Optimized float serializer for hexadecimal floats.

#![cfg(feature = "power-of-two")]
#![doc(hidden)]

use crate::options::Options;
use lexical_util::num::Float;

// TODO(ahuszagh) Implement...

/// Optimized float-to-string algorithm for decimal strings.
/// # Safety
///
/// Safe as long as the float isn't special (NaN or Infinity), and `bytes`
/// is large enough to hold the significant digits.
#[allow(unused)] // TODO(ahuszagh) Remove...
pub unsafe fn write_float<F: Float, const FORMAT: u128>(
float: F,
bytes: &mut [u8],
options: &Options,
) -> usize {
todo!();
}
5 changes: 4 additions & 1 deletion lexical-write-float/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,11 @@ pub mod hex;
pub mod options;
pub mod radix;

mod api;
mod write;

// Re-exports
//pub use self::api::{ToLexical, ToLexicalWithOptions};
pub use self::api::{ToLexical, ToLexicalWithOptions};
pub use self::options::{Options, OptionsBuilder};
pub use lexical_util::constants::{FormattedSize, BUFFER_SIZE};
pub use lexical_util::format::{NumberFormatBuilder, STANDARD};
Loading

0 comments on commit f9cf171

Please sign in to comment.