Skip to content

Commit

Permalink
Fixes #1384, by ensuring specials are properly parsed.
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexhuszagh committed Sep 10, 2021
1 parent 615e6a4 commit c734f41
Show file tree
Hide file tree
Showing 3 changed files with 348 additions and 59 deletions.
190 changes: 160 additions & 30 deletions src/number/complete.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Parsers recognizing numbers, complete input version
use core::{f32, f64};
use crate::branch::alt;
use crate::bytes::complete::tag;
use crate::character::complete::{char, digit1, sign};
Expand Down Expand Up @@ -1426,11 +1427,13 @@ where
)(input)
}

///
/// Recognizes a floating point number in text format and returns the integer, fraction and exponent parts of the input data
///
/// *Complete version*: Can parse until the end of input.
///
pub fn recognize_float_parts<T, E: ParseError<T>>(input: T) -> IResult<T, (bool, T, T, i32), E>
pub fn recognize_float_parts<T, E: ParseError<T>>(input: T) -> IResult<T, (T, T, i32), E>
where
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
T: Clone + Offset,
Expand All @@ -1441,8 +1444,7 @@ where
T: for<'a> Compare<&'a [u8]>,
T: AsBytes,
{
let (i, sign) = sign(input.clone())?;

let i = input.clone();
//let (i, zeroes) = take_while(|c: <T as InputTakeAtPosition>::Item| c.as_char() == '0')(i)?;
let (i, zeroes) = match i.as_bytes().iter().position(|c| *c != b'0' as u8) {
Some(index) => i.take_split(index),
Expand Down Expand Up @@ -1514,7 +1516,79 @@ where
(i2, 0)
};

Ok((i, (sign, integer, fraction, exp)))
Ok((i, (integer, fraction, exp)))
}

macro_rules! float_finite {
($input:ident, $t:ty) => {{
let (i, (integer, fraction, exponent)) = recognize_float_parts($input)?;

let float: $t = minimal_lexical::parse_float(
integer.as_bytes().iter(),
fraction.as_bytes().iter(),
exponent,
);

Ok((i, float))
}};
}

macro_rules! float_nonfinite {
($input:ident, $t:ident) => {{
let b = $input.as_bytes();
let (float, count) = if b.len() >= 3 {
if crate::number::case_insensitive_cmp(b, b"nan") {
($t::NAN, 3)
} else if b.len() >= 8 && crate::number::case_insensitive_cmp(b, b"infinity") {
($t::INFINITY, 8)
} else if crate::number::case_insensitive_cmp(b, b"inf") {
($t::INFINITY, 3)
} else {
return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float)));
}
} else {
return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float)));
};

Ok(($input.slice(count..), float))
}};
}

/// Recognizes floating point number in text format and returns a f32.
///
/// *Complete version*: Can parse until the end of input. This only handles
/// finite (non-special floats).
/// ```
pub fn float_finite<T, E: ParseError<T>>(input: T) -> IResult<T, f32, E>
where
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
T: Clone + Offset,
T: InputIter + InputLength + InputTake,
<T as InputIter>::Item: AsChar + Copy,
<T as InputIter>::IterElem: Clone,
T: InputTakeAtPosition,
<T as InputTakeAtPosition>::Item: AsChar,
T: AsBytes,
T: for<'a> Compare<&'a [u8]>,
{
float_finite!(input, f32)
}

/// Recognizes floating point number in text format and returns a f32.
/// This only handles non-finite (special) values.
pub fn float_nonfinite<T, E: ParseError<T>>(input: T) -> IResult<T, f32, E>
where
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
T: Clone + Offset,
T: InputIter + InputLength + InputTake,
<T as InputIter>::Item: AsChar + Copy,
<T as InputIter>::IterElem: Clone,
T: InputTakeAtPosition,
<T as InputTakeAtPosition>::Item: AsChar,
T: AsBytes,
T: for<'a> Compare<&'a [u8]>,
{
float_nonfinite!(input, f32)
}

/// Recognizes floating point number in text format and returns a f32.
Expand Down Expand Up @@ -1546,30 +1620,62 @@ where
T: AsBytes,
T: for<'a> Compare<&'a [u8]>,
{
let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?;
let (input, sign) = sign(input.clone())?;
let res = float_finite::<T, E>(input.clone());
let (i, mut float) = res.or(float_nonfinite::<T, E>(input))?;

let mut float: f32 = minimal_lexical::parse_float(
integer.as_bytes().iter(),
fraction.as_bytes().iter(),
exponent,
);
if !sign {
float = -float;
}

Ok((i, float))
}

/// Recognizes floating point number in text format and returns a f32.
/// Recognizes floating point number in text format and returns a f64.
///
/// *Complete version*: Can parse until the end of input. This only handles
/// finite (non-special floats).
pub fn double_finite<T, E: ParseError<T>>(input: T) -> IResult<T, f64, E>
where
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
T: Clone + Offset,
T: InputIter + InputLength + InputTake,
<T as InputIter>::Item: AsChar + Copy,
<T as InputIter>::IterElem: Clone,
T: InputTakeAtPosition,
<T as InputTakeAtPosition>::Item: AsChar,
T: AsBytes,
T: for<'a> Compare<&'a [u8]>,
{
float_finite!(input, f64)
}

/// Recognizes floating point number in text format and returns a f64.
/// This only handles non-finite (special) values.
pub fn double_nonfinite<T, E: ParseError<T>>(input: T) -> IResult<T, f64, E>
where
T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>,
T: Clone + Offset,
T: InputIter + InputLength + InputTake,
<T as InputIter>::Item: AsChar + Copy,
<T as InputIter>::IterElem: Clone,
T: InputTakeAtPosition,
<T as InputTakeAtPosition>::Item: AsChar,
T: AsBytes,
T: for<'a> Compare<&'a [u8]>,
{
float_nonfinite!(input, f64)
}

/// Recognizes floating point number in text format and returns a f64.
///
/// *Complete version*: Can parse until the end of input.
/// ```rust
/// # use nom::{Err, error::ErrorKind, Needed};
/// # use nom::Needed::Size;
/// use nom::number::complete::float;
/// use nom::number::complete::double;
///
/// let parser = |s| {
/// float(s)
/// double(s)
/// };
///
/// assert_eq!(parser("11e-1"), Ok(("", 1.1)));
Expand All @@ -1589,18 +1695,14 @@ where
T: AsBytes,
T: for<'a> Compare<&'a [u8]>,
{
let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?;
let (input, sign) = sign(input.clone())?;
let res = double_finite::<T, E>(input.clone());
let (i, mut double) = res.or(double_nonfinite::<T, E>(input))?;

let mut float: f64 = minimal_lexical::parse_float(
integer.as_bytes().iter(),
fraction.as_bytes().iter(),
exponent,
);
if !sign {
float = -float;
double = -double;
}

Ok((i, float))
Ok((i, double))
}

#[cfg(test)]
Expand All @@ -1618,6 +1720,23 @@ mod tests {
};
);

// Need more complex logic, since NaN != NaN.
macro_rules! assert_float_eq {
($left: expr, $right: expr) => {
let left: $crate::IResult<_, _, (_, ErrorKind)> = $left;
let right: $crate::IResult<_, _, (_, ErrorKind)> = $right;
if let Ok((_, float)) = right {
if float.is_nan() {
assert!(left.unwrap().1.is_nan());
} else {
assert_eq!(left, right);
}
}else {
assert_eq!(left, right);
}
};
}

#[test]
fn i8_tests() {
assert_parse!(i8(&[0x00][..]), Ok((&b""[..], 0)));
Expand Down Expand Up @@ -1942,6 +2061,8 @@ mod tests {
"12.34",
"-1.234E-12",
"-1.234e-12",
"NaN",
"inf",
];

for test in test_cases.drain(..) {
Expand All @@ -1951,15 +2072,24 @@ mod tests {
println!("now parsing: {} -> {}", test, expected32);

let larger = format!("{}", test);
assert_parse!(recognize_float(&larger[..]), Ok(("", test)));
if expected32.is_finite() {
assert_parse!(recognize_float(&larger[..]), Ok(("", test)));
}

assert_parse!(float(larger.as_bytes()), Ok((&b""[..], expected32)));
assert_parse!(float(&larger[..]), Ok(("", expected32)));
assert_float_eq!(float(larger.as_bytes()), Ok((&b""[..], expected32)));
assert_float_eq!(float(&larger[..]), Ok(("", expected32)));

assert_parse!(double(larger.as_bytes()), Ok((&b""[..], expected64)));
assert_parse!(double(&larger[..]), Ok(("", expected64)));
assert_float_eq!(double(larger.as_bytes()), Ok((&b""[..], expected64)));
assert_float_eq!(double(&larger[..]), Ok(("", expected64)));
}

// b"infinity" and case-insensitive floats don't work until recent
// rustc versions, so just test they work here.
assert_float_eq!(float("nan".as_bytes()), Ok((&b""[..], f32::NAN)));
assert_float_eq!(float("infinity".as_bytes()), Ok((&b""[..], f32::INFINITY)));
assert_float_eq!(double("nan".as_bytes()), Ok((&b""[..], f64::NAN)));
assert_float_eq!(double("infinity".as_bytes()), Ok((&b""[..], f64::INFINITY)));

let remaining_exponent = "-1.234E-";
assert_parse!(
recognize_float(remaining_exponent),
Expand Down Expand Up @@ -2051,8 +2181,8 @@ mod tests {
}

fn parse_f64(i: &str) -> IResult<&str, f64, ()> {
match recognize_float(i) {
Err(e) => Err(e),
match recognize_float::<_, ()>(i) {
Err(_) => Err(Err::Error(())),
Ok((i, s)) => {
if s.is_empty() {
return Err(Err::Error(()));
Expand Down
10 changes: 10 additions & 0 deletions src/number/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,13 @@ pub enum Endianness {
/// Will match the host's endianness
Native,
}

/// Case-insensitive comparison of digits. Only works if `y` is only ASCII letters.
#[inline]
fn case_insensitive_cmp(x: &[u8], y: &[u8]) -> bool {
let d = (x.iter().zip(y.iter())).fold(0, |d, (xi, yi)| d | xi ^ yi);
// This uses the trick that 'a' - 'A' == 0x20, and this is true
// for all characters, so as long as `yi` is a valid ASCII letter,
// `xi ^ yi` can only be 0 or 0x20.
d == 0 || d == 0x20
}
Loading

0 comments on commit c734f41

Please sign in to comment.