From c734f41955bb651c3de582bac7c2dfefd6288776 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Fri, 10 Sep 2021 18:10:11 -0500 Subject: [PATCH] Fixes #1384, by ensuring specials are properly parsed. --- src/number/complete.rs | 190 ++++++++++++++++++++++++++++++------ src/number/mod.rs | 10 ++ src/number/streaming.rs | 207 ++++++++++++++++++++++++++++++++++------ 3 files changed, 348 insertions(+), 59 deletions(-) diff --git a/src/number/complete.rs b/src/number/complete.rs index c5553a40c..eb8b3c3ea 100644 --- a/src/number/complete.rs +++ b/src/number/complete.rs @@ -1,5 +1,6 @@ //! Parsers recognizing numbers, complete input version +use core::{f32, f64}; use crate::branch::alt; use crate::bytes::complete::tag; use crate::character::complete::{char, digit1, sign}; @@ -1426,11 +1427,13 @@ where )(input) } +/// + /// Recognizes a floating point number in text format and returns the integer, fraction and exponent parts of the input data /// /// *Complete version*: Can parse until the end of input. /// -pub fn recognize_float_parts>(input: T) -> IResult +pub fn recognize_float_parts>(input: T) -> IResult where T: Slice> + Slice> + Slice>, T: Clone + Offset, @@ -1441,8 +1444,7 @@ where T: for<'a> Compare<&'a [u8]>, T: AsBytes, { - let (i, sign) = sign(input.clone())?; - + let i = input.clone(); //let (i, zeroes) = take_while(|c: ::Item| c.as_char() == '0')(i)?; let (i, zeroes) = match i.as_bytes().iter().position(|c| *c != b'0' as u8) { Some(index) => i.take_split(index), @@ -1514,7 +1516,79 @@ where (i2, 0) }; - Ok((i, (sign, integer, fraction, exp))) + Ok((i, (integer, fraction, exp))) +} + +macro_rules! float_finite { + ($input:ident, $t:ty) => {{ + let (i, (integer, fraction, exponent)) = recognize_float_parts($input)?; + + let float: $t = minimal_lexical::parse_float( + integer.as_bytes().iter(), + fraction.as_bytes().iter(), + exponent, + ); + + Ok((i, float)) + }}; +} + +macro_rules! float_nonfinite { + ($input:ident, $t:ident) => {{ + let b = $input.as_bytes(); + let (float, count) = if b.len() >= 3 { + if crate::number::case_insensitive_cmp(b, b"nan") { + ($t::NAN, 3) + } else if b.len() >= 8 && crate::number::case_insensitive_cmp(b, b"infinity") { + ($t::INFINITY, 8) + } else if crate::number::case_insensitive_cmp(b, b"inf") { + ($t::INFINITY, 3) + } else { + return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float))); + } + } else { + return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float))); + }; + + Ok(($input.slice(count..), float)) + }}; +} + +/// Recognizes floating point number in text format and returns a f32. +/// +/// *Complete version*: Can parse until the end of input. This only handles +/// finite (non-special floats). +/// ``` +pub fn float_finite>(input: T) -> IResult +where + T: Slice> + Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake, + ::Item: AsChar + Copy, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_finite!(input, f32) +} + +/// Recognizes floating point number in text format and returns a f32. +/// This only handles non-finite (special) values. +pub fn float_nonfinite>(input: T) -> IResult +where + T: Slice> + Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake, + ::Item: AsChar + Copy, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_nonfinite!(input, f32) } /// Recognizes floating point number in text format and returns a f32. @@ -1546,30 +1620,62 @@ where T: AsBytes, T: for<'a> Compare<&'a [u8]>, { - let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; + let (input, sign) = sign(input.clone())?; + let res = float_finite::(input.clone()); + let (i, mut float) = res.or(float_nonfinite::(input))?; - let mut float: f32 = minimal_lexical::parse_float( - integer.as_bytes().iter(), - fraction.as_bytes().iter(), - exponent, - ); if !sign { float = -float; } - Ok((i, float)) } -/// Recognizes floating point number in text format and returns a f32. +/// Recognizes floating point number in text format and returns a f64. +/// +/// *Complete version*: Can parse until the end of input. This only handles +/// finite (non-special floats). +pub fn double_finite>(input: T) -> IResult +where + T: Slice> + Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake, + ::Item: AsChar + Copy, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_finite!(input, f64) +} + +/// Recognizes floating point number in text format and returns a f64. +/// This only handles non-finite (special) values. +pub fn double_nonfinite>(input: T) -> IResult +where + T: Slice> + Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake, + ::Item: AsChar + Copy, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_nonfinite!(input, f64) +} + +/// Recognizes floating point number in text format and returns a f64. /// /// *Complete version*: Can parse until the end of input. /// ```rust /// # use nom::{Err, error::ErrorKind, Needed}; /// # use nom::Needed::Size; -/// use nom::number::complete::float; +/// use nom::number::complete::double; /// /// let parser = |s| { -/// float(s) +/// double(s) /// }; /// /// assert_eq!(parser("11e-1"), Ok(("", 1.1))); @@ -1589,18 +1695,14 @@ where T: AsBytes, T: for<'a> Compare<&'a [u8]>, { - let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; + let (input, sign) = sign(input.clone())?; + let res = double_finite::(input.clone()); + let (i, mut double) = res.or(double_nonfinite::(input))?; - let mut float: f64 = minimal_lexical::parse_float( - integer.as_bytes().iter(), - fraction.as_bytes().iter(), - exponent, - ); if !sign { - float = -float; + double = -double; } - - Ok((i, float)) + Ok((i, double)) } #[cfg(test)] @@ -1618,6 +1720,23 @@ mod tests { }; ); + // Need more complex logic, since NaN != NaN. + macro_rules! assert_float_eq { + ($left: expr, $right: expr) => { + let left: $crate::IResult<_, _, (_, ErrorKind)> = $left; + let right: $crate::IResult<_, _, (_, ErrorKind)> = $right; + if let Ok((_, float)) = right { + if float.is_nan() { + assert!(left.unwrap().1.is_nan()); + } else { + assert_eq!(left, right); + } + }else { + assert_eq!(left, right); + } + }; + } + #[test] fn i8_tests() { assert_parse!(i8(&[0x00][..]), Ok((&b""[..], 0))); @@ -1942,6 +2061,8 @@ mod tests { "12.34", "-1.234E-12", "-1.234e-12", + "NaN", + "inf", ]; for test in test_cases.drain(..) { @@ -1951,15 +2072,24 @@ mod tests { println!("now parsing: {} -> {}", test, expected32); let larger = format!("{}", test); - assert_parse!(recognize_float(&larger[..]), Ok(("", test))); + if expected32.is_finite() { + assert_parse!(recognize_float(&larger[..]), Ok(("", test))); + } - assert_parse!(float(larger.as_bytes()), Ok((&b""[..], expected32))); - assert_parse!(float(&larger[..]), Ok(("", expected32))); + assert_float_eq!(float(larger.as_bytes()), Ok((&b""[..], expected32))); + assert_float_eq!(float(&larger[..]), Ok(("", expected32))); - assert_parse!(double(larger.as_bytes()), Ok((&b""[..], expected64))); - assert_parse!(double(&larger[..]), Ok(("", expected64))); + assert_float_eq!(double(larger.as_bytes()), Ok((&b""[..], expected64))); + assert_float_eq!(double(&larger[..]), Ok(("", expected64))); } + // b"infinity" and case-insensitive floats don't work until recent + // rustc versions, so just test they work here. + assert_float_eq!(float("nan".as_bytes()), Ok((&b""[..], f32::NAN))); + assert_float_eq!(float("infinity".as_bytes()), Ok((&b""[..], f32::INFINITY))); + assert_float_eq!(double("nan".as_bytes()), Ok((&b""[..], f64::NAN))); + assert_float_eq!(double("infinity".as_bytes()), Ok((&b""[..], f64::INFINITY))); + let remaining_exponent = "-1.234E-"; assert_parse!( recognize_float(remaining_exponent), @@ -2051,8 +2181,8 @@ mod tests { } fn parse_f64(i: &str) -> IResult<&str, f64, ()> { - match recognize_float(i) { - Err(e) => Err(e), + match recognize_float::<_, ()>(i) { + Err(_) => Err(Err::Error(())), Ok((i, s)) => { if s.is_empty() { return Err(Err::Error(())); diff --git a/src/number/mod.rs b/src/number/mod.rs index 58c3d51b0..509d50ffd 100644 --- a/src/number/mod.rs +++ b/src/number/mod.rs @@ -13,3 +13,13 @@ pub enum Endianness { /// Will match the host's endianness Native, } + +/// Case-insensitive comparison of digits. Only works if `y` is only ASCII letters. +#[inline] +fn case_insensitive_cmp(x: &[u8], y: &[u8]) -> bool { + let d = (x.iter().zip(y.iter())).fold(0, |d, (xi, yi)| d | xi ^ yi); + // This uses the trick that 'a' - 'A' == 0x20, and this is true + // for all characters, so as long as `yi` is a valid ASCII letter, + // `xi ^ yi` can only be 0 or 0x20. + d == 0 || d == 0x20 +} diff --git a/src/number/streaming.rs b/src/number/streaming.rs index 3ca445fa8..a76cfe287 100644 --- a/src/number/streaming.rs +++ b/src/number/streaming.rs @@ -1,5 +1,6 @@ //! Parsers recognizing numbers, streaming version +use core::{f32, f64}; use crate::branch::alt; use crate::bytes::streaming::tag; use crate::character::streaming::{char, digit1, sign}; @@ -1399,7 +1400,7 @@ where /// /// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. /// -pub fn recognize_float_parts>(input: T) -> IResult +pub fn recognize_float_parts>(input: T) -> IResult where T: Slice> + Slice>, T: Clone + Offset, @@ -1410,8 +1411,7 @@ where T: for<'a> Compare<&'a [u8]>, T: AsBytes, { - let (i, sign) = sign(input.clone())?; - + let i = input.clone(); //let (i, zeroes) = take_while(|c: ::Item| c.as_char() == '0')(i)?; let (i, zeroes) = match i.as_bytes().iter().position(|c| *c != b'0' as u8) { Some(index) => i.take_split(index), @@ -1487,20 +1487,106 @@ where (i2, 0) }; - Ok((i, (sign, integer, fraction, exp))) + Ok((i, (integer, fraction, exp))) +} + +macro_rules! float_finite { + ($input:ident, $t:ident) => {{ + let (i, (integer, fraction, exponent)) = recognize_float_parts($input)?; + + let float: $t = minimal_lexical::parse_float( + integer.as_bytes().iter(), + fraction.as_bytes().iter(), + exponent, + ); + + Ok((i, float)) + }}; +} + +macro_rules! float_nonfinite { + ($input:ident, $t:ident) => {{ + let b = $input.as_bytes(); + let (float, count) = if b.len() >= 3 { + if crate::number::case_insensitive_cmp(b, b"nan") { + ($t::NAN, 3) + } else if b.len() >= 8 && crate::number::case_insensitive_cmp(b, b"infinity") { + ($t::INFINITY, 8) + } else if crate::number::case_insensitive_cmp(b, b"inf") { + ($t::INFINITY, 3) + } else { + return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float))); + } + } else { + return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float))); + }; + + Ok(($input.slice(count..), float)) + }}; } /// Recognizes floating point number in text format and returns a f32. /// /// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// This only handles finite (non-special floats). /// /// ```rust /// # use nom::{Err, error::ErrorKind, Needed}; /// # use nom::Needed::Size; -/// use nom::number::complete::float; +/// use nom::number::complete::float_finite; /// /// let parser = |s| { -/// float(s) +/// float_finite(s) +/// }; +/// +/// assert_eq!(parser("11e-1"), Ok(("", 1.1))); +/// assert_eq!(parser("123E-02"), Ok(("", 1.23))); +/// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); +/// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Float)))); +/// ``` +pub fn float_finite>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + ::Item: AsChar, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_finite!(input, f32) +} + +/// Recognizes floating point number in text format and returns a f32. +/// This only handles non-finite (special) values. +pub fn float_nonfinite>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + ::Item: AsChar, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_nonfinite!(input, f32) +} + +/// Recognizes floating point number in text format and returns a f32. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::float_finite; +/// +/// let parser = |s| { +/// float_finite(s) /// }; /// /// assert_eq!(parser("11e-1"), Ok(("", 1.1))); @@ -1520,21 +1606,56 @@ where T: AsBytes, T: for<'a> Compare<&'a [u8]>, { - let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; + let (input, sign) = sign(input)?; + let (i, mut float) = match float_finite::(input.clone()) { + Ok((i, float)) => Ok((i, float)), + Err(Err::Incomplete(e)) => Err(Err::Incomplete(e)), + _ => float_nonfinite::(input), + }?; - let mut float: f32 = minimal_lexical::parse_float( - integer.as_bytes().iter(), - fraction.as_bytes().iter(), - exponent, - ); if !sign { float = -float; } - Ok((i, float)) } -/// Recognizes floating point number in text format and returns a f32. +/// Recognizes floating point number in text format and returns a f64. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// This only handles finite (non-special floats). +pub fn double_finite>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + ::Item: AsChar, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_finite!(input, f64) +} + +/// Recognizes floating point number in text format and returns a f64. +/// This only handles non-finite (special) values. +pub fn double_nonfinite>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + ::Item: AsChar, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_nonfinite!(input, f64) +} + +/// Recognizes floating point number in text format and returns a f64. /// /// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. /// @@ -1564,18 +1685,17 @@ where T: AsBytes, T: for<'a> Compare<&'a [u8]>, { - let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; + let (input, sign) = sign(input)?; + let (i, mut double) = match double_finite::(input.clone()) { + Ok((i, double)) => Ok((i, double)), + Err(Err::Incomplete(e)) => Err(Err::Incomplete(e)), + _ => double_nonfinite::(input), + }?; - let mut float: f64 = minimal_lexical::parse_float( - integer.as_bytes().iter(), - fraction.as_bytes().iter(), - exponent, - ); if !sign { - float = -float; + double = -double; } - - Ok((i, float)) + Ok((i, double)) } #[cfg(test)] @@ -1593,6 +1713,23 @@ mod tests { }; ); + // Need more complex logic, since NaN != NaN. + macro_rules! assert_float_eq { + ($left: expr, $right: expr) => { + let left: $crate::IResult<_, _, (_, ErrorKind)> = $left; + let right: $crate::IResult<_, _, (_, ErrorKind)> = $right; + if let Ok((_, float)) = right { + if float.is_nan() { + assert!(left.unwrap().1.is_nan()); + } else { + assert_eq!(left, right); + } + }else { + assert_eq!(left, right); + } + }; + } + #[test] fn i8_tests() { assert_parse!(be_i8(&[0x00][..]), Ok((&b""[..], 0))); @@ -2023,6 +2160,8 @@ mod tests { "12.34", "-1.234E-12", "-1.234e-12", + "NaN", + "inf", ]; for test in test_cases.drain(..) { @@ -2032,15 +2171,24 @@ mod tests { println!("now parsing: {} -> {}", test, expected32); let larger = format!("{};", test); - assert_parse!(recognize_float(&larger[..]), Ok((";", test))); + if expected32.is_finite() { + assert_parse!(recognize_float(&larger[..]), Ok((";", test))); + } - assert_parse!(float(larger.as_bytes()), Ok((&b";"[..], expected32))); - assert_parse!(float(&larger[..]), Ok((";", expected32))); + assert_float_eq!(float(larger.as_bytes()), Ok((&b";"[..], expected32))); + assert_float_eq!(float(&larger[..]), Ok((";", expected32))); - assert_parse!(double(larger.as_bytes()), Ok((&b";"[..], expected64))); - assert_parse!(double(&larger[..]), Ok((";", expected64))); + assert_float_eq!(double(larger.as_bytes()), Ok((&b";"[..], expected64))); + assert_float_eq!(double(&larger[..]), Ok((";", expected64))); } + // b"infinity" and case-insensitive floats don't work until recent + // rustc versions, so just test they work here. + assert_float_eq!(float("nan".as_bytes()), Ok((&b""[..], f32::NAN))); + assert_float_eq!(float("infinity".as_bytes()), Ok((&b""[..], f32::INFINITY))); + assert_float_eq!(double("nan".as_bytes()), Ok((&b""[..], f64::NAN))); + assert_float_eq!(double("infinity".as_bytes()), Ok((&b""[..], f64::INFINITY))); + let remaining_exponent = "-1.234E-"; assert_parse!( recognize_float(remaining_exponent), @@ -2132,7 +2280,8 @@ mod tests { } fn parse_f64(i: &str) -> IResult<&str, f64, ()> { - match recognize_float(i) { + match recognize_float::<_, ()>(i) { + Err(Err::Failure(_)) => Err(Err::Error(())), Err(e) => Err(e), Ok((i, s)) => { if s.is_empty() {