From 70a0b408384be789e32b671ef5e2d5cd0ad9aea8 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Fri, 10 Sep 2021 18:59:57 -0500 Subject: [PATCH] Fixes #1384, by ensuring specials are properly parsed. --- src/number/complete.rs | 191 +++++++++++++++++++++++++++++++++------- src/number/mod.rs | 10 +++ src/number/streaming.rs | 187 +++++++++++++++++++++++++++++++++------ 3 files changed, 326 insertions(+), 62 deletions(-) diff --git a/src/number/complete.rs b/src/number/complete.rs index c5553a40c..dcaf77215 100644 --- a/src/number/complete.rs +++ b/src/number/complete.rs @@ -1,5 +1,6 @@ //! Parsers recognizing numbers, complete input version +use core::{f32, f64}; use crate::branch::alt; use crate::bytes::complete::tag; use crate::character::complete::{char, digit1, sign}; @@ -1426,6 +1427,8 @@ where )(input) } +/// + /// Recognizes a floating point number in text format and returns the integer, fraction and exponent parts of the input data /// /// *Complete version*: Can parse until the end of input. @@ -1442,7 +1445,6 @@ where T: AsBytes, { let (i, sign) = sign(input.clone())?; - //let (i, zeroes) = take_while(|c: ::Item| c.as_char() == '0')(i)?; let (i, zeroes) = match i.as_bytes().iter().position(|c| *c != b'0' as u8) { Some(index) => i.take_split(index), @@ -1517,6 +1519,85 @@ where Ok((i, (sign, integer, fraction, exp))) } +macro_rules! float_finite { + ($input:ident, $t:ty) => {{ + let (i, (sign, integer, fraction, exponent)) = recognize_float_parts($input)?; + + let mut float: $t = minimal_lexical::parse_float( + integer.as_bytes().iter(), + fraction.as_bytes().iter(), + exponent, + ); + if !sign { + float = -float; + } + + Ok((i, float)) + }}; +} + +macro_rules! float_nonfinite { + ($input:ident, $t:ident) => {{ + let (input, sign) = sign($input.clone())?; + let b = input.as_bytes(); + let (mut float, count) = if b.len() >= 3 { + if crate::number::case_insensitive_cmp(b, b"nan") { + ($t::NAN, 3) + } else if b.len() >= 8 && crate::number::case_insensitive_cmp(b, b"infinity") { + ($t::INFINITY, 8) + } else if crate::number::case_insensitive_cmp(b, b"inf") { + ($t::INFINITY, 3) + } else { + return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float))); + } + } else { + return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float))); + }; + if !sign { + float = -float; + } + + Ok((input.slice(count..), float)) + }}; +} + +/// Recognizes floating point number in text format and returns a f32. +/// +/// *Complete version*: Can parse until the end of input. This only handles +/// finite (non-special floats). +/// ``` +fn float_finite>(input: T) -> IResult +where + T: Slice> + Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake, + ::Item: AsChar + Copy, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_finite!(input, f32) +} + +/// Recognizes floating point number in text format and returns a f32. +/// This only handles non-finite (special) values. +fn float_nonfinite>(input: T) -> IResult +where + T: Slice> + Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake, + ::Item: AsChar + Copy, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_nonfinite!(input, f32) +} + /// Recognizes floating point number in text format and returns a f32. /// /// *Complete version*: Can parse until the end of input. @@ -1546,30 +1627,56 @@ where T: AsBytes, T: for<'a> Compare<&'a [u8]>, { - let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; + let res = float_finite::(input.clone()); + res.or_else(|_| float_nonfinite::(input)) +} - let mut float: f32 = minimal_lexical::parse_float( - integer.as_bytes().iter(), - fraction.as_bytes().iter(), - exponent, - ); - if !sign { - float = -float; - } +/// Recognizes floating point number in text format and returns a f64. +/// +/// *Complete version*: Can parse until the end of input. This only handles +/// finite (non-special floats). +fn double_finite>(input: T) -> IResult +where + T: Slice> + Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake, + ::Item: AsChar + Copy, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_finite!(input, f64) +} - Ok((i, float)) +/// Recognizes floating point number in text format and returns a f64. +/// This only handles non-finite (special) values. +fn double_nonfinite>(input: T) -> IResult +where + T: Slice> + Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake, + ::Item: AsChar + Copy, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_nonfinite!(input, f64) } -/// Recognizes floating point number in text format and returns a f32. +/// Recognizes floating point number in text format and returns a f64. /// /// *Complete version*: Can parse until the end of input. /// ```rust /// # use nom::{Err, error::ErrorKind, Needed}; /// # use nom::Needed::Size; -/// use nom::number::complete::float; +/// use nom::number::complete::double; /// /// let parser = |s| { -/// float(s) +/// double(s) /// }; /// /// assert_eq!(parser("11e-1"), Ok(("", 1.1))); @@ -1589,18 +1696,8 @@ where T: AsBytes, T: for<'a> Compare<&'a [u8]>, { - let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; - - let mut float: f64 = minimal_lexical::parse_float( - integer.as_bytes().iter(), - fraction.as_bytes().iter(), - exponent, - ); - if !sign { - float = -float; - } - - Ok((i, float)) + let res = double_finite::(input.clone()); + res.or_else(|_| double_nonfinite::(input)) } #[cfg(test)] @@ -1618,6 +1715,23 @@ mod tests { }; ); + // Need more complex logic, since NaN != NaN. + macro_rules! assert_float_eq { + ($left: expr, $right: expr) => { + let left: $crate::IResult<_, _, (_, ErrorKind)> = $left; + let right: $crate::IResult<_, _, (_, ErrorKind)> = $right; + if let Ok((_, float)) = right { + if float.is_nan() { + assert!(left.unwrap().1.is_nan()); + } else { + assert_eq!(left, right); + } + }else { + assert_eq!(left, right); + } + }; + } + #[test] fn i8_tests() { assert_parse!(i8(&[0x00][..]), Ok((&b""[..], 0))); @@ -1942,6 +2056,8 @@ mod tests { "12.34", "-1.234E-12", "-1.234e-12", + "NaN", + "inf", ]; for test in test_cases.drain(..) { @@ -1951,15 +2067,24 @@ mod tests { println!("now parsing: {} -> {}", test, expected32); let larger = format!("{}", test); - assert_parse!(recognize_float(&larger[..]), Ok(("", test))); + if expected32.is_finite() { + assert_parse!(recognize_float(&larger[..]), Ok(("", test))); + } - assert_parse!(float(larger.as_bytes()), Ok((&b""[..], expected32))); - assert_parse!(float(&larger[..]), Ok(("", expected32))); + assert_float_eq!(float(larger.as_bytes()), Ok((&b""[..], expected32))); + assert_float_eq!(float(&larger[..]), Ok(("", expected32))); - assert_parse!(double(larger.as_bytes()), Ok((&b""[..], expected64))); - assert_parse!(double(&larger[..]), Ok(("", expected64))); + assert_float_eq!(double(larger.as_bytes()), Ok((&b""[..], expected64))); + assert_float_eq!(double(&larger[..]), Ok(("", expected64))); } + // b"infinity" and case-insensitive floats don't work until recent + // rustc versions, so just test they work here. + assert_float_eq!(float("nan".as_bytes()), Ok((&b""[..], f32::NAN))); + assert_float_eq!(float("infinity".as_bytes()), Ok((&b""[..], f32::INFINITY))); + assert_float_eq!(double("nan".as_bytes()), Ok((&b""[..], f64::NAN))); + assert_float_eq!(double("infinity".as_bytes()), Ok((&b""[..], f64::INFINITY))); + let remaining_exponent = "-1.234E-"; assert_parse!( recognize_float(remaining_exponent), @@ -2051,8 +2176,8 @@ mod tests { } fn parse_f64(i: &str) -> IResult<&str, f64, ()> { - match recognize_float(i) { - Err(e) => Err(e), + match recognize_float::<_, ()>(i) { + Err(_) => Err(Err::Error(())), Ok((i, s)) => { if s.is_empty() { return Err(Err::Error(())); diff --git a/src/number/mod.rs b/src/number/mod.rs index 58c3d51b0..509d50ffd 100644 --- a/src/number/mod.rs +++ b/src/number/mod.rs @@ -13,3 +13,13 @@ pub enum Endianness { /// Will match the host's endianness Native, } + +/// Case-insensitive comparison of digits. Only works if `y` is only ASCII letters. +#[inline] +fn case_insensitive_cmp(x: &[u8], y: &[u8]) -> bool { + let d = (x.iter().zip(y.iter())).fold(0, |d, (xi, yi)| d | xi ^ yi); + // This uses the trick that 'a' - 'A' == 0x20, and this is true + // for all characters, so as long as `yi` is a valid ASCII letter, + // `xi ^ yi` can only be 0 or 0x20. + d == 0 || d == 0x20 +} diff --git a/src/number/streaming.rs b/src/number/streaming.rs index 3ca445fa8..5c252b66c 100644 --- a/src/number/streaming.rs +++ b/src/number/streaming.rs @@ -1,5 +1,6 @@ //! Parsers recognizing numbers, streaming version +use core::{f32, f64}; use crate::branch::alt; use crate::bytes::streaming::tag; use crate::character::streaming::{char, digit1, sign}; @@ -1411,7 +1412,6 @@ where T: AsBytes, { let (i, sign) = sign(input.clone())?; - //let (i, zeroes) = take_while(|c: ::Item| c.as_char() == '0')(i)?; let (i, zeroes) = match i.as_bytes().iter().position(|c| *c != b'0' as u8) { Some(index) => i.take_split(index), @@ -1490,6 +1490,84 @@ where Ok((i, (sign, integer, fraction, exp))) } +macro_rules! float_finite { + ($input:ident, $t:ident) => {{ + let (i, (sign, integer, fraction, exponent)) = recognize_float_parts($input)?; + + let mut float: $t = minimal_lexical::parse_float( + integer.as_bytes().iter(), + fraction.as_bytes().iter(), + exponent, + ); + if !sign { + float = -float; + } + + Ok((i, float)) + }}; +} + +macro_rules! float_nonfinite { + ($input:ident, $t:ident) => {{ + let (input, sign) = sign($input.clone())?; + let b = input.as_bytes(); + let (mut float, count) = if b.len() >= 3 { + if crate::number::case_insensitive_cmp(b, b"nan") { + ($t::NAN, 3) + } else if b.len() >= 8 && crate::number::case_insensitive_cmp(b, b"infinity") { + ($t::INFINITY, 8) + } else if crate::number::case_insensitive_cmp(b, b"inf") { + ($t::INFINITY, 3) + } else { + return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float))); + } + } else { + return Err(Err::Error(E::from_error_kind($input, ErrorKind::Float))); + }; + if !sign { + float = -float; + } + + Ok((input.slice(count..), float)) + }}; +} + +/// Recognizes floating point number in text format and returns a f32. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// This only handles finite (non-special floats). +fn float_finite>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + ::Item: AsChar, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_finite!(input, f32) +} + +/// Recognizes floating point number in text format and returns a f32. +/// This only handles non-finite (special) values. +fn float_nonfinite>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + ::Item: AsChar, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_nonfinite!(input, f32) +} + /// Recognizes floating point number in text format and returns a f32. /// /// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. @@ -1520,21 +1598,50 @@ where T: AsBytes, T: for<'a> Compare<&'a [u8]>, { - let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; - - let mut float: f32 = minimal_lexical::parse_float( - integer.as_bytes().iter(), - fraction.as_bytes().iter(), - exponent, - ); - if !sign { - float = -float; + match float_finite::(input.clone()) { + Ok((i, float)) => Ok((i, float)), + Err(Err::Incomplete(e)) => Err(Err::Incomplete(e)), + _ => float_nonfinite::(input), } +} - Ok((i, float)) +/// Recognizes floating point number in text format and returns a f64. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// This only handles finite (non-special floats). +fn double_finite>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + ::Item: AsChar, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_finite!(input, f64) } -/// Recognizes floating point number in text format and returns a f32. +/// Recognizes floating point number in text format and returns a f64. +/// This only handles non-finite (special) values. +fn double_nonfinite>(input: T) -> IResult +where + T: Slice> + Slice>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo, + ::Item: AsChar, + ::IterElem: Clone, + T: InputTakeAtPosition, + ::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + float_nonfinite!(input, f64) +} + +/// Recognizes floating point number in text format and returns a f64. /// /// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. /// @@ -1564,18 +1671,11 @@ where T: AsBytes, T: for<'a> Compare<&'a [u8]>, { - let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; - - let mut float: f64 = minimal_lexical::parse_float( - integer.as_bytes().iter(), - fraction.as_bytes().iter(), - exponent, - ); - if !sign { - float = -float; + match double_finite::(input.clone()) { + Ok((i, double)) => Ok((i, double)), + Err(Err::Incomplete(e)) => Err(Err::Incomplete(e)), + _ => double_nonfinite::(input), } - - Ok((i, float)) } #[cfg(test)] @@ -1593,6 +1693,23 @@ mod tests { }; ); + // Need more complex logic, since NaN != NaN. + macro_rules! assert_float_eq { + ($left: expr, $right: expr) => { + let left: $crate::IResult<_, _, (_, ErrorKind)> = $left; + let right: $crate::IResult<_, _, (_, ErrorKind)> = $right; + if let Ok((_, float)) = right { + if float.is_nan() { + assert!(left.unwrap().1.is_nan()); + } else { + assert_eq!(left, right); + } + }else { + assert_eq!(left, right); + } + }; + } + #[test] fn i8_tests() { assert_parse!(be_i8(&[0x00][..]), Ok((&b""[..], 0))); @@ -2023,6 +2140,8 @@ mod tests { "12.34", "-1.234E-12", "-1.234e-12", + "NaN", + "inf", ]; for test in test_cases.drain(..) { @@ -2032,15 +2151,24 @@ mod tests { println!("now parsing: {} -> {}", test, expected32); let larger = format!("{};", test); - assert_parse!(recognize_float(&larger[..]), Ok((";", test))); + if expected32.is_finite() { + assert_parse!(recognize_float(&larger[..]), Ok((";", test))); + } - assert_parse!(float(larger.as_bytes()), Ok((&b";"[..], expected32))); - assert_parse!(float(&larger[..]), Ok((";", expected32))); + assert_float_eq!(float(larger.as_bytes()), Ok((&b";"[..], expected32))); + assert_float_eq!(float(&larger[..]), Ok((";", expected32))); - assert_parse!(double(larger.as_bytes()), Ok((&b";"[..], expected64))); - assert_parse!(double(&larger[..]), Ok((";", expected64))); + assert_float_eq!(double(larger.as_bytes()), Ok((&b";"[..], expected64))); + assert_float_eq!(double(&larger[..]), Ok((";", expected64))); } + // b"infinity" and case-insensitive floats don't work until recent + // rustc versions, so just test they work here. + assert_float_eq!(float("nan".as_bytes()), Ok((&b""[..], f32::NAN))); + assert_float_eq!(float("infinity".as_bytes()), Ok((&b""[..], f32::INFINITY))); + assert_float_eq!(double("nan".as_bytes()), Ok((&b""[..], f64::NAN))); + assert_float_eq!(double("infinity".as_bytes()), Ok((&b""[..], f64::INFINITY))); + let remaining_exponent = "-1.234E-"; assert_parse!( recognize_float(remaining_exponent), @@ -2132,7 +2260,8 @@ mod tests { } fn parse_f64(i: &str) -> IResult<&str, f64, ()> { - match recognize_float(i) { + match recognize_float::<_, ()>(i) { + Err(Err::Failure(_)) => Err(Err::Error(())), Err(e) => Err(e), Ok((i, s)) => { if s.is_empty() {