From 769a3d7312c08464b2f05738d0c9d8ce96e23bfa Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 29 Jul 2024 13:17:07 -0500 Subject: [PATCH] refactor(parser): Clarify results in profiler Be removing the extra closure, it somehow made things easier to read (despite using closures in so many other places). This is also closer to where I want the parser to be long term. --- crates/toml_edit/src/parser/array.rs | 64 ++++++--------- crates/toml_edit/src/parser/document.rs | 2 +- crates/toml_edit/src/parser/inline_table.rs | 88 ++++++++++----------- crates/toml_edit/src/parser/mod.rs | 57 ++++++++----- crates/toml_edit/src/parser/value.rs | 18 ++--- 5 files changed, 113 insertions(+), 116 deletions(-) diff --git a/crates/toml_edit/src/parser/array.rs b/crates/toml_edit/src/parser/array.rs index 561c4235..2f1649fb 100644 --- a/crates/toml_edit/src/parser/array.rs +++ b/crates/toml_edit/src/parser/array.rs @@ -13,17 +13,18 @@ use crate::parser::prelude::*; // ;; Array // array = array-open array-values array-close -pub(crate) fn array<'i>(check: RecursionCheck) -> impl Parser, Array, ContextError> { +pub(crate) fn array<'i>(input: &mut Input<'i>) -> PResult { trace("array", move |input: &mut Input<'i>| { delimited( ARRAY_OPEN, - cut_err(array_values(check)), + cut_err(array_values), cut_err(ARRAY_CLOSE) .context(StrContext::Label("array")) .context(StrContext::Expected(StrContextValue::CharLiteral(']'))), ) .parse_next(input) }) + .parse_next(input) } // note: we're omitting ws and newlines here, because @@ -38,46 +39,33 @@ const ARRAY_SEP: u8 = b','; // note: this rule is modified // array-values = [ ( array-value array-sep array-values ) / // array-value / ws-comment-newline ] -pub(crate) fn array_values<'i>( - check: RecursionCheck, -) -> impl Parser, Array, ContextError> { - move |input: &mut Input<'i>| { - let check = check.recursing(input)?; - ( - opt(( - separated(1.., array_value(check), ARRAY_SEP), - opt(ARRAY_SEP), - ) - .map(|(v, trailing): (Vec, Option)| { +pub(crate) fn array_values(input: &mut Input<'_>) -> PResult { + ( + opt( + (separated(1.., array_value, ARRAY_SEP), opt(ARRAY_SEP)).map( + |(v, trailing): (Vec, Option)| { ( Array::with_vec(v.into_iter().map(Item::Value).collect()), trailing.is_some(), ) - })), - ws_comment_newline.span(), - ) - .try_map::<_, _, std::str::Utf8Error>(|(array, trailing)| { - let (mut array, comma) = array.unwrap_or_default(); - array.set_trailing_comma(comma); - array.set_trailing(RawString::with_span(trailing)); - Ok(array) - }) - .parse_next(input) - } + }, + ), + ), + ws_comment_newline.span(), + ) + .try_map::<_, _, std::str::Utf8Error>(|(array, trailing)| { + let (mut array, comma) = array.unwrap_or_default(); + array.set_trailing_comma(comma); + array.set_trailing(RawString::with_span(trailing)); + Ok(array) + }) + .parse_next(input) } -pub(crate) fn array_value<'i>( - check: RecursionCheck, -) -> impl Parser, Value, ContextError> { - move |input: &mut Input<'i>| { - ( - ws_comment_newline.span(), - value(check), - ws_comment_newline.span(), - ) - .map(|(ws1, v, ws2)| v.decorated(RawString::with_span(ws1), RawString::with_span(ws2))) - .parse_next(input) - } +pub(crate) fn array_value(input: &mut Input<'_>) -> PResult { + (ws_comment_newline.span(), value, ws_comment_newline.span()) + .map(|(ws1, v, ws2)| v.decorated(RawString::with_span(ws1), RawString::with_span(ws2))) + .parse_next(input) } #[cfg(test)] @@ -125,7 +113,7 @@ mod test { ]; for input in inputs { dbg!(input); - let mut parsed = array(Default::default()).parse(new_input(input)); + let mut parsed = array.parse(new_input(input)); if let Ok(parsed) = &mut parsed { parsed.despan(input); } @@ -138,7 +126,7 @@ mod test { let invalid_inputs = [r#"["#, r#"[,]"#, r#"[,2]"#, r#"[1e165,,]"#]; for input in invalid_inputs { dbg!(input); - let mut parsed = array(Default::default()).parse(new_input(input)); + let mut parsed = array.parse(new_input(input)); if let Ok(parsed) = &mut parsed { parsed.despan(input); } diff --git a/crates/toml_edit/src/parser/document.rs b/crates/toml_edit/src/parser/document.rs index 92a49fce..bff40bf6 100644 --- a/crates/toml_edit/src/parser/document.rs +++ b/crates/toml_edit/src/parser/document.rs @@ -109,7 +109,7 @@ pub(crate) fn parse_keyval(input: &mut Input<'_>) -> PResult<(Vec, TableKey .context(StrContext::Expected(StrContextValue::CharLiteral('='))), ( ws.span(), - value(RecursionCheck::default()), + value, line_trailing .context(StrContext::Expected(StrContextValue::CharLiteral('\n'))) .context(StrContext::Expected(StrContextValue::CharLiteral('#'))), diff --git a/crates/toml_edit/src/parser/inline_table.rs b/crates/toml_edit/src/parser/inline_table.rs index ac6ad76b..bba5100d 100644 --- a/crates/toml_edit/src/parser/inline_table.rs +++ b/crates/toml_edit/src/parser/inline_table.rs @@ -18,19 +18,18 @@ use indexmap::map::Entry; // ;; Inline Table // inline-table = inline-table-open inline-table-keyvals inline-table-close -pub(crate) fn inline_table<'i>( - check: RecursionCheck, -) -> impl Parser, InlineTable, ContextError> { +pub(crate) fn inline_table<'i>(input: &mut Input<'i>) -> PResult { trace("inline-table", move |input: &mut Input<'i>| { delimited( INLINE_TABLE_OPEN, - cut_err(inline_table_keyvals(check).try_map(|(kv, p)| table_from_pairs(kv, p))), + cut_err(inline_table_keyvals.try_map(|(kv, p)| table_from_pairs(kv, p))), cut_err(INLINE_TABLE_CLOSE) .context(StrContext::Label("inline table")) .context(StrContext::Expected(StrContextValue::CharLiteral('}'))), ) .parse_next(input) }) + .parse_next(input) } fn table_from_pairs( @@ -118,50 +117,43 @@ pub(crate) const KEYVAL_SEP: u8 = b'='; // ( key keyval-sep val inline-table-sep inline-table-keyvals-non-empty ) / // ( key keyval-sep val ) -fn inline_table_keyvals<'i>( - check: RecursionCheck, -) -> impl Parser, (Vec<(Vec, TableKeyValue)>, RawString), ContextError> { - move |input: &mut Input<'i>| { - let check = check.recursing(input)?; - ( - separated(0.., keyval(check), INLINE_TABLE_SEP), - ws.span().map(RawString::with_span), - ) - .parse_next(input) - } +fn inline_table_keyvals( + input: &mut Input<'_>, +) -> PResult<(Vec<(Vec, TableKeyValue)>, RawString)> { + ( + separated(0.., keyval, INLINE_TABLE_SEP), + ws.span().map(RawString::with_span), + ) + .parse_next(input) } -fn keyval<'i>( - check: RecursionCheck, -) -> impl Parser, (Vec, TableKeyValue), ContextError> { - move |input: &mut Input<'i>| { - ( - key, - cut_err(( - one_of(KEYVAL_SEP) - .context(StrContext::Expected(StrContextValue::CharLiteral('.'))) - .context(StrContext::Expected(StrContextValue::CharLiteral('='))), - (ws.span(), value(check), ws.span()), - )), - ) - .map(|(key, (_, v))| { - let mut path = key; - let key = path.pop().expect("grammar ensures at least 1"); - - let (pre, v, suf) = v; - let pre = RawString::with_span(pre); - let suf = RawString::with_span(suf); - let v = v.decorated(pre, suf); - ( - path, - TableKeyValue { - key, - value: Item::Value(v), - }, - ) - }) - .parse_next(input) - } +fn keyval(input: &mut Input<'_>) -> PResult<(Vec, TableKeyValue)> { + ( + key, + cut_err(( + one_of(KEYVAL_SEP) + .context(StrContext::Expected(StrContextValue::CharLiteral('.'))) + .context(StrContext::Expected(StrContextValue::CharLiteral('='))), + (ws.span(), value, ws.span()), + )), + ) + .map(|(key, (_, v))| { + let mut path = key; + let key = path.pop().expect("grammar ensures at least 1"); + + let (pre, v, suf) = v; + let pre = RawString::with_span(pre); + let suf = RawString::with_span(suf); + let v = v.decorated(pre, suf); + ( + path, + TableKeyValue { + key, + value: Item::Value(v), + }, + ) + }) + .parse_next(input) } #[cfg(test)] @@ -181,7 +173,7 @@ mod test { ]; for input in inputs { dbg!(input); - let mut parsed = inline_table(Default::default()).parse(new_input(input)); + let mut parsed = inline_table.parse(new_input(input)); if let Ok(parsed) = &mut parsed { parsed.despan(input); } @@ -194,7 +186,7 @@ mod test { let invalid_inputs = [r#"{a = 1e165"#, r#"{ hello = "world", a = 2, hello = 1}"#]; for input in invalid_inputs { dbg!(input); - let mut parsed = inline_table(Default::default()).parse(new_input(input)); + let mut parsed = inline_table.parse(new_input(input)); if let Ok(parsed) = &mut parsed { parsed.despan(input); } diff --git a/crates/toml_edit/src/parser/mod.rs b/crates/toml_edit/src/parser/mod.rs index b2370657..1d5dbcf1 100644 --- a/crates/toml_edit/src/parser/mod.rs +++ b/crates/toml_edit/src/parser/mod.rs @@ -23,7 +23,7 @@ pub(crate) fn parse_document>(raw: S) -> Result Result { use prelude::*; let b = new_input(raw); - let result = key::simple_key.parse(b); + let result = key::simple_key.parse(b.clone()); match result { Ok((raw, key)) => { Ok(crate::Key::new(key).with_repr_unchecked(crate::Repr::new_unchecked(raw))) @@ -49,7 +49,7 @@ pub(crate) fn parse_key_path(raw: &str) -> Result, TomlError> { use prelude::*; let b = new_input(raw); - let result = key::key.parse(b); + let result = key::key.parse(b.clone()); match result { Ok(mut keys) => { for key in &mut keys { @@ -65,7 +65,7 @@ pub(crate) fn parse_value(raw: &str) -> Result { use prelude::*; let b = new_input(raw); - let parsed = value::value(RecursionCheck::default()).parse(b); + let parsed = value::value.parse(b.clone()); match parsed { Ok(mut value) => { // Only take the repr and not decor, as its probably not intended @@ -86,13 +86,16 @@ pub(crate) mod prelude { pub(crate) use winnow::PResult; pub(crate) use winnow::Parser; - pub(crate) type Input<'b> = winnow::Located<&'b winnow::BStr>; + pub(crate) type Input<'b> = winnow::Stateful, RecursionCheck>; pub(crate) fn new_input(s: &str) -> Input<'_> { - winnow::Located::new(winnow::BStr::new(s)) + winnow::Stateful { + input: winnow::Located::new(winnow::BStr::new(s)), + state: Default::default(), + } } - #[derive(Copy, Clone, Debug, Default)] + #[derive(Clone, Debug, Default, PartialEq, Eq)] pub(crate) struct RecursionCheck { #[cfg(not(feature = "unbounded"))] current: usize, @@ -111,24 +114,40 @@ pub(crate) mod prelude { Ok(()) } - #[allow(unused_mut)] - pub(crate) fn recursing( - mut self, - _input: &mut Input<'_>, - ) -> Result> { + fn enter(&mut self) -> Result<(), super::error::CustomError> { #[cfg(not(feature = "unbounded"))] { self.current += 1; if LIMIT <= self.current { - return Err(winnow::error::ErrMode::from_external_error( - _input, - winnow::error::ErrorKind::Eof, - super::error::CustomError::RecursionLimitExceeded, - ) - .cut()); + return Err(super::error::CustomError::RecursionLimitExceeded); } } - Ok(self) + Ok(()) + } + + fn exit(&mut self) { + #[cfg(not(feature = "unbounded"))] + { + self.current -= 1; + } + } + } + + pub(crate) fn check_recursion<'b, O>( + mut parser: impl Parser, O, ContextError>, + ) -> impl Parser, O, ContextError> { + move |input: &mut Input<'b>| { + input.state.enter().map_err(|err| { + winnow::error::ErrMode::from_external_error( + input, + winnow::error::ErrorKind::Eof, + err, + ) + .cut() + })?; + let result = parser.parse_next(input); + input.state.exit(); + result } } } diff --git a/crates/toml_edit/src/parser/value.rs b/crates/toml_edit/src/parser/value.rs index 56d8e44b..93f2eef4 100644 --- a/crates/toml_edit/src/parser/value.rs +++ b/crates/toml_edit/src/parser/value.rs @@ -14,17 +14,16 @@ use crate::RawString; use crate::Value; // val = string / boolean / array / inline-table / date-time / float / integer -pub(crate) fn value<'i>(check: RecursionCheck) -> impl Parser, Value, ContextError> { - move |input: &mut Input<'i>| { - dispatch!{peek(any); +pub(crate) fn value(input: &mut Input<'_>) -> PResult { + dispatch! {peek(any); crate::parser::strings::QUOTATION_MARK | crate::parser::strings::APOSTROPHE => string.map(|s| { Value::String(Formatted::new( s.into_owned() )) }), - crate::parser::array::ARRAY_OPEN => array(check).map(Value::Array), - crate::parser::inline_table::INLINE_TABLE_OPEN => inline_table(check).map(Value::InlineTable), + crate::parser::array::ARRAY_OPEN => check_recursion(array).map(Value::Array), + crate::parser::inline_table::INLINE_TABLE_OPEN => check_recursion(inline_table).map(Value::InlineTable), // Date/number starts b'+' | b'-' | b'0'..=b'9' => { // Uncommon enough not to be worth optimizing at this time @@ -80,10 +79,9 @@ pub(crate) fn value<'i>(check: RecursionCheck) -> impl Parser, Value, .context(StrContext::Expected(StrContextValue::CharLiteral('\''))) }, } - .with_span() - .map(|(value, span)| apply_raw(value, span)) - .parse_next(input) - } + .with_span() + .map(|(value, span)| apply_raw(value, span)) + .parse_next(input) } fn apply_raw(mut val: Value, span: std::ops::Range) -> Value { @@ -146,7 +144,7 @@ trimmed in raw strings. ]; for input in inputs { dbg!(input); - let mut parsed = value(Default::default()).parse(new_input(input)); + let mut parsed = value.parse(new_input(input)); if let Ok(parsed) = &mut parsed { parsed.despan(input); }