Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Perf] Use heuristics to avoid allocations in Sanitizer::str_till_eol #2517

Closed
wants to merge 5 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 47 additions & 6 deletions console/network/environment/src/helpers/sanitizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,36 @@ impl Sanitizer {
///
/// Discard any leading newline.
fn str_till_eol(string: &str) -> ParserResult<&str> {
map(
recognize(Self::till(alt((value((), tag("\\\n")), value((), Sanitizer::parse_safe_char))), Self::eol)),
|i| {
if i.as_bytes().last() == Some(&b'\n') { &i[0..i.len() - 1] } else { i }
},
)(string)
// A heuristic approach is applied here in order to avoid costly parsing operations in the
// most common scenarios: non-parsing methods are used to verify if the string has multiple
// lines and if there are any unsafe characters.
if let Some((before, after)) = string.split_once('\n') {
let is_multiline = before.ends_with('\\');

if !is_multiline {
let contains_unsafe_chars = !before.chars().all(is_char_supported);

if !contains_unsafe_chars {
Ok((after, before))
} else {
// `eoi` is used here instead of `eol`, since the earlier call to `split_once`
// already removed the newline
recognize(Self::till(value((), Sanitizer::parse_safe_char), Self::eoi))(before)
}
} else {
map(
recognize(Self::till(
alt((value((), tag("\\\n")), value((), Sanitizer::parse_safe_char))),
Self::eol,
)),
|i| {
if i.as_bytes().last() == Some(&b'\n') { &i[0..i.len() - 1] } else { i }
},
)(string)
}
} else {
Ok((string, ""))
}
}

/// Parse a string until `*/` is encountered.
Expand Down Expand Up @@ -256,6 +280,23 @@ mod tests {
("hello world", "// hel\u{4141}lo\n"),
Sanitizer::parse_comments("// hel\u{4141}lo\nhello world").unwrap()
);
assert_eq!(
("hello world", "/* multi\n line comment\n*/\n"),
Sanitizer::parse_comments("/* multi\n line comment\n*/\nhello world").unwrap()
);
assert_eq!(
("hello world", "// multiple\n// line\n// comments\n"),
Sanitizer::parse_comments("// multiple\n// line\n// comments\nhello world").unwrap()
);
assert_eq!(
("hello world", "/* multi\n line comment\n*/\n/* and\n another\n one\n*/\n"),
Sanitizer::parse_comments("/* multi\n line comment\n*/\n/* and\n another\n one\n*/\nhello world")
.unwrap()
);
assert_eq!(
("hello world", "/* multi\n line comment\n*/\n// two single\n// line comments\n/* and\n another\n multi-liner\n*/\n"),
Sanitizer::parse_comments("/* multi\n line comment\n*/\n// two single\n// line comments\n/* and\n another\n multi-liner\n*/\nhello world").unwrap()
);
assert!(Sanitizer::parse_comments("// hel\x08lo\nhello world").is_err());
assert!(Sanitizer::parse_comments("// hel\u{2066}lo\nhello world").is_err());
assert!(Sanitizer::parse_comments("/* hel\x7flo */\nhello world").is_err());
Expand Down