diff --git a/checker/src/features/regexp.rs b/checker/src/features/regexp.rs index 297d0334..9da6f685 100644 --- a/checker/src/features/regexp.rs +++ b/checker/src/features/regexp.rs @@ -21,36 +21,31 @@ pub struct RegExp { } impl RegExp { - pub fn new(pattern: &str, flag_options: Option<&str>) -> Result { - let source = if let Some(flag_options) = flag_options { - format!("/{pattern}/{flag_options}") - } else { - format!("/{pattern}/") - }; - + pub fn new(pattern: &str, flag_options: &str) -> Result { + let source = format!("/{pattern}/{flag_options}"); let mut flags = Flags::default(); let mut flags_unsupported = false; - if let Some(flag_options) = flag_options { - for flag in flag_options.chars() { - #[allow(clippy::match_same_arms)] - match flag { - 'd' => flags_unsupported = true, // indices for substring matches are not supported - 'g' => flags_unsupported = true, // stateful regex is not supported - 'i' => flags.icase = true, - 'm' => flags.multiline = true, - 's' => flags.dot_all = true, - 'u' => flags.unicode = true, - 'v' => flags.unicode_sets = true, - 'y' => flags_unsupported = true, // sticky search is not supported - _ => panic!("Unknown flag: {flag:?}"), - } + for flag in flag_options.chars() { + #[allow(clippy::match_same_arms)] + match flag { + 'd' => flags_unsupported = true, // indices for substring matches are not supported + 'g' => flags_unsupported = true, // stateful regex is not supported + 'i' => flags.icase = true, + 'm' => flags.multiline = true, + 's' => flags.dot_all = true, + 'u' => flags.unicode = true, + 'v' => flags.unicode_sets = true, + 'y' => flags_unsupported = true, // sticky search is not supported + // Should be caught by parser errors + _ => panic!("Unknown flag: {flag:?}"), } } let compiled_regex = { let mut ire = backends::try_parse(pattern.chars().map(u32::from), flags) .map_err(|err| err.text)?; + if !flags.no_opt { backends::optimize(&mut ire); } @@ -316,10 +311,7 @@ impl BinarySerializable for RegExp { fn deserialize>(iter: &mut I, source_id: SourceId) -> Self { let source = String::deserialize(iter, source_id); - let (pattern, flags) = source[1..].rsplit_once('/').unwrap(); - let flags = if flags.is_empty() { None } else { Some(flags) }; - Self::new(pattern, flags).unwrap() } } diff --git a/checker/src/types/store.rs b/checker/src/types/store.rs index 263ced34..c2636fbf 100644 --- a/checker/src/types/store.rs +++ b/checker/src/types/store.rs @@ -497,12 +497,11 @@ impl TypeStore { pub fn new_regexp( &mut self, pattern: &str, - flags: &Option, + flags: &str, _position: &Span, ) -> Result { - let regexp = RegExp::new(pattern, flags.as_ref().map(String::as_str))?; + let regexp = RegExp::new(pattern, flags)?; let ty = Type::SpecialObject(SpecialObject::RegularExpression(regexp)); - Ok(self.register_type(ty)) } diff --git a/parser/src/expressions/mod.rs b/parser/src/expressions/mod.rs index cf1b4170..fdc87145 100644 --- a/parser/src/expressions/mod.rs +++ b/parser/src/expressions/mod.rs @@ -63,7 +63,8 @@ pub enum Expression { BooleanLiteral(bool, Span), RegexLiteral { pattern: String, - flags: Option, + /// Can be `""` + flags: String, position: Span, }, ArrayLiteral(Vec, Span), @@ -283,10 +284,10 @@ impl Expression { } } else if reader.starts_with('/') { let (pattern, flags) = reader.parse_regex_literal()?; - let position = start.with_length(2 + pattern.len() + flags.map_or(0, str::len)); + let position = start.with_length(2 + pattern.len() + flags.len()); Expression::RegexLiteral { pattern: pattern.to_owned(), - flags: flags.map(ToOwned::to_owned), + flags: flags.to_owned(), position, } } else if reader.is_operator_advance("[") { @@ -1093,9 +1094,7 @@ impl Expression { buf.push('/'); buf.push_str(pattern); buf.push('/'); - if let Some(flags) = flags { - buf.push_str(flags); - } + buf.push_str(flags); } Self::BinaryOperation { lhs, operator, rhs, .. } => { lhs.to_string_using_precedence( diff --git a/parser/src/lexer.rs b/parser/src/lexer.rs index f27d58d7..cef5fd57 100644 --- a/parser/src/lexer.rs +++ b/parser/src/lexer.rs @@ -727,7 +727,8 @@ impl<'a> Lexer<'a> { Ok((number, length)) } - pub fn parse_regex_literal(&mut self) -> Result<(&'a str, Option<&'a str>), ParseError> { + /// Returns content and flags. Flags can be empty + pub fn parse_regex_literal(&mut self) -> Result<(&'a str, &'a str), ParseError> { let mut escaped = false; let mut after_last_slash = false; let mut in_set = false; @@ -776,15 +777,22 @@ impl<'a> Lexer<'a> { } let regex = ¤t[1..regex_content]; + self.head += 2 + regex.len() as u32; let regex_end = regex_content + '/'.len_utf8(); + let first_non_char = chars .find_map(|(idx, chr)| (!chr.is_alphabetic()).then_some(idx)) .unwrap_or(current.len()); - let regex_flag = ¤t[regex_end..first_non_char]; - - self.head += (2 + regex.len() + regex_flag.len()) as u32; - Ok((regex, (!regex_flag.is_empty()).then_some(regex_flag))) + let regex_flags = ¤t[regex_end..first_non_char]; + + let invalid_flag = regex_flags.chars().any(|chr| !matches!(chr, 'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'y')); + if invalid_flag { + Err(ParseError::new(ParseErrors::InvalidRegexFlag, self.get_start().with_length(regex_flags.len()))) + } else { + self.head += regex_flags.len() as u32; + Ok((regex, regex_flags)) + } } pub fn parse_comment_literal(&mut self, is_multiline: bool) -> Result<&str, ParseError> {