diff --git a/lrlex/src/lib/ctbuilder.rs b/lrlex/src/lib/ctbuilder.rs index db716e638..2281994cd 100644 --- a/lrlex/src/lib/ctbuilder.rs +++ b/lrlex/src/lib/ctbuilder.rs @@ -25,7 +25,8 @@ use regex::Regex; use serde::Serialize; use crate::{ - DefaultLexerTypes, LRNonStreamingLexerDef, LexerDef, RegexOptions, DEFAULT_REGEX_OPTIONS, + DefaultLexerTypes, LRNonStreamingLexerDef, LexBuildError, LexerDef, RegexOptions, + DEFAULT_REGEX_OPTIONS, }; const RUST_FILE_EXT: &str = "rs"; @@ -79,6 +80,33 @@ pub enum RustEdition { Rust2021, } +pub trait LexErrorHandler +where + LexerTypesT: LexerTypes, + usize: num_traits::AsPrimitive, +{ + /// Will be called with the path to the `.l` file + /// before `fn on_*` or `fn missing_*`. + fn lexer_path(&mut self, filename: &Path); + /// Will be called with the `.y` file sources as `src` + /// before any call to `fn on_*`. + fn lexer_src(&mut self, src: &str); + /// Lends `self` a slice containing `LexBuildError`s + fn on_lex_build_error(&mut self, errors: &[LexBuildError]); + /// Lends `self` a set of `String`s denoting tokens + /// present in the parser, but missing from the lexer. + fn missing_in_lexer(&mut self, missing: &HashSet); + /// Lends `self` a set of `String`s denoting tokens + /// present in the lexer, but missing from the parser. + fn missing_in_parser(&mut self, missing: &HashSet); + /// This function must return an `Err` variant if any of the following are true: + /// + /// - missing_in_lexer is called. + /// - missing_in_parser is called. + /// - on_lex_build_error is called. + fn results(&self) -> Result<(), Box>; +} + /// A `CTLexerBuilder` allows one to specify the criteria for building a statically generated /// lexer. pub struct CTLexerBuilder<'a, LexerTypesT: LexerTypes = DefaultLexerTypes> @@ -97,6 +125,7 @@ where allow_missing_terms_in_lexer: bool, allow_missing_tokens_in_parser: bool, regex_options: RegexOptions, + error_handler: Option<&'a mut dyn LexErrorHandler>, } impl<'a> CTLexerBuilder<'a, DefaultLexerTypes> { @@ -141,9 +170,18 @@ where allow_missing_terms_in_lexer: false, allow_missing_tokens_in_parser: true, regex_options: DEFAULT_REGEX_OPTIONS, + error_handler: None, } } + pub fn error_handler( + mut self, + error_handler: &'a mut dyn LexErrorHandler, + ) -> Self { + self.error_handler = Some(error_handler); + self + } + /// An optional convenience function to make it easier to create an (lrlex) lexer and (lrpar) /// parser in one shot. The closure passed to this function will be called during /// [CTLexerBuilder::build]: it will be passed an lrpar `CTParserBuilder` instance upon which @@ -327,29 +365,45 @@ where } let lex_src = read_to_string(lexerp)?; - let line_cache = NewlineCache::from_str(&lex_src).unwrap(); + if let Some(error_handler) = self.error_handler.as_mut() { + error_handler.lexer_path(lexerp.as_path()); + error_handler.lexer_src(lex_src.as_str()); + } let mut lexerdef: Box> = match self.lexerkind { - LexerKind::LRNonStreamingLexer => Box::new( - LRNonStreamingLexerDef::::new_with_options( + LexerKind::LRNonStreamingLexer => { + let lexerdef = LRNonStreamingLexerDef::::new_with_options( &lex_src, self.regex_options.clone(), - ) - .map_err(|errs| { - errs.iter() - .map(|e| { - if let Some((line, column)) = line_cache.byte_to_line_num_and_col_num( - &lex_src, - e.spans().first().unwrap().start(), - ) { - format!("{} at line {line} column {column}", e) - } else { - format!("{}", e) - } - }) - .collect::>() - .join("\n") - })?, - ), + ); + + Box::new(if let Some(error_handler) = self.error_handler.as_mut() { + lexerdef.map_err(|errs| { + error_handler.on_lex_build_error(errs.as_slice()); + error_handler + .results() + .expect_err("Expected an error from error_handler.") + })? + } else { + let line_cache = NewlineCache::from_str(&lex_src).unwrap(); + lexerdef.map_err(|errs| { + errs.iter() + .map(|e| { + if let Some((line, column)) = line_cache + .byte_to_line_num_and_col_num( + &lex_src, + e.spans().first().unwrap().start(), + ) + { + format!("{} at line {line} column {column}", e) + } else { + format!("{}", e) + } + }) + .collect::>() + .join("\n") + })? + }) + } }; let (missing_from_lexer, missing_from_parser) = match self.rule_ids_map { Some(ref rim) => { @@ -370,25 +424,39 @@ where let mut has_unallowed_missing = false; if !self.allow_missing_terms_in_lexer { if let Some(ref mfl) = missing_from_lexer { - eprintln!("Error: the following tokens are used in the grammar but are not defined in the lexer:"); - for n in mfl { - eprintln!(" {}", n); + if let Some(error_handler) = self.error_handler.as_deref_mut() { + error_handler.missing_in_lexer(mfl); + } else { + eprintln!("Error: the following tokens are used in the grammar but are not defined in the lexer:"); + for n in mfl { + eprintln!(" {}", n); + } } has_unallowed_missing = true; } } if !self.allow_missing_tokens_in_parser { if let Some(ref mfp) = missing_from_parser { - eprintln!("Error: the following tokens are defined in the lexer but not used in the grammar:"); - for n in mfp { - eprintln!(" {}", n); + if let Some(error_handler) = self.error_handler.as_deref_mut() { + error_handler.missing_in_parser(mfp); + } else { + eprintln!("Error: the following tokens are defined in the lexer but not used in the grammar:"); + for n in mfp { + eprintln!(" {}", n); + } } has_unallowed_missing = true; } } if has_unallowed_missing { fs::remove_file(outp).ok(); - panic!(); + if let Some(error_handler) = self.error_handler.as_deref_mut() { + return Err(error_handler + .results() + .expect_err("Expected missing tokens in lexer or parser")); + } else { + panic!(); + } } let mod_name = match self.mod_name { diff --git a/lrlex/src/lib/mod.rs b/lrlex/src/lib/mod.rs index 190facaa3..26ae0fac0 100644 --- a/lrlex/src/lib/mod.rs +++ b/lrlex/src/lib/mod.rs @@ -21,7 +21,9 @@ mod lexer; mod parser; pub use crate::{ - ctbuilder::{ct_token_map, CTLexer, CTLexerBuilder, LexerKind, RustEdition, Visibility}, + ctbuilder::{ + ct_token_map, CTLexer, CTLexerBuilder, LexErrorHandler, LexerKind, RustEdition, Visibility, + }, defaults::{DefaultLexeme, DefaultLexerTypes}, lexer::{ LRNonStreamingLexer, LRNonStreamingLexerDef, LexerDef, RegexOptions, Rule, diff --git a/lrpar/src/lib/ctbuilder.rs b/lrpar/src/lib/ctbuilder.rs index 410c76d3e..6b2eae6e3 100644 --- a/lrpar/src/lib/ctbuilder.rs +++ b/lrpar/src/lib/ctbuilder.rs @@ -3,6 +3,7 @@ use std::{ any::type_name, borrow::Cow, + cell::RefCell, collections::{HashMap, HashSet}, convert::AsRef, env::{current_dir, var}, @@ -13,13 +14,17 @@ use std::{ io::{self, Write}, marker::PhantomData, path::{Path, PathBuf}, + rc::Rc, sync::Mutex, }; use bincode::{deserialize, serialize_into}; use cfgrammar::{ newlinecache::NewlineCache, - yacc::{ast::ASTWithValidityInfo, YaccGrammar, YaccKind, YaccOriginalActionKind}, + yacc::{ + ast::{ASTWithValidityInfo, GrammarAST}, + YaccGrammar, YaccGrammarError, YaccGrammarWarning, YaccKind, YaccOriginalActionKind, + }, RIdx, Spanned, Symbol, }; use filetime::FileTime; @@ -146,6 +151,41 @@ impl Visibility { } } +pub trait GrammarErrorHandler +where + LexerTypesT: LexerTypes, + usize: num_traits::AsPrimitive, +{ + /// Will be called with the `.y` file sources + /// as `src`, before any any call to `fn on_*`. + fn grammar_src(&mut self, src: &str); + /// Will be called with the path to the `.y` + /// file, before any any call to `fn on_*`. + fn grammar_path(&mut self, path: &Path); + /// Will be called with a flag denoting whether warnings + /// should be treated as errors before any call to `fn on_*`. + fn warnings_are_errors(&mut self, flag: bool); + /// Lends `Self` a slice containing warnings. + fn on_grammar_warning(&mut self, ws: &[YaccGrammarWarning]); + /// Lends `Self` a slice containing errors. + fn on_grammar_error(&mut self, errs: &[YaccGrammarError]); + /// Lends `Self` conflicts, and values necessary to obtain spans. + fn on_unexpected_conflicts( + &mut self, + ast: &GrammarAST, + grm: &YaccGrammar, + sgraph: &StateGraph, + stable: &StateTable, + conflicts: &Conflicts, + ); + /// This function must return an `Err` if any of the following are true: + /// + /// - `on_grammar_error` has been called. + /// - `on_unexpected_conflicts` has been called, + /// - `on_grammar_warning` was called and `warnings_are_errors` is true. + fn results(&self) -> Result<(), Box>; +} + /// A `CTParserBuilder` allows one to specify the criteria for building a statically generated /// parser. pub struct CTParserBuilder<'a, LexerTypesT: LexerTypes> @@ -166,6 +206,7 @@ where show_warnings: bool, visibility: Visibility, rust_edition: RustEdition, + error_handler: Option>>>, phantom: PhantomData, } @@ -209,6 +250,7 @@ where show_warnings: true, visibility: Visibility::Private, rust_edition: RustEdition::Rust2021, + error_handler: None, phantom: PhantomData, } } @@ -262,6 +304,14 @@ where Ok(self.output_path(outp)) } + pub fn error_handler( + mut self, + error_handler: Rc>>, + ) -> Self { + self.error_handler = Some(error_handler); + self + } + /// Set the input grammar path to `inp`. If specified, you must also call /// [CTParserBuilder::output_path]. In general it is easier to use /// [CTParserBuilder::grammar_in_src_dir]. @@ -429,55 +479,85 @@ where }; let res = YaccGrammar::::new_from_ast_with_validity_info(yk, &ast_validation); + if let Some(error_handler) = self.error_handler.as_ref() { + let mut error_handler = error_handler.borrow_mut(); + error_handler.grammar_path(grmp); + error_handler.grammar_src(inc.as_str()); + error_handler.warnings_are_errors(self.warnings_are_errors); + } let grm = match res { Ok(_) if self.warnings_are_errors && !warnings.is_empty() => { - let mut line_cache = NewlineCache::new(); - line_cache.feed(&inc); - return Err(ErrorString(if warnings.len() > 1 { - // Indent under the "Error:" prefix. - format!( - "\n\t{}", - warnings - .iter() - .map(|w| spanned_fmt(w, &inc, &line_cache)) - .collect::>() - .join("\n\t") - ) + if let Some(error_handler) = self.error_handler { + let mut error_handler = error_handler.borrow_mut(); + error_handler.on_grammar_warning(warnings.as_slice()); + return Err(error_handler + .results() + .expect_err("Expected error from error handler")); } else { - spanned_fmt(warnings.first().unwrap(), &inc, &line_cache) - }))?; + let mut line_cache = NewlineCache::new(); + line_cache.feed(&inc); + return Err(ErrorString(if warnings.len() > 1 { + // Indent under the "Error:" prefix. + format!( + "\n\t{}", + warnings + .iter() + .map(|w| spanned_fmt(w, &inc, &line_cache)) + .collect::>() + .join("\n\t") + ) + } else { + spanned_fmt(warnings.first().unwrap(), &inc, &line_cache) + }))?; + } } Ok(grm) => { if !warnings.is_empty() { - let mut line_cache = NewlineCache::new(); - line_cache.feed(&inc); - for w in warnings { - // Assume if this variable is set we are running under cargo. - if std::env::var("OUT_DIR").is_ok() && self.show_warnings { - println!("cargo:warning={}", spanned_fmt(&w, &inc, &line_cache)); - } else if self.show_warnings { - eprintln!("{}", spanned_fmt(&w, &inc, &line_cache)); + if let Some(error_handler) = self.error_handler.as_ref() { + let mut error_handler = error_handler.borrow_mut(); + error_handler.on_grammar_warning(warnings.as_slice()); + } else { + let mut line_cache = NewlineCache::new(); + line_cache.feed(&inc); + for w in warnings { + // Assume if this variable is set we are running under cargo. + if std::env::var("OUT_DIR").is_ok() && self.show_warnings { + println!("cargo:warning={}", spanned_fmt(&w, &inc, &line_cache)); + } else if self.show_warnings { + eprintln!("{}", spanned_fmt(&w, &inc, &line_cache)); + } } } } grm } Err(errs) => { - let mut line_cache = NewlineCache::new(); - line_cache.feed(&inc); - return Err(ErrorString(if errs.len() + warnings.len() > 1 { - // Indent under the "Error:" prefix. - format!( - "\n\t{}", - errs.iter() - .map(|e| spanned_fmt(e, &inc, &line_cache)) - .chain(warnings.iter().map(|w| spanned_fmt(w, &inc, &line_cache))) - .collect::>() - .join("\n\t") - ) + if let Some(error_handler) = self.error_handler.as_ref() { + let mut error_handler = error_handler.borrow_mut(); + if !warnings.is_empty() { + error_handler.on_grammar_warning(warnings.as_slice()) + } + error_handler.on_grammar_error(errs.as_slice()); + return Err(error_handler + .results() + .expect_err("Expected an error from error_handler.")); } else { - spanned_fmt(errs.first().unwrap(), &inc, &line_cache) - }))?; + let mut line_cache = NewlineCache::new(); + line_cache.feed(&inc); + return Err(ErrorString(if errs.len() + warnings.len() > 1 { + // Indent under the "Error:" prefix. + format!( + "\n\t{}", + errs.iter() + .map(|e| spanned_fmt(e, &inc, &line_cache)) + .chain(warnings.iter().map(|w| spanned_fmt(w, &inc, &line_cache))) + .collect::>() + .join("\n\t") + ) + } else { + spanned_fmt(errs.first().unwrap(), &inc, &line_cache) + }))?; + } } }; @@ -529,7 +609,23 @@ where (Some(i), None) if i == c.sr_len() && 0 == c.rr_len() => (), (None, Some(j)) if 0 == c.sr_len() && j == c.rr_len() => (), (None, None) if 0 == c.rr_len() && 0 == c.sr_len() => (), - _ => return Err(Box::new(CTConflictsError { stable })), + _ => { + if let Some(error_handler) = self.error_handler.as_ref() { + let mut error_handler = error_handler.borrow_mut(); + error_handler.on_unexpected_conflicts( + ast_validation.ast(), + &grm, + &sgraph, + &stable, + c, + ); + return Err(error_handler + .results() + .expect_err("Expected conflict error from error handler")); + } else { + return Err(Box::new(CTConflictsError { stable })); + } + } } } } @@ -661,6 +757,7 @@ where show_warnings: self.show_warnings, visibility: self.visibility.clone(), rust_edition: self.rust_edition, + error_handler: None, phantom: PhantomData, }; Ok(cl.build()?.rule_ids) diff --git a/lrpar/src/lib/mod.rs b/lrpar/src/lib/mod.rs index 45af9eed3..3f47ce737 100644 --- a/lrpar/src/lib/mod.rs +++ b/lrpar/src/lib/mod.rs @@ -204,7 +204,7 @@ pub mod parser; mod test_utils; pub use crate::{ - ctbuilder::{CTParser, CTParserBuilder, RustEdition, Visibility}, + ctbuilder::{CTParser, CTParserBuilder, GrammarErrorHandler, RustEdition, Visibility}, lex_api::{LexError, Lexeme, Lexer, LexerTypes, NonStreamingLexer}, parser::{LexParseError, Node, ParseError, ParseRepair, RTParserBuilder, RecoveryKind}, };