From 56d1765e96338d6e4f7032fcf984dcb85164ef6a Mon Sep 17 00:00:00 2001 From: lmittmann <3458786+lmittmann@users.noreply.github.com> Date: Thu, 17 Oct 2024 22:46:46 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Enforce=20spacing=20between=20id?= =?UTF-8?q?entifiers=20&=20other=20in=20lexer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * make sure tokens end with a space or punct * cleanup * cleanup2 --------- Co-authored-by: lmittmann --- crates/ast/src/lexer.rs | 44 ++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/crates/ast/src/lexer.rs b/crates/ast/src/lexer.rs index 7befa40..af7341c 100644 --- a/crates/ast/src/lexer.rs +++ b/crates/ast/src/lexer.rs @@ -31,7 +31,7 @@ pub enum Token<'src> { Error(char), } -impl<'src> fmt::Display for Token<'src> { +impl fmt::Display for Token<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Token::Comment(s) @@ -47,25 +47,41 @@ impl<'src> fmt::Display for Token<'src> { fn lexer<'src>( ) -> impl Parser<'src, &'src str, Vec>>, extra::Err>> { + let validate_end = any() + .or_not() + .rewind() + .validate(|c: Option, e, emitter| { + if let Some(c) = c { + if !(c.is_whitespace() || "(){[]<>:=,/}".contains(c)) { + emitter.emit(Rich::custom(e.span(), "invalid token")); + } + } + }); let keyword = just("#") - .ignore_then(keyword("define").or(keyword("include"))) + .ignore_then(choice((keyword("define"), keyword("include")))) + .then_ignore(validate_end) .map(Token::Keyword); - let ident = text::ident().map(Token::Ident); + let ident = text::ident().then_ignore(validate_end).map(Token::Ident); let punct = one_of("(){}[]<>:=,").map(Token::Punct); let hex = just("0x") .ignore_then(text::digits(16)) .to_slice() + .then_ignore(validate_end) .map(Token::Hex); let bin = just("0b") .ignore_then(text::digits(2)) + .then_ignore(validate_end) .to_slice() .map(Token::Bin); - let dec = text::digits(10).to_slice().map(Token::Dec); + let dec = text::digits(10) + .then_ignore(validate_end) + .to_slice() + .map(Token::Dec); let token = choice((keyword, ident, punct, hex, bin, dec)); @@ -102,11 +118,11 @@ mod tests { }; } - // macro_rules! assert_err { - // ($input:expr, $expected:expr) => { - // assert_eq!(lexer().parse($input).into_result(), Err(vec![$expected]),); - // }; - // } + macro_rules! assert_err { + ($input:expr, $expected:expr) => { + assert_eq!(lexer().parse($input).into_result(), Err(vec![$expected]),); + }; + } #[test] fn lex_keyword() { @@ -125,10 +141,10 @@ mod tests { (Token::Ident("foo"), SimpleSpan::new(0, 3)), (Token::Ident("bar"), SimpleSpan::new(4, 7)) ); - // assert_err!( - // "foo#define", - // Rich::custom(SimpleSpan::new(0, 10), "invalid token") - // ); + assert_err!( + "foo#define", + Rich::custom(SimpleSpan::new(3, 3), "invalid token") + ); } #[test] @@ -155,7 +171,7 @@ mod tests { fn lex_hex() { assert_ok!("0x0", (Token::Hex("0x0"), SimpleSpan::new(0, 3))); assert_ok!("0x123", (Token::Hex("0x123"), SimpleSpan::new(0, 5))); - // assert_err!("0x", SimpleSpan::new(2, 2)); + assert_err!("0x0x", Rich::custom(SimpleSpan::new(3, 3), "invalid token")); } #[test]