Skip to content

Commit

Permalink
🐛 Enforce spacing between identifiers & other in lexer
Browse files Browse the repository at this point in the history
* make sure tokens end with a space or punct

* cleanup

* cleanup2

---------

Co-authored-by: lmittmann <[email protected]>
  • Loading branch information
lmittmann and lmittmann authored Oct 17, 2024
1 parent 12edecc commit 56d1765
Showing 1 changed file with 30 additions and 14 deletions.
44 changes: 30 additions & 14 deletions crates/ast/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ pub enum Token<'src> {
Error(char),
}

impl<'src> fmt::Display for Token<'src> {
impl fmt::Display for Token<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Token::Comment(s)
Expand All @@ -47,25 +47,41 @@ impl<'src> fmt::Display for Token<'src> {

fn lexer<'src>(
) -> impl Parser<'src, &'src str, Vec<Spanned<Token<'src>>>, extra::Err<Rich<'src, char>>> {
let validate_end = any()
.or_not()
.rewind()
.validate(|c: Option<char>, e, emitter| {
if let Some(c) = c {
if !(c.is_whitespace() || "(){[]<>:=,/}".contains(c)) {
emitter.emit(Rich::custom(e.span(), "invalid token"));
}
}
});
let keyword = just("#")
.ignore_then(keyword("define").or(keyword("include")))
.ignore_then(choice((keyword("define"), keyword("include"))))
.then_ignore(validate_end)
.map(Token::Keyword);

let ident = text::ident().map(Token::Ident);
let ident = text::ident().then_ignore(validate_end).map(Token::Ident);

let punct = one_of("(){}[]<>:=,").map(Token::Punct);

let hex = just("0x")
.ignore_then(text::digits(16))
.to_slice()
.then_ignore(validate_end)
.map(Token::Hex);

let bin = just("0b")
.ignore_then(text::digits(2))
.then_ignore(validate_end)
.to_slice()
.map(Token::Bin);

let dec = text::digits(10).to_slice().map(Token::Dec);
let dec = text::digits(10)
.then_ignore(validate_end)
.to_slice()
.map(Token::Dec);

let token = choice((keyword, ident, punct, hex, bin, dec));

Expand Down Expand Up @@ -102,11 +118,11 @@ mod tests {
};
}

// macro_rules! assert_err {
// ($input:expr, $expected:expr) => {
// assert_eq!(lexer().parse($input).into_result(), Err(vec![$expected]),);
// };
// }
macro_rules! assert_err {
($input:expr, $expected:expr) => {
assert_eq!(lexer().parse($input).into_result(), Err(vec![$expected]),);
};
}

#[test]
fn lex_keyword() {
Expand All @@ -125,10 +141,10 @@ mod tests {
(Token::Ident("foo"), SimpleSpan::new(0, 3)),
(Token::Ident("bar"), SimpleSpan::new(4, 7))
);
// assert_err!(
// "foo#define",
// Rich::custom(SimpleSpan::new(0, 10), "invalid token")
// );
assert_err!(
"foo#define",
Rich::custom(SimpleSpan::new(3, 3), "invalid token")
);
}

#[test]
Expand All @@ -155,7 +171,7 @@ mod tests {
fn lex_hex() {
assert_ok!("0x0", (Token::Hex("0x0"), SimpleSpan::new(0, 3)));
assert_ok!("0x123", (Token::Hex("0x123"), SimpleSpan::new(0, 5)));
// assert_err!("0x", SimpleSpan::new(2, 2));
assert_err!("0x0x", Rich::custom(SimpleSpan::new(3, 3), "invalid token"));
}

#[test]
Expand Down

0 comments on commit 56d1765

Please sign in to comment.