Skip to content

Commit

Permalink
reimplemented parser with parser combinator
Browse files Browse the repository at this point in the history
  • Loading branch information
glyh committed Sep 16, 2024
1 parent 6fcec1e commit d1cf63f
Show file tree
Hide file tree
Showing 14 changed files with 783 additions and 1,339 deletions.
2 changes: 1 addition & 1 deletion src/bin/main.mbt
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ fn CompileStatus::step(self : CompileStatus) -> Bool {
match self.curr_stage {
Parse => {
let source_code = self.source_code.unwrap()
let parsed = @util.die("TODO: parse")
let parsed = @parser.parse_program(source_code)
self.ast = Some(parsed)
}
Typecheck => {
Expand Down
89 changes: 7 additions & 82 deletions src/lex/lex.mbt
Original file line number Diff line number Diff line change
@@ -1,72 +1,5 @@
// An O(N) lexer
// TODO: check int, float out of bound parsing
pub struct Loc {
row : Int
col : Int
index : Int
} derive(Show)

pub enum LexError_ {
ExpectLE
UnexpectedChar(Char)
} derive(Show)

pub type! LexError LexError_

pub fn new_loc() -> Loc {
{ row: 1, col: 1, index: 0 }
}

fn advance(self : Loc, c : Char) -> Loc {
if c == '\n' {
self.advance_line()
} else {
{ ..self, col: self.col + 1, index: self.index + 1 }
}
}

fn advance_line(self : Loc) -> Loc {
{ row: self.row + 1, col: 1, index: self.index + 1 }
}

pub enum Token {
ID(String)
COLON
ASSIGN
SEMICOL
FN
MAIN
INIT
LPAREN
RPAREN
ARROW
COMMA
LBRACE
RBRACE
EQ
LE
ADD
SUB
MUL
DIV
IF
ELSE
LBKT
RBKT
BOOL(Bool)
DOT
ARRAY
MAKE
UNIT_T
BOOL_T
DOUBLE_T
I32(Int)
F64(Double)
NOT
INT_T
LET
} derive(Show, Eq)

let reserved_keywords : @immut/hashmap.T[String, Token] = @immut/hashmap.of(
[
("true", BOOL(true)),
Expand All @@ -77,13 +10,10 @@ let reserved_keywords : @immut/hashmap.T[String, Token] = @immut/hashmap.of(
("Double", DOUBLE_T),
("Array", ARRAY),
("not", NOT),
("make", MAKE),
("if", IF),
("else", ELSE),
("fn", FN),
("let", LET),
("main", MAIN),
("init", INIT),
],
)

Expand Down Expand Up @@ -117,8 +47,8 @@ enum LexState { // DFA State
AfterFloatingDot
}

pub fn lex_string(input : String) -> Array[(Token, Loc, Loc)]!LexError {
let result : Array[(Token, Loc, Loc)] = Array::new()
pub fn lex_string(input : String) -> Array[Lexeme]!LexError {
let result : Array[Lexeme] = Array::new()
// put a new line here manually so we always spit the last token
let chars = (input + "\n").iter().collect()
let index_ub = chars.length()
Expand Down Expand Up @@ -165,7 +95,7 @@ pub fn lex_string(input : String) -> Array[(Token, Loc, Loc)]!LexError {
} else {
match symbols.find(char) {
Some(tok) => result.push((tok, loc, loc))
None => raise LexError(UnexpectedChar(char))
None => raise LexError(UnexpectedChar(char, loc))
}
}
}
Expand Down Expand Up @@ -237,7 +167,7 @@ pub fn lex_string(input : String) -> Array[(Token, Loc, Loc)]!LexError {
mode = Default
result.push((LE, last_loc, loc))
}
(AfterLess, _) => raise LexError(ExpectLE)
(AfterLess, c) => raise LexError(UnexpectedChar(c, loc))
(AfterMinus, '>') => {
mode = Default
result.push((ARROW, last_loc, loc))
Expand All @@ -250,15 +180,10 @@ pub fn lex_string(input : String) -> Array[(Token, Loc, Loc)]!LexError {
}
loc = loc.advance(char)
}
result.push((EOF, loc, loc))
result
}

fn fst[U, V, W](pair : (U, V, W)) -> U {
match pair {
(a, _, _) => a
}
}

test "simple lexing" {
inspect!(
lex_string!(
Expand All @@ -275,9 +200,9 @@ test "simple lexing" {
#| 1.0007 9.
#| 2147483647
,
).map(fst),
).map(fn { _1 => _1.0 }),
content=
#|[FN, ID("make_adder"), LPAREN, ID("x"), COLON, INT_T, RPAREN, ARROW, LPAREN, INT_T, RPAREN, ARROW, INT_T, LBRACE, FN, ID("adder"), LPAREN, ID("y"), COLON, INT_T, RPAREN, ARROW, INT_T, LBRACE, ID("x"), ADD, ID("y"), RBRACE, SEMICOL, ID("adder"), RBRACE, SEMICOL, FN, MAIN, LBRACE, ID("print_int"), LPAREN, LPAREN, ID("make_adder"), LPAREN, I32(3), RPAREN, RPAREN, LPAREN, I32(7), RPAREN, RPAREN, RBRACE, SEMICOL, F64(1.0007), F64(9), I32(2147483647)]
#|[FN, ID("make_adder"), LPAREN, ID("x"), COLON, INT_T, RPAREN, ARROW, LPAREN, INT_T, RPAREN, ARROW, INT_T, LBRACE, FN, ID("adder"), LPAREN, ID("y"), COLON, INT_T, RPAREN, ARROW, INT_T, LBRACE, ID("x"), ADD, ID("y"), RBRACE, SEMICOL, ID("adder"), RBRACE, SEMICOL, FN, ID("main"), LBRACE, ID("print_int"), LPAREN, LPAREN, ID("make_adder"), LPAREN, I32(3), RPAREN, RPAREN, LPAREN, I32(7), RPAREN, RPAREN, RBRACE, SEMICOL, F64(1.0007), F64(9), I32(2147483647), EOF]
,
)
}
65 changes: 65 additions & 0 deletions src/lex/lexeme.mbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
pub struct Loc {
row : Int
col : Int
index : Int
} derive(Show)

pub enum LexError_ {
UnexpectedChar(Char, Loc)
} derive(Show)

pub type! LexError LexError_

fn new_loc() -> Loc {
{ row: 1, col: 1, index: 0 }
}

fn advance(self : Loc, c : Char) -> Loc {
if c == '\n' {
self.advance_line()
} else {
{ ..self, col: self.col + 1, index: self.index + 1 }
}
}

fn advance_line(self : Loc) -> Loc {
{ row: self.row + 1, col: 1, index: self.index + 1 }
}

pub enum Token {
ID(String)
COLON
ASSIGN
SEMICOL
FN
LPAREN
RPAREN
ARROW
COMMA
LBRACE
RBRACE
EQ
LE
ADD
SUB
MUL
DIV
IF
ELSE
LBKT
RBKT
BOOL(Bool)
DOT
ARRAY
UNIT_T
BOOL_T
DOUBLE_T
I32(Int)
F64(Double)
NOT
INT_T
LET
EOF
} derive(Show, Eq)

typealias Lexeme = (Token, Loc, Loc)
4 changes: 0 additions & 4 deletions src/parser/README.md

This file was deleted.

11 changes: 11 additions & 0 deletions src/parser/aliases.mbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
typealias S = @types.Syntax

typealias PS = Parser[S]

typealias T = @types.Type

typealias PT = Parser[T]

typealias B = @types.Op

typealias PB = Parser[B]
87 changes: 0 additions & 87 deletions src/parser/cst.mbt

This file was deleted.

26 changes: 0 additions & 26 deletions src/parser/entry.mbt

This file was deleted.

Loading

0 comments on commit d1cf63f

Please sign in to comment.