From 60a2b3fec6f94a6adbf0d4e778780fae7e669414 Mon Sep 17 00:00:00 2001 From: jzbor Date: Sat, 30 Nov 2024 20:13:56 +0100 Subject: [PATCH] Implement parser in pest --- Cargo.lock | 141 ++++++++++++--- Cargo.toml | 4 +- examples/capav.lsh | 6 +- examples/fib.lsh | 10 +- examples/sort.lsh | 12 +- src/error.rs | 36 ++-- src/grammar.pest | 28 +++ src/interpreter.rs | 10 +- src/macro.rs | 2 + src/parsing.rs | 442 ++++++++++++--------------------------------- src/typing.rs | 23 ++- 11 files changed, 319 insertions(+), 395 deletions(-) create mode 100644 src/grammar.pest diff --git a/Cargo.lock b/Cargo.lock index 53dd24b..9c8a5f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -69,10 +69,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] -name = "bytecount" -version = "0.6.7" +name = "block-buffer" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] [[package]] name = "cfg-if" @@ -147,6 +150,35 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "cpufeatures" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -205,6 +237,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.12" @@ -235,8 +277,8 @@ dependencies = [ "clap", "colored", "humantime", - "nom", - "nom_locate", + "pest", + "pest_derive", "rustyline", ] @@ -248,9 +290,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.153" +version = "0.2.161" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" [[package]] name = "libredox" @@ -281,12 +323,6 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "nibble_vec" version = "0.1.0" @@ -309,24 +345,54 @@ dependencies = [ ] [[package]] -name = "nom" -version = "7.1.3" +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "pest" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +checksum = "879952a81a83930934cbf1786752d6dedc3b1f29e8f8fb2ad1d0a36f377cf442" dependencies = [ "memchr", - "minimal-lexical", + "thiserror", + "ucd-trie", ] [[package]] -name = "nom_locate" -version = "4.2.0" +name = "pest_derive" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e3c83c053b0713da60c5b8de47fe8e494fe3ece5267b2f23090a07a053ba8f3" +checksum = "d214365f632b123a47fd913301e14c946c61d1c183ee245fa76eb752e59a02dd" dependencies = [ - "bytecount", - "memchr", - "nom", + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb55586734301717aea2ac313f50b2eb8f60d2fc3dc01d190eefa2e625f60c4e" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pest_meta" +version = "2.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b75da2a70cf4d9cb76833c990ac9cd3923c9a8905a8929789ce347c84564d03d" +dependencies = [ + "once_cell", + "pest", + "sha2", ] [[package]] @@ -419,6 +485,17 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "smallvec" version = "1.13.2" @@ -468,6 +545,18 @@ dependencies = [ "syn", ] +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + [[package]] name = "unicode-ident" version = "1.0.12" @@ -492,6 +581,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index a759c82..5d5237e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,11 +17,11 @@ path = "src/main.rs" [dependencies] rustyline = { version = "10.0.0", optional = true } -nom = { version = "7", default-features = false, features = ["alloc"] } -nom_locate = { version = "4.0.0", default-features = false, features = ["alloc"] } clap = { version = "4.3.11", features = ["derive"], optional = true } colored = { version = "2.0.4", optional = true } humantime = { version = "2.1.0", optional = true } +pest = { version = "2.7.14", features = [], default-features = false } +pest_derive = { version = "2.7.14", features = [], default-features = false } [features] default = ["std"] diff --git a/examples/capav.lsh b/examples/capav.lsh index f35be8d..77eb3a4 100644 --- a/examples/capav.lsh +++ b/examples/capav.lsh @@ -1,9 +1,9 @@ term1 := (\x y . y x) (y y) z; term2 := (\x y . y x) (a a) z; -@echo "This works as expected:" +@echo "This works as expected:"; !debug (!vnormalize term2); -@echo "" -@echo "But here we see the necessity of capture avoidance:" +@echo ""; +@echo "But here we see the necessity of capture avoidance:"; !debug (!vnormalize term1); diff --git a/examples/fib.lsh b/examples/fib.lsh index 99ed5c9..9fd9781 100644 --- a/examples/fib.lsh +++ b/examples/fib.lsh @@ -1,6 +1,6 @@ -@usestd -@set strategy normal -@set numerals true +@usestd; +@set strategy normal; +@set numerals true; iszero := λn.n (λx. FALSE) TRUE; @@ -13,6 +13,6 @@ fib' := \f x. IFTHENELSE (ADD (f f (PRED x)) (f f (PRED (PRED x))))); fib := fib' fib'; -@echo "" -@echo "6. Fibonacci number" +@echo ""; +@echo "6. Fibonacci number"; listunsorted := !debug (!cnorm (fib $6)); diff --git a/examples/sort.lsh b/examples/sort.lsh index ccfb072..e41a544 100644 --- a/examples/sort.lsh +++ b/examples/sort.lsh @@ -1,5 +1,5 @@ -@set strategy normal -@usestd +@set strategy normal; +@usestd; one := !n (SUCC NIL); @@ -34,11 +34,11 @@ sort' := \f l. IFTHENELSE sort := sort' sort'; -@echo "" -@echo "Unsorted list:" +@echo ""; +@echo "Unsorted list:"; listunsorted := !debug (!norm (CONS three (CONS one (CONS four ((CONS two NIL)))))); -@echo "" -@echo "After sorting:" +@echo ""; +@echo "After sorting:"; !debug (!time (!cnorm (sort listunsorted))); diff --git a/src/error.rs b/src/error.rs index 6f1017f..4fbf51d 100644 --- a/src/error.rs +++ b/src/error.rs @@ -3,9 +3,10 @@ extern crate alloc; use alloc::borrow::ToOwned; use alloc::format; use alloc::string::{String, ToString}; +use pest::RuleType; use core::fmt::Display; -use crate::{parsing, r#macro::Macro}; +use crate::r#macro::Macro; pub type LashResult = Result; @@ -25,6 +26,7 @@ pub enum LashErrorType { SetKeyError, SetValueError, SyntaxError, + UnknownMacroError, #[cfg(not(feature = "std"))] NotFoundError, #[cfg(not(feature = "std"))] @@ -66,6 +68,13 @@ impl LashError { } } + pub fn new_syntax_error(err: pest::error::Error) -> Self { + LashError { + error_type: LashErrorType::SyntaxError, + message: format!("\n{}", err), + } + } + pub fn new_set_key_error(key: &str) -> Self { LashError { error_type: LashErrorType::SetKeyError, @@ -80,6 +89,13 @@ impl LashError { } } + pub fn new_unknown_macro_error(name: &str) -> Self { + LashError { + error_type: LashErrorType::UnknownMacroError, + message: format!("unknown macro '{}'", name), + } + } + #[cfg(not(feature = "std"))] pub fn new_not_supported_error(message: String) -> Self { LashError { @@ -108,9 +124,10 @@ impl Display for LashError { FileError => "File Error", FormatError => "Format Error", MacroArgError => "Macro Argument Error", + SyntaxError => "Syntax Error", SetKeyError => "Set Key Error", SetValueError => "Set Value Error", - SyntaxError => "Syntax Error", + UnknownMacroError => "Unknown Macro Error", #[cfg(not(feature = "std"))] NotFoundError => "Not Found", #[cfg(not(feature = "std"))] @@ -120,21 +137,6 @@ impl Display for LashError { } } -impl From>> for LashError { - fn from(value: nom::Err>) -> Self { - use nom::Err::*; - let message = match value { - Incomplete(_) => "incomplete data".to_owned(), - Error(e) => format!("{}", e), - Failure(e) => format!("{}", e), - }; - LashError { - error_type: LashErrorType::SyntaxError, - message, - } - } -} - impl From for LashError { fn from(value: core::fmt::Error) -> Self { LashError { diff --git a/src/grammar.pest b/src/grammar.pest new file mode 100644 index 0000000..96b6e02 --- /dev/null +++ b/src/grammar.pest @@ -0,0 +1,28 @@ +statements = { SOI ~ ((directive | assignment | lambda ) ~ ";")+ ~ EOI } +statement = { SOI ~ (directive | assignment | lambda) ~ EOI } +directive = ${ "@" ~ (directive_set | directive_echo | directive_include | directive_usestd)} +assignment = { variable ~ ":=" ~ lambda } + +lambda = { macro | abstraction | application } +abstraction = { lambda_sign ~ variable+ ~ "." ~ lambda } +application = { group+ } +group = { variable | church | "(" ~ lambda ~ ")" } +macro = { "!" ~ macro_name ~ group+} + +lambda_sign = _{ "\\" | "λ" } +church = @{ "$" ~ ASCII_ALPHANUMERIC+ } +variable = @{ (ASCII_ALPHANUMERIC | "'")+ } + +directive_set = { "set" ~ WHITESPACE ~ key ~ WHITESPACE ~ value } +directive_echo = { "echo" ~ WHITESPACE ~ "\"" ~ quoted_string ~ "\"" } +directive_include = { "include" ~ WHITESPACE ~ "\"" ~ quoted_string ~ "\"" } +directive_usestd = { "usestd" } +key = @{ ASCII_ALPHA_LOWER+ } +value = @{ ASCII_ALPHA_LOWER+ } +quoted_string = @{ (!"\"" ~ ASCII)* } +macro_name = @{ ASCII_ALPHA_LOWER* } + + + + +WHITESPACE = _{ " " | "\t" | "\r" | "\n" } diff --git a/src/interpreter.rs b/src/interpreter.rs index fc77d53..fe0b237 100644 --- a/src/interpreter.rs +++ b/src/interpreter.rs @@ -59,9 +59,7 @@ impl Interpreter { pub fn interpret_contents(&mut self, content: &str) -> LashResult<()> { use parsing::Statement::*; - let (rest, statements) = parsing::match_statements(parsing::Span::new(content))?; - let (rest, _) = parsing::finish(rest)?; - assert!(rest.is_empty(), "{:?}", rest); + let statements = parsing::parse_statements(content)?; for statement in statements { match statement { @@ -69,7 +67,6 @@ impl Interpreter { let term = self.process_lambda_term(term)?; self.named_terms.insert(name.clone(), Rc::new(NamedTerm::new(name, term))); }, - Comment => {}, Lambda(term) => { self.process_lambda_term(term)?; }, Directive(directive) => self.apply_directive(directive)?, } @@ -87,9 +84,7 @@ impl Interpreter { pub fn interpret_line(&mut self, line: &str) -> LashResult { use parsing::Statement::*; - let (rest, statement) = parsing::match_statement(parsing::Span::new(line), false)?; - let (rest, _) = parsing::finish(rest)?; - assert!(rest.is_empty(), "{:?}", rest); + let statement = parsing::parse_statement(line)?; match statement.clone() { Assignment(name, term) => { @@ -97,7 +92,6 @@ impl Interpreter { self.named_terms.insert(name.clone(), Rc::new(NamedTerm::new(name.clone(), term.clone()))); Ok(Assignment(name, term)) }, - Comment => Ok(Comment), Lambda(term) => Ok(Lambda(self.process_lambda_term(term)?)), Directive(directive) => { self.apply_directive(directive)?; Ok(statement) }, } diff --git a/src/macro.rs b/src/macro.rs index 44d8297..d16d2b1 100644 --- a/src/macro.rs +++ b/src/macro.rs @@ -71,6 +71,8 @@ impl Macro { pub fn apply(self, interpreter: &mut Interpreter, terms: Vec, duration: Duration) -> LashResult { use Macro::*; + // println!("Apply {} to {}", self, terms.iter().map(|t| t.to_string()).collect::>().join(" ")); + // println!("Apply {} to {:#?}", self, terms); if terms.len() != self.nargs() { return Err(LashError::new_macro_arg_error(self, self.nargs(), terms.len())); diff --git a/src/parsing.rs b/src/parsing.rs index ff9eab1..e971293 100644 --- a/src/parsing.rs +++ b/src/parsing.rs @@ -1,100 +1,35 @@ extern crate alloc; -use alloc::borrow::ToOwned; -use alloc::collections::VecDeque; -use alloc::format; use alloc::string::{String, ToString}; use alloc::vec::Vec; -use core::cmp::Ordering; -use core::str; -use core::str::FromStr; use core::fmt::Display; +use core::str::FromStr; -use nom::{ - branch::*, - bytes::complete::*, - character::*, - character::complete::*, - combinator::*, - multi::*, -}; -use nom_locate::LocatedSpan; - -use crate::{lambda::*, r#macro::Macro, interpreter::InterpreterDirective}; +use pest::{iterators::Pair, Parser}; +use pest_derive::Parser; +use crate::error::{LashError, LashResult}; +use crate::interpreter::InterpreterDirective; +use crate::lambda::LambdaTree; +use crate::r#macro::Macro; -pub type Span<'a> = LocatedSpan<&'a str>; -pub type IResult<'a, O> = nom::IResult, O, ParseError<'a>>; - -#[derive(Debug, PartialEq)] -pub struct ParseError<'a> { - span: Span<'a>, - message: String, -} +#[derive(Parser)] +#[grammar = "grammar.pest"] +pub struct LambdaParser; #[derive(Debug, Clone)] pub enum Statement { Assignment(String, LambdaTree), - Comment, Directive(InterpreterDirective), Lambda(LambdaTree), } -impl<'a> ParseError<'a> { - pub fn new(message: String, span: Span<'a>) -> Self { - Self { span, message } - } - - pub fn line(&self) -> u32 { - return self.span().location_line(); - } - - pub fn offset(&self) -> usize { - return self.span().location_offset(); - } - - pub fn span(&self) -> &Span { - &self.span - } -} - - -impl<'a> nom::error::ParseError> for ParseError<'a> { - fn from_error_kind(input: Span<'a>, kind: nom::error::ErrorKind) -> Self { - Self::new(kind.description().to_owned(), input) - } - - fn append(_input: Span<'a>, _kind: nom::error::ErrorKind, other: Self) -> Self { - // TODO: build stack trace - other - } - - fn from_char(input: Span<'a>, c: char) -> Self { - Self::new(format!("expected character '{}'", c), input) - } - - fn or(self, other: Self) -> Self { - match self.line().cmp(&other.line()) { - Ordering::Equal => if self.offset() > other.offset() { self } else { other }, - Ordering::Greater => self, - Ordering::Less => other, - } - } -} - -impl<'a> Display for ParseError<'a> { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, "{} (line {})" , self.message, self.line()) - } -} - impl Display for Statement { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use Statement::*; match self { Assignment(name, term) => write!(f, "{} := {}", name, term), - Comment => write!(f, ""), Lambda(term) => term.fmt(f), Directive(directive) => directive.fmt(f), } @@ -102,257 +37,120 @@ impl Display for Statement { } -pub fn finish(s: Span) -> IResult<()> { - let (rem_err, _) = multispace0(s)?; - let (rem_ok, rem) = rest(rem_err)?; - - if !rem_err.is_empty() { - Err(nom::Err::Error(ParseError::new(format!("unable to parse remainder '{}'", rem), rem_err))) - } else { - Ok((rem_ok, ())) - } -} - -fn match_abstraction(s: Span) -> IResult { - let (rest, _) = match_lambda_sign(s)?; - let (rest, _) = multispace0(rest)?; - let (rest, mut variables) = map(match_variable_list, VecDeque::from)(rest)?; - let (rest, _) = multispace0(rest)?; - let (rest, _) = with_err(char('.')(rest), rest, - "expected '.' after abstraction variables".to_owned())?; - let (rest, _) = multispace0(rest)?; - let (rest, inner) = with_err(match_lambda(rest), rest, - "invalid or missing inner term on abstraction".to_owned())?; - - let mut current_abstraction = LambdaTree::new_abstraction(variables.pop_back().unwrap().to_owned(), inner); - while let Some(variable) = variables.pop_back() { - current_abstraction = LambdaTree::new_abstraction(variable.to_owned(), current_abstraction); - } - - Ok((rest, current_abstraction)) -} - -fn match_application(s: Span) -> IResult { - let (rest, terms) = separated_list1(multispace1, match_group)(s)?; - let node = vec_to_application(terms); - Ok((rest, node)) -} - -fn match_church(s: Span) -> IResult { - let (rest, _) = with_err(char('$')(s), s, - "expected '$' as prefix for church numerals".to_owned())?; - let (rest, digits) = with_err(recognize(digit1)(rest), rest, - "church numeral without denominator".to_owned())?; - let denominator = str::parse(&digits) - .map_err(|_| nom::Err::Error(ParseError::new(format!("illegal church denominator '{digits}'"), s)))?; - Ok((rest, LambdaTree::new_church_num(denominator))) -} - -fn match_comment(s: Span) -> IResult<()> { - let (rest, _) = multispace0(s)?; - let (rest, _) = char('#')(rest)?; - let (rest, _) = take_till(|c| is_newline(c as u8))(rest)?; - - Ok((rest, ())) -} - -fn match_directive(s: Span) -> IResult { - let (rest, _) = multispace0(s)?; - let (rest, _) = char('@')(rest)?; - let (rest, directive) = alt((match_directive_set, - match_directive_echo, - match_directive_include, - match_directive_usestd))(rest)?; - - Ok((rest, directive)) -} - -fn match_directive_echo(s: Span) -> IResult { - let (rest, _) = tag("echo")(s)?; - let (rest, _) = space1(rest)?; - let (rest, _) = char('\"')(rest)?; - let (rest, msg) = take_until("\"")(rest)?; // TODO find more robust matcher - let (rest, _) = char('\"')(rest)?; - - Ok((rest, InterpreterDirective::Echo(msg.to_string()))) -} - -fn match_directive_set(s: Span) -> IResult { - let (rest, _) = tag("set")(s)?; - let (rest, _) = space1(rest)?; - let (rest, key) = alphanumeric1(rest)?; - let (rest, _) = space1(rest)?; - let (rest, value) = alphanumeric1(rest)?; - - Ok((rest, InterpreterDirective::Set(key.to_string(), value.to_string()))) -} - -fn match_directive_include(s: Span) -> IResult { - let (rest, _) = tag("include")(s)?; - let (rest, _) = space1(rest)?; - let (rest, _) = char('\"')(rest)?; - let (rest, file_path) = take_until("\"")(rest)?; // TODO find more robust matcher - let (rest, _) = char('\"')(rest)?; - - Ok((rest, InterpreterDirective::Include(file_path.to_string()))) -} - -fn match_directive_usestd(s: Span) -> IResult { - let (rest, _) = tag("usestd")(s)?; - Ok((rest, InterpreterDirective::UseStd)) -} - -fn match_assignment(s: Span) -> IResult<(String, LambdaTree)> { - let (rest, name) = match_variable_name(s)?; - let (rest, _) = multispace0(rest)?; - let (rest, _) = tag(":=")(rest)?; - let (rest, _) = multispace0(rest)?; - let (rest, lambda) = match_lambda(rest)?; - Ok((rest, (name.to_owned(), lambda))) -} - -fn match_bracketed(s: Span) -> IResult { - let (rest, _) = char('(')(s)?; - let (rest, _) = multispace0(rest)?; - let (rest, lambda) = match_lambda(rest)?; - let (rest, _) = multispace0(rest)?; - let (rest, _) = char(')')(rest)?; - - Ok((rest, lambda)) -} - -fn match_group(s: Span) -> IResult { - alt((match_variable, match_church, match_bracketed,))(s) -} - -pub fn match_lambda(s: Span) -> IResult { - alt((match_macro, match_abstraction, match_application))(s) -} - -fn match_lambda_sign(s: Span) -> IResult { - recognize(alt((char('\\'), char('λ'))))(s) -} - -fn match_macro(s: Span) -> IResult { - let (rest, _) = char('!')(s)?; - let (rest, macro_name) = alphanumeric1(rest)?; - - let m = match Macro::from_str(¯o_name) { - Ok(m) => m, - Err(_) => return Err(nom::Err::Error(ParseError::new(format!("unknown macro '{}'", macro_name), rest))), - }; - - let (rest, args) = opt(match_macro_args)(rest)?; - let lambdas = args.unwrap_or_default(); - - Ok((rest, LambdaTree::new_macro(m, lambdas))) -} - -fn match_macro_args(s: Span) -> IResult> { - let (rest, _) = multispace1(s)?; - let (rest, lambdas) = separated_list1(multispace1, match_group)(rest)?; - let (rest, _) = multispace0(rest)?; - Ok((rest, lambdas)) -} - -pub fn match_statement(s: Span, with_semicolon: bool) -> IResult { - alt((match_short_statement, |x| match_long_statement(x, with_semicolon)))(s) -} - -pub fn match_short_statement(s: Span) -> IResult { - let (rest, _) = space0(s)?; - let (rest, statement) = alt((|x| match_comment(x).map(|(r, _)| (r, Statement::Comment)), - |x| match_directive(x).map(|(r, l)| (r, Statement::Directive(l))), - ))(rest)?; - let (rest, _) = space0(rest)?; - let (rest, _) = opt(char(';'))(rest)?; - let (rest, _) = multispace0(rest)?; - Ok((rest, statement)) -} - -pub fn match_long_statement(s: Span, with_semicolon: bool) -> IResult { - let (rest, _) = multispace0(s)?; - let (rest, statement) = alt((|x| match_assignment(x).map(|(r, (n, l))| (r, Statement::Assignment(n, l))), - |x| match_lambda(x).map(|(r, l)| (r, Statement::Lambda(l))), - ))(rest)?; - let (rest, _) = multispace0(rest)?; - let (rest, _) = if with_semicolon { - (char(';')(rest)?.0, 0) - } else { - (opt(char(';'))(rest)?.0, 0) - }; - let (rest, _) = multispace0(rest)?; - Ok((rest, statement)) -} - - -pub fn match_statements(s: Span) -> IResult> { - let mut rest = s; +fn parse_lambda(pair: Pair) -> LashResult { + use Rule::*; + match pair.as_rule() { + lambda => parse_lambda(pair.into_inner().next().unwrap()), + abstraction => { + let mut variables = Vec::new(); + let mut child = None; + for sub_pair in pair.into_inner() { + if sub_pair.as_rule() == variable { + variables.push(sub_pair.as_span().as_str().to_string()); + } else { + child = Some(parse_lambda(sub_pair)?); + break + } + } + + let mut current = child.unwrap(); + for var in variables.into_iter().rev() { + current = LambdaTree::new_abstraction(var, current); + } + Ok(current) + }, + application => { + let mut children: Vec<_> = pair.into_inner() + .map(|ip| parse_lambda(ip)) + .collect(); + if children.len() == 1 { + Ok(children.pop().unwrap()?) + } else { + let mut iter = children.into_iter(); + let mut current = iter.next().unwrap()?; + for child in iter { + current = LambdaTree::new_application(current, child?); + } + Ok(current) + } + }, + group => parse_lambda(pair.into_inner().next().unwrap()), + variable => Ok(LambdaTree::new_variable(pair.as_span().as_str().to_string())), + church => Ok(LambdaTree::new_church_num(pair.as_span().as_str()[1..].parse::().unwrap())), + r#macro => { + let mut inner = pair.into_inner(); + let name = inner.next().unwrap().as_span().as_str().to_string(); + let mut children = Vec::new(); + for child in inner.map(|ip| parse_lambda(ip)) { + children.push(child?); + } + let m = Macro::from_str(&name) + .map_err(|_| LashError::new_unknown_macro_error(&name))?; + Ok(LambdaTree::new_macro(m, children)) + }, + _ => panic!("Unexpected token '{:?}'", pair.as_rule()), + } +} + +fn parse_single_statement(pair: Pair) -> LashResult { + use Rule::*; + match pair.as_rule() { + statement => parse_single_statement(pair.into_inner().next().unwrap()), + assignment => { + let mut inner = pair.into_inner(); + let name = inner.next().unwrap().as_str(); + let term = parse_lambda(inner.next().unwrap())?; + Ok(Statement::Assignment(name.to_string(), term)) + }, + lambda => Ok(Statement::Lambda(parse_lambda(pair.into_inner().next().unwrap())?)), + directive => { + let mut inner = pair.into_inner(); + let dir_pair = inner.next().unwrap(); + match dir_pair.as_rule() { + directive_usestd => Ok(Statement::Directive(InterpreterDirective::UseStd)), + directive_set => { + let mut inner = dir_pair.into_inner(); + let k = inner.next().unwrap().as_span().as_str().to_string(); + let v = inner.next().unwrap().as_span().as_str().to_string(); + Ok(Statement::Directive(InterpreterDirective::Set(k, v))) + }, + directive_echo => { + let mut inner = dir_pair.into_inner(); + let msg = inner.next().unwrap().as_span().as_str().to_string(); + Ok(Statement::Directive(InterpreterDirective::Echo(msg))) + } + directive_include => { + let mut inner = dir_pair.into_inner(); + let path = inner.next().unwrap().as_span().as_str().to_string(); + Ok(Statement::Directive(InterpreterDirective::Include(path))) + } + _ => unreachable!(), + } + }, + _ => panic!("Unexpected token '{:?}'", pair.as_rule()), + } +} + +fn parse_multi_statement(pair: Pair) -> LashResult> { let mut statements = Vec::new(); - - loop { - let (r, statement) = match_statement(rest, true)?; - rest = r; - statements.push(statement); - if rest.is_empty() { - break; - } - } - - if eof::<&str, ()>(*rest).is_ok() { - Ok((rest, statements)) - } else { - return Err(nom::Err::Error(ParseError::new("expected end of file".to_owned(), rest))); + for result in pair.into_inner() + .filter(|ip| ip.as_rule() != Rule::EOI) + .map(|ip| parse_single_statement(ip)) { + statements.push(result?); } + Ok(statements) } -pub fn match_variable(s: Span) -> IResult { - let (rest, name) = match_variable_name(s)?; - Ok((rest, LambdaTree::new_variable(name.to_owned()))) -} - -fn match_variable_list(s: Span<'_>) -> IResult> { - let mut rest = s; - let mut variables = Vec::new(); - - if let Ok((r, name)) = match_variable_name(rest) { - variables.push(name); - rest = r - } else { - // @TODO - return Err(nom::Err::Error( - ParseError::new("variables missing for lambda abstraction".to_owned(), rest))); - } - - loop { - (rest, _) = multispace0(rest)?; - if let Ok((r, name)) = match_variable_name(rest) { - variables.push(name); - rest = r; - } else { - break; - } - } - - Ok((rest, variables)) -} -pub fn match_variable_name(s: Span<'_>) -> IResult<&str> { - let (rest, name) = take_while1(|x| is_alphanumeric(x as u8) || x == '-' || x == '_' || x == '\'')(s)?; - Ok((rest, *name)) -} - -fn vec_to_application(mut terms: Vec) -> LambdaTree { - if terms.is_empty() { - panic!("Invalid number of input terms for application"); - } else if terms.len() == 1 { - return terms.pop().unwrap(); - } else { - let right = terms.pop().unwrap(); - let left = vec_to_application(terms); - return LambdaTree::new_application(left, right); - } +pub fn parse_statement(input: &str) -> LashResult { + let parsed = LambdaParser::parse(Rule::statement, input) + .map_err(|e| LashError::new_syntax_error(e))? + .next().unwrap(); + parse_single_statement(parsed) } -pub fn with_err<'a, O>(result: IResult<'a, O>, s: Span<'a>, msg: String) -> IResult<'a, O> { - result.map_err(|_| nom::Err::Error(ParseError::new(msg, s))) +pub fn parse_statements(input: &str) -> LashResult> { + let parsed = LambdaParser::parse(Rule::statements, input) + .map_err(|e| LashError::new_syntax_error(e))? + .next().unwrap(); + parse_multi_statement(parsed) } diff --git a/src/typing.rs b/src/typing.rs index 6e172db..61e2417 100644 --- a/src/typing.rs +++ b/src/typing.rs @@ -1,8 +1,13 @@ -use std::cell::RefCell; -use std::cmp; -use std::collections::BTreeMap; -use std::fmt::Display; -use std::rc::Rc; +extern crate alloc; + +use alloc::boxed::Box; +use alloc::collections::BTreeMap; +use alloc::format; +use alloc::rc::Rc; +use alloc::string::{String, ToString}; +use core::cell::RefCell; +use core::cmp; +use core::fmt::Display; use crate::lambda::{LambdaNode, LambdaTree}; @@ -65,7 +70,7 @@ impl TypeMachine { _ => level, }; // *tv2.borrow_mut() = Unbound(name.to_owned(), min_level); - unsafe { *tv2.as_ptr() = Unbound(name.to_owned(), min_level); } + unsafe { *tv2.as_ptr() = Unbound(name.to_string(), min_level); } Ok(()) } else if let Link(ty) = &*tv2.borrow() { Self::occurs(tv, ty) @@ -151,7 +156,7 @@ impl TypeMachine { Abstraction(x, e) => { let ty_x = self.newvar(); let mut nextenv = env.clone(); - nextenv.insert(x.to_owned(), ty_x.clone()); + nextenv.insert(x.to_string(), ty_x.clone()); let ty_e = self.infer(&nextenv, e.clone())?; Ok(TypeArrow(Box::new(ty_x), Box::new(ty_e))) }, @@ -174,7 +179,7 @@ pub fn infer(lambda: LambdaTree) -> Result { } impl Display for Type { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use Type::*; match self { TypeVar(var) => write!(f, "{}", var.borrow()), @@ -184,7 +189,7 @@ impl Display for Type { } impl Display for TypeVariable { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { use TypeVariable::*; match self { Unbound(u, _) => write!(f, "{}", u),