From a0c0e48c23fbbdad421c3941ce54a30edb2e4e2e Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Fri, 1 Sep 2023 20:11:13 +0900 Subject: [PATCH 01/25] Reworking SExParser in order to get out the information needed for syntax highlighting Adding syntax highlighting and bracket-blink to repl --- lib/src/metta/text.rs | 289 +++++++++++++++++++++++--------- repl/Cargo.toml | 5 +- repl/src/interactive_helper.rs | 293 +++++++++++++++++++++++++++++++-- repl/src/main.rs | 7 +- 4 files changed, 498 insertions(+), 96 deletions(-) diff --git a/lib/src/metta/text.rs b/lib/src/metta/text.rs index 5c42b603e..b8913d1a5 100644 --- a/lib/src/metta/text.rs +++ b/lib/src/metta/text.rs @@ -2,7 +2,8 @@ use crate::*; -use std::str::Chars; +use core::ops::Range; +use std::str::CharIndices; use std::iter::Peekable; use regex::Regex; use std::rc::Rc; @@ -61,53 +62,157 @@ impl Tokenizer { } +/// The meaning of a parsed token, generated from a substring in the input text +#[derive(Clone, Debug)] +pub enum ParseTokenType { + /// Comment line. All text between a non-escaped ';' and a newline + Comment, + /// Variable. A symbol immediately preceded by a '$' sigil + Variable, + /// String Literal. All text between non-escaped '"' (double quote) characters + StringLiteral, + /// Special Token. A token matched by a regex registered with the [Tokenizer] + //TODO: Currently this `Special` token is never generated. When I split the atom generation from the parsing, I will + // roll the Tokenizer check into `next_token_with_visitor`, and eliminate `parse_atom_with_visitor` + Special, + /// Symbol Token. Any other whitespace-delimited token that isn't a [Variable](ParseTokenType::Variable), + /// [StringLiteral](ParseTokenType::ParseTokenType), or [Special](ParseTokenType::Special) + MiscSymbol, + /// Open Parenthesis. A non-escaped '(' character indicating the beginning of an expression + OpenParen, + /// Close Parenthesis. A non-escaped ')' character indicating the end of an expression + CloseParen, + /// Whitespace. One or more whitespace chars + Whitespace, + /// Expression. All input text composing an Expression, from the opening '(' to the close + Expression, + /// Atom. A Symbol Atom or Grounded Atom + //TODO, check since I'm not sure about this one. Maybe I'll want different intermediate tokens + // when I generate atoms from parse tokens + Atom, + /// Unparsed Leftover Text. Text remaining after the parser has encountered an error + LeftoverText, +} + +#[derive(Clone, Debug)] +pub struct ParseToken<'a> { + pub token_type: ParseTokenType, + pub src_range: Range, + pub substr: &'a str, +} + pub struct SExprParser<'a> { - it: Peekable>, + text: &'a str, + it: Peekable>, } impl<'a> SExprParser<'a> { pub fn new(text: &'a str) -> Self { - Self{ it: text.chars().peekable() } + Self{ text, it: text.char_indices().peekable() } } + //TODO: Consider reorganizing this function as a visitor pub fn parse(&mut self, tokenizer: &Tokenizer) -> Result, String> { - while let Some(c) = self.it.peek() { + self.parse_with_visitor(tokenizer, |_tok| ()) + } + + pub fn parse_with_visitor(&mut self, tokenizer: &Tokenizer, mut callback: C) -> Result, String> + where C: FnMut(ParseToken) + { + self.parse_with_visitor_internal(tokenizer, &mut callback) + } + + fn parse_with_visitor_internal(&mut self, tokenizer: &Tokenizer, callback: &mut C) -> Result, String> + where C: FnMut(ParseToken) + { + while let Some((idx, c)) = self.it.peek().cloned() { match c { ';' => { + let start_idx = idx; self.skip_line(); + let range = start_idx..self.cur_idx(); + callback(self.new_parse_token(ParseTokenType::Comment, range)); }, _ if c.is_whitespace() => { + let range = idx..idx+1; + callback(self.new_parse_token(ParseTokenType::Whitespace, range)); self.it.next(); }, '$' => { - self.it.next(); - let token = next_var(&mut self.it)?; + let token = self.next_var_with_visitor(callback)?; return Ok(Some(Atom::var(token))); }, '(' => { + let range = idx..idx+1; + callback(self.new_parse_token(ParseTokenType::OpenParen, range)); + + self.it.next(); + let start_idx = idx; + let expr = self.parse_expr_with_visitor(tokenizer, callback)?; + let range = start_idx..self.cur_idx(); + callback(self.new_parse_token(ParseTokenType::Expression, range)); + return Ok(Some(expr)); + }, + ')' => { + let range = idx..idx+1; + callback(self.new_parse_token(ParseTokenType::CloseParen, range)); self.it.next(); - return self.parse_expr(tokenizer).map(Some); + + self.parse_leftovers_with_visitor(callback); + return Err("Unexpected right bracket".to_string()) }, - ')' => return Err("Unexpected right bracket".to_string()), _ => { - return Ok(Some(self.parse_atom(tokenizer)?)); + let start_idx = idx; + let atom = self.parse_atom_with_visitor(tokenizer, callback)?; + let range = start_idx..self.cur_idx(); + callback(self.new_parse_token(ParseTokenType::Atom, range)); + return Ok(Some(atom)); }, } } Ok(None) } + ///WARNING: may be (often is) == to text.len(), and thus can't be used as an index to read a char + fn cur_idx(&mut self) -> usize { + if let Some((idx, _)) = self.it.peek() { + *idx + } else { + self.text.len() + } + } + + fn new_parse_token(&self, token_type: ParseTokenType, src_range: Range) -> ParseToken { + ParseToken { + token_type, + src_range: src_range.clone(), + substr: &self.text[src_range], + } + } + fn skip_line(&mut self) -> () { - while let Some(n) = self.it.peek() { - match n { + while let Some((_idx, c)) = self.it.peek() { + match c { '\n' => break, _ => { self.it.next(); } } } } - fn parse_atom(&mut self, tokenizer: &Tokenizer) -> Result { - let token = next_token(&mut self.it)?; + fn parse_leftovers_with_visitor(&mut self, callback: &mut C) + where C: FnMut(ParseToken) + { + if let Some((start_idx, _c)) = self.it.peek().cloned() { + let (last, _c) = self.it.clone().last().unwrap(); + let range = start_idx..last+1; + callback(self.new_parse_token(ParseTokenType::LeftoverText, range)); + } + } + + fn parse_atom_with_visitor(&mut self, tokenizer: &Tokenizer, callback: &mut C) -> Result + where C: FnMut(ParseToken) + { + let token = self.next_token_with_visitor(callback)?; let constr = tokenizer.find_token(token.as_str()); if let Some(constr) = constr { return Ok(constr(token.as_str())); @@ -116,21 +221,32 @@ impl<'a> SExprParser<'a> { } } - fn parse_expr(&mut self, tokenizer: &Tokenizer) -> Result { + fn parse_expr_with_visitor(&mut self, tokenizer: &Tokenizer, callback: &mut C) -> Result + where C: FnMut(ParseToken) + { let mut children: Vec = Vec::new(); - while let Some(c) = self.it.peek() { + while let Some((idx, c)) = self.it.peek().cloned() { match c { ';' => { + let start_idx = idx; self.skip_line(); + let range = start_idx..self.cur_idx(); + callback(self.new_parse_token(ParseTokenType::Comment, range)); + }, + _ if c.is_whitespace() => { + let range = idx..idx+1; + callback(self.new_parse_token(ParseTokenType::Whitespace, range)); + self.it.next(); }, - _ if c.is_whitespace() => { self.it.next(); }, ')' => { + let range = idx..idx+1; + callback(self.new_parse_token(ParseTokenType::CloseParen, range)); self.it.next(); let expr = Atom::expr(children); return Ok(expr); }, _ => { - if let Ok(Some(child)) = self.parse(tokenizer) { + if let Ok(Some(child)) = self.parse_with_visitor_internal(tokenizer, callback) { children.push(child); } else { return Err("Unexpected end of expression member".to_string()); @@ -141,67 +257,91 @@ impl<'a> SExprParser<'a> { Err("Unexpected end of expression".to_string()) } -} - -fn next_token(it: &mut Peekable>) -> Result { - match it.peek() { - Some('"') => next_string(it), - _ => Ok(next_word(it)?), + fn next_token_with_visitor(&mut self, callback: &mut C) -> Result + where C: FnMut(ParseToken) + { + match self.it.peek().cloned() { + Some((idx, '"')) => { + let start_idx = idx; + let str_token = self.next_string()?; + let range = start_idx..self.cur_idx(); + callback(self.new_parse_token(ParseTokenType::StringLiteral, range)); + Ok(str_token) + }, + Some((idx, _)) => { + let start_idx = idx; + let tok = self.next_word()?; + let range = start_idx..self.cur_idx(); + callback(self.new_parse_token(ParseTokenType::MiscSymbol, range)); + Ok(tok) + }, + None => Ok(String::new()) + } } -} - -fn next_string(it: &mut Peekable>) -> Result { - let mut token = String::new(); + fn next_string(&mut self) -> Result { + let mut token = String::new(); - if it.next() != Some('"') { - return Err("Double quote expected".to_string()); - } else { - token.push('"'); - } - while let Some(c) = it.next() { - if c == '"' { + if let Some((_idx, '"')) = self.it.next() { token.push('"'); - break; + } else { + return Err("Double quote expected".to_string()); } - let c = if c == '\\' { - match it.next() { - Some(c) => c, - None => return Err("Escaping sequence is not finished".to_string()), + while let Some((_idx, c)) = self.it.next() { + if c == '"' { + token.push('"'); + break; } - } else { - c - }; - token.push(c); + let c = if c == '\\' { + match self.it.next() { + Some((_idx, c)) => c, + None => return Err("Escaping sequence is not finished".to_string()), + } + } else { + c + }; + token.push(c); + } + Ok(token) } - Ok(token) -} -fn next_word(it: &mut Peekable>) -> Result { - let mut token = String::new(); - while let Some(&c) = it.peek() { - if c.is_whitespace() || c == '(' || c == ')' { - break; + fn next_word(&mut self) -> Result { + let mut token = String::new(); + while let Some((_idx, c)) = self.it.peek() { + if c.is_whitespace() || *c == '(' || *c == ')' { + break; + } + token.push(*c); + self.it.next(); } - token.push(c); - it.next(); + Ok(token) } - Ok(token) -} -fn next_var(it: &mut Peekable>) -> Result { - let mut token = String::new(); - while let Some(&c) = it.peek() { - if c.is_whitespace() || c == '(' || c == ')' { - break; - } - if c == '#' { - return Err("'#' char is reserved for internal usage".to_string()); + fn next_var_with_visitor(&mut self, callback: &mut C) -> Result + where C: FnMut(ParseToken) + { + let (start_idx, _c) = self.it.peek().cloned().unwrap(); + let mut tmp_it = self.it.clone(); + tmp_it.next(); + + let mut token = String::new(); + while let Some((_idx, c)) = tmp_it.peek() { + if c.is_whitespace() || *c == '(' || *c == ')' { + break; + } + if *c == '#' { + self.parse_leftovers_with_visitor(callback); + return Err("'#' char is reserved for internal usage".to_string()); + } + token.push(*c); + tmp_it.next(); } - token.push(c); - it.next(); + self.it = tmp_it; + let range = start_idx..self.cur_idx(); + callback(self.new_parse_token(ParseTokenType::Variable, range)); + Ok(token) } - Ok(token) + } #[cfg(test)] @@ -261,24 +401,19 @@ mod tests { #[test] fn test_next_token() { - let mut it = "n)".chars().peekable(); + let mut parser = SExprParser::new("n)"); - assert_eq!("n".to_string(), next_token(&mut it).unwrap()); - assert_eq!(Some(')'), it.next()); + assert_eq!("n".to_string(), parser.next_token_with_visitor(&mut |_tok| ()).unwrap()); + assert_eq!(Some((1, ')')), parser.it.next()); } #[test] fn test_next_string_errors() { - let mut token = String::new(); - token.push('a'); - let mut it = token.chars().peekable(); - assert_eq!(Err(String::from("Double quote expected")), next_string(&mut it)); + let mut parser = SExprParser::new("a"); + assert_eq!(Err(String::from("Double quote expected")), parser.next_string()); - let mut token = String::new(); - token.push('"'); - token.push('\\'); - let mut it = token.chars().peekable(); - assert_eq!(Err(String::from("Escaping sequence is not finished")), next_string(&mut it)); + let mut parser = SExprParser::new("\"\\"); + assert_eq!(Err(String::from("Escaping sequence is not finished")), parser.next_string()); } #[test] diff --git a/repl/Cargo.toml b/repl/Cargo.toml index 7e4d79e90..cd3f6fc97 100644 --- a/repl/Cargo.toml +++ b/repl/Cargo.toml @@ -7,7 +7,10 @@ description = "A shell to execute MeTTa" [dependencies] anyhow = { version = "1.0.75", features = ["std"] } hyperon = { path = "../lib/" } -rustyline = { version = "12.0.0", features = ["derive"] } +# rustyline = { version = "12.0.0", features = ["derive"] } +# rustyline = {git = "https://github.com/luketpeterson/rustyline", version = "12.0.0", features = ["derive"] } +# TODO: I hope these changes stabilize inside rustyline before we need to publish Hyperon +rustyline = {git = "https://github.com/gwenn/rustyline.git", branch="no_highlight_char_on_final_refresh", version = "12.0.0", features = ["derive"] } clap = { version = "4.4.0", features = ["derive"] } directories = "5.0.1" pyo3 = { version = "0.19.2", features = ["auto-initialize"], optional = true } diff --git a/repl/src/interactive_helper.rs b/repl/src/interactive_helper.rs index e9c4523af..82ee51713 100644 --- a/repl/src/interactive_helper.rs +++ b/repl/src/interactive_helper.rs @@ -1,24 +1,39 @@ use std::borrow::Cow::{self, Borrowed, Owned}; +use std::cell::RefCell; use rustyline::completion::FilenameCompleter; -use rustyline::highlight::{Highlighter, MatchingBracketHighlighter}; +use rustyline::highlight::Highlighter; use rustyline::hint::HistoryHinter; -use rustyline::validate::MatchingBracketValidator; -use rustyline::{Completer, Helper, Hinter, Validator}; +use rustyline::validate::{Validator, ValidationContext, ValidationResult}; +use rustyline::error::ReadlineError; +use rustyline::{Completer, Helper, Hinter}; -#[derive(Helper, Completer, Hinter, Validator)] +use hyperon::metta::text::{SExprParser, ParseTokenType}; + +use crate::metta_shim::MettaShim; + +#[derive(Helper, Completer, Hinter)] pub struct ReplHelper { + pub metta: RefCell, #[rustyline(Completer)] completer: FilenameCompleter, - highlighter: MatchingBracketHighlighter, - #[rustyline(Validator)] - validator: MatchingBracketValidator, #[rustyline(Hinter)] hinter: HistoryHinter, pub colored_prompt: String, + cursor_bracket: std::cell::Cell>, // If the cursor is over or near a bracket to match } +//TODO: this information needs to come from the config.metta. +// This is just a stop-gap to make sure parsing & rendering is correct +const BRACKET_COLORS: &[&str] = &["94", "93", "95", "96"]; +const COMMENT_COLOR: &str = "32"; +const VARIABLE_COLOR: &str = "33"; +const MISC_SYMBOL_COLOR: &str = "34"; +const STRING_LITERAL_COLOR: &str = "31"; +const SPECIAL_SYMBOL_COLOR: &str = "36"; +const ERROR_COLOR: &str = "91"; + impl Highlighter for ReplHelper { fn highlight_prompt<'b, 's: 'b, 'p: 'b>( &'s self, @@ -33,26 +48,274 @@ impl Highlighter for ReplHelper { } fn highlight_hint<'h>(&self, hint: &'h str) -> Cow<'h, str> { + //Render the hints in a lighter font Owned("\x1b[2m".to_owned() + hint + "\x1b[m") } - fn highlight<'l>(&self, line: &'l str, pos: usize) -> Cow<'l, str> { - self.highlighter.highlight(line, pos) + fn highlight<'l>(&self, line: &'l str, _pos: usize) -> Cow<'l, str> { + + //See if we need to highlight the bracket matching the cursor position + //BUG: this could possibly get tripped up by parenthesis inside comments and string literals + let mut blink_char = None; + if let Some((bracket, pos)) = self.cursor_bracket.get() { + blink_char = find_matching_bracket(line, pos, bracket); + } + + //Iterate over the tokens generated by the parser, coloring them appropriately + let mut colored_line = String::with_capacity(line.len() * 2); + let mut bracket_depth = 0; + self.metta.borrow_mut().inside_env(|metta| { + let mut parser = SExprParser::new(line); + loop { + let result = parser.parse_with_visitor(&metta.metta.tokenizer().borrow(), |token| { + let mut style_sequence = vec![]; + + //Set up the style for the token + match token.token_type { + ParseTokenType::Comment => { + style_sequence.push(COMMENT_COLOR); + }, + ParseTokenType::Variable => { + style_sequence.push(VARIABLE_COLOR); + }, + ParseTokenType::MiscSymbol => { + style_sequence.push(MISC_SYMBOL_COLOR); + }, + ParseTokenType::StringLiteral => { + style_sequence.push(STRING_LITERAL_COLOR); + }, + ParseTokenType::Special => { + //TODO: We'll want to use the type system to assign styling here + style_sequence.push(SPECIAL_SYMBOL_COLOR); + }, + ParseTokenType::OpenParen => { + style_sequence.push(BRACKET_COLORS[bracket_depth%BRACKET_COLORS.len()]); + bracket_depth += 1; + }, + ParseTokenType::CloseParen => { + if bracket_depth > 0 { + bracket_depth -= 1; + style_sequence.push(BRACKET_COLORS[bracket_depth%BRACKET_COLORS.len()]); + } else { + style_sequence.push(ERROR_COLOR); + } + }, + ParseTokenType::LeftoverText => { + style_sequence.push(ERROR_COLOR); + } + _ => { } // We don't do anything with the compound tokens, e.g. Expression & Atom + } + + //See if we need to render this token with the "bracket blink" + if let Some((_matching_char, blink_idx)) = &blink_char { + if token.src_range.contains(blink_idx) { + style_sequence.push("1;7"); + } + } + + //Render the token to the buffer if it's one of the ones we choose to render + if token_type_should_render(token.token_type) { + + //Push the styles to the buffer + let style_count = style_sequence.len(); + if style_count > 0 { + colored_line.push_str("\x1b["); + for (style_idx, style) in style_sequence.into_iter().enumerate() { + colored_line.push_str(style); + if style_idx < style_count-1 { + colored_line.push(';'); + } + } + colored_line.push('m'); + } + + //Push the token itself to the buffer + colored_line.push_str(&line[token.src_range]); + + //And push an undo sequence, if the token was stylized + if style_count > 0 { + colored_line.push_str("\x1b[0m"); + } + } + }); + + match result { + Ok(Some(_atom)) => (), + Ok(None) => break, + Err(_err) => break, + } + } + }); + + Owned(colored_line) } - fn highlight_char(&self, line: &str, pos: usize) -> bool { - self.highlighter.highlight_char(line, pos) + fn highlight_char(&self, line: &str, pos: usize, final_render: bool) -> bool { + if final_render { + self.cursor_bracket.set(None); + } else { + self.cursor_bracket.set(check_bracket(line, pos)); + } + true + } +} + +impl Validator for ReplHelper { + fn validate(&self, ctx: &mut ValidationContext) -> Result { + + let mut validation_result = ValidationResult::Incomplete; + self.metta.borrow_mut().inside_env(|metta| { + let mut parser = SExprParser::new(ctx.input()); + loop { + let result = parser.parse(&metta.metta.tokenizer().borrow()); + + match result { + Ok(Some(_atom)) => (), + Ok(None) => { + validation_result = ValidationResult::Valid(None); + break + }, + Err(err) => { + validation_result = ValidationResult::Invalid(Some( + format!(" - \x1b[0;{}m{}\x1b[0m", ERROR_COLOR, err) + )); + break; + } + } + } + }); + Ok(validation_result) + } + +} + +fn token_type_should_render(token_type: ParseTokenType) -> bool { + match token_type { + ParseTokenType::Comment | + ParseTokenType::Variable | + ParseTokenType::OpenParen | + ParseTokenType::CloseParen | + ParseTokenType::StringLiteral | + ParseTokenType::Special | + ParseTokenType::MiscSymbol | + ParseTokenType::Whitespace | + ParseTokenType::LeftoverText => true, + + ParseTokenType::Atom | + ParseTokenType::Expression => false, } } impl ReplHelper { - pub fn new() -> Self { + pub fn new(metta: MettaShim) -> Self { Self { + metta: RefCell::new(metta), completer: FilenameCompleter::new(), - highlighter: MatchingBracketHighlighter::new(), hinter: HistoryHinter {}, colored_prompt: "".to_owned(), - validator: MatchingBracketValidator::new(), + cursor_bracket: std::cell::Cell::new(None), + } + } +} + +//*-=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*/ +// LICENSE. The below functions are based on a functions with the same names in the highlight.rs +// file of the rustyline crate source, version 12.0.0. +// Incorporated here under the terms of the MIT license. +//*-=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*=-*/ + +fn find_matching_bracket(line: &str, pos: usize, bracket: u8) -> Option<(u8, usize)> { + let matching = matching_bracket(bracket); + let mut idx; + let mut unmatched = 1; + if is_open_bracket(bracket) { + // forward search + idx = pos + 1; + let bytes = &line.as_bytes()[idx..]; + for b in bytes { + if *b == matching { + unmatched -= 1; + if unmatched == 0 { + debug_assert_eq!(matching, line.as_bytes()[idx]); + return Some((matching, idx)); + } + } else if *b == bracket { + unmatched += 1; + } + idx += 1; + } + debug_assert_eq!(idx, line.len()); + } else { + // backward search + idx = pos; + let bytes = &line.as_bytes()[..idx]; + for b in bytes.iter().rev() { + if *b == matching { + unmatched -= 1; + if unmatched == 0 { + debug_assert_eq!(matching, line.as_bytes()[idx - 1]); + return Some((matching, idx - 1)); + } + } else if *b == bracket { + unmatched += 1; + } + idx -= 1; + } + debug_assert_eq!(idx, 0); + } + None +} + +// check under or before the cursor +fn check_bracket(line: &str, pos: usize) -> Option<(u8, usize)> { + if line.is_empty() { + return None; + } + let mut pos = pos; + if pos >= line.len() { + pos = line.len() - 1; // before cursor + let b = line.as_bytes()[pos]; // previous byte + if is_close_bracket(b) { + Some((b, pos)) + } else { + None + } + } else { + let mut under_cursor = true; + loop { + let b = line.as_bytes()[pos]; + if is_close_bracket(b) { + return if pos == 0 { None } else { Some((b, pos)) }; + } else if is_open_bracket(b) { + return if pos + 1 == line.len() { + None + } else { + Some((b, pos)) + }; + } else if under_cursor && pos > 0 { + under_cursor = false; + pos -= 1; // or before cursor + } else { + return None; + } } } -} \ No newline at end of file +} + +const fn matching_bracket(bracket: u8) -> u8 { + match bracket { + b'{' => b'}', + b'}' => b'{', + b'[' => b']', + b']' => b'[', + b'(' => b')', + b')' => b'(', + b => b, + } +} +const fn is_open_bracket(bracket: u8) -> bool { + matches!(bracket, b'{' | b'[' | b'(') +} +const fn is_close_bracket(bracket: u8) -> bool { + matches!(bracket, b'}' | b']' | b')') +} diff --git a/repl/src/main.rs b/repl/src/main.rs index 8696bbe5e..b33e269cc 100644 --- a/repl/src/main.rs +++ b/repl/src/main.rs @@ -75,13 +75,13 @@ fn main() -> Result<()> { } else { //Otherwise enter interactive mode - start_interactive_mode(repl_params, &mut metta).map_err(|err| err.into()) + start_interactive_mode(repl_params, metta).map_err(|err| err.into()) } } // To debug rustyline: // RUST_LOG=rustyline=debug cargo run --example example 2> debug.log -fn start_interactive_mode(repl_params: Shared, metta: &mut MettaShim) -> rustyline::Result<()> { +fn start_interactive_mode(repl_params: Shared, metta: MettaShim) -> rustyline::Result<()> { //Init RustyLine let config = Config::builder() @@ -89,7 +89,7 @@ fn start_interactive_mode(repl_params: Shared, metta: &mut MettaShim .completion_type(CompletionType::List) .edit_mode(EditMode::Emacs) .build(); - let helper = ReplHelper::new(); + let helper = ReplHelper::new(metta); let mut rl = Editor::with_config(config)?; rl.set_helper(Some(helper)); rl.bind_sequence(KeyEvent::alt('n'), Cmd::HistorySearchForward); @@ -109,6 +109,7 @@ fn start_interactive_mode(repl_params: Shared, metta: &mut MettaShim Ok(line) => { rl.add_history_entry(line.as_str())?; + let mut metta = rl.helper().unwrap().metta.borrow_mut(); metta.exec(line.as_str()); metta.inside_env(|metta| { for result in metta.result.iter() { From fdd30b4df29d17cb5797a6b33569d54613ed8e23 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 2 Sep 2023 11:26:57 +0900 Subject: [PATCH 02/25] Renaming "ParseToken" to "SyntaxNode" --- lib/src/metta/text.rs | 60 +++++++++++++++--------------- repl/src/interactive_helper.rs | 68 +++++++++++++++++----------------- 2 files changed, 63 insertions(+), 65 deletions(-) diff --git a/lib/src/metta/text.rs b/lib/src/metta/text.rs index b8913d1a5..4d23f08f2 100644 --- a/lib/src/metta/text.rs +++ b/lib/src/metta/text.rs @@ -62,9 +62,9 @@ impl Tokenizer { } -/// The meaning of a parsed token, generated from a substring in the input text +/// The meaning of a parsed syntactic element, generated from a substring in the input text #[derive(Clone, Debug)] -pub enum ParseTokenType { +pub enum SyntaxNodeType { /// Comment line. All text between a non-escaped ';' and a newline Comment, /// Variable. A symbol immediately preceded by a '$' sigil @@ -75,8 +75,8 @@ pub enum ParseTokenType { //TODO: Currently this `Special` token is never generated. When I split the atom generation from the parsing, I will // roll the Tokenizer check into `next_token_with_visitor`, and eliminate `parse_atom_with_visitor` Special, - /// Symbol Token. Any other whitespace-delimited token that isn't a [Variable](ParseTokenType::Variable), - /// [StringLiteral](ParseTokenType::ParseTokenType), or [Special](ParseTokenType::Special) + /// Symbol Token. Any other whitespace-delimited token that isn't a [Variable](SyntaxNodeType::Variable), + /// [StringLiteral](SyntaxNodeType::StringLiteral), or [Special](SyntaxNodeType::Special) MiscSymbol, /// Open Parenthesis. A non-escaped '(' character indicating the beginning of an expression OpenParen, @@ -95,10 +95,9 @@ pub enum ParseTokenType { } #[derive(Clone, Debug)] -pub struct ParseToken<'a> { - pub token_type: ParseTokenType, +pub struct SyntaxNode { + pub node_type: SyntaxNodeType, pub src_range: Range, - pub substr: &'a str, } pub struct SExprParser<'a> { @@ -117,13 +116,13 @@ impl<'a> SExprParser<'a> { } pub fn parse_with_visitor(&mut self, tokenizer: &Tokenizer, mut callback: C) -> Result, String> - where C: FnMut(ParseToken) + where C: FnMut(SyntaxNode) { self.parse_with_visitor_internal(tokenizer, &mut callback) } fn parse_with_visitor_internal(&mut self, tokenizer: &Tokenizer, callback: &mut C) -> Result, String> - where C: FnMut(ParseToken) + where C: FnMut(SyntaxNode) { while let Some((idx, c)) = self.it.peek().cloned() { match c { @@ -131,11 +130,11 @@ impl<'a> SExprParser<'a> { let start_idx = idx; self.skip_line(); let range = start_idx..self.cur_idx(); - callback(self.new_parse_token(ParseTokenType::Comment, range)); + callback(self.new_syntax_node(SyntaxNodeType::Comment, range)); }, _ if c.is_whitespace() => { let range = idx..idx+1; - callback(self.new_parse_token(ParseTokenType::Whitespace, range)); + callback(self.new_syntax_node(SyntaxNodeType::Whitespace, range)); self.it.next(); }, '$' => { @@ -144,18 +143,18 @@ impl<'a> SExprParser<'a> { }, '(' => { let range = idx..idx+1; - callback(self.new_parse_token(ParseTokenType::OpenParen, range)); + callback(self.new_syntax_node(SyntaxNodeType::OpenParen, range)); self.it.next(); let start_idx = idx; let expr = self.parse_expr_with_visitor(tokenizer, callback)?; let range = start_idx..self.cur_idx(); - callback(self.new_parse_token(ParseTokenType::Expression, range)); + callback(self.new_syntax_node(SyntaxNodeType::Expression, range)); return Ok(Some(expr)); }, ')' => { let range = idx..idx+1; - callback(self.new_parse_token(ParseTokenType::CloseParen, range)); + callback(self.new_syntax_node(SyntaxNodeType::CloseParen, range)); self.it.next(); self.parse_leftovers_with_visitor(callback); @@ -165,7 +164,7 @@ impl<'a> SExprParser<'a> { let start_idx = idx; let atom = self.parse_atom_with_visitor(tokenizer, callback)?; let range = start_idx..self.cur_idx(); - callback(self.new_parse_token(ParseTokenType::Atom, range)); + callback(self.new_syntax_node(SyntaxNodeType::Atom, range)); return Ok(Some(atom)); }, } @@ -182,11 +181,10 @@ impl<'a> SExprParser<'a> { } } - fn new_parse_token(&self, token_type: ParseTokenType, src_range: Range) -> ParseToken { - ParseToken { - token_type, + fn new_syntax_node(&self, node_type: SyntaxNodeType, src_range: Range) -> SyntaxNode { + SyntaxNode { + node_type, src_range: src_range.clone(), - substr: &self.text[src_range], } } @@ -200,17 +198,17 @@ impl<'a> SExprParser<'a> { } fn parse_leftovers_with_visitor(&mut self, callback: &mut C) - where C: FnMut(ParseToken) + where C: FnMut(SyntaxNode) { if let Some((start_idx, _c)) = self.it.peek().cloned() { let (last, _c) = self.it.clone().last().unwrap(); let range = start_idx..last+1; - callback(self.new_parse_token(ParseTokenType::LeftoverText, range)); + callback(self.new_syntax_node(SyntaxNodeType::LeftoverText, range)); } } fn parse_atom_with_visitor(&mut self, tokenizer: &Tokenizer, callback: &mut C) -> Result - where C: FnMut(ParseToken) + where C: FnMut(SyntaxNode) { let token = self.next_token_with_visitor(callback)?; let constr = tokenizer.find_token(token.as_str()); @@ -222,7 +220,7 @@ impl<'a> SExprParser<'a> { } fn parse_expr_with_visitor(&mut self, tokenizer: &Tokenizer, callback: &mut C) -> Result - where C: FnMut(ParseToken) + where C: FnMut(SyntaxNode) { let mut children: Vec = Vec::new(); while let Some((idx, c)) = self.it.peek().cloned() { @@ -231,16 +229,16 @@ impl<'a> SExprParser<'a> { let start_idx = idx; self.skip_line(); let range = start_idx..self.cur_idx(); - callback(self.new_parse_token(ParseTokenType::Comment, range)); + callback(self.new_syntax_node(SyntaxNodeType::Comment, range)); }, _ if c.is_whitespace() => { let range = idx..idx+1; - callback(self.new_parse_token(ParseTokenType::Whitespace, range)); + callback(self.new_syntax_node(SyntaxNodeType::Whitespace, range)); self.it.next(); }, ')' => { let range = idx..idx+1; - callback(self.new_parse_token(ParseTokenType::CloseParen, range)); + callback(self.new_syntax_node(SyntaxNodeType::CloseParen, range)); self.it.next(); let expr = Atom::expr(children); return Ok(expr); @@ -258,21 +256,21 @@ impl<'a> SExprParser<'a> { } fn next_token_with_visitor(&mut self, callback: &mut C) -> Result - where C: FnMut(ParseToken) + where C: FnMut(SyntaxNode) { match self.it.peek().cloned() { Some((idx, '"')) => { let start_idx = idx; let str_token = self.next_string()?; let range = start_idx..self.cur_idx(); - callback(self.new_parse_token(ParseTokenType::StringLiteral, range)); + callback(self.new_syntax_node(SyntaxNodeType::StringLiteral, range)); Ok(str_token) }, Some((idx, _)) => { let start_idx = idx; let tok = self.next_word()?; let range = start_idx..self.cur_idx(); - callback(self.new_parse_token(ParseTokenType::MiscSymbol, range)); + callback(self.new_syntax_node(SyntaxNodeType::MiscSymbol, range)); Ok(tok) }, None => Ok(String::new()) @@ -318,7 +316,7 @@ impl<'a> SExprParser<'a> { } fn next_var_with_visitor(&mut self, callback: &mut C) -> Result - where C: FnMut(ParseToken) + where C: FnMut(SyntaxNode) { let (start_idx, _c) = self.it.peek().cloned().unwrap(); let mut tmp_it = self.it.clone(); @@ -338,7 +336,7 @@ impl<'a> SExprParser<'a> { } self.it = tmp_it; let range = start_idx..self.cur_idx(); - callback(self.new_parse_token(ParseTokenType::Variable, range)); + callback(self.new_syntax_node(SyntaxNodeType::Variable, range)); Ok(token) } diff --git a/repl/src/interactive_helper.rs b/repl/src/interactive_helper.rs index 82ee51713..8cbe0bcc9 100644 --- a/repl/src/interactive_helper.rs +++ b/repl/src/interactive_helper.rs @@ -9,7 +9,7 @@ use rustyline::validate::{Validator, ValidationContext, ValidationResult}; use rustyline::error::ReadlineError; use rustyline::{Completer, Helper, Hinter}; -use hyperon::metta::text::{SExprParser, ParseTokenType}; +use hyperon::metta::text::{SExprParser, SyntaxNodeType}; use crate::metta_shim::MettaShim; @@ -61,38 +61,38 @@ impl Highlighter for ReplHelper { blink_char = find_matching_bracket(line, pos, bracket); } - //Iterate over the tokens generated by the parser, coloring them appropriately + //Iterate over the syntax nodes generated by the parser, coloring them appropriately let mut colored_line = String::with_capacity(line.len() * 2); let mut bracket_depth = 0; self.metta.borrow_mut().inside_env(|metta| { let mut parser = SExprParser::new(line); loop { - let result = parser.parse_with_visitor(&metta.metta.tokenizer().borrow(), |token| { + let result = parser.parse_with_visitor(&metta.metta.tokenizer().borrow(), |node| { let mut style_sequence = vec![]; - //Set up the style for the token - match token.token_type { - ParseTokenType::Comment => { + //Set up the style for the node + match node.node_type { + SyntaxNodeType::Comment => { style_sequence.push(COMMENT_COLOR); }, - ParseTokenType::Variable => { + SyntaxNodeType::Variable => { style_sequence.push(VARIABLE_COLOR); }, - ParseTokenType::MiscSymbol => { + SyntaxNodeType::MiscSymbol => { style_sequence.push(MISC_SYMBOL_COLOR); }, - ParseTokenType::StringLiteral => { + SyntaxNodeType::StringLiteral => { style_sequence.push(STRING_LITERAL_COLOR); }, - ParseTokenType::Special => { + SyntaxNodeType::Special => { //TODO: We'll want to use the type system to assign styling here style_sequence.push(SPECIAL_SYMBOL_COLOR); }, - ParseTokenType::OpenParen => { + SyntaxNodeType::OpenParen => { style_sequence.push(BRACKET_COLORS[bracket_depth%BRACKET_COLORS.len()]); bracket_depth += 1; }, - ParseTokenType::CloseParen => { + SyntaxNodeType::CloseParen => { if bracket_depth > 0 { bracket_depth -= 1; style_sequence.push(BRACKET_COLORS[bracket_depth%BRACKET_COLORS.len()]); @@ -100,21 +100,21 @@ impl Highlighter for ReplHelper { style_sequence.push(ERROR_COLOR); } }, - ParseTokenType::LeftoverText => { + SyntaxNodeType::LeftoverText => { style_sequence.push(ERROR_COLOR); } - _ => { } // We don't do anything with the compound tokens, e.g. Expression & Atom + _ => { } // We don't do anything with the compound nodes, e.g. Expression & Atom } - //See if we need to render this token with the "bracket blink" + //See if we need to render this node with the "bracket blink" if let Some((_matching_char, blink_idx)) = &blink_char { - if token.src_range.contains(blink_idx) { + if node.src_range.contains(blink_idx) { style_sequence.push("1;7"); } } - //Render the token to the buffer if it's one of the ones we choose to render - if token_type_should_render(token.token_type) { + //Render the node to the buffer if it's one of the ones we choose to render + if node_type_should_render(node.node_type) { //Push the styles to the buffer let style_count = style_sequence.len(); @@ -129,10 +129,10 @@ impl Highlighter for ReplHelper { colored_line.push('m'); } - //Push the token itself to the buffer - colored_line.push_str(&line[token.src_range]); + //Push the node itself to the buffer + colored_line.push_str(&line[node.src_range]); - //And push an undo sequence, if the token was stylized + //And push an undo sequence, if the node was stylized if style_count > 0 { colored_line.push_str("\x1b[0m"); } @@ -189,20 +189,20 @@ impl Validator for ReplHelper { } -fn token_type_should_render(token_type: ParseTokenType) -> bool { - match token_type { - ParseTokenType::Comment | - ParseTokenType::Variable | - ParseTokenType::OpenParen | - ParseTokenType::CloseParen | - ParseTokenType::StringLiteral | - ParseTokenType::Special | - ParseTokenType::MiscSymbol | - ParseTokenType::Whitespace | - ParseTokenType::LeftoverText => true, +fn node_type_should_render(node_type: SyntaxNodeType) -> bool { + match node_type { + SyntaxNodeType::Comment | + SyntaxNodeType::Variable | + SyntaxNodeType::OpenParen | + SyntaxNodeType::CloseParen | + SyntaxNodeType::StringLiteral | + SyntaxNodeType::Special | + SyntaxNodeType::MiscSymbol | + SyntaxNodeType::Whitespace | + SyntaxNodeType::LeftoverText => true, - ParseTokenType::Atom | - ParseTokenType::Expression => false, + SyntaxNodeType::Atom | + SyntaxNodeType::Expression => false, } } From f8c01e46b828235b3e9cf21a1a7d1ccd7d54ea60 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 2 Sep 2023 18:30:36 +0900 Subject: [PATCH 03/25] Redoing parser again, this time going to a syntax tree containing atoms. Long term I think this is a better foundation for MeTTa tooling --- lib/src/metta/text.rs | 389 +++++++++++++++++++++------------ repl/src/interactive_helper.rs | 146 +++++-------- 2 files changed, 314 insertions(+), 221 deletions(-) diff --git a/lib/src/metta/text.rs b/lib/src/metta/text.rs index 4d23f08f2..7dba3f8a9 100644 --- a/lib/src/metta/text.rs +++ b/lib/src/metta/text.rs @@ -1,5 +1,7 @@ //! MeTTa parser implementation. +use core::convert::TryFrom; + use crate::*; use core::ops::Range; @@ -68,36 +70,121 @@ pub enum SyntaxNodeType { /// Comment line. All text between a non-escaped ';' and a newline Comment, /// Variable. A symbol immediately preceded by a '$' sigil - Variable, + VariableToken, /// String Literal. All text between non-escaped '"' (double quote) characters - StringLiteral, - /// Special Token. A token matched by a regex registered with the [Tokenizer] - //TODO: Currently this `Special` token is never generated. When I split the atom generation from the parsing, I will - // roll the Tokenizer check into `next_token_with_visitor`, and eliminate `parse_atom_with_visitor` - Special, - /// Symbol Token. Any other whitespace-delimited token that isn't a [Variable](SyntaxNodeType::Variable), - /// [StringLiteral](SyntaxNodeType::StringLiteral), or [Special](SyntaxNodeType::Special) - MiscSymbol, + StringToken, + /// Word Token. Any other whitespace-delimited token that isn't a [Variable](SyntaxNodeType::VariableToken), + /// or [StringToken](SyntaxNodeType::StringToken) + WordToken, /// Open Parenthesis. A non-escaped '(' character indicating the beginning of an expression OpenParen, /// Close Parenthesis. A non-escaped ')' character indicating the end of an expression CloseParen, /// Whitespace. One or more whitespace chars Whitespace, + /// Symbol Atom. A Symbol Atom + SymbolAtom, + /// Variable Atom. A [VariableAtom] constructed from a [VariableToken](SyntaxNodeType::VariableToken) + VariableAtom, /// Expression. All input text composing an Expression, from the opening '(' to the close - Expression, - /// Atom. A Symbol Atom or Grounded Atom - //TODO, check since I'm not sure about this one. Maybe I'll want different intermediate tokens - // when I generate atoms from parse tokens - Atom, - /// Unparsed Leftover Text. Text remaining after the parser has encountered an error + ExpressionAtom, + /// Special Atom. A token matched by a regex registered with the [Tokenizer]. Might be a Grounded + /// Atom, but might also be another type of Atom + SpecialAtom, + /// Unparsed Leftover Text. Unparsed text remaining after the parser has encountered an error LeftoverText, + /// Syntax Nodes that cannot be combined into a coherent atom due to a parse error, even if some + /// of the individual nodes are valid + ErrorGroup, } #[derive(Clone, Debug)] pub struct SyntaxNode { pub node_type: SyntaxNodeType, pub src_range: Range, + pub atom: Option, + pub sub_nodes: Vec, + pub message: Option, + pub is_complete: bool, +} + +impl SyntaxNode { + fn new(node_type: SyntaxNodeType, src_range: Range, atom: Option, sub_nodes: Vec) -> SyntaxNode { + Self { + node_type, + src_range, + atom, + sub_nodes, + message: None, + is_complete: true + } + } + + fn incomplete_with_message(node_type: SyntaxNodeType, src_range: Range, sub_nodes: Vec, message: String) -> SyntaxNode { + Self { + node_type, + src_range, + atom: None, + sub_nodes, + message: Some(message), + is_complete: false + } + } + + /// Creates a new error group. Gets the error message associated with the last node + fn new_error_group(src_range: Range, sub_nodes: Vec) -> SyntaxNode { + let message = sub_nodes[sub_nodes.len()-1].message.clone(); + Self { + node_type: SyntaxNodeType::ErrorGroup, + src_range, + atom: None, + sub_nodes, + message, + is_complete: false + } + } + + /// Visits all the syntactic nodes (vs. semantic) in a parsed syntax tree + /// + /// This method is useful to render syntax styling. + /// + /// TODO: Inthe future, We'll want to be able to use the type system to assign styling, + /// which is going to mean looking at Atoms, and not the tokens they were built from + pub fn visit_syntactic(&self, mut callback: C) + where C: FnMut(&SyntaxNode) + { + self.visit_depth_first(|node| { + match node.node_type { + SyntaxNodeType::Comment | + SyntaxNodeType::VariableToken | + SyntaxNodeType::StringToken | + SyntaxNodeType::WordToken | + SyntaxNodeType::OpenParen | + SyntaxNodeType::CloseParen | + SyntaxNodeType::Whitespace | + SyntaxNodeType::LeftoverText => { + callback(node); + } + _ => {} + } + }) + } + + /// Visits all the nodes in a parsed syntax tree in a depth-first order + pub fn visit_depth_first(&self, mut callback: C) + where C: FnMut(&SyntaxNode) + { + self.visit_depth_first_internal(&mut callback); + } + + fn visit_depth_first_internal(&self, callback: &mut C) + where C: FnMut(&SyntaxNode) + { + for sub_node in self.sub_nodes.iter() { + sub_node.visit_depth_first_internal(callback); + } + callback(self); + } } pub struct SExprParser<'a> { @@ -110,66 +197,67 @@ impl<'a> SExprParser<'a> { Self{ text, it: text.char_indices().peekable() } } - //TODO: Consider reorganizing this function as a visitor pub fn parse(&mut self, tokenizer: &Tokenizer) -> Result, String> { - self.parse_with_visitor(tokenizer, |_tok| ()) - } + loop { + match self.parse_to_syntax_tree(tokenizer) { + Some(node) => { + //If we have an incomplete node, it's an error + if !node.is_complete { + return Err(node.message.unwrap()) + } - pub fn parse_with_visitor(&mut self, tokenizer: &Tokenizer, mut callback: C) -> Result, String> - where C: FnMut(SyntaxNode) - { - self.parse_with_visitor_internal(tokenizer, &mut callback) + //We are only interested in nodes that represent atoms + match node.node_type { + SyntaxNodeType::SymbolAtom | + SyntaxNodeType::VariableAtom | + SyntaxNodeType::ExpressionAtom | + SyntaxNodeType::SpecialAtom => { + return Ok(node.atom) + }, + _ => () + } + }, + None => { + return Ok(None); + }, + } + } } - fn parse_with_visitor_internal(&mut self, tokenizer: &Tokenizer, callback: &mut C) -> Result, String> - where C: FnMut(SyntaxNode) - { - while let Some((idx, c)) = self.it.peek().cloned() { + pub fn parse_to_syntax_tree(&mut self, tokenizer: &Tokenizer) -> Option { + if let Some((idx, c)) = self.it.peek().cloned() { match c { ';' => { - let start_idx = idx; - self.skip_line(); - let range = start_idx..self.cur_idx(); - callback(self.new_syntax_node(SyntaxNodeType::Comment, range)); + let comment_node = self.parse_comment().unwrap(); + return Some(comment_node); }, _ if c.is_whitespace() => { - let range = idx..idx+1; - callback(self.new_syntax_node(SyntaxNodeType::Whitespace, range)); + let whispace_node = SyntaxNode::new(SyntaxNodeType::Whitespace, idx..idx+1, None, vec![]); self.it.next(); + return Some(whispace_node); }, '$' => { - let token = self.next_var_with_visitor(callback)?; - return Ok(Some(Atom::var(token))); + let var_node = self.parse_variable(); + return Some(var_node); }, '(' => { - let range = idx..idx+1; - callback(self.new_syntax_node(SyntaxNodeType::OpenParen, range)); - - self.it.next(); - let start_idx = idx; - let expr = self.parse_expr_with_visitor(tokenizer, callback)?; - let range = start_idx..self.cur_idx(); - callback(self.new_syntax_node(SyntaxNodeType::Expression, range)); - return Ok(Some(expr)); + let expr_node = self.parse_expr(tokenizer); + return Some(expr_node); }, ')' => { - let range = idx..idx+1; - callback(self.new_syntax_node(SyntaxNodeType::CloseParen, range)); + let close_paren_node = SyntaxNode::new(SyntaxNodeType::CloseParen, idx..idx+1, None, vec![]); self.it.next(); - - self.parse_leftovers_with_visitor(callback); - return Err("Unexpected right bracket".to_string()) + let leftover_text_node = self.parse_leftovers("Unexpected right bracket".to_string()); + let error_group_node = SyntaxNode::new_error_group(idx..self.cur_idx(), vec![close_paren_node, leftover_text_node]); + return Some(error_group_node); }, _ => { - let start_idx = idx; - let atom = self.parse_atom_with_visitor(tokenizer, callback)?; - let range = start_idx..self.cur_idx(); - callback(self.new_syntax_node(SyntaxNodeType::Atom, range)); - return Ok(Some(atom)); + let atom_node = self.parse_atom(tokenizer); + return atom_node; }, } } - Ok(None) + None } ///WARNING: may be (often is) == to text.len(), and thus can't be used as an index to read a char @@ -181,130 +269,152 @@ impl<'a> SExprParser<'a> { } } - fn new_syntax_node(&self, node_type: SyntaxNodeType, src_range: Range) -> SyntaxNode { - SyntaxNode { - node_type, - src_range: src_range.clone(), + /// Parse to the next `\n` newline + fn parse_comment(&mut self) -> Option { + if let Some((start_idx, _c)) = self.it.peek().cloned() { + while let Some((_idx, c)) = self.it.peek() { + match c { + '\n' => break, + _ => { self.it.next(); } + } + } + let range = start_idx..self.cur_idx(); + Some(SyntaxNode::new(SyntaxNodeType::Comment, range, None, vec![])) + } else { + None } } - fn skip_line(&mut self) -> () { - while let Some((_idx, c)) = self.it.peek() { - match c { - '\n' => break, - _ => { self.it.next(); } + fn parse_leftovers(&mut self, message: String) -> SyntaxNode { + let start_idx = self.cur_idx(); + while let Some(_) = self.it.next() {} + let range = start_idx..self.cur_idx(); + SyntaxNode::incomplete_with_message(SyntaxNodeType::LeftoverText, range, vec![], message) + } + + fn parse_atom(&mut self, tokenizer: &Tokenizer) -> Option { + if let Some(token_node) = self.parse_token() { + if token_node.is_complete { + let token_text = <&SymbolAtom>::try_from(token_node.atom.as_ref().unwrap()).unwrap().name(); + let constr = tokenizer.find_token(token_text); + if let Some(constr) = constr { + let new_atom = constr(token_text); + let special_atom_node = SyntaxNode::new(SyntaxNodeType::SpecialAtom, token_node.src_range.clone(), Some(new_atom), vec![token_node]); + return Some(special_atom_node); + } else { + let symbol_atom_node = SyntaxNode::new(SyntaxNodeType::SymbolAtom, token_node.src_range.clone(), token_node.atom.clone(), vec![token_node]); + return Some(symbol_atom_node); + } + } else { + Some(token_node) } + } else { + None } } - fn parse_leftovers_with_visitor(&mut self, callback: &mut C) - where C: FnMut(SyntaxNode) - { - if let Some((start_idx, _c)) = self.it.peek().cloned() { - let (last, _c) = self.it.clone().last().unwrap(); - let range = start_idx..last+1; - callback(self.new_syntax_node(SyntaxNodeType::LeftoverText, range)); - } - } + fn parse_expr(&mut self, tokenizer: &Tokenizer) -> SyntaxNode { + let start_idx = self.cur_idx(); + let mut child_nodes: Vec = Vec::new(); - fn parse_atom_with_visitor(&mut self, tokenizer: &Tokenizer, callback: &mut C) -> Result - where C: FnMut(SyntaxNode) - { - let token = self.next_token_with_visitor(callback)?; - let constr = tokenizer.find_token(token.as_str()); - if let Some(constr) = constr { - return Ok(constr(token.as_str())); - } else { - return Ok(Atom::sym(token)); - } - } + let open_paren_node = SyntaxNode::new(SyntaxNodeType::OpenParen, start_idx..start_idx+1, None, vec![]); + child_nodes.push(open_paren_node); + self.it.next(); - fn parse_expr_with_visitor(&mut self, tokenizer: &Tokenizer, callback: &mut C) -> Result - where C: FnMut(SyntaxNode) - { - let mut children: Vec = Vec::new(); while let Some((idx, c)) = self.it.peek().cloned() { match c { ';' => { - let start_idx = idx; - self.skip_line(); - let range = start_idx..self.cur_idx(); - callback(self.new_syntax_node(SyntaxNodeType::Comment, range)); + let comment_node = self.parse_comment().unwrap(); + child_nodes.push(comment_node); }, _ if c.is_whitespace() => { - let range = idx..idx+1; - callback(self.new_syntax_node(SyntaxNodeType::Whitespace, range)); + let whitespace_node = SyntaxNode::new(SyntaxNodeType::Whitespace, idx..idx+1, None, vec![]); + child_nodes.push(whitespace_node); self.it.next(); }, ')' => { - let range = idx..idx+1; - callback(self.new_syntax_node(SyntaxNodeType::CloseParen, range)); + let close_paren_node = SyntaxNode::new(SyntaxNodeType::CloseParen, idx..idx+1, None, vec![]); + child_nodes.push(close_paren_node); self.it.next(); - let expr = Atom::expr(children); - return Ok(expr); + + let expr_children: Vec = child_nodes.iter().filter_map(|node| node.atom.clone()).collect(); + let new_expr_atom = Atom::expr(expr_children); + let expr_node = SyntaxNode::new(SyntaxNodeType::ExpressionAtom, start_idx..self.cur_idx(), Some(new_expr_atom), child_nodes); + return expr_node; }, _ => { - if let Ok(Some(child)) = self.parse_with_visitor_internal(tokenizer, callback) { - children.push(child); + if let Some(parsed_node) = self.parse_to_syntax_tree(tokenizer) { + let is_err = !parsed_node.is_complete; + child_nodes.push(parsed_node); + + //If we hit an error parsing a child, then bubble it up + if is_err { + let error_group_node = SyntaxNode::new_error_group(start_idx..self.cur_idx(), child_nodes); + return error_group_node; + } } else { - return Err("Unexpected end of expression member".to_string()); + let leftover_node = SyntaxNode::incomplete_with_message(SyntaxNodeType::ErrorGroup, start_idx..self.cur_idx(), child_nodes, "Unexpected end of expression member".to_string()); + return leftover_node; } }, } } - Err("Unexpected end of expression".to_string()) + let leftover_node = SyntaxNode::incomplete_with_message(SyntaxNodeType::ErrorGroup, start_idx..self.cur_idx(), child_nodes, "Unexpected end of expression".to_string()); + leftover_node } - fn next_token_with_visitor(&mut self, callback: &mut C) -> Result - where C: FnMut(SyntaxNode) - { + fn parse_token(&mut self) -> Option { match self.it.peek().cloned() { - Some((idx, '"')) => { - let start_idx = idx; - let str_token = self.next_string()?; - let range = start_idx..self.cur_idx(); - callback(self.new_syntax_node(SyntaxNodeType::StringLiteral, range)); - Ok(str_token) + Some((_idx, '"')) => { + let string_node = self.parse_string(); + Some(string_node) }, - Some((idx, _)) => { - let start_idx = idx; - let tok = self.next_word()?; - let range = start_idx..self.cur_idx(); - callback(self.new_syntax_node(SyntaxNodeType::MiscSymbol, range)); - Ok(tok) + Some((_idx, _)) => { + let word_node = self.parse_word(); + Some(word_node) }, - None => Ok(String::new()) + None => None } } - fn next_string(&mut self) -> Result { + fn parse_string(&mut self) -> SyntaxNode { let mut token = String::new(); + let start_idx = self.cur_idx(); if let Some((_idx, '"')) = self.it.next() { token.push('"'); } else { - return Err("Double quote expected".to_string()); + let leftover_text_node = SyntaxNode::incomplete_with_message(SyntaxNodeType::LeftoverText, start_idx..self.cur_idx(), vec![], "Double quote expected".to_string()); + return leftover_text_node; } while let Some((_idx, c)) = self.it.next() { if c == '"' { token.push('"'); - break; + let string_symbol_atom = Atom::sym(token); + let string_node = SyntaxNode::new(SyntaxNodeType::StringToken, start_idx..self.cur_idx(), Some(string_symbol_atom), vec![]); + return string_node; } let c = if c == '\\' { match self.it.next() { Some((_idx, c)) => c, - None => return Err("Escaping sequence is not finished".to_string()), + None => { + let leftover_text_node = SyntaxNode::incomplete_with_message(SyntaxNodeType::StringToken, start_idx..self.cur_idx(), vec![], "Escaping sequence is not finished".to_string()); + return leftover_text_node; + }, } } else { c }; token.push(c); } - Ok(token) + let unclosed_string_node = SyntaxNode::incomplete_with_message(SyntaxNodeType::StringToken, start_idx..self.cur_idx(), vec![], "Unclosed String Literal".to_string()); + unclosed_string_node } - fn next_word(&mut self) -> Result { + fn parse_word(&mut self) -> SyntaxNode { let mut token = String::new(); + let start_idx = self.cur_idx(); + while let Some((_idx, c)) = self.it.peek() { if c.is_whitespace() || *c == '(' || *c == ')' { break; @@ -312,12 +422,13 @@ impl<'a> SExprParser<'a> { token.push(*c); self.it.next(); } - Ok(token) + + let word_symbol_atom = Atom::sym(token); + let word_node = SyntaxNode::new(SyntaxNodeType::WordToken, start_idx..self.cur_idx(), Some(word_symbol_atom), vec![]); + word_node } - fn next_var_with_visitor(&mut self, callback: &mut C) -> Result - where C: FnMut(SyntaxNode) - { + fn parse_variable(&mut self) -> SyntaxNode { let (start_idx, _c) = self.it.peek().cloned().unwrap(); let mut tmp_it = self.it.clone(); tmp_it.next(); @@ -328,16 +439,18 @@ impl<'a> SExprParser<'a> { break; } if *c == '#' { - self.parse_leftovers_with_visitor(callback); - return Err("'#' char is reserved for internal usage".to_string()); + let leftover_node = self.parse_leftovers("'#' char is reserved for internal usage".to_string()); + return leftover_node; } token.push(*c); tmp_it.next(); } self.it = tmp_it; let range = start_idx..self.cur_idx(); - callback(self.new_syntax_node(SyntaxNodeType::Variable, range)); - Ok(token) + let var_token_node = SyntaxNode::new(SyntaxNodeType::VariableToken, range.clone(), None, vec![]); + let new_var_atom = Atom::var(token); + let variable_atom_node = SyntaxNode::new(SyntaxNodeType::VariableAtom, range, Some(new_var_atom), vec![var_token_node]); + variable_atom_node } } @@ -399,19 +512,25 @@ mod tests { #[test] fn test_next_token() { - let mut parser = SExprParser::new("n)"); + let text = "n)"; + let mut parser = SExprParser::new(text); - assert_eq!("n".to_string(), parser.next_token_with_visitor(&mut |_tok| ()).unwrap()); + let node = parser.parse_token().unwrap(); + assert_eq!("n".to_string(), text[node.src_range]); assert_eq!(Some((1, ')')), parser.it.next()); } #[test] fn test_next_string_errors() { let mut parser = SExprParser::new("a"); - assert_eq!(Err(String::from("Double quote expected")), parser.next_string()); + let node = parser.parse_string(); + assert!(!node.is_complete); + assert_eq!("Double quote expected", node.message.unwrap()); let mut parser = SExprParser::new("\"\\"); - assert_eq!(Err(String::from("Escaping sequence is not finished")), parser.next_string()); + let node = parser.parse_string(); + assert!(!node.is_complete); + assert_eq!("Escaping sequence is not finished", node.message.unwrap()); } #[test] diff --git a/repl/src/interactive_helper.rs b/repl/src/interactive_helper.rs index 8cbe0bcc9..36dc7a5fd 100644 --- a/repl/src/interactive_helper.rs +++ b/repl/src/interactive_helper.rs @@ -31,7 +31,7 @@ const COMMENT_COLOR: &str = "32"; const VARIABLE_COLOR: &str = "33"; const MISC_SYMBOL_COLOR: &str = "34"; const STRING_LITERAL_COLOR: &str = "31"; -const SPECIAL_SYMBOL_COLOR: &str = "36"; +// const SPECIAL_SYMBOL_COLOR: &str = "36"; const ERROR_COLOR: &str = "91"; impl Highlighter for ReplHelper { @@ -67,82 +67,73 @@ impl Highlighter for ReplHelper { self.metta.borrow_mut().inside_env(|metta| { let mut parser = SExprParser::new(line); loop { - let result = parser.parse_with_visitor(&metta.metta.tokenizer().borrow(), |node| { - let mut style_sequence = vec![]; + match parser.parse_to_syntax_tree(&metta.metta.tokenizer().borrow()) { + Some(root_node) => { + root_node.visit_syntactic(|node| { + let mut style_sequence = vec![]; - //Set up the style for the node - match node.node_type { - SyntaxNodeType::Comment => { - style_sequence.push(COMMENT_COLOR); - }, - SyntaxNodeType::Variable => { - style_sequence.push(VARIABLE_COLOR); - }, - SyntaxNodeType::MiscSymbol => { - style_sequence.push(MISC_SYMBOL_COLOR); - }, - SyntaxNodeType::StringLiteral => { - style_sequence.push(STRING_LITERAL_COLOR); - }, - SyntaxNodeType::Special => { - //TODO: We'll want to use the type system to assign styling here - style_sequence.push(SPECIAL_SYMBOL_COLOR); - }, - SyntaxNodeType::OpenParen => { - style_sequence.push(BRACKET_COLORS[bracket_depth%BRACKET_COLORS.len()]); - bracket_depth += 1; - }, - SyntaxNodeType::CloseParen => { - if bracket_depth > 0 { - bracket_depth -= 1; - style_sequence.push(BRACKET_COLORS[bracket_depth%BRACKET_COLORS.len()]); - } else { - style_sequence.push(ERROR_COLOR); + //Set up the style for the node + match node.node_type { + SyntaxNodeType::Comment => { + style_sequence.push(COMMENT_COLOR); + }, + SyntaxNodeType::VariableToken => { + style_sequence.push(VARIABLE_COLOR); + }, + SyntaxNodeType::StringToken => { + style_sequence.push(STRING_LITERAL_COLOR); + }, + SyntaxNodeType::WordToken => { + style_sequence.push(MISC_SYMBOL_COLOR); + }, + SyntaxNodeType::OpenParen => { + style_sequence.push(BRACKET_COLORS[bracket_depth%BRACKET_COLORS.len()]); + bracket_depth += 1; + }, + SyntaxNodeType::CloseParen => { + if bracket_depth > 0 { + bracket_depth -= 1; + style_sequence.push(BRACKET_COLORS[bracket_depth%BRACKET_COLORS.len()]); + } else { + style_sequence.push(ERROR_COLOR); + } + }, + SyntaxNodeType::LeftoverText => { + style_sequence.push(ERROR_COLOR); + } + _ => { } } - }, - SyntaxNodeType::LeftoverText => { - style_sequence.push(ERROR_COLOR); - } - _ => { } // We don't do anything with the compound nodes, e.g. Expression & Atom - } - - //See if we need to render this node with the "bracket blink" - if let Some((_matching_char, blink_idx)) = &blink_char { - if node.src_range.contains(blink_idx) { - style_sequence.push("1;7"); - } - } - //Render the node to the buffer if it's one of the ones we choose to render - if node_type_should_render(node.node_type) { - - //Push the styles to the buffer - let style_count = style_sequence.len(); - if style_count > 0 { - colored_line.push_str("\x1b["); - for (style_idx, style) in style_sequence.into_iter().enumerate() { - colored_line.push_str(style); - if style_idx < style_count-1 { - colored_line.push(';'); + //See if we need to render this node with the "bracket blink" + if let Some((_matching_char, blink_idx)) = &blink_char { + if node.src_range.contains(blink_idx) { + style_sequence.push("1;7"); } } - colored_line.push('m'); - } - //Push the node itself to the buffer - colored_line.push_str(&line[node.src_range]); + //Push the styles to the buffer + let style_count = style_sequence.len(); + if style_count > 0 { + colored_line.push_str("\x1b["); + for (style_idx, style) in style_sequence.into_iter().enumerate() { + colored_line.push_str(style); + if style_idx < style_count-1 { + colored_line.push(';'); + } + } + colored_line.push('m'); + } - //And push an undo sequence, if the node was stylized - if style_count > 0 { - colored_line.push_str("\x1b[0m"); - } - } - }); + //Push the node itself to the buffer + colored_line.push_str(&line[node.src_range.clone()]); - match result { - Ok(Some(_atom)) => (), - Ok(None) => break, - Err(_err) => break, + //And push an undo sequence, if the node was stylized + if style_count > 0 { + colored_line.push_str("\x1b[0m"); + } + }); + }, + None => break, } } }); @@ -189,23 +180,6 @@ impl Validator for ReplHelper { } -fn node_type_should_render(node_type: SyntaxNodeType) -> bool { - match node_type { - SyntaxNodeType::Comment | - SyntaxNodeType::Variable | - SyntaxNodeType::OpenParen | - SyntaxNodeType::CloseParen | - SyntaxNodeType::StringLiteral | - SyntaxNodeType::Special | - SyntaxNodeType::MiscSymbol | - SyntaxNodeType::Whitespace | - SyntaxNodeType::LeftoverText => true, - - SyntaxNodeType::Atom | - SyntaxNodeType::Expression => false, - } -} - impl ReplHelper { pub fn new(metta: MettaShim) -> Self { Self { From 675496ca060dd5bf122beca12bf13587176238fc Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 2 Sep 2023 20:21:48 +0900 Subject: [PATCH 04/25] Yay. Our fix has been merged into rustyline main. Still not published though --- repl/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/repl/Cargo.toml b/repl/Cargo.toml index cd3f6fc97..59c70b0e5 100644 --- a/repl/Cargo.toml +++ b/repl/Cargo.toml @@ -9,8 +9,8 @@ anyhow = { version = "1.0.75", features = ["std"] } hyperon = { path = "../lib/" } # rustyline = { version = "12.0.0", features = ["derive"] } # rustyline = {git = "https://github.com/luketpeterson/rustyline", version = "12.0.0", features = ["derive"] } -# TODO: I hope these changes stabilize inside rustyline before we need to publish Hyperon -rustyline = {git = "https://github.com/gwenn/rustyline.git", branch="no_highlight_char_on_final_refresh", version = "12.0.0", features = ["derive"] } +# TODO: Yay, our fix landed in main. Still needs to publish however. One step closer +rustyline = {git = "https://github.com/kkawakam/rustyline", version = "12.0.0", features = ["derive"] } clap = { version = "4.4.0", features = ["derive"] } directories = "5.0.1" pyo3 = { version = "0.19.2", features = ["auto-initialize"], optional = true } From a84b1e37647b8ef50c02f35ca8e2e5889a2799b1 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sat, 2 Sep 2023 20:44:10 +0900 Subject: [PATCH 05/25] Creating a default metta.config file in the config dir, if none exists --- repl/src/config.default.metta | 12 ++++++++++++ repl/src/config_params.rs | 23 +++++++++++++++++++---- repl/src/main.rs | 5 +++++ 3 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 repl/src/config.default.metta diff --git a/repl/src/config.default.metta b/repl/src/config.default.metta new file mode 100644 index 000000000..35b1f1b0f --- /dev/null +++ b/repl/src/config.default.metta @@ -0,0 +1,12 @@ + +; TODO Let the "includePaths" be modifiable, but I want better string manipulation atoms + +; TODO: somebody with better design sense should tweak these, and also provide dark-mode setings +; ANSI escape codes to configure the syntax highlighter +(= BracketColors (94 93 95 96)) +(= CommentColor 32) +(= VariableColor 33) +(= SymbolColor 34) +(= StringColor 31) +(= ErrorColor 91) + diff --git a/repl/src/config_params.rs b/repl/src/config_params.rs index 171e3df17..b4021ce90 100644 --- a/repl/src/config_params.rs +++ b/repl/src/config_params.rs @@ -1,5 +1,9 @@ use std::path::{Path, PathBuf}; +use std::io::Write; +use std::fs; + +const DEFAULT_CONFIG_METTA: &[u8] = include_bytes!("config.default.metta"); #[derive(Default, Debug)] pub struct ReplParams { @@ -55,12 +59,23 @@ impl ReplParams { /// /// The metta_working_dir is always returned first pub fn modules_search_paths<'a>(&'a self) -> impl Iterator + 'a { - - //TODO: This is here to temporarily squish a warning. - let _ = self.config_dir; - [self.metta_working_dir.clone()].into_iter().chain( self.include_paths.iter().cloned()) } + /// A path to the config.metta file that's run to configure the repl environment + pub fn config_metta_path(&self) -> PathBuf { + let config_path = self.config_dir.join("config.metta"); + + //Create the default config.metta file, if none exists + if !config_path.exists() { + let mut file = fs::OpenOptions::new() + .create(true) + .write(true) + .open(&config_path).unwrap(); + file.write_all(&DEFAULT_CONFIG_METTA).unwrap(); + } + config_path + } + } diff --git a/repl/src/main.rs b/repl/src/main.rs index b33e269cc..d4bda075e 100644 --- a/repl/src/main.rs +++ b/repl/src/main.rs @@ -53,8 +53,13 @@ fn main() -> Result<()> { }; let repl_params = Shared::new(repl_params); + //Create our MeTTa runtime environment let mut metta = MettaShim::new(repl_params.clone()); + //Run the config.metta file + let config_metta_path = repl_params.borrow().config_metta_path(); + metta.load_metta_module(config_metta_path); + //If we have .metta files to run, then run them if let Some(metta_file) = primary_metta_file { From 44113d12a1db20608870da5a679566ca7781c64b Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sun, 3 Sep 2023 00:46:20 +0900 Subject: [PATCH 06/25] Rounding out metta-based highlighting config --- lib/src/metta/runner/mod.rs | 2 +- repl/src/config.default.metta | 15 ++++---- repl/src/config_params.rs | 30 ++++++++++------ repl/src/interactive_helper.rs | 63 ++++++++++++++++++++++------------ repl/src/main.rs | 4 --- repl/src/metta_shim.rs | 53 ++++++++++++++++++++++++++-- 6 files changed, 122 insertions(+), 45 deletions(-) diff --git a/lib/src/metta/runner/mod.rs b/lib/src/metta/runner/mod.rs index 96d8b80ed..21a438770 100644 --- a/lib/src/metta/runner/mod.rs +++ b/lib/src/metta/runner/mod.rs @@ -124,7 +124,7 @@ impl Metta { &self.0.search_paths } - pub(crate) fn modules(&self) -> &Shared> { + pub fn modules(&self) -> &Shared> { &self.0.modules } diff --git a/repl/src/config.default.metta b/repl/src/config.default.metta index 35b1f1b0f..2914cd817 100644 --- a/repl/src/config.default.metta +++ b/repl/src/config.default.metta @@ -3,10 +3,13 @@ ; TODO: somebody with better design sense should tweak these, and also provide dark-mode setings ; ANSI escape codes to configure the syntax highlighter -(= BracketColors (94 93 95 96)) -(= CommentColor 32) -(= VariableColor 33) -(= SymbolColor 34) -(= StringColor 31) -(= ErrorColor 91) +(= BracketStyles (94 93 95 96)) +(= CommentStyle 32) +(= VariableStyle 33) +(= SymbolStyle 34) +(= StringStyle 31) +(= ErrorStyle 91) +; (= BracketMatchEnabled True) ;TODO, enable this +; (= BracketMatchStyle 1;7) ; TODO BUG, this be being matched as a float and failing to parse + diff --git a/repl/src/config_params.rs b/repl/src/config_params.rs index b4021ce90..ef724e848 100644 --- a/repl/src/config_params.rs +++ b/repl/src/config_params.rs @@ -10,6 +10,9 @@ pub struct ReplParams { /// Path to the config dir for the whole repl, in an OS-specific location config_dir: PathBuf, + /// Path to the config.metta file, used to configure the repl + config_metta_path: PathBuf, + /// Path to the dir containing the script being run, or the cwd the repl was invoked from in interactive mode metta_working_dir: PathBuf, @@ -37,6 +40,16 @@ impl ReplParams { } }; + //Create the default config.metta file, if none exists + let config_metta_path = config_dir.join("config.metta"); + if !config_metta_path.exists() { + let mut file = fs::OpenOptions::new() + .create(true) + .write(true) + .open(&config_metta_path).unwrap(); + file.write_all(&DEFAULT_CONFIG_METTA).unwrap(); + } + //Create the modules dir inside the config dir, if it doesn't already exist let modules_dir = config_dir.join("modules"); std::fs::create_dir_all(&modules_dir).unwrap(); @@ -49,6 +62,7 @@ impl ReplParams { Self { config_dir: config_dir.into(), + config_metta_path, metta_working_dir, include_paths, history_file: Some(config_dir.join("history.txt")), @@ -64,18 +78,12 @@ impl ReplParams { } /// A path to the config.metta file that's run to configure the repl environment - pub fn config_metta_path(&self) -> PathBuf { - let config_path = self.config_dir.join("config.metta"); + pub fn config_metta_path(&self) -> &PathBuf { - //Create the default config.metta file, if none exists - if !config_path.exists() { - let mut file = fs::OpenOptions::new() - .create(true) - .write(true) - .open(&config_path).unwrap(); - file.write_all(&DEFAULT_CONFIG_METTA).unwrap(); - } - config_path + //TODO: Temporary access to avoid warning. Delete soon + let _ = self.config_dir; + + &self.config_metta_path } } diff --git a/repl/src/interactive_helper.rs b/repl/src/interactive_helper.rs index 36dc7a5fd..19b248c08 100644 --- a/repl/src/interactive_helper.rs +++ b/repl/src/interactive_helper.rs @@ -22,17 +22,20 @@ pub struct ReplHelper { hinter: HistoryHinter, pub colored_prompt: String, cursor_bracket: std::cell::Cell>, // If the cursor is over or near a bracket to match + style: StyleSettings, } -//TODO: this information needs to come from the config.metta. -// This is just a stop-gap to make sure parsing & rendering is correct -const BRACKET_COLORS: &[&str] = &["94", "93", "95", "96"]; -const COMMENT_COLOR: &str = "32"; -const VARIABLE_COLOR: &str = "33"; -const MISC_SYMBOL_COLOR: &str = "34"; -const STRING_LITERAL_COLOR: &str = "31"; -// const SPECIAL_SYMBOL_COLOR: &str = "36"; -const ERROR_COLOR: &str = "91"; +#[derive(Default)] +struct StyleSettings { + bracket_styles: Vec, + comment_style: String, + variable_style: String, + symbol_style: String, + string_style: String, + error_style: String, + bracket_match_style: String, + // bracket_match_enabled: bool, //TODO +} impl Highlighter for ReplHelper { fn highlight_prompt<'b, 's: 'b, 'p: 'b>( @@ -70,36 +73,36 @@ impl Highlighter for ReplHelper { match parser.parse_to_syntax_tree(&metta.metta.tokenizer().borrow()) { Some(root_node) => { root_node.visit_syntactic(|node| { - let mut style_sequence = vec![]; + let mut style_sequence: Vec<&str> = vec![]; //Set up the style for the node match node.node_type { SyntaxNodeType::Comment => { - style_sequence.push(COMMENT_COLOR); + style_sequence.push(&self.style.comment_style); }, SyntaxNodeType::VariableToken => { - style_sequence.push(VARIABLE_COLOR); + style_sequence.push(&self.style.variable_style); }, SyntaxNodeType::StringToken => { - style_sequence.push(STRING_LITERAL_COLOR); + style_sequence.push(&self.style.string_style); }, SyntaxNodeType::WordToken => { - style_sequence.push(MISC_SYMBOL_COLOR); + style_sequence.push(&self.style.symbol_style); }, SyntaxNodeType::OpenParen => { - style_sequence.push(BRACKET_COLORS[bracket_depth%BRACKET_COLORS.len()]); + style_sequence.push(&self.style.bracket_styles[bracket_depth%self.style.bracket_styles.len()]); bracket_depth += 1; }, SyntaxNodeType::CloseParen => { if bracket_depth > 0 { bracket_depth -= 1; - style_sequence.push(BRACKET_COLORS[bracket_depth%BRACKET_COLORS.len()]); + style_sequence.push(&self.style.bracket_styles[bracket_depth%self.style.bracket_styles.len()]); } else { - style_sequence.push(ERROR_COLOR); + style_sequence.push(&self.style.error_style); } }, SyntaxNodeType::LeftoverText => { - style_sequence.push(ERROR_COLOR); + style_sequence.push(&self.style.error_style); } _ => { } } @@ -107,7 +110,7 @@ impl Highlighter for ReplHelper { //See if we need to render this node with the "bracket blink" if let Some((_matching_char, blink_idx)) = &blink_char { if node.src_range.contains(blink_idx) { - style_sequence.push("1;7"); + style_sequence.push(&self.style.bracket_match_style); } } @@ -168,7 +171,7 @@ impl Validator for ReplHelper { }, Err(err) => { validation_result = ValidationResult::Invalid(Some( - format!(" - \x1b[0;{}m{}\x1b[0m", ERROR_COLOR, err) + format!(" - \x1b[0;{}m{}\x1b[0m", self.style.error_style, err) )); break; } @@ -181,13 +184,31 @@ impl Validator for ReplHelper { } impl ReplHelper { - pub fn new(metta: MettaShim) -> Self { + pub fn new(mut metta: MettaShim) -> Self { + + let style = StyleSettings::new(&mut metta); + Self { metta: RefCell::new(metta), completer: FilenameCompleter::new(), hinter: HistoryHinter {}, colored_prompt: "".to_owned(), cursor_bracket: std::cell::Cell::new(None), + style, + } + } +} + +impl StyleSettings { + pub fn new(metta_shim: &mut MettaShim) -> Self { + Self { + bracket_styles: metta_shim.get_config_expr_vec("BracketStyles").unwrap_or(vec!["94".to_string(), "93".to_string(), "95".to_string(), "96".to_string()]), + comment_style: metta_shim.get_config_string("CommentStyle").unwrap_or("32".to_string()), + variable_style: metta_shim.get_config_string("VariableStyle").unwrap_or("33".to_string()), + symbol_style: metta_shim.get_config_string("SymbolStyle").unwrap_or("34".to_string()), + string_style: metta_shim.get_config_string("StringStyle").unwrap_or("31".to_string()), + error_style: metta_shim.get_config_string("ErrorStyle").unwrap_or("91".to_string()), + bracket_match_style: metta_shim.get_config_string("BracketMatchStyle").unwrap_or("1;7".to_string()), } } } diff --git a/repl/src/main.rs b/repl/src/main.rs index d4bda075e..6e5fb5421 100644 --- a/repl/src/main.rs +++ b/repl/src/main.rs @@ -56,10 +56,6 @@ fn main() -> Result<()> { //Create our MeTTa runtime environment let mut metta = MettaShim::new(repl_params.clone()); - //Run the config.metta file - let config_metta_path = repl_params.borrow().config_metta_path(); - metta.load_metta_module(config_metta_path); - //If we have .metta files to run, then run them if let Some(metta_file) = primary_metta_file { diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index f8ea38c16..dde7aa0a2 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -2,8 +2,9 @@ use std::fmt::Display; use std::path::PathBuf; +use hyperon::{sym, expr, ExpressionAtom}; use hyperon::Atom; -use hyperon::atom::{Grounded, ExecError, match_by_equality}; +use hyperon::atom::{Grounded, VariableAtom, ExecError, match_by_equality}; use hyperon::matcher::MatchResultIter; use hyperon::space::*; use hyperon::space::grounding::GroundingSpace; @@ -25,6 +26,7 @@ use crate::ReplParams; pub struct MettaShim { pub metta: Metta, pub result: Vec>, + repl_params: Shared, } #[macro_export] @@ -64,7 +66,8 @@ impl MettaShim { let tokenizer = Shared::new(Tokenizer::new()); let mut new_shim = Self { metta: Metta::from_space(space, tokenizer, repl_params.borrow().modules_search_paths().collect()), - result: vec![] + result: vec![], + repl_params: repl_params.clone(), }; //Load the hyperonpy Python stdlib, if the repl includes Python support @@ -86,6 +89,11 @@ impl MettaShim { #[cfg(not(feature = "python"))] new_shim.metta.tokenizer().borrow_mut().register_token_with_regex_str("extend-py!", move |_| { Atom::gnd(ImportPyErr) }); + //Run the config.metta file + let repl_params = repl_params.borrow(); + let config_metta_path = repl_params.config_metta_path(); + new_shim.load_metta_module(config_metta_path.clone()); + new_shim } @@ -107,6 +115,47 @@ impl MettaShim { func(self) }} } + + pub fn get_config_atom(&mut self, config_name: &str) -> Option { + let mut result = None; + metta_shim_env!{{ + let repl_params = self.repl_params.borrow(); + let config_metta_path = repl_params.config_metta_path(); + let metta_modules = self.metta.modules().borrow(); + let config_space = metta_modules.get(config_metta_path).unwrap(); + let bindings_set = config_space.query(&Atom::expr(vec![sym!("="), Atom::sym(config_name.to_string()), expr!(val)])); + if let Some(bindings) = bindings_set.into_iter().next() { + result = bindings.resolve(&VariableAtom::new("val")); + } + }} + result + } + + pub fn get_config_string(&mut self, config_name: &str) -> Option { + let atom = self.get_config_atom(config_name)?; + + #[allow(unused_assignments)] + let mut result = None; + metta_shim_env!{{ + result = Some(atom.to_string()); + }} + result + } + + pub fn get_config_expr_vec(&mut self, config_name: &str) -> Option> { + let atom = self.get_config_atom(config_name)?; + let mut result = None; + metta_shim_env!{{ + if let Ok(expr) = ExpressionAtom::try_from(atom) { + result = Some(expr.into_children() + .into_iter() + .map(|atom| atom.to_string()) + .collect()) + } + }} + result + } + } #[derive(Clone, PartialEq, Debug)] From 276d6ad8323533998e11955a546c69cd0bffb0f6 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sun, 3 Sep 2023 11:48:57 +0900 Subject: [PATCH 07/25] Bringing more settings under config.metta control --- repl/src/config.default.metta | 23 ++++++++++++----------- repl/src/main.rs | 15 ++++++++++++--- repl/src/metta_shim.rs | 22 ++++++++++++++++++++-- 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/repl/src/config.default.metta b/repl/src/config.default.metta index 2914cd817..ec129c8c9 100644 --- a/repl/src/config.default.metta +++ b/repl/src/config.default.metta @@ -1,15 +1,16 @@ -; TODO Let the "includePaths" be modifiable, but I want better string manipulation atoms +; TODO: Let the "includePaths" be modifiable, but I want better string manipulation atoms + +(= DefaultPrompt "> ") +; (= StyledPrompt "\x1b[1;32m> \x1b[0m") ; TODO: currently the MeTTa string parser doesn't resolve escape chars, although perhaps it should ; TODO: somebody with better design sense should tweak these, and also provide dark-mode setings ; ANSI escape codes to configure the syntax highlighter -(= BracketStyles (94 93 95 96)) -(= CommentStyle 32) -(= VariableStyle 33) -(= SymbolStyle 34) -(= StringStyle 31) -(= ErrorStyle 91) -; (= BracketMatchEnabled True) ;TODO, enable this -; (= BracketMatchStyle 1;7) ; TODO BUG, this be being matched as a float and failing to parse - - +(= BracketStyles ("94" "93" "95" "96")) +(= CommentStyle "32") +(= VariableStyle "33") +(= SymbolStyle "34") +(= StringStyle "31") +(= ErrorStyle "91") +(= BracketMatchStyle "1;7") +; (= BracketMatchEnabled True) ;TODO: enable this when I have a reliable value interchange path built. Another use for https://github.com/trueagi-io/hyperon-experimental/issues/351 diff --git a/repl/src/main.rs b/repl/src/main.rs index 6e5fb5421..0270a2597 100644 --- a/repl/src/main.rs +++ b/repl/src/main.rs @@ -103,9 +103,18 @@ fn start_interactive_mode(repl_params: Shared, metta: MettaShim) -> //The Interpreter Loop loop { - let p = format!("> "); - rl.helper_mut().expect("No helper").colored_prompt = format!("\x1b[1;32m{p}\x1b[0m"); - let readline = rl.readline(&p); + + //Set the prompt based on resolving a MeTTa variable + let prompt = { + let helper = rl.helper_mut().unwrap(); + let mut metta = helper.metta.borrow_mut(); + let prompt = metta.get_config_string("DefaultPrompt").unwrap_or("> ".to_string()); + let styled_prompt = metta.get_config_string("StyledPrompt").unwrap_or(format!("\x1b[1;32m{prompt}\x1b[0m")); + helper.colored_prompt = styled_prompt; + prompt + }; + + let readline = rl.readline(&prompt); match readline { Ok(line) => { rl.add_history_entry(line.as_str())?; diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index dde7aa0a2..8f33e2789 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -137,11 +137,29 @@ impl MettaShim { #[allow(unused_assignments)] let mut result = None; metta_shim_env!{{ - result = Some(atom.to_string()); + result = Some(Self::strip_quotes(atom.to_string())); }} result } + /// A utility function to return the part of a string in between starting and ending quotes + // TODO: Roll this into a stdlib grounded string module, maybe as a test case for + // https://github.com/trueagi-io/hyperon-experimental/issues/351 + fn strip_quotes(the_string: String) -> String { + if let Some(first) = the_string.chars().next() { + if first == '"' { + if let Some(last) = the_string.chars().last() { + if last == '"' { + if the_string.len() > 1 { + return String::from_utf8(the_string.as_bytes()[1..the_string.len()-1].to_vec()).unwrap(); + } + } + } + } + } + the_string + } + pub fn get_config_expr_vec(&mut self, config_name: &str) -> Option> { let atom = self.get_config_atom(config_name)?; let mut result = None; @@ -149,7 +167,7 @@ impl MettaShim { if let Ok(expr) = ExpressionAtom::try_from(atom) { result = Some(expr.into_children() .into_iter() - .map(|atom| atom.to_string()) + .map(|atom| Self::strip_quotes(atom.to_string())) .collect()) } }} From a4236e6741536d079e096e97e069e1233e40dc21 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sun, 3 Sep 2023 13:03:43 +0900 Subject: [PATCH 08/25] Moving non-python error code behind a compile-time switch --- repl/src/metta_shim.rs | 46 ++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index 8f33e2789..8f1a90952 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -1,14 +1,11 @@ -use std::fmt::Display; use std::path::PathBuf; use hyperon::{sym, expr, ExpressionAtom}; use hyperon::Atom; -use hyperon::atom::{Grounded, VariableAtom, ExecError, match_by_equality}; -use hyperon::matcher::MatchResultIter; +use hyperon::atom::VariableAtom; use hyperon::space::*; use hyperon::space::grounding::GroundingSpace; -use hyperon::metta::*; use hyperon::metta::runner::Metta; use hyperon::metta::runner::stdlib::register_rust_tokens; use hyperon::metta::text::Tokenizer; @@ -87,7 +84,7 @@ impl MettaShim { //extend-py! should throw an error if we don't #[cfg(not(feature = "python"))] - new_shim.metta.tokenizer().borrow_mut().register_token_with_regex_str("extend-py!", move |_| { Atom::gnd(ImportPyErr) }); + new_shim.metta.tokenizer().borrow_mut().register_token_with_regex_str("extend-py!", move |_| { Atom::gnd(py_mod_err::ImportPyErr) }); //Run the config.metta file let repl_params = repl_params.borrow(); @@ -176,26 +173,35 @@ impl MettaShim { } -#[derive(Clone, PartialEq, Debug)] -pub struct ImportPyErr; +#[cfg(not(feature = "python"))] +mod py_mod_err { + use std::fmt::Display; + use hyperon::Atom; + use hyperon::atom::{Grounded, ExecError, match_by_equality}; + use hyperon::matcher::MatchResultIter; + use hyperon::metta::*; -impl Display for ImportPyErr { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "extend-py!") - } -} + #[derive(Clone, PartialEq, Debug)] + pub struct ImportPyErr; -impl Grounded for ImportPyErr { - fn type_(&self) -> Atom { - Atom::expr([ARROW_SYMBOL, ATOM_TYPE_SYMBOL, ATOM_TYPE_UNDEFINED]) + impl Display for ImportPyErr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "extend-py!") + } } - fn execute(&self, _args: &[Atom]) -> Result, ExecError> { - Err(ExecError::from("extend-py! not available in metta repl without Python support")) - } + impl Grounded for ImportPyErr { + fn type_(&self) -> Atom { + Atom::expr([ARROW_SYMBOL, ATOM_TYPE_SYMBOL, ATOM_TYPE_UNDEFINED]) + } - fn match_(&self, other: &Atom) -> MatchResultIter { - match_by_equality(self, other) + fn execute(&self, _args: &[Atom]) -> Result, ExecError> { + Err(ExecError::from("extend-py! not available in metta repl without Python support")) + } + + fn match_(&self, other: &Atom) -> MatchResultIter { + match_by_equality(self, other) + } } } From 38eba91def91daf910db809c926471e555a0cff1 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sun, 3 Sep 2023 14:07:24 +0900 Subject: [PATCH 09/25] Merging interruptibility into repl, in both interactive and non-interactive modes --- lib/src/metta/runner/mod.rs | 89 ++++++++++++++++++++----------------- repl/Cargo.toml | 1 + repl/src/main.rs | 17 +++++++ repl/src/metta_shim.rs | 24 +++++++++- 4 files changed, 88 insertions(+), 43 deletions(-) diff --git a/lib/src/metta/runner/mod.rs b/lib/src/metta/runner/mod.rs index 21a438770..a56826e14 100644 --- a/lib/src/metta/runner/mod.rs +++ b/lib/src/metta/runner/mod.rs @@ -36,9 +36,11 @@ pub struct MettaContents { search_paths: Vec, } -enum Mode { +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum MettaRunnerMode { ADD, INTERPRET, + TERMINATE, } impl Metta { @@ -142,55 +144,60 @@ impl Metta { } pub fn run(&self, parser: &mut SExprParser) -> Result>, String> { - let mut mode = Mode::ADD; + let mut mode = MettaRunnerMode::ADD; let mut results: Vec> = Vec::new(); - loop { - let atom = parser.parse(&self.0.tokenizer.borrow())?; - - if let Some(atom) = atom { - if atom == EXEC_SYMBOL { - mode = Mode::INTERPRET; - continue; - } - match mode { - Mode::ADD => { - if let Err(atom) = self.add_atom(atom) { - results.push(vec![atom]); - break; - } - }, - Mode::INTERPRET => { - match self.evaluate_atom(atom) { - Err(msg) => return Err(msg), - Ok(result) => { - fn is_error(atom: &Atom) -> bool { - match atom { - Atom::Expression(expr) => { - expr.children().len() > 0 && expr.children()[0] == ERROR_SYMBOL - }, - _ => false, - } - } - let error = result.iter().any(|atom| is_error(atom)); - results.push(result); - if error { - break; + while mode != MettaRunnerMode::TERMINATE { + mode = self.run_step(parser, mode, &mut results)?; + } + Ok(results) + } + + pub fn run_step(&self, parser: &mut SExprParser, mode: MettaRunnerMode, intermediate_results: &mut Vec>) -> Result { + + let atom = parser.parse(&self.0.tokenizer.borrow())?; + + if let Some(atom) = atom { + if atom == EXEC_SYMBOL { + return Ok(MettaRunnerMode::INTERPRET); + } + match mode { + MettaRunnerMode::ADD => { + if let Err(atom) = self.add_atom(atom) { + intermediate_results.push(vec![atom]); + return Ok(MettaRunnerMode::TERMINATE); + } + }, + MettaRunnerMode::INTERPRET => { + match self.evaluate_atom(atom) { + Err(msg) => return Err(msg), + Ok(result) => { + fn is_error(atom: &Atom) -> bool { + match atom { + Atom::Expression(expr) => { + expr.children().len() > 0 && expr.children()[0] == ERROR_SYMBOL + }, + _ => false, } } + let error = result.iter().any(|atom| is_error(atom)); + intermediate_results.push(result); + if error { + return Ok(MettaRunnerMode::TERMINATE); + } } - }, - } - mode = Mode::ADD; - } else { - break; + } + }, + MettaRunnerMode::TERMINATE => { + return Ok(MettaRunnerMode::TERMINATE); + }, } + Ok(MettaRunnerMode::ADD) + } else { + Ok(MettaRunnerMode::TERMINATE) } - Ok(results) } - - pub fn evaluate_atom(&self, atom: Atom) -> Result, String> { match self.type_check(atom) { Err(atom) => Ok(vec![atom]), diff --git a/repl/Cargo.toml b/repl/Cargo.toml index 59c70b0e5..237907892 100644 --- a/repl/Cargo.toml +++ b/repl/Cargo.toml @@ -13,6 +13,7 @@ hyperon = { path = "../lib/" } rustyline = {git = "https://github.com/kkawakam/rustyline", version = "12.0.0", features = ["derive"] } clap = { version = "4.4.0", features = ["derive"] } directories = "5.0.1" +signal-hook = "0.3.17" pyo3 = { version = "0.19.2", features = ["auto-initialize"], optional = true } [[bin]] diff --git a/repl/src/main.rs b/repl/src/main.rs index 0270a2597..659565ece 100644 --- a/repl/src/main.rs +++ b/repl/src/main.rs @@ -1,5 +1,7 @@ use std::path::PathBuf; +use std::thread; +use std::sync::Mutex; use rustyline::error::ReadlineError; use rustyline::{Cmd, CompletionType, Config, EditMode, Editor, KeyEvent}; @@ -7,6 +9,7 @@ use rustyline::{Cmd, CompletionType, Config, EditMode, Editor, KeyEvent}; use anyhow::Result; use clap::Parser; use directories::ProjectDirs; +use signal_hook::{consts::SIGINT, iterator::Signals}; use hyperon::common::shared::Shared; @@ -19,6 +22,8 @@ use config_params::*; mod interactive_helper; use interactive_helper::*; +static SIGNAL_STATE: Mutex = Mutex::new(false); + #[derive(Parser)] #[command(version, about)] struct CliArgs { @@ -56,9 +61,21 @@ fn main() -> Result<()> { //Create our MeTTa runtime environment let mut metta = MettaShim::new(repl_params.clone()); + //Spawn a signal handler background thread, to deal with passing interrupts to the execution loop + let mut signals = Signals::new(&[SIGINT])?; + thread::spawn(move || { + for _sig in signals.forever() { + //Assume SIGINT, since that's the only registered handler + println!("Interrupt Received, Stopping MeTTa Operation..."); + *SIGNAL_STATE.lock().unwrap() = true; + } + }); + //If we have .metta files to run, then run them if let Some(metta_file) = primary_metta_file { + //All non-primary .metta files run without printing output + //TODO: Currently the interrupt handler does not break these for import_file in other_metta_files { metta.load_metta_module(import_file.clone()); } diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index 8f1a90952..1e0b87421 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -6,13 +6,14 @@ use hyperon::Atom; use hyperon::atom::VariableAtom; use hyperon::space::*; use hyperon::space::grounding::GroundingSpace; -use hyperon::metta::runner::Metta; +use hyperon::metta::runner::{Metta, MettaRunnerMode}; use hyperon::metta::runner::stdlib::register_rust_tokens; use hyperon::metta::text::Tokenizer; use hyperon::metta::text::SExprParser; use hyperon::common::shared::Shared; use crate::ReplParams; +use crate::SIGNAL_STATE; /// MettaShim is responsible for **ALL** calls between the repl and MeTTa, and is in charge of keeping /// Python happy (and perhaps other languages in the future). @@ -103,7 +104,26 @@ impl MettaShim { pub fn exec(&mut self, line: &str) { metta_shim_env!{{ let mut parser = SExprParser::new(line); - self.result = self.metta.run(&mut parser).unwrap(); + let mut runner_mode = MettaRunnerMode::ADD; + self.result = Vec::new(); + + while runner_mode != MettaRunnerMode::TERMINATE { + //If we received an interrupt, then clear it and break the loop + if *SIGNAL_STATE.lock().unwrap() { + *SIGNAL_STATE.lock().unwrap() = false; + break; + } + + //Run the next step + match self.metta.run_step(&mut parser, runner_mode, &mut self.result) { + Ok(mode) => { + runner_mode = mode; + }, + Err(err) => { + panic!("Unhandled MeTTa error: {}", err); + } + } + } }} } From e42eabfd7bb75a079c9afc0846599ec25de1d9d5 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sun, 3 Sep 2023 20:39:34 +0900 Subject: [PATCH 10/25] Providing a path to break out of the interpreter by mashing cntl-c --- repl/src/main.rs | 14 +++++++++++--- repl/src/metta_shim.rs | 4 ++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/repl/src/main.rs b/repl/src/main.rs index 659565ece..6b8415f3b 100644 --- a/repl/src/main.rs +++ b/repl/src/main.rs @@ -1,6 +1,7 @@ use std::path::PathBuf; use std::thread; +use std::process::exit; use std::sync::Mutex; use rustyline::error::ReadlineError; @@ -22,7 +23,7 @@ use config_params::*; mod interactive_helper; use interactive_helper::*; -static SIGNAL_STATE: Mutex = Mutex::new(false); +static SIGNAL_STATE: Mutex = Mutex::new(0); #[derive(Parser)] #[command(version, about)] @@ -66,8 +67,15 @@ fn main() -> Result<()> { thread::spawn(move || { for _sig in signals.forever() { //Assume SIGINT, since that's the only registered handler - println!("Interrupt Received, Stopping MeTTa Operation..."); - *SIGNAL_STATE.lock().unwrap() = true; + match *SIGNAL_STATE.lock().unwrap() { + 0 => println!("Interrupt received, stopping MeTTa. Please wait..."), + 1 => println!("Stopping in progress..."), + _ => { + println!("Ok, I get it! Yeesh!"); + exit(-1); + }, + } + *SIGNAL_STATE.lock().unwrap() += 1; } }); diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index 1e0b87421..8d08f2452 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -109,8 +109,8 @@ impl MettaShim { while runner_mode != MettaRunnerMode::TERMINATE { //If we received an interrupt, then clear it and break the loop - if *SIGNAL_STATE.lock().unwrap() { - *SIGNAL_STATE.lock().unwrap() = false; + if *SIGNAL_STATE.lock().unwrap() > 0 { + *SIGNAL_STATE.lock().unwrap() = 0; break; } From 9f6d2d909655b859f3aa29b26d387f1ab96a84f5 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Sun, 3 Sep 2023 21:09:51 +0900 Subject: [PATCH 11/25] Plugging a theoretical logical race where an interrupt issued just as a MeTTa operation was finishing could cause the next operation to be skipped --- repl/src/main.rs | 10 ++++++---- repl/src/metta_shim.rs | 9 +++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/repl/src/main.rs b/repl/src/main.rs index 6b8415f3b..3ac43d5da 100644 --- a/repl/src/main.rs +++ b/repl/src/main.rs @@ -67,15 +67,17 @@ fn main() -> Result<()> { thread::spawn(move || { for _sig in signals.forever() { //Assume SIGINT, since that's the only registered handler - match *SIGNAL_STATE.lock().unwrap() { - 0 => println!("Interrupt received, stopping MeTTa. Please wait..."), - 1 => println!("Stopping in progress..."), + let mut signal_state = SIGNAL_STATE.lock().unwrap(); + match *signal_state { + 0 => println!("Interrupt received, stopping MeTTa..."), + 1 => println!("Stopping in progress. Please wait..."), _ => { println!("Ok, I get it! Yeesh!"); exit(-1); }, } - *SIGNAL_STATE.lock().unwrap() += 1; + *signal_state += 1; + drop(signal_state); } }); diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index 8d08f2452..b38453d57 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -107,12 +107,17 @@ impl MettaShim { let mut runner_mode = MettaRunnerMode::ADD; self.result = Vec::new(); + //We don't want any leftover interrupts to break us this time + *SIGNAL_STATE.lock().unwrap() = 0; + while runner_mode != MettaRunnerMode::TERMINATE { //If we received an interrupt, then clear it and break the loop - if *SIGNAL_STATE.lock().unwrap() > 0 { - *SIGNAL_STATE.lock().unwrap() = 0; + let mut signal_state = SIGNAL_STATE.lock().unwrap(); + if *signal_state > 0 { + *signal_state = 0; break; } + drop(signal_state); //Run the next step match self.metta.run_step(&mut parser, runner_mode, &mut self.result) { From 324a072f8366d32551879191ee7ef52fb6427d55 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 4 Sep 2023 16:07:03 +0900 Subject: [PATCH 12/25] Back-porting changes to interpreter interface from interpreter2 to old interpreter Getting one step closer to an interruptible interpreter loop --- c/src/metta.rs | 26 ++++++-------- lib/src/metta/interpreter.rs | 47 ++++++++++++++++++++---- lib/src/metta/interpreter2.rs | 5 +-- lib/src/metta/runner/mod.rs | 63 ++++++++++++++++++++++++--------- lib/src/metta/runner/stdlib2.rs | 3 +- repl/src/metta_shim.rs | 18 ++++------ 6 files changed, 107 insertions(+), 55 deletions(-) diff --git a/c/src/metta.rs b/c/src/metta.rs index 77f590b58..b26066460 100644 --- a/c/src/metta.rs +++ b/c/src/metta.rs @@ -1,10 +1,8 @@ use hyperon::common::shared::Shared; use hyperon::space::DynSpace; -use hyperon::Atom; use hyperon::metta::text::*; use hyperon::metta::interpreter; -use hyperon::metta::interpreter::InterpretedAtom; -use hyperon::common::plan::StepResult; +use hyperon::metta::interpreter::InterpreterState; use hyperon::metta::runner::Metta; use hyperon::rust_type_atom; @@ -345,19 +343,19 @@ pub struct step_result_t { result: *mut RustStepResult, } -struct RustStepResult(StepResult<'static, Vec, (Atom, Atom)>); +struct RustStepResult(InterpreterState<'static, DynSpace>); -impl From, (Atom, Atom)>> for step_result_t { - fn from(result: StepResult<'static, Vec, (Atom, Atom)>) -> Self { - Self{ result: Box::into_raw(Box::new(RustStepResult(result))) } +impl From> for step_result_t { + fn from(state: InterpreterState<'static, DynSpace>) -> Self { + Self{ result: Box::into_raw(Box::new(RustStepResult(state))) } } } impl step_result_t { - fn into_inner(self) -> StepResult<'static, Vec, (Atom, Atom)> { + fn into_inner(self) -> InterpreterState<'static, DynSpace> { unsafe{ Box::from_raw(self.result).0 } } - fn borrow(&self) -> &StepResult<'static, Vec, (Atom, Atom)> { + fn borrow(&self) -> &InterpreterState<'static, DynSpace> { &unsafe{ &*(&*self).result }.0 } } @@ -427,13 +425,9 @@ pub extern "C" fn step_has_next(step: *const step_result_t) -> bool { pub extern "C" fn step_get_result(step: step_result_t, callback: c_atom_vec_callback_t, context: *mut c_void) { let step = step.into_inner(); - match step { - StepResult::Return(mut res) => { - let res = res.drain(0..).map(|res| res.into_tuple().0).collect(); - return_atoms(&res, callback, context); - }, - StepResult::Error(_) => return_atoms(&vec![], callback, context), - _ => panic!("Not expected step result: {:?}", step), + match step.into_result() { + Ok(res) => return_atoms(&res, callback, context), + Err(_) => return_atoms(&vec![], callback, context), } } diff --git a/lib/src/metta/interpreter.rs b/lib/src/metta/interpreter.rs index f0ab18ffe..b817084df 100644 --- a/lib/src/metta/interpreter.rs +++ b/lib/src/metta/interpreter.rs @@ -79,6 +79,34 @@ use std::ops::Deref; use std::rc::Rc; use std::fmt::{Debug, Display, Formatter}; +/// Wrapper, So the old interpreter can present the same public interface as the new intperpreter +pub struct InterpreterState<'a, T: SpaceRef<'a>> { + step_result: StepResult<'a, Results, InterpreterError>, + phantom: core::marker::PhantomData +} + +impl<'a, T: SpaceRef<'a>> InterpreterState<'a, T> { + pub fn has_next(&self) -> bool { + self.step_result.has_next() + } + pub fn into_result(self) -> Result, String> { + match self.step_result { + StepResult::Return(mut res) => { + let res = res.drain(0..).map(|res| res.into_tuple().0).collect(); + Ok(res) + }, + StepResult::Error(_) => Ok(vec![]), + StepResult::Execute(_) => Err("Evaluation is not finished".into()) + } + } +} + +impl<'a, T: SpaceRef<'a>> Debug for InterpreterState<'a, T> { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + Debug::fmt(&self.step_result, f) + } +} + /// Result of atom interpretation plus variable bindings found #[derive(Clone, PartialEq)] pub struct InterpretedAtom(Atom, Bindings); @@ -132,7 +160,12 @@ type NoInputPlan<'a> = Box + 'a>; /// # Arguments /// * `space` - atomspace to query for interpretation /// * `expr` - atom to interpret -pub fn interpret_init<'a, T: Space + 'a>(space: T, expr: &Atom) -> StepResult<'a, Results, InterpreterError> { +pub fn interpret_init<'a, T: Space + 'a>(space: T, expr: &Atom) -> InterpreterState<'a, T> { + let step_result = interpret_init_internal(space, expr); + InterpreterState { step_result: step_result, phantom: <_>::default() } +} + +fn interpret_init_internal<'a, T: Space + 'a>(space: T, expr: &Atom) -> StepResult<'a, Results, InterpreterError> { let context = InterpreterContextRef::new(space); interpret_as_type_plan(context, InterpretedAtom(expr.clone(), Bindings::new()), @@ -145,10 +178,10 @@ pub fn interpret_init<'a, T: Space + 'a>(space: T, expr: &Atom) -> StepResult<'a /// /// # Arguments /// * `step` - [StepResult::Execute] result from the previous step. -pub fn interpret_step<'a>(step: StepResult<'a, Results, InterpreterError>) -> StepResult<'a, Results, InterpreterError> { +pub fn interpret_step<'a, T: Space + 'a>(step: InterpreterState<'a, T>) -> InterpreterState<'a, T> { log::debug!("current plan:\n{:?}", step); - match step { - StepResult::Execute(plan) => plan.step(()), + match step.step_result { + StepResult::Execute(plan) => InterpreterState { step_result: plan.step(()), phantom: <_>::default() }, StepResult::Return(_) => panic!("Plan execution is finished already"), StepResult::Error(_) => panic!("Plan execution is finished with error"), } @@ -162,10 +195,10 @@ pub fn interpret_step<'a>(step: StepResult<'a, Results, InterpreterError>) -> St /// * `expr` - atom to interpret pub fn interpret(space: T, expr: &Atom) -> Result, String> { let mut step = interpret_init(space, expr); - while step.has_next() { + while step.step_result.has_next() { step = interpret_step(step); } - match step { + match step.step_result { StepResult::Return(mut result) => Ok(result.drain(0..) .map(|InterpretedAtom(atom, _)| atom).collect()), // TODO: return (Error atom err) expression @@ -226,7 +259,7 @@ impl SpaceObserver for InterpreterCache { use std::marker::PhantomData; -trait SpaceRef<'a> : Space + 'a {} +pub trait SpaceRef<'a> : Space + 'a {} impl<'a, T: Space + 'a> SpaceRef<'a> for T {} struct InterpreterContext<'a, T: SpaceRef<'a>> { diff --git a/lib/src/metta/interpreter2.rs b/lib/src/metta/interpreter2.rs index 4fb75dab3..0815c014e 100644 --- a/lib/src/metta/interpreter2.rs +++ b/lib/src/metta/interpreter2.rs @@ -91,11 +91,11 @@ fn atom_into_array(atom: Atom) -> Option<[Atom; N]> { impl<'a, T: SpaceRef<'a>> InterpreterState<'a, T> { - fn has_next(&self) -> bool { + pub fn has_next(&self) -> bool { !self.plan.is_empty() } - fn into_result(self) -> Result, String> { + pub fn into_result(self) -> Result, String> { if self.has_next() { Err("Evaluation is not finished".into()) } else { @@ -142,6 +142,7 @@ pub fn interpret_init<'a, T: Space + 'a>(space: T, expr: &Atom) -> InterpreterSt } } +//TODO: These docs are out of date for the new interpreter /// Perform next step of the interpretation plan and return the result. Panics /// when [StepResult::Return] or [StepResult::Error] are passed as input. /// See [crate::metta::interpreter] for algorithm explanation. diff --git a/lib/src/metta/runner/mod.rs b/lib/src/metta/runner/mod.rs index a56826e14..1f95a9a18 100644 --- a/lib/src/metta/runner/mod.rs +++ b/lib/src/metta/runner/mod.rs @@ -37,12 +37,18 @@ pub struct MettaContents { } #[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum MettaRunnerMode { +enum MettaRunnerMode { ADD, INTERPRET, TERMINATE, } +#[derive(Clone, Debug)] +pub struct RunnerState { + mode: MettaRunnerMode, + results: Vec>, +} + impl Metta { pub fn new(space: DynSpace, tokenizer: Shared) -> Self { Metta::from_space(space, tokenizer, vec![PathBuf::from(".")]) @@ -144,28 +150,33 @@ impl Metta { } pub fn run(&self, parser: &mut SExprParser) -> Result>, String> { - let mut mode = MettaRunnerMode::ADD; - let mut results: Vec> = Vec::new(); + let mut state = self.start_run(); - while mode != MettaRunnerMode::TERMINATE { - mode = self.run_step(parser, mode, &mut results)?; + while !state.is_complete() { + self.run_step(parser, &mut state)?; } - Ok(results) + Ok(state.into_results()) + } + + pub fn start_run(&self) -> RunnerState { + RunnerState::new() } - pub fn run_step(&self, parser: &mut SExprParser, mode: MettaRunnerMode, intermediate_results: &mut Vec>) -> Result { + pub fn run_step(&self, parser: &mut SExprParser, state: &mut RunnerState) -> Result<(), String> { let atom = parser.parse(&self.0.tokenizer.borrow())?; if let Some(atom) = atom { if atom == EXEC_SYMBOL { - return Ok(MettaRunnerMode::INTERPRET); + state.mode = MettaRunnerMode::INTERPRET; + return Ok(()); } - match mode { + match state.mode { MettaRunnerMode::ADD => { if let Err(atom) = self.add_atom(atom) { - intermediate_results.push(vec![atom]); - return Ok(MettaRunnerMode::TERMINATE); + state.results.push(vec![atom]); + state.mode = MettaRunnerMode::TERMINATE; + return Ok(()); } }, MettaRunnerMode::INTERPRET => { @@ -181,21 +192,23 @@ impl Metta { } } let error = result.iter().any(|atom| is_error(atom)); - intermediate_results.push(result); + state.results.push(result); if error { - return Ok(MettaRunnerMode::TERMINATE); + state.mode = MettaRunnerMode::TERMINATE; + return Ok(()); } } } }, MettaRunnerMode::TERMINATE => { - return Ok(MettaRunnerMode::TERMINATE); + return Ok(()); }, } - Ok(MettaRunnerMode::ADD) + state.mode = MettaRunnerMode::ADD; } else { - Ok(MettaRunnerMode::TERMINATE) + state.mode = MettaRunnerMode::TERMINATE; } + Ok(()) } pub fn evaluate_atom(&self, atom: Atom) -> Result, String> { @@ -222,6 +235,24 @@ impl Metta { } +impl RunnerState { + fn new() -> Self { + Self { + mode: MettaRunnerMode::ADD, + results: vec![], + } + } + pub fn is_complete(&self) -> bool { + self.mode == MettaRunnerMode::TERMINATE + } + pub fn intermediate_results(&self) -> &Vec> { + &self.results + } + pub fn into_results(self) -> Vec> { + self.results + } +} + pub fn new_metta_rust() -> Metta { let metta = Metta::new(DynSpace::new(GroundingSpace::new()), Shared::new(Tokenizer::new())); diff --git a/lib/src/metta/runner/stdlib2.rs b/lib/src/metta/runner/stdlib2.rs index 0fecd4b4b..fc21494d5 100644 --- a/lib/src/metta/runner/stdlib2.rs +++ b/lib/src/metta/runner/stdlib2.rs @@ -7,7 +7,6 @@ use crate::metta::runner::Metta; use crate::metta::types::{get_atom_types, get_meta_type}; use std::fmt::Display; -use std::path::PathBuf; use regex::Regex; use super::arithmetics::*; @@ -115,7 +114,7 @@ pub fn register_common_tokens(metta: &Metta) { tref.register_token(regex(r"if-equal"), move |_| { is_equivalent.clone() }); } -pub fn register_runner_tokens(metta: &Metta, _cwd: PathBuf) { +pub fn register_runner_tokens(metta: &Metta) { let _space = metta.space(); let tokenizer = metta.tokenizer(); diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index b38453d57..f69196eb3 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -6,7 +6,7 @@ use hyperon::Atom; use hyperon::atom::VariableAtom; use hyperon::space::*; use hyperon::space::grounding::GroundingSpace; -use hyperon::metta::runner::{Metta, MettaRunnerMode}; +use hyperon::metta::runner::Metta; use hyperon::metta::runner::stdlib::register_rust_tokens; use hyperon::metta::text::Tokenizer; use hyperon::metta::text::SExprParser; @@ -104,13 +104,12 @@ impl MettaShim { pub fn exec(&mut self, line: &str) { metta_shim_env!{{ let mut parser = SExprParser::new(line); - let mut runner_mode = MettaRunnerMode::ADD; - self.result = Vec::new(); + let mut runner_state = self.metta.start_run(); //We don't want any leftover interrupts to break us this time *SIGNAL_STATE.lock().unwrap() = 0; - while runner_mode != MettaRunnerMode::TERMINATE { + while !runner_state.is_complete() { //If we received an interrupt, then clear it and break the loop let mut signal_state = SIGNAL_STATE.lock().unwrap(); if *signal_state > 0 { @@ -120,14 +119,9 @@ impl MettaShim { drop(signal_state); //Run the next step - match self.metta.run_step(&mut parser, runner_mode, &mut self.result) { - Ok(mode) => { - runner_mode = mode; - }, - Err(err) => { - panic!("Unhandled MeTTa error: {}", err); - } - } + self.metta.run_step(&mut parser, &mut runner_state) + .unwrap_or_else(|err| panic!("Unhandled MeTTa error: {}", err)); + self.result = runner_state.intermediate_results().clone(); } }} } From 6cece33c468cf56d182a2f186ad2eac79788236d Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Mon, 4 Sep 2023 19:21:02 +0900 Subject: [PATCH 13/25] Reworking runner so interpreter loop is interruptible --- lib/src/metta/interpreter.rs | 11 +++- lib/src/metta/interpreter2.rs | 10 +++ lib/src/metta/runner/mod.rs | 114 +++++++++++++++++++++------------- 3 files changed, 90 insertions(+), 45 deletions(-) diff --git a/lib/src/metta/interpreter.rs b/lib/src/metta/interpreter.rs index b817084df..f26b893ad 100644 --- a/lib/src/metta/interpreter.rs +++ b/lib/src/metta/interpreter.rs @@ -86,6 +86,15 @@ pub struct InterpreterState<'a, T: SpaceRef<'a>> { } impl<'a, T: SpaceRef<'a>> InterpreterState<'a, T> { + + /// INTERNAL USE ONLY. Create an InterpreterState that is ready to yield results + pub(crate) fn new_finished(_space: T, results: Vec) -> Self { + Self { + step_result: StepResult::Return(results.into_iter().map(|atom| InterpretedAtom(atom, Bindings::new())).collect()), + phantom: <_>::default(), + } + } + pub fn has_next(&self) -> bool { self.step_result.has_next() } @@ -95,7 +104,7 @@ impl<'a, T: SpaceRef<'a>> InterpreterState<'a, T> { let res = res.drain(0..).map(|res| res.into_tuple().0).collect(); Ok(res) }, - StepResult::Error(_) => Ok(vec![]), + StepResult::Error((atom, err)) => Ok(vec![Atom::expr([ERROR_SYMBOL, atom, err])]), StepResult::Execute(_) => Err("Evaluation is not finished".into()) } } diff --git a/lib/src/metta/interpreter2.rs b/lib/src/metta/interpreter2.rs index 0815c014e..8545cc1ce 100644 --- a/lib/src/metta/interpreter2.rs +++ b/lib/src/metta/interpreter2.rs @@ -91,6 +91,16 @@ fn atom_into_array(atom: Atom) -> Option<[Atom; N]> { impl<'a, T: SpaceRef<'a>> InterpreterState<'a, T> { + /// INTERNAL USE ONLY. Create an InterpreterState that is ready to yield results + #[allow(dead_code)] //TODO: only silence the warning until interpreter2 replaces interpreter + pub(crate) fn new_finished(space: T, results: Vec) -> Self { + Self { + plan: vec![], + finished: results, + context: InterpreterContextRef::new(space), + } + } + pub fn has_next(&self) -> bool { !self.plan.is_empty() } diff --git a/lib/src/metta/runner/mod.rs b/lib/src/metta/runner/mod.rs index 1f95a9a18..7a1ecf1dd 100644 --- a/lib/src/metta/runner/mod.rs +++ b/lib/src/metta/runner/mod.rs @@ -11,13 +11,13 @@ use std::path::PathBuf; use std::collections::HashMap; pub mod stdlib; -use super::interpreter::interpret; +use super::interpreter::{interpret, interpret_init, interpret_step, InterpreterState}; use stdlib::*; // Uncomment three lines below and comment three lines above to // switch to the minimal MeTTa version //pub mod stdlib2; -//use super::interpreter2::interpret; +//use super::interpreter2::{interpret, interpret_init, interpret_step, InterpreterState}; //use stdlib2::*; mod arithmetics; @@ -36,16 +36,16 @@ pub struct MettaContents { search_paths: Vec, } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq)] enum MettaRunnerMode { ADD, INTERPRET, TERMINATE, } -#[derive(Clone, Debug)] -pub struct RunnerState { +pub struct RunnerState<'a> { mode: MettaRunnerMode, + interpreter_state: Option>, results: Vec>, } @@ -164,50 +164,75 @@ impl Metta { pub fn run_step(&self, parser: &mut SExprParser, state: &mut RunnerState) -> Result<(), String> { - let atom = parser.parse(&self.0.tokenizer.borrow())?; - - if let Some(atom) = atom { - if atom == EXEC_SYMBOL { - state.mode = MettaRunnerMode::INTERPRET; - return Ok(()); - } - match state.mode { - MettaRunnerMode::ADD => { - if let Err(atom) = self.add_atom(atom) { - state.results.push(vec![atom]); - state.mode = MettaRunnerMode::TERMINATE; - return Ok(()); - } - }, - MettaRunnerMode::INTERPRET => { - match self.evaluate_atom(atom) { - Err(msg) => return Err(msg), - Ok(result) => { - fn is_error(atom: &Atom) -> bool { - match atom { - Atom::Expression(expr) => { - expr.children().len() > 0 && expr.children()[0] == ERROR_SYMBOL - }, - _ => false, - } - } - let error = result.iter().any(|atom| is_error(atom)); - state.results.push(result); - if error { - state.mode = MettaRunnerMode::TERMINATE; - return Ok(()); + // If we're in the middle of interpreting an atom... + if let Some(interpreter_state) = core::mem::replace(&mut state.interpreter_state, None) { + + if interpreter_state.has_next() { + + //Take a step with the interpreter, and put it back for next time + state.interpreter_state = Some(interpret_step(interpreter_state)) + } else { + + //This interpreter is finished, process the results + match interpreter_state.into_result() { + Err(msg) => return Err(msg), + Ok(result) => { + fn is_error(atom: &Atom) -> bool { + match atom { + Atom::Expression(expr) => { + expr.children().len() > 0 && expr.children()[0] == ERROR_SYMBOL + }, + _ => false, } } + + let error = result.iter().any(|atom| is_error(atom)); + state.results.push(result); + if error { + state.mode = MettaRunnerMode::TERMINATE; + return Ok(()); + } } - }, - MettaRunnerMode::TERMINATE => { + } + } + + } else { + + // We'll parse the next atom, and start a new intperpreter + if let Some(atom) = parser.parse(&self.0.tokenizer.borrow())? { + if atom == EXEC_SYMBOL { + state.mode = MettaRunnerMode::INTERPRET; return Ok(()); - }, + } + match state.mode { + MettaRunnerMode::ADD => { + if let Err(atom) = self.add_atom(atom) { + state.results.push(vec![atom]); + state.mode = MettaRunnerMode::TERMINATE; + return Ok(()); + } + }, + MettaRunnerMode::INTERPRET => { + + state.interpreter_state = Some(match self.type_check(atom) { + Err(atom) => { + InterpreterState::new_finished(self.space().clone(), vec![atom]) + }, + Ok(atom) => { + interpret_init(self.space().clone(), &atom) + }, + }); + }, + MettaRunnerMode::TERMINATE => { + return Ok(()); + }, + } + state.mode = MettaRunnerMode::ADD; + } else { + state.mode = MettaRunnerMode::TERMINATE; } - state.mode = MettaRunnerMode::ADD; - } else { - state.mode = MettaRunnerMode::TERMINATE; } + Ok(()) } @@ -235,10 +260,11 @@ impl Metta { } -impl RunnerState { +impl<'a> RunnerState<'a> { fn new() -> Self { Self { mode: MettaRunnerMode::ADD, + interpreter_state: None, results: vec![], } } From 2febe66146b5bc6d20897d8469f4cd662be9610a Mon Sep 17 00:00:00 2001 From: luketpeterson <36806965+luketpeterson@users.noreply.github.com> Date: Tue, 5 Sep 2023 10:33:35 +0900 Subject: [PATCH 14/25] Update lib/src/metta/text.rs Typo Co-authored-by: Vitaly Bogdanov --- lib/src/metta/text.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/src/metta/text.rs b/lib/src/metta/text.rs index 7dba3f8a9..7aeee506f 100644 --- a/lib/src/metta/text.rs +++ b/lib/src/metta/text.rs @@ -148,7 +148,7 @@ impl SyntaxNode { /// /// This method is useful to render syntax styling. /// - /// TODO: Inthe future, We'll want to be able to use the type system to assign styling, + /// TODO: In the future, We'll want to be able to use the type system to assign styling, /// which is going to mean looking at Atoms, and not the tokens they were built from pub fn visit_syntactic(&self, mut callback: C) where C: FnMut(&SyntaxNode) From faffafbadb5fb92238101064bafd3ab20728a812 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 5 Sep 2023 13:42:24 +0900 Subject: [PATCH 15/25] Reworking parser to separate lexical from semantic tokens --- lib/src/metta/text.rs | 216 ++++++++++++++++----------------- repl/src/interactive_helper.rs | 14 ++- 2 files changed, 114 insertions(+), 116 deletions(-) diff --git a/lib/src/metta/text.rs b/lib/src/metta/text.rs index 7aeee506f..e2c14a191 100644 --- a/lib/src/metta/text.rs +++ b/lib/src/metta/text.rs @@ -1,7 +1,5 @@ //! MeTTa parser implementation. -use core::convert::TryFrom; - use crate::*; use core::ops::Range; @@ -83,91 +81,123 @@ pub enum SyntaxNodeType { /// Whitespace. One or more whitespace chars Whitespace, /// Symbol Atom. A Symbol Atom - SymbolAtom, - /// Variable Atom. A [VariableAtom] constructed from a [VariableToken](SyntaxNodeType::VariableToken) - VariableAtom, - /// Expression. All input text composing an Expression, from the opening '(' to the close - ExpressionAtom, - /// Special Atom. A token matched by a regex registered with the [Tokenizer]. Might be a Grounded - /// Atom, but might also be another type of Atom - SpecialAtom, - /// Unparsed Leftover Text. Unparsed text remaining after the parser has encountered an error LeftoverText, + /// A Group of [SyntaxNode]s between an [OpenParen](SyntaxNodeType::OpenParen) and a matching + /// [CloseParen](SyntaxNodeType::CloseParen) + ExpressionGroup, /// Syntax Nodes that cannot be combined into a coherent atom due to a parse error, even if some /// of the individual nodes are valid ErrorGroup, } +impl SyntaxNodeType { + /// Returns `true` is the SyntaxNodeType is a leaf (incapable of hosting sub-nodes). Returns `false` + /// for "group" node tyes. + pub fn is_leaf(&self) -> bool { + match self { + Self::ExpressionGroup | + Self::ErrorGroup => false, + _ => true + } + } +} + #[derive(Clone, Debug)] pub struct SyntaxNode { pub node_type: SyntaxNodeType, pub src_range: Range, - pub atom: Option, pub sub_nodes: Vec, + pub parsed_text: Option, pub message: Option, pub is_complete: bool, } impl SyntaxNode { - fn new(node_type: SyntaxNodeType, src_range: Range, atom: Option, sub_nodes: Vec) -> SyntaxNode { + fn new(node_type: SyntaxNodeType, src_range: Range, sub_nodes: Vec) -> SyntaxNode { Self { node_type, src_range, - atom, + parsed_text: None, sub_nodes, message: None, is_complete: true } } + fn new_token_node(node_type: SyntaxNodeType, src_range: Range, parsed_text: String) -> SyntaxNode { + let mut node = SyntaxNode::new(node_type, src_range, vec![]); + node.parsed_text = Some(parsed_text); + node + } + fn incomplete_with_message(node_type: SyntaxNodeType, src_range: Range, sub_nodes: Vec, message: String) -> SyntaxNode { - Self { - node_type, - src_range, - atom: None, - sub_nodes, - message: Some(message), - is_complete: false - } + let mut node = SyntaxNode::new(node_type, src_range, sub_nodes); + node.message = Some(message); + node.is_complete = false; + node } /// Creates a new error group. Gets the error message associated with the last node fn new_error_group(src_range: Range, sub_nodes: Vec) -> SyntaxNode { let message = sub_nodes[sub_nodes.len()-1].message.clone(); - Self { - node_type: SyntaxNodeType::ErrorGroup, - src_range, - atom: None, - sub_nodes, - message, - is_complete: false - } + let mut node = SyntaxNode::new(SyntaxNodeType::ErrorGroup, src_range, sub_nodes); + node.message = message; + node.is_complete = false; + node } - /// Visits all the syntactic nodes (vs. semantic) in a parsed syntax tree - /// - /// This method is useful to render syntax styling. - /// - /// TODO: In the future, We'll want to be able to use the type system to assign styling, - /// which is going to mean looking at Atoms, and not the tokens they were built from - pub fn visit_syntactic(&self, mut callback: C) - where C: FnMut(&SyntaxNode) - { - self.visit_depth_first(|node| { - match node.node_type { - SyntaxNodeType::Comment | - SyntaxNodeType::VariableToken | - SyntaxNodeType::StringToken | - SyntaxNodeType::WordToken | - SyntaxNodeType::OpenParen | - SyntaxNodeType::CloseParen | - SyntaxNodeType::Whitespace | - SyntaxNodeType::LeftoverText => { - callback(node); + /// Transforms a root SyntaxNode into an [Atom] + pub fn as_atom(&self, tokenizer: &Tokenizer) -> Result, String> { + + //If we have an incomplete node, it's an error + if !self.is_complete { + return Err(self.message.clone().unwrap()) + } + + match self.node_type { + SyntaxNodeType::Comment | + SyntaxNodeType::Whitespace => Ok(None), + SyntaxNodeType::OpenParen | + SyntaxNodeType::CloseParen => Ok(None), + SyntaxNodeType::VariableToken => { + let token_text = self.parsed_text.as_ref().unwrap(); + let new_var_atom = Atom::var(token_text); + Ok(Some(new_var_atom)) + }, + SyntaxNodeType::StringToken | + SyntaxNodeType::WordToken => { + let token_text = self.parsed_text.as_ref().unwrap(); + let constr = tokenizer.find_token(token_text); + if let Some(constr) = constr { + let new_atom = constr(token_text); + Ok(Some(new_atom)) + } else { + let new_atom = Atom::sym(token_text); + Ok(Some(new_atom)) } - _ => {} - } - }) + }, + SyntaxNodeType::ExpressionGroup => { + let mut err_encountered = Ok(()); + let expr_children: Vec = self.sub_nodes.iter().filter_map(|node| { + match node.as_atom(tokenizer) { + Err(err) => { + err_encountered = Err(err); + None + }, + Ok(atom) => atom + } + }).collect(); + match err_encountered { + Ok(_) => { + let new_expr_atom = Atom::expr(expr_children); + Ok(Some(new_expr_atom)) + }, + Err(err) => Err(err) + } + }, + SyntaxNodeType::LeftoverText | + SyntaxNodeType::ErrorGroup => {unreachable!()} + } } /// Visits all the nodes in a parsed syntax tree in a depth-first order @@ -199,22 +229,10 @@ impl<'a> SExprParser<'a> { pub fn parse(&mut self, tokenizer: &Tokenizer) -> Result, String> { loop { - match self.parse_to_syntax_tree(tokenizer) { + match self.parse_to_syntax_tree() { Some(node) => { - //If we have an incomplete node, it's an error - if !node.is_complete { - return Err(node.message.unwrap()) - } - - //We are only interested in nodes that represent atoms - match node.node_type { - SyntaxNodeType::SymbolAtom | - SyntaxNodeType::VariableAtom | - SyntaxNodeType::ExpressionAtom | - SyntaxNodeType::SpecialAtom => { - return Ok(node.atom) - }, - _ => () + if let Some(atom) = node.as_atom(tokenizer)? { + return Ok(Some(atom)) } }, None => { @@ -224,7 +242,7 @@ impl<'a> SExprParser<'a> { } } - pub fn parse_to_syntax_tree(&mut self, tokenizer: &Tokenizer) -> Option { + pub fn parse_to_syntax_tree(&mut self) -> Option { if let Some((idx, c)) = self.it.peek().cloned() { match c { ';' => { @@ -232,7 +250,7 @@ impl<'a> SExprParser<'a> { return Some(comment_node); }, _ if c.is_whitespace() => { - let whispace_node = SyntaxNode::new(SyntaxNodeType::Whitespace, idx..idx+1, None, vec![]); + let whispace_node = SyntaxNode::new(SyntaxNodeType::Whitespace, idx..idx+1, vec![]); self.it.next(); return Some(whispace_node); }, @@ -241,19 +259,19 @@ impl<'a> SExprParser<'a> { return Some(var_node); }, '(' => { - let expr_node = self.parse_expr(tokenizer); + let expr_node = self.parse_expr(); return Some(expr_node); }, ')' => { - let close_paren_node = SyntaxNode::new(SyntaxNodeType::CloseParen, idx..idx+1, None, vec![]); + let close_paren_node = SyntaxNode::new(SyntaxNodeType::CloseParen, idx..idx+1, vec![]); self.it.next(); let leftover_text_node = self.parse_leftovers("Unexpected right bracket".to_string()); let error_group_node = SyntaxNode::new_error_group(idx..self.cur_idx(), vec![close_paren_node, leftover_text_node]); return Some(error_group_node); }, _ => { - let atom_node = self.parse_atom(tokenizer); - return atom_node; + let token_node = self.parse_token(); + return token_node; }, } } @@ -279,7 +297,7 @@ impl<'a> SExprParser<'a> { } } let range = start_idx..self.cur_idx(); - Some(SyntaxNode::new(SyntaxNodeType::Comment, range, None, vec![])) + Some(SyntaxNode::new(SyntaxNodeType::Comment, range, vec![])) } else { None } @@ -292,32 +310,11 @@ impl<'a> SExprParser<'a> { SyntaxNode::incomplete_with_message(SyntaxNodeType::LeftoverText, range, vec![], message) } - fn parse_atom(&mut self, tokenizer: &Tokenizer) -> Option { - if let Some(token_node) = self.parse_token() { - if token_node.is_complete { - let token_text = <&SymbolAtom>::try_from(token_node.atom.as_ref().unwrap()).unwrap().name(); - let constr = tokenizer.find_token(token_text); - if let Some(constr) = constr { - let new_atom = constr(token_text); - let special_atom_node = SyntaxNode::new(SyntaxNodeType::SpecialAtom, token_node.src_range.clone(), Some(new_atom), vec![token_node]); - return Some(special_atom_node); - } else { - let symbol_atom_node = SyntaxNode::new(SyntaxNodeType::SymbolAtom, token_node.src_range.clone(), token_node.atom.clone(), vec![token_node]); - return Some(symbol_atom_node); - } - } else { - Some(token_node) - } - } else { - None - } - } - - fn parse_expr(&mut self, tokenizer: &Tokenizer) -> SyntaxNode { + fn parse_expr(&mut self) -> SyntaxNode { let start_idx = self.cur_idx(); let mut child_nodes: Vec = Vec::new(); - let open_paren_node = SyntaxNode::new(SyntaxNodeType::OpenParen, start_idx..start_idx+1, None, vec![]); + let open_paren_node = SyntaxNode::new(SyntaxNodeType::OpenParen, start_idx..start_idx+1, vec![]); child_nodes.push(open_paren_node); self.it.next(); @@ -328,22 +325,20 @@ impl<'a> SExprParser<'a> { child_nodes.push(comment_node); }, _ if c.is_whitespace() => { - let whitespace_node = SyntaxNode::new(SyntaxNodeType::Whitespace, idx..idx+1, None, vec![]); + let whitespace_node = SyntaxNode::new(SyntaxNodeType::Whitespace, idx..idx+1, vec![]); child_nodes.push(whitespace_node); self.it.next(); }, ')' => { - let close_paren_node = SyntaxNode::new(SyntaxNodeType::CloseParen, idx..idx+1, None, vec![]); + let close_paren_node = SyntaxNode::new(SyntaxNodeType::CloseParen, idx..idx+1, vec![]); child_nodes.push(close_paren_node); self.it.next(); - let expr_children: Vec = child_nodes.iter().filter_map(|node| node.atom.clone()).collect(); - let new_expr_atom = Atom::expr(expr_children); - let expr_node = SyntaxNode::new(SyntaxNodeType::ExpressionAtom, start_idx..self.cur_idx(), Some(new_expr_atom), child_nodes); + let expr_node = SyntaxNode::new(SyntaxNodeType::ExpressionGroup, start_idx..self.cur_idx(), child_nodes); return expr_node; }, _ => { - if let Some(parsed_node) = self.parse_to_syntax_tree(tokenizer) { + if let Some(parsed_node) = self.parse_to_syntax_tree() { let is_err = !parsed_node.is_complete; child_nodes.push(parsed_node); @@ -390,8 +385,7 @@ impl<'a> SExprParser<'a> { while let Some((_idx, c)) = self.it.next() { if c == '"' { token.push('"'); - let string_symbol_atom = Atom::sym(token); - let string_node = SyntaxNode::new(SyntaxNodeType::StringToken, start_idx..self.cur_idx(), Some(string_symbol_atom), vec![]); + let string_node = SyntaxNode::new_token_node(SyntaxNodeType::StringToken, start_idx..self.cur_idx(), token); return string_node; } let c = if c == '\\' { @@ -423,8 +417,7 @@ impl<'a> SExprParser<'a> { self.it.next(); } - let word_symbol_atom = Atom::sym(token); - let word_node = SyntaxNode::new(SyntaxNodeType::WordToken, start_idx..self.cur_idx(), Some(word_symbol_atom), vec![]); + let word_node = SyntaxNode::new_token_node(SyntaxNodeType::WordToken, start_idx..self.cur_idx(), token); word_node } @@ -446,11 +439,8 @@ impl<'a> SExprParser<'a> { tmp_it.next(); } self.it = tmp_it; - let range = start_idx..self.cur_idx(); - let var_token_node = SyntaxNode::new(SyntaxNodeType::VariableToken, range.clone(), None, vec![]); - let new_var_atom = Atom::var(token); - let variable_atom_node = SyntaxNode::new(SyntaxNodeType::VariableAtom, range, Some(new_var_atom), vec![var_token_node]); - variable_atom_node + let var_token_node = SyntaxNode::new_token_node(SyntaxNodeType::VariableToken, start_idx..self.cur_idx(), token); + var_token_node } } diff --git a/repl/src/interactive_helper.rs b/repl/src/interactive_helper.rs index 19b248c08..207f9ba65 100644 --- a/repl/src/interactive_helper.rs +++ b/repl/src/interactive_helper.rs @@ -67,14 +67,22 @@ impl Highlighter for ReplHelper { //Iterate over the syntax nodes generated by the parser, coloring them appropriately let mut colored_line = String::with_capacity(line.len() * 2); let mut bracket_depth = 0; - self.metta.borrow_mut().inside_env(|metta| { + self.metta.borrow_mut().inside_env(|_metta| { let mut parser = SExprParser::new(line); loop { - match parser.parse_to_syntax_tree(&metta.metta.tokenizer().borrow()) { + match parser.parse_to_syntax_tree() { Some(root_node) => { - root_node.visit_syntactic(|node| { + root_node.visit_depth_first(|node| { + // We will only render the leaf nodes in the syntax tree + if !node.node_type.is_leaf() { + return; + } + let mut style_sequence: Vec<&str> = vec![]; + // TODO: In the future, We'll want to be able to use the type system to assign styling, + // which is going to mean looking at Atoms, and not the tokens they were built from + //Set up the style for the node match node.node_type { SyntaxNodeType::Comment => { From 2cac97ad83e8411495e63ba866f5a35ea3021d27 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 5 Sep 2023 14:19:24 +0900 Subject: [PATCH 16/25] Renaming SIGNAL_STATE to SIGINT_RECEIVED_COUNT --- repl/src/main.rs | 4 ++-- repl/src/metta_shim.rs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/repl/src/main.rs b/repl/src/main.rs index 3ac43d5da..995a16ec5 100644 --- a/repl/src/main.rs +++ b/repl/src/main.rs @@ -23,7 +23,7 @@ use config_params::*; mod interactive_helper; use interactive_helper::*; -static SIGNAL_STATE: Mutex = Mutex::new(0); +static SIGINT_RECEIVED_COUNT: Mutex = Mutex::new(0); #[derive(Parser)] #[command(version, about)] @@ -67,7 +67,7 @@ fn main() -> Result<()> { thread::spawn(move || { for _sig in signals.forever() { //Assume SIGINT, since that's the only registered handler - let mut signal_state = SIGNAL_STATE.lock().unwrap(); + let mut signal_state = SIGINT_RECEIVED_COUNT.lock().unwrap(); match *signal_state { 0 => println!("Interrupt received, stopping MeTTa..."), 1 => println!("Stopping in progress. Please wait..."), diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index f69196eb3..6c69be276 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -13,7 +13,7 @@ use hyperon::metta::text::SExprParser; use hyperon::common::shared::Shared; use crate::ReplParams; -use crate::SIGNAL_STATE; +use crate::SIGINT_RECEIVED_COUNT; /// MettaShim is responsible for **ALL** calls between the repl and MeTTa, and is in charge of keeping /// Python happy (and perhaps other languages in the future). @@ -107,11 +107,11 @@ impl MettaShim { let mut runner_state = self.metta.start_run(); //We don't want any leftover interrupts to break us this time - *SIGNAL_STATE.lock().unwrap() = 0; + *SIGINT_RECEIVED_COUNT.lock().unwrap() = 0; while !runner_state.is_complete() { //If we received an interrupt, then clear it and break the loop - let mut signal_state = SIGNAL_STATE.lock().unwrap(); + let mut signal_state = SIGINT_RECEIVED_COUNT.lock().unwrap(); if *signal_state > 0 { *signal_state = 0; break; From 6ed1b807d9ed75c25a3e46eb5db5977c04dcfe15 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 5 Sep 2023 15:37:49 +0900 Subject: [PATCH 17/25] Adding hyperonpy version check logic, but currently disabled on account of incompatible version formats --- repl/Cargo.toml | 3 ++- repl/src/metta_shim.rs | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/repl/Cargo.toml b/repl/Cargo.toml index 237907892..60087eaf4 100644 --- a/repl/Cargo.toml +++ b/repl/Cargo.toml @@ -15,6 +15,7 @@ clap = { version = "4.4.0", features = ["derive"] } directories = "5.0.1" signal-hook = "0.3.17" pyo3 = { version = "0.19.2", features = ["auto-initialize"], optional = true } +semver = { version = "1.0.18", optional = true } [[bin]] name = "metta" @@ -22,4 +23,4 @@ path = "src/main.rs" [features] # default = ["python"] -python = ["pyo3"] +python = ["pyo3", "semver"] diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index 6c69be276..c1a43d5b5 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -68,9 +68,16 @@ impl MettaShim { repl_params: repl_params.clone(), }; - //Load the hyperonpy Python stdlib, if the repl includes Python support + //Init HyperonPy if the repl includes Python support #[cfg(feature = "python")] - py_mod_loading::load_python_module(&new_shim.metta, "hyperon.stdlib").unwrap(); + { + //Confirm the hyperonpy version is compatible + //TODO, re-enable this check when hyperonpy version is semver-compatible + // py_mod_loading::confirm_hyperonpy_version(">=0.1.0, <0.2.0").unwrap(); + + //Load the hyperonpy Python stdlib + py_mod_loading::load_python_module(&new_shim.metta, "hyperon.stdlib").unwrap(); + } //Load the Rust stdlib register_rust_tokens(&new_shim.metta); @@ -228,6 +235,7 @@ mod py_mod_err { mod py_mod_loading { use std::fmt::Display; use std::path::PathBuf; + use semver::{Version, VersionReq}; use pyo3::prelude::*; use pyo3::types::{PyTuple, PyDict}; use hyperon::*; @@ -239,6 +247,29 @@ mod py_mod_loading { use hyperon::common::shared::Shared; use crate::ReplParams; + /// Load the hyperon module, and get the "__version__" attribute + pub fn get_hyperonpy_version() -> Result { + Python::with_gil(|py| -> PyResult { + let hyperon_mod = PyModule::import(py, "hyperon")?; + let version_obj = hyperon_mod.getattr("__version__")?; + Ok(version_obj.str()?.to_str()?.into()) + }).map_err(|err| { + format!("{err}") + }) + } + + pub fn confirm_hyperonpy_version(req_str: &str) -> Result<(), String> { + + let req = VersionReq::parse(req_str).unwrap(); + let version_string = get_hyperonpy_version()?; + let version = Version::parse(&version_string).map_err(|e| format!("Error parsing HyperonPy version: '{version_string}', {e}"))?; + if req.matches(&version) { + Ok(()) + } else { + Err(format!("MeTTa repl requires HyperonPy version matching '{req}'. Found version: '{version}'")) + } + } + pub fn load_python_module_from_mod_or_file(repl_params: &ReplParams, metta: &Metta, module_name: &str) -> Result<(), String> { // First, see if the module is already registered with Python From 256b2bd4cef52ea89c7c4a4dda390829f1d7ddac Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 5 Sep 2023 16:47:01 +0900 Subject: [PATCH 18/25] Re-enabling Python version check in repl --- repl/src/metta_shim.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index c1a43d5b5..aed73c3be 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -72,8 +72,7 @@ impl MettaShim { #[cfg(feature = "python")] { //Confirm the hyperonpy version is compatible - //TODO, re-enable this check when hyperonpy version is semver-compatible - // py_mod_loading::confirm_hyperonpy_version(">=0.1.0, <0.2.0").unwrap(); + py_mod_loading::confirm_hyperonpy_version(">=0.1.0, <0.2.0").unwrap(); //Load the hyperonpy Python stdlib py_mod_loading::load_python_module(&new_shim.metta, "hyperon.stdlib").unwrap(); From 21708caf763e209764997a1cf36f67029eaf463d Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 5 Sep 2023 17:35:06 +0900 Subject: [PATCH 19/25] Updating default config creation --- repl/src/config_params.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/repl/src/config_params.rs b/repl/src/config_params.rs index ef724e848..3b3f29da3 100644 --- a/repl/src/config_params.rs +++ b/repl/src/config_params.rs @@ -40,20 +40,21 @@ impl ReplParams { } }; + //Create the modules dir inside the config dir, if it doesn't already exist + let modules_dir = config_dir.join("modules"); + std::fs::create_dir_all(&modules_dir).unwrap(); + //Create the default config.metta file, if none exists let config_metta_path = config_dir.join("config.metta"); if !config_metta_path.exists() { let mut file = fs::OpenOptions::new() .create(true) .write(true) - .open(&config_metta_path).unwrap(); + .open(&config_metta_path) + .expect(&format!("Error creating default config file at {config_metta_path:?}")); file.write_all(&DEFAULT_CONFIG_METTA).unwrap(); } - //Create the modules dir inside the config dir, if it doesn't already exist - let modules_dir = config_dir.join("modules"); - std::fs::create_dir_all(&modules_dir).unwrap(); - //Push the "modules" dir, as the last place to search after the paths specified on the cmd line //TODO: the config.metta file will be able to append / modify the search paths, and can choose not to // include the "modules" dir in the future. From 8c6258b01234fe3213217ad021d2d2e9a9ea1254 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 5 Sep 2023 18:26:27 +0900 Subject: [PATCH 20/25] Changing var name to be more explicit --- repl/src/main.rs | 8 ++++---- repl/src/metta_shim.rs | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/repl/src/main.rs b/repl/src/main.rs index 995a16ec5..978d45e2b 100644 --- a/repl/src/main.rs +++ b/repl/src/main.rs @@ -67,8 +67,8 @@ fn main() -> Result<()> { thread::spawn(move || { for _sig in signals.forever() { //Assume SIGINT, since that's the only registered handler - let mut signal_state = SIGINT_RECEIVED_COUNT.lock().unwrap(); - match *signal_state { + let mut signal_received_cnt = SIGINT_RECEIVED_COUNT.lock().unwrap(); + match *signal_received_cnt { 0 => println!("Interrupt received, stopping MeTTa..."), 1 => println!("Stopping in progress. Please wait..."), _ => { @@ -76,8 +76,8 @@ fn main() -> Result<()> { exit(-1); }, } - *signal_state += 1; - drop(signal_state); + *signal_received_cnt += 1; + drop(signal_received_cnt); } }); diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index aed73c3be..0675c5007 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -117,12 +117,12 @@ impl MettaShim { while !runner_state.is_complete() { //If we received an interrupt, then clear it and break the loop - let mut signal_state = SIGINT_RECEIVED_COUNT.lock().unwrap(); - if *signal_state > 0 { - *signal_state = 0; + let mut signal_received_cnt = SIGINT_RECEIVED_COUNT.lock().unwrap(); + if *signal_received_cnt > 0 { + *signal_received_cnt = 0; break; } - drop(signal_state); + drop(signal_received_cnt); //Run the next step self.metta.run_step(&mut parser, &mut runner_state) From 452e46fe43f3bcf1d9bf258bc14bb0b7bd570e0e Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 5 Sep 2023 19:35:34 +0900 Subject: [PATCH 21/25] Tweaking behavior around multi-line entry, so user is provided with a syntax error only if they attempt to submit an erroneous line twice. --- repl/src/interactive_helper.rs | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/repl/src/interactive_helper.rs b/repl/src/interactive_helper.rs index 207f9ba65..41ec3173f 100644 --- a/repl/src/interactive_helper.rs +++ b/repl/src/interactive_helper.rs @@ -22,6 +22,7 @@ pub struct ReplHelper { hinter: HistoryHinter, pub colored_prompt: String, cursor_bracket: std::cell::Cell>, // If the cursor is over or near a bracket to match + checked_line: std::cell::RefCell, style: StyleSettings, } @@ -165,6 +166,11 @@ impl Highlighter for ReplHelper { impl Validator for ReplHelper { fn validate(&self, ctx: &mut ValidationContext) -> Result { + //This validator implements the following behavior: + // * if user hits enter and line is valid, it will be submitted. + // * if user hits enter and line is invalid, it will treat it as a newline + // * If user hits enter twice in a row, it will report a syntax error + let mut validation_result = ValidationResult::Incomplete; self.metta.borrow_mut().inside_env(|metta| { let mut parser = SExprParser::new(ctx.input()); @@ -175,12 +181,21 @@ impl Validator for ReplHelper { Ok(Some(_atom)) => (), Ok(None) => { validation_result = ValidationResult::Valid(None); + *self.checked_line.borrow_mut() = "".to_string(); break }, Err(err) => { - validation_result = ValidationResult::Invalid(Some( - format!(" - \x1b[0;{}m{}\x1b[0m", self.style.error_style, err) - )); + let input = ctx.input(); + if input.len() < 1 { + break; + } + if *self.checked_line.borrow() != &input[0..input.len()-1] { + *self.checked_line.borrow_mut() = ctx.input().to_string(); + } else { + validation_result = ValidationResult::Invalid(Some( + format!(" - \x1b[0;{}m{}\x1b[0m", self.style.error_style, err) + )); + } break; } } @@ -202,6 +217,7 @@ impl ReplHelper { hinter: HistoryHinter {}, colored_prompt: "".to_owned(), cursor_bracket: std::cell::Cell::new(None), + checked_line: std::cell::RefCell::new(String::new()), style, } } From 0d51318ebba5903176835ef8841612f6dc1d6339 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 5 Sep 2023 19:40:50 +0900 Subject: [PATCH 22/25] Adding clarifying comment around signal state synchronization --- repl/src/metta_shim.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index 0675c5007..d3bd77628 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -112,7 +112,10 @@ impl MettaShim { let mut parser = SExprParser::new(line); let mut runner_state = self.metta.start_run(); - //We don't want any leftover interrupts to break us this time + // This clears any leftover count that might have happened if the user pressed Ctrl+C just after MeTTa + // interpreter finished processing, but before control returned to rustyline's prompt. That signal is + // not intended for the new execution we are about to begin. + //See https://github.com/trueagi-io/hyperon-experimental/pull/419#discussion_r1315598220 for more details *SIGINT_RECEIVED_COUNT.lock().unwrap() = 0; while !runner_state.is_complete() { From c27685903e97fc7859a1b85dad0af30136523ccb Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 5 Sep 2023 19:48:49 +0900 Subject: [PATCH 23/25] Accessing config by querying metta.space(), rather than sub-space associated with specific config module --- repl/src/metta_shim.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/repl/src/metta_shim.rs b/repl/src/metta_shim.rs index d3bd77628..6bfadcebb 100644 --- a/repl/src/metta_shim.rs +++ b/repl/src/metta_shim.rs @@ -1,11 +1,12 @@ use std::path::PathBuf; -use hyperon::{sym, expr, ExpressionAtom}; +use hyperon::ExpressionAtom; use hyperon::Atom; use hyperon::atom::VariableAtom; use hyperon::space::*; use hyperon::space::grounding::GroundingSpace; +use hyperon::metta::*; use hyperon::metta::runner::Metta; use hyperon::metta::runner::stdlib::register_rust_tokens; use hyperon::metta::text::Tokenizer; @@ -24,7 +25,7 @@ use crate::SIGINT_RECEIVED_COUNT; pub struct MettaShim { pub metta: Metta, pub result: Vec>, - repl_params: Shared, + _repl_params: Shared, //TODO: We'll likely want this back soon, but so I'm not un-plumbing it just yet } #[macro_export] @@ -65,7 +66,7 @@ impl MettaShim { let mut new_shim = Self { metta: Metta::from_space(space, tokenizer, repl_params.borrow().modules_search_paths().collect()), result: vec![], - repl_params: repl_params.clone(), + _repl_params: repl_params.clone(), }; //Init HyperonPy if the repl includes Python support @@ -144,13 +145,10 @@ impl MettaShim { pub fn get_config_atom(&mut self, config_name: &str) -> Option { let mut result = None; metta_shim_env!{{ - let repl_params = self.repl_params.borrow(); - let config_metta_path = repl_params.config_metta_path(); - let metta_modules = self.metta.modules().borrow(); - let config_space = metta_modules.get(config_metta_path).unwrap(); - let bindings_set = config_space.query(&Atom::expr(vec![sym!("="), Atom::sym(config_name.to_string()), expr!(val)])); + let val = VariableAtom::new("val"); + let bindings_set = self.metta.space().query(&Atom::expr(vec![EQUAL_SYMBOL, Atom::sym(config_name.to_string()), Atom::Variable(val.clone())])); if let Some(bindings) = bindings_set.into_iter().next() { - result = bindings.resolve(&VariableAtom::new("val")); + result = bindings.resolve(&val); } }} result From fb13d4f3c9a581247c92b66786757f6d17183c13 Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Tue, 5 Sep 2023 19:52:34 +0900 Subject: [PATCH 24/25] Small behavior tweak around multi-line entry, when we're detecting when to accept a newline and when to report syntax errors --- repl/src/interactive_helper.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/repl/src/interactive_helper.rs b/repl/src/interactive_helper.rs index 41ec3173f..56488be20 100644 --- a/repl/src/interactive_helper.rs +++ b/repl/src/interactive_helper.rs @@ -170,7 +170,6 @@ impl Validator for ReplHelper { // * if user hits enter and line is valid, it will be submitted. // * if user hits enter and line is invalid, it will treat it as a newline // * If user hits enter twice in a row, it will report a syntax error - let mut validation_result = ValidationResult::Incomplete; self.metta.borrow_mut().inside_env(|metta| { let mut parser = SExprParser::new(ctx.input()); @@ -189,7 +188,7 @@ impl Validator for ReplHelper { if input.len() < 1 { break; } - if *self.checked_line.borrow() != &input[0..input.len()-1] { + if *self.checked_line.borrow() != &input[0..input.len()-1] || input.as_bytes()[input.len()-1] != b'\n' { *self.checked_line.borrow_mut() = ctx.input().to_string(); } else { validation_result = ValidationResult::Invalid(Some( From 327738f5227ffa2c5468568c9d0c8ba9ab4a58ad Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Wed, 6 Sep 2023 11:11:55 +0900 Subject: [PATCH 25/25] Changing repl config Metta atoms to contain the "Repl" prefix Changing enter-key behavior when cursor isn't at the end --- repl/src/config.default.metta | 20 ++++++++++---------- repl/src/interactive_helper.rs | 14 +++++++------- repl/src/main.rs | 21 ++++++++++++++++++--- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/repl/src/config.default.metta b/repl/src/config.default.metta index ec129c8c9..73247a498 100644 --- a/repl/src/config.default.metta +++ b/repl/src/config.default.metta @@ -1,16 +1,16 @@ ; TODO: Let the "includePaths" be modifiable, but I want better string manipulation atoms -(= DefaultPrompt "> ") -; (= StyledPrompt "\x1b[1;32m> \x1b[0m") ; TODO: currently the MeTTa string parser doesn't resolve escape chars, although perhaps it should +(= ReplDefaultPrompt "> ") +; (= ReplStyledPrompt "\x1b[1;32m> \x1b[0m") ; TODO: currently the MeTTa string parser doesn't resolve escape chars, although perhaps it should ; TODO: somebody with better design sense should tweak these, and also provide dark-mode setings ; ANSI escape codes to configure the syntax highlighter -(= BracketStyles ("94" "93" "95" "96")) -(= CommentStyle "32") -(= VariableStyle "33") -(= SymbolStyle "34") -(= StringStyle "31") -(= ErrorStyle "91") -(= BracketMatchStyle "1;7") -; (= BracketMatchEnabled True) ;TODO: enable this when I have a reliable value interchange path built. Another use for https://github.com/trueagi-io/hyperon-experimental/issues/351 +(= ReplBracketStyles ("94" "93" "95" "96")) +(= ReplCommentStyle "32") +(= ReplVariableStyle "33") +(= ReplSymbolStyle "34") +(= ReplStringStyle "31") +(= ReplErrorStyle "91") +(= ReplBracketMatchStyle "1;7") +; (= ReplBracketMatchEnabled True) ;TODO: enable this when I have a reliable value interchange path built. Another use for https://github.com/trueagi-io/hyperon-experimental/issues/351 diff --git a/repl/src/interactive_helper.rs b/repl/src/interactive_helper.rs index 56488be20..60e7f8fc9 100644 --- a/repl/src/interactive_helper.rs +++ b/repl/src/interactive_helper.rs @@ -225,13 +225,13 @@ impl ReplHelper { impl StyleSettings { pub fn new(metta_shim: &mut MettaShim) -> Self { Self { - bracket_styles: metta_shim.get_config_expr_vec("BracketStyles").unwrap_or(vec!["94".to_string(), "93".to_string(), "95".to_string(), "96".to_string()]), - comment_style: metta_shim.get_config_string("CommentStyle").unwrap_or("32".to_string()), - variable_style: metta_shim.get_config_string("VariableStyle").unwrap_or("33".to_string()), - symbol_style: metta_shim.get_config_string("SymbolStyle").unwrap_or("34".to_string()), - string_style: metta_shim.get_config_string("StringStyle").unwrap_or("31".to_string()), - error_style: metta_shim.get_config_string("ErrorStyle").unwrap_or("91".to_string()), - bracket_match_style: metta_shim.get_config_string("BracketMatchStyle").unwrap_or("1;7".to_string()), + bracket_styles: metta_shim.get_config_expr_vec("ReplBracketStyles").unwrap_or(vec!["94".to_string(), "93".to_string(), "95".to_string(), "96".to_string()]), + comment_style: metta_shim.get_config_string("ReplCommentStyle").unwrap_or("32".to_string()), + variable_style: metta_shim.get_config_string("ReplVariableStyle").unwrap_or("33".to_string()), + symbol_style: metta_shim.get_config_string("ReplSymbolStyle").unwrap_or("34".to_string()), + string_style: metta_shim.get_config_string("ReplStringStyle").unwrap_or("31".to_string()), + error_style: metta_shim.get_config_string("ReplErrorStyle").unwrap_or("91".to_string()), + bracket_match_style: metta_shim.get_config_string("ReplBracketMatchStyle").unwrap_or("1;7".to_string()), } } } diff --git a/repl/src/main.rs b/repl/src/main.rs index 978d45e2b..e93620a27 100644 --- a/repl/src/main.rs +++ b/repl/src/main.rs @@ -5,7 +5,7 @@ use std::process::exit; use std::sync::Mutex; use rustyline::error::ReadlineError; -use rustyline::{Cmd, CompletionType, Config, EditMode, Editor, KeyEvent}; +use rustyline::{Cmd, CompletionType, Config, EditMode, Editor, KeyEvent, KeyCode, Modifiers}; use anyhow::Result; use clap::Parser; @@ -119,7 +119,22 @@ fn start_interactive_mode(repl_params: Shared, metta: MettaShim) -> .build(); let helper = ReplHelper::new(metta); let mut rl = Editor::with_config(config)?; + + //QUESTION: Should we provide a config to use vi key bindings vs. Emacs? + rl.set_helper(Some(helper)); + //KEY BEHAVIOR: Enter and ctrl-M will add a newline when the cursor is in the middle of a line, while + // ctrl-J will submit the line. + //TODO: Rustyline seems to have a bug where this is only true sometimes. Needs to be debugged. + // Ideally Rustyline could just subsume the whole "accept_in_the_middle" behavior with a design that + // allows the Validator to access the key event, so the Validator could make the decision without + // special logic inside rustyline. + rl.bind_sequence(KeyEvent( KeyCode::Enter, Modifiers::NONE ), Cmd::AcceptOrInsertLine { + accept_in_the_middle: false, + }); + rl.bind_sequence(KeyEvent::ctrl('j'), Cmd::AcceptOrInsertLine { + accept_in_the_middle: true, + }); rl.bind_sequence(KeyEvent::alt('n'), Cmd::HistorySearchForward); rl.bind_sequence(KeyEvent::alt('p'), Cmd::HistorySearchBackward); if let Some(history_path) = &repl_params.borrow().history_file { @@ -135,8 +150,8 @@ fn start_interactive_mode(repl_params: Shared, metta: MettaShim) -> let prompt = { let helper = rl.helper_mut().unwrap(); let mut metta = helper.metta.borrow_mut(); - let prompt = metta.get_config_string("DefaultPrompt").unwrap_or("> ".to_string()); - let styled_prompt = metta.get_config_string("StyledPrompt").unwrap_or(format!("\x1b[1;32m{prompt}\x1b[0m")); + let prompt = metta.get_config_string("ReplDefaultPrompt").unwrap_or("> ".to_string()); + let styled_prompt = metta.get_config_string("ReplStyledPrompt").unwrap_or(format!("\x1b[1;32m{prompt}\x1b[0m")); helper.colored_prompt = styled_prompt; prompt };