From 639ce57ad53d9dcf8ec02e61d50e9f1fc6a688aa Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Thu, 22 Jun 2023 16:16:40 +0800
Subject: [PATCH 01/42] rename ParserTree to ParseTree

---
 pag-parser/src/fusion.rs          | 14 +++++++-------
 pag-parser/src/lib.rs             |  2 +-
 tests/arith-expr/src/lib.rs       |  2 +-
 tests/sexpr-calculator/src/lib.rs |  2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/pag-parser/src/fusion.rs b/pag-parser/src/fusion.rs
index e66e6ca..b98d1a2 100644
--- a/pag-parser/src/fusion.rs
+++ b/pag-parser/src/fusion.rs
@@ -35,14 +35,14 @@ fn generate_tag_enum(parser: &Parser<'_, '_>) -> TokenStream {
 fn generate_parse_tree() -> TokenStream {
     quote! {
         #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-        pub struct ParserTree<'a> {
+        pub struct ParseTree<'a> {
             tag: Tag,
             src: &'a str,
             span: core::ops::Range<usize>,
             children: alloc::vec::Vec<Self>
         }
 
-        impl <'a> ParserTree<'a> {
+        impl <'a> ParseTree<'a> {
             pub fn new(tag: Tag, src: &'a str) -> Self {
                 Self {
                     tag,
@@ -169,7 +169,7 @@ fn generate_empty_actions(active: bool, symbols: &[Symbol<'_>]) -> Vec<TokenStre
                 format_ident!("parent")
             };
             quote! {{
-                let mut subtree = ParserTree::new(Tag::#tag, src);
+                let mut subtree = ParseTree::new(Tag::#tag, src);
                 subtree.set_span(cursor..cursor);
                 #target.add_child(subtree);
             }}
@@ -209,7 +209,7 @@ fn generate_children<'src>(
             if let Some(sym) = next_tree_indices.get(&0) {
                 let tag = format_ident!("{}", sym.name());
                 actions.push(quote! {
-                    let mut subtree = ParserTree::new(Tag::#tag, src);
+                    let mut subtree = ParseTree::new(Tag::#tag, src);
                 });
                 subtree = true;
             }
@@ -341,7 +341,7 @@ fn generate_inactive_parser<'src>(
         fn #parser_name<'a>(
             src: &'a str,
             mut offset: usize,
-            parent: &mut ParserTree<'a>,
+            parent: &mut ParseTree<'a>,
         ) -> Result<usize, Error> {
             #expect
             let mut cursor;
@@ -400,9 +400,9 @@ fn generate_active_parser<'src>(
         fn #parser_name(
             src: &str,
             mut offset: usize,
-        ) -> Result<ParserTree, Error> {
+        ) -> Result<ParseTree, Error> {
             #expect
-            let mut tree = ParserTree::new(Tag::#tag_ident, src);
+            let mut tree = ParseTree::new(Tag::#tag_ident, src);
             let mut cursor;
             'parser: loop {
                 cursor = offset;
diff --git a/pag-parser/src/lib.rs b/pag-parser/src/lib.rs
index db02afd..355eb87 100644
--- a/pag-parser/src/lib.rs
+++ b/pag-parser/src/lib.rs
@@ -282,7 +282,7 @@ pub fn generate_parser(input: &str) -> Result<TokenStream, Error> {
             clippy::match_single_binding,
         )]
         #parser_routines
-        pub fn parse(input: &str) -> Result<ParserTree, Error> {
+        pub fn parse(input: &str) -> Result<ParseTree, Error> {
             #entrypoint(input, 0)
         }
     })
diff --git a/tests/arith-expr/src/lib.rs b/tests/arith-expr/src/lib.rs
index b528f03..f6def5e 100644
--- a/tests/arith-expr/src/lib.rs
+++ b/tests/arith-expr/src/lib.rs
@@ -6,7 +6,7 @@ use std::num::Wrapping;
 mod parser;
 
 #[allow(dead_code)]
-fn eval(tree: &parser::ParserTree) -> Wrapping<usize> {
+fn eval(tree: &parser::ParseTree) -> Wrapping<usize> {
     match tree.tag() {
         parser::Tag::expr => tree.children()[..].iter().map(eval).sum(),
         parser::Tag::mult => tree.children()[..].iter().map(eval).product(),
diff --git a/tests/sexpr-calculator/src/lib.rs b/tests/sexpr-calculator/src/lib.rs
index ef9fa51..e3a768c 100644
--- a/tests/sexpr-calculator/src/lib.rs
+++ b/tests/sexpr-calculator/src/lib.rs
@@ -6,7 +6,7 @@ use std::num::Wrapping;
 mod parser;
 
 #[allow(dead_code)]
-fn eval(tree: &parser::ParserTree) -> Wrapping<usize> {
+fn eval(tree: &parser::ParseTree) -> Wrapping<usize> {
     match tree.tag() {
         parser::Tag::sexpr => eval(&tree.children()[0]),
         parser::Tag::int => Wrapping(tree.as_slice().parse::<usize>().unwrap()),

From 9d92fd05ec9a6aa99690bee2872532a6526422db Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Thu, 22 Jun 2023 18:36:50 +0800
Subject: [PATCH 02/42] avoid name conflicts

---
 pag-parser/src/fusion.rs | 8 +++-----
 pag-parser/src/lib.rs    | 2 +-
 pag-parser/src/nf.rs     | 6 +-----
 3 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/pag-parser/src/fusion.rs b/pag-parser/src/fusion.rs
index b98d1a2..f8c12b2 100644
--- a/pag-parser/src/fusion.rs
+++ b/pag-parser/src/fusion.rs
@@ -303,8 +303,7 @@ fn generate_inactive_parser<'src>(
     rules: &[&NormalForm<'src>],
     loop_optimizer: &mut LoopOptimizer,
 ) -> TokenStream {
-    let tag_name = format!("{tag}");
-    let parser_name = format_ident!("parse_{tag_name}");
+    let parser_name = format_ident!("parse_{tag}");
     let expect = generate_expect(rules);
 
     let success_actions = generate_children(&tag, false, parser, rules)
@@ -362,9 +361,8 @@ fn generate_active_parser<'src>(
     rules: &[&NormalForm<'src>],
     loop_optimizer: &mut LoopOptimizer,
 ) -> TokenStream {
-    let tag_name = format!("{tag}");
-    let tag_ident = format_ident!("{tag_name}");
-    let parser_name = format_ident!("parse_{tag_name}");
+    let tag_ident = format_ident!("{}", tag.symbol().name());
+    let parser_name = format_ident!("parse_{tag}");
     let expect = generate_expect(rules);
 
     let success_actions = generate_children(&tag, true, parser, rules)
diff --git a/pag-parser/src/lib.rs b/pag-parser/src/lib.rs
index 355eb87..a980cfc 100644
--- a/pag-parser/src/lib.rs
+++ b/pag-parser/src/lib.rs
@@ -268,7 +268,7 @@ pub fn generate_parser(input: &str) -> Result<TokenStream, Error> {
     merge_inactive_rules(&mut nfs, &parser, &nf_arena);
     remove_unreachable_rules(&mut nfs, &parser);
     let parser_routines = fusion_parser(&nfs, &parser);
-    let entrypoint = format_ident!("parse_{}", parser.entrypoint.name());
+    let entrypoint = format_ident!("parse_{}_0", parser.entrypoint.name());
     Ok(quote::quote! {
         #![allow(
             dead_code,
diff --git a/pag-parser/src/nf.rs b/pag-parser/src/nf.rs
index 1b05d52..47433e8 100644
--- a/pag-parser/src/nf.rs
+++ b/pag-parser/src/nf.rs
@@ -38,11 +38,7 @@ impl<'src> Tag<'src> {
 
 impl<'src> Display for Tag<'src> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        self.symbol.fmt(f)?;
-        if self.version > 0 {
-            write!(f, "_{}", self.version)?;
-        }
-        Ok(())
+        write!(f, "{}_{}", self.symbol, self.version)
     }
 }
 

From f5b18668d0ac73b9b4912c83fd70115d3e39c389 Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Mon, 17 Jul 2023 23:36:28 +0800
Subject: [PATCH 03/42] make clippy happy

---
 pag-lexer/src/utilities.rs | 2 +-
 pag-parser/src/lib.rs      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pag-lexer/src/utilities.rs b/pag-lexer/src/utilities.rs
index b14a70d..adb2440 100644
--- a/pag-lexer/src/utilities.rs
+++ b/pag-lexer/src/utilities.rs
@@ -10,7 +10,7 @@ where
     }
     #[cfg(debug_assertions)]
     {
-        let mut vec = Vec::from_iter(data.into_iter());
+        let mut vec = Vec::from_iter(data);
         vec.sort_unstable_by_key(_f);
         vec.into_iter()
     }
diff --git a/pag-parser/src/lib.rs b/pag-parser/src/lib.rs
index a980cfc..0687b7b 100644
--- a/pag-parser/src/lib.rs
+++ b/pag-parser/src/lib.rs
@@ -80,7 +80,7 @@ impl<'src> Error<'src> {
                         Report::build(ReportKind::Error, input_name, span.start)
                             .with_message("Syntax error in grammar definition")
                             .with_label(Label::new((input_name, span))
-                                .with_message(format!("{}", x.variant.message()))
+                                .with_message(x.variant.message())
                                 .with_color(Color::Red))
                             .finish()
                     },
@@ -88,7 +88,7 @@ impl<'src> Error<'src> {
                         Report::build(ReportKind::Error, input_name, span.start())
                             .with_message("Format error in grammar definition")
                             .with_label(Label::new((input_name, span.start()..span.end()))
-                                .with_message(format!("{}", message))
+                                .with_message(message)
                                 .with_color(Color::Red))
                             .finish()
                     },

From f7792d64fa8ac1085d3e7eb61b178db060d03ada Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Tue, 25 Jul 2023 04:02:08 +0800
Subject: [PATCH 04/42] impl prototype of new frontend (wip)

---
 Cargo.toml                        |  11 +-
 pag-parser2/Cargo.toml            |  26 +++++
 pag-parser2/src/frontend/ast.rs   |  56 ++++++++++
 pag-parser2/src/frontend/mod.rs   |  13 +++
 pag-parser2/src/frontend/parse.rs | 165 ++++++++++++++++++++++++++++++
 pag-parser2/src/lib.rs            |   9 ++
 6 files changed, 270 insertions(+), 10 deletions(-)
 create mode 100644 pag-parser2/Cargo.toml
 create mode 100644 pag-parser2/src/frontend/ast.rs
 create mode 100644 pag-parser2/src/frontend/mod.rs
 create mode 100644 pag-parser2/src/frontend/parse.rs
 create mode 100644 pag-parser2/src/lib.rs

diff --git a/Cargo.toml b/Cargo.toml
index adb3388..7a4017e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,16 +7,7 @@
 # modified, or distributed except according to those terms.
 
 [workspace]
-members = [
-    "pag-lexer",
-    "pag-parser",
-    "pag-compiler",
-    "tests/sexpr-calculator",
-    "tests/arith-expr",
-    "tests/tokenizer",
-    "benches/csv",
-    "benches/json",
-]
+members = ["pag-*", "tests/*", "benches/*"]
 resolver = "2"
 
 [workspace.package]
diff --git a/pag-parser2/Cargo.toml b/pag-parser2/Cargo.toml
new file mode 100644
index 0000000..ce26614
--- /dev/null
+++ b/pag-parser2/Cargo.toml
@@ -0,0 +1,26 @@
+# Copyright (c) 2023 Paguroidea Developers
+#
+# Licensed under the Apache License, Version 2.0
+# <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+# license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. All files in the project carrying such notice may not be copied,
+# modified, or distributed except according to those terms.
+
+[package]
+name = "pag-parser2"
+keywords = ["parser", "cfg", "grammar"]
+description = "Parser-lexer fusion generator (parser generator)"
+documentation = "https://docs.rs/pag-parser/"
+
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+exclude.workspace = true
+categories.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+authors.workspace = true
+readme.workspace = true
+
+[dependencies]
+syn = "2.0.27"
diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
new file mode 100644
index 0000000..f591cbf
--- /dev/null
+++ b/pag-parser2/src/frontend/ast.rs
@@ -0,0 +1,56 @@
+// Copyright (c) 2023 Paguroidea Developers
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+use std::collections::HashMap;
+
+pub struct Ast {
+    pub entry: syn::Ident,
+    pub skip: Option<LexerTree>,
+    pub lexer_map: HashMap<syn::Ident, LexerTree>,
+    pub parser_map: HashMap<syn::Ident, ParserDef>,
+}
+
+pub struct ParserDef {
+    pub ty: syn::Type,
+    pub rules: Vec<ParserRule>,
+}
+
+pub struct ParserRule {
+    pub bindings: Vec<ParserBinding>,
+    pub action: Option<syn::Block>,
+}
+
+pub struct ParserBinding {
+    pub name: Option<syn::Ident>,
+    pub ty: Option<syn::Type>,
+    pub tree: ParserTree,
+}
+
+// TODO: how to express "bottom" & "empty"?
+pub enum LexerTree {
+    Alt(Vec<Box<Self>>),
+    Seq(Vec<Box<Self>>),
+    And(Vec<Box<Self>>),
+    Star(Box<Self>),
+    Plus(Box<Self>),
+    Opt(Box<Self>),
+    Not(Box<Self>),
+    Ref(syn::Ident),
+    Str(syn::LitStr),
+    Range(syn::LitChar, syn::LitChar),
+}
+
+// TODO: how to express "select" & "ignore"?
+pub enum ParserTree {
+    Seq(Vec<Box<Self>>),
+    Star(Box<Self>),
+    Plus(Box<Self>),
+    Opt(Box<Self>),
+    LexerRef(syn::Ident),
+    ParserRef(syn::Ident),
+}
diff --git a/pag-parser2/src/frontend/mod.rs b/pag-parser2/src/frontend/mod.rs
new file mode 100644
index 0000000..777e86d
--- /dev/null
+++ b/pag-parser2/src/frontend/mod.rs
@@ -0,0 +1,13 @@
+// Copyright (c) 2023 Paguroidea Developers
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+mod ast;
+mod parse;
+
+pub use ast::*;
+pub use parse::*;
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
new file mode 100644
index 0000000..1eb6c61
--- /dev/null
+++ b/pag-parser2/src/frontend/parse.rs
@@ -0,0 +1,165 @@
+// Copyright (c) 2023 Paguroidea Developers
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+use super::ast::*;
+
+use syn::parse::{Parse, ParseStream};
+use syn::punctuated::Punctuated;
+use syn::{parse_quote, Token};
+
+use std::collections::HashMap;
+
+enum IdentKind {
+    LexerName,
+    ParserName,
+    Invalid,
+}
+
+fn ident_kind(ident: &syn::Ident) -> IdentKind {
+    let s = ident.to_string(); // TODO: should we add a `.unraw()` ?
+    if s.chars().all(|c| matches!(c, 'A'..='Z' | '0'..='9' | '_')) {
+        return IdentKind::LexerName;
+    }
+    if s.chars().all(|c| matches!(c, 'a'..='z' | '0'..='9' | '_')) {
+        return IdentKind::ParserName;
+    }
+    IdentKind::Invalid
+}
+
+impl Parse for Ast {
+    fn parse(input: ParseStream) -> syn::Result<Self> {
+        let mut entry = None;
+        let mut skip = None;
+        let mut lexer_map = HashMap::new();
+        let mut parser_map = HashMap::new();
+
+        while !input.is_empty() {
+            if input.peek(Token![%]) {
+                // parse keyword
+                input.parse::<Token![%]>()?;
+                let ident = input.parse::<syn::Ident>()?;
+                match ident.to_string().as_str() {
+                    "entry" => {
+                        input.parse::<Token![=]>()?;
+                        entry = Some(input.parse::<syn::Ident>()?);
+                    }
+                    "skip" => {
+                        input.parse::<Token![=]>()?;
+                        skip = Some(input.parse::<LexerTree>()?);
+                    }
+                    _ => return Err(syn::Error::new(ident.span(), "invalid keyword")),
+                }
+            } else {
+                // parse lexer / parser definitions
+                let ident = input.parse::<syn::Ident>()?;
+                match ident_kind(&ident) {
+                    IdentKind::LexerName => {
+                        input.parse::<Token![=]>()?;
+                        lexer_map.insert(ident, input.parse::<LexerTree>()?);
+                    }
+                    IdentKind::ParserName => {
+                        parser_map.insert(ident, input.parse::<ParserDef>()?);
+                    }
+                    _ => return Err(syn::Error::new(ident.span(), "invalid ident")),
+                }
+            }
+            input.parse::<Token![;]>()?;
+        }
+
+        Ok(Self {
+            entry: entry.ok_or_else(|| input.error("missing %entry"))?,
+            skip,
+            lexer_map,
+            parser_map,
+        })
+    }
+}
+
+impl Parse for ParserDef {
+    // (":" syn::Type)? = (ParserRule)|+
+    fn parse(input: ParseStream) -> syn::Result<Self> {
+        let ty = match input.parse::<Token![:]>() {
+            Ok(_) => input.parse::<syn::Type>()?,
+            Err(_) => parse_quote!(&'src str),
+        };
+
+        input.parse::<Token![=]>()?;
+
+        // let mut rules = Vec::new();
+        // loop {
+        //     rules.push(input.parse::<ParserRule>()?);
+        //     if !input.peek(Token![|]) {
+        //         break;
+        //     }
+        //     input.parse::<Token![|]>();
+        // }
+
+        // TODO: check whether this is in-place
+        let rules = Punctuated::<ParserRule, Token![|]>::parse_separated_nonempty(input)?
+            .into_iter()
+            .collect();
+
+        Ok(Self { ty, rules })
+    }
+}
+
+impl Parse for ParserRule {
+    // (ParserBinding)+ syn::Block?
+    fn parse(input: ParseStream) -> syn::Result<Self> {
+        let mut bindings = Vec::new();
+        while !input.peek(syn::token::Brace) && !input.peek(Token![|]) && !input.peek(Token![;]) {
+            bindings.push(input.parse::<ParserBinding>()?);
+        }
+
+        let mut action = None;
+        if input.peek(syn::token::Brace) {
+            action = Some(input.parse::<syn::Block>()?);
+        }
+
+        Ok(Self { bindings, action })
+    }
+}
+
+impl Parse for ParserBinding {
+    // ("$" syn::Ident ("<" syn::Type ">")? ":")? ParserTree
+    fn parse(input: ParseStream) -> syn::Result<Self> {
+        let mut name = None;
+        let mut ty = None;
+
+        if input.peek(Token![$]) {
+            input.parse::<Token![%]>()?;
+            name = Some(input.parse::<syn::Ident>()?);
+
+            if input.peek(Token![<]) {
+                input.parse::<Token![<]>()?;
+                ty = Some(input.parse::<syn::Type>()?);
+                input.parse::<Token![>]>()?;
+            }
+
+            input.parse::<Token![:]>()?;
+        }
+
+        let tree = input.parse::<ParserTree>()?;
+
+        Ok(Self { name, ty, tree })
+    }
+}
+
+impl Parse for LexerTree {
+    // pratt parsing
+    fn parse(input: ParseStream) -> syn::Result<Self> {
+        todo!()
+    }
+}
+
+impl Parse for ParserTree {
+    // pratt parsing
+    fn parse(input: ParseStream) -> syn::Result<Self> {
+        todo!()
+    }
+}
diff --git a/pag-parser2/src/lib.rs b/pag-parser2/src/lib.rs
new file mode 100644
index 0000000..3602390
--- /dev/null
+++ b/pag-parser2/src/lib.rs
@@ -0,0 +1,9 @@
+// Copyright (c) 2023 Paguroidea Developers
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
+mod frontend;

From 6386241c259102ef1909cef39570cbe68ba3bebe Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Mon, 24 Jul 2023 23:54:05 -0400
Subject: [PATCH 05/42] initial work to add semact

---
 pag-parser2/Cargo.toml            |  1 +
 pag-parser2/src/frontend/ast.rs   |  8 +++---
 pag-parser2/src/frontend/parse.rs |  4 +--
 pag-parser2/src/lib.rs            |  1 +
 pag-parser2/src/nf/mod.rs         | 45 +++++++++++++++++++++++++++++++
 pag-parser2/src/nf/semact.rs      | 41 ++++++++++++++++++++++++++++
 6 files changed, 94 insertions(+), 6 deletions(-)
 create mode 100644 pag-parser2/src/nf/mod.rs
 create mode 100644 pag-parser2/src/nf/semact.rs

diff --git a/pag-parser2/Cargo.toml b/pag-parser2/Cargo.toml
index ce26614..925356d 100644
--- a/pag-parser2/Cargo.toml
+++ b/pag-parser2/Cargo.toml
@@ -24,3 +24,4 @@ readme.workspace = true
 
 [dependencies]
 syn = "2.0.27"
+quote = "1.0.9"
\ No newline at end of file
diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index f591cbf..caadc3c 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -33,9 +33,9 @@ pub struct ParserBinding {
 
 // TODO: how to express "bottom" & "empty"?
 pub enum LexerTree {
-    Alt(Vec<Box<Self>>),
-    Seq(Vec<Box<Self>>),
-    And(Vec<Box<Self>>),
+    Alt(Vec<Self>),
+    Seq(Vec<Self>),
+    And(Vec<Self>),
     Star(Box<Self>),
     Plus(Box<Self>),
     Opt(Box<Self>),
@@ -47,7 +47,7 @@ pub enum LexerTree {
 
 // TODO: how to express "select" & "ignore"?
 pub enum ParserTree {
-    Seq(Vec<Box<Self>>),
+    Seq(Vec<Self>),
     Star(Box<Self>),
     Plus(Box<Self>),
     Opt(Box<Self>),
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index 1eb6c61..8f81715 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -152,14 +152,14 @@ impl Parse for ParserBinding {
 
 impl Parse for LexerTree {
     // pratt parsing
-    fn parse(input: ParseStream) -> syn::Result<Self> {
+    fn parse(_input: ParseStream) -> syn::Result<Self> {
         todo!()
     }
 }
 
 impl Parse for ParserTree {
     // pratt parsing
-    fn parse(input: ParseStream) -> syn::Result<Self> {
+    fn parse(_input: ParseStream) -> syn::Result<Self> {
         todo!()
     }
 }
diff --git a/pag-parser2/src/lib.rs b/pag-parser2/src/lib.rs
index 3602390..11e33c1 100644
--- a/pag-parser2/src/lib.rs
+++ b/pag-parser2/src/lib.rs
@@ -7,3 +7,4 @@
 // modified, or distributed except according to those terms.
 
 mod frontend;
+mod nf;
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
new file mode 100644
index 0000000..02a0655
--- /dev/null
+++ b/pag-parser2/src/nf/mod.rs
@@ -0,0 +1,45 @@
+use quote::format_ident;
+use syn::Ident;
+mod semact;
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Tag {
+    Toplevel(Ident),
+    Anonymous(usize),
+}
+
+impl Tag {
+    pub fn toplevel(ident: Ident) -> Self {
+        Self::Toplevel(ident)
+    }
+    pub fn anonymous(index: usize) -> Self {
+        Self::Anonymous(index)
+    }
+    /// Identifier of the parser routine.
+    pub fn parser_name(&self) -> Ident {
+        match self {
+            Self::Anonymous(index) => format_ident!("__anonymous_{}", index),
+            Self::Toplevel(ident) => format_ident!("parse_{}", ident),
+        }
+    }
+}
+
+/// Action in the normal form.
+/// If this subroutine's return value is taken, it should mark [`Action::output`] as `true`.
+/// There is no need to assign an ident to a subroutine. As we are always
+/// reducing from left to right, we maintain the context of which the current
+/// semantic action to reduce, and always assign "__0", "__1", "__2". When a [`Reduce`] is
+/// encountered, we start over from "__0".
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Action {
+    Shift {
+        /// Parser routine to call.
+        tag: Tag,
+        output: bool,
+    },
+    Reduce {
+        /// Reduction routine to call.
+        tag: Tag,
+        output: bool,
+    },
+}
diff --git a/pag-parser2/src/nf/semact.rs b/pag-parser2/src/nf/semact.rs
new file mode 100644
index 0000000..1673dae
--- /dev/null
+++ b/pag-parser2/src/nf/semact.rs
@@ -0,0 +1,41 @@
+use std::collections::HashMap;
+
+use quote::format_ident;
+use syn::{parse_quote, Expr, ExprCall, Stmt, Type};
+
+use super::Tag;
+
+pub type SemActTable = HashMap<Tag, SemAct>;
+pub struct SemAct {
+    /// Identifier of the semantic action routine.
+    function: Expr,
+    /// Type annotation
+    ty: Option<Type>,
+    /// Number of arguments
+    arity: usize,
+}
+
+impl SemAct {
+    fn generate_call(&self) -> ExprCall {
+        let exprs = (0..self.arity).map(|i| format_ident!("__{}", i));
+        let function = &self.function;
+        parse_quote!(
+            #function(#(#exprs),*)
+        )
+    }
+    pub fn generate_statement(&self, output: Option<usize>) -> Stmt {
+        let expr = self.generate_call();
+        match output {
+            None => parse_quote!(
+                #expr;
+            ),
+            Some(index) => {
+                let ty = self.ty.iter();
+                let output = format_ident!("__{}", index);
+                parse_quote!(
+                    let #output #(: #ty)* = #expr;
+                )
+            }
+        }
+    }
+}

From 4e87b65295d874b5adde42687843908ee0f91c98 Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Tue, 25 Jul 2023 17:28:54 +0800
Subject: [PATCH 06/42] minor update

---
 pag-parser/src/frontend/mod.rs         |  4 ++-
 pag-parser/src/frontend/syntax.rs      |  2 +-
 pag-parser/src/fusion.rs               |  4 ++-
 pag-parser/src/nf.rs                   | 13 ++++++--
 pag-parser/src/type_system/fixpoint.rs |  4 ++-
 pag-parser/src/utilities.rs            |  4 ++-
 pag-parser2/Cargo.toml                 |  4 +--
 pag-parser2/src/frontend/ast.rs        | 16 +++++-----
 pag-parser2/src/frontend/parse.rs      | 42 +++++++++++---------------
 pag-parser2/src/nf/mod.rs              |  8 +++++
 pag-parser2/src/nf/semact.rs           |  8 +++++
 11 files changed, 67 insertions(+), 42 deletions(-)

diff --git a/pag-parser/src/frontend/mod.rs b/pag-parser/src/frontend/mod.rs
index 8fa04b7..067624e 100644
--- a/pag-parser/src/frontend/mod.rs
+++ b/pag-parser/src/frontend/mod.rs
@@ -493,7 +493,9 @@ mod test {
         dbg!(size_of::<NormalForm>());
         let pairs = GrammarParser::parse(Rule::grammar, TEST).unwrap();
         let tree = parse_surface_syntax(pairs, &PRATT_PARSER, TEST).unwrap();
-        let Grammar { lexer, parser } = &tree.node else { unreachable!() };
+        let Grammar { lexer, parser } = &tree.node else {
+            unreachable!()
+        };
 
         println!("\n---------< construct lexer database >----------");
         let database = construct_lexer_database(lexer).unwrap();
diff --git a/pag-parser/src/frontend/syntax.rs b/pag-parser/src/frontend/syntax.rs
index 6119d92..b1ee998 100644
--- a/pag-parser/src/frontend/syntax.rs
+++ b/pag-parser/src/frontend/syntax.rs
@@ -71,7 +71,7 @@ pub fn construct_parser<'src, 'arena>(
     };
     let mut errs = Vec::new();
     for rule in rules {
-        let ParserRuleDef { active, name, expr, } = &rule.node else {
+        let ParserRuleDef { active, name, expr } = &rule.node else {
             unreachable_branch!("parser should only contain rule definitions")
         };
         match construct_core_syntax_tree(&parser, expr) {
diff --git a/pag-parser/src/fusion.rs b/pag-parser/src/fusion.rs
index f8c12b2..a6a289b 100644
--- a/pag-parser/src/fusion.rs
+++ b/pag-parser/src/fusion.rs
@@ -199,7 +199,9 @@ fn generate_children<'src>(
         .iter()
         .filter(|x| !matches!(x, NormalForm::Empty(..)))
         .map(|nf| {
-            let NormalForm::Sequence { nonterminals, .. } = nf else { unreachable!() };
+            let NormalForm::Sequence { nonterminals, .. } = nf else {
+                unreachable!()
+            };
 
             let mut add_continue = false;
             let mut actions = Vec::new();
diff --git a/pag-parser/src/nf.rs b/pag-parser/src/nf.rs
index 47433e8..3332861 100644
--- a/pag-parser/src/nf.rs
+++ b/pag-parser/src/nf.rs
@@ -297,7 +297,10 @@ pub fn merge_inactive_rules<'src, 'nf>(
                 let NormalForm::Sequence {
                     terminal,
                     nonterminals,
-                } = j else { continue };
+                } = j
+                else {
+                    continue;
+                };
                 if nonterminals.contains(&Action::Subroutine(tag)) {
                     *j = &*arena.alloc(NormalForm::Sequence {
                         terminal: *terminal,
@@ -328,9 +331,13 @@ pub fn remove_unreachable_rules<'src>(nfs: &mut NormalForms<'src, '_>, parser: &
             return;
         }
         visited.insert(current);
-        let Some(tag) = nfs.entries.get(&current) else { return };
+        let Some(tag) = nfs.entries.get(&current) else {
+            return;
+        };
         for i in tag {
-            let NormalForm::Sequence { nonterminals, .. } = i else { continue };
+            let NormalForm::Sequence { nonterminals, .. } = i else {
+                continue;
+            };
             for i in nonterminals {
                 let Action::Subroutine(x) = i else { continue };
                 dfs(nfs, *x, visited);
diff --git a/pag-parser/src/type_system/fixpoint.rs b/pag-parser/src/type_system/fixpoint.rs
index a6e297a..3e4ca85 100644
--- a/pag-parser/src/type_system/fixpoint.rs
+++ b/pag-parser/src/type_system/fixpoint.rs
@@ -39,7 +39,9 @@ fn find_neighbors(
         Term::Fix(_, expr) => find_neighbors(expr, neighbors, sym_to_id),
         Term::ParserRef(symbol) => {
             // unexisted IDs refer to implicit fixpoints
-            let Some(&id) = sym_to_id.get(symbol) else { return };
+            let Some(&id) = sym_to_id.get(symbol) else {
+                return;
+            };
             neighbors.push(id);
         }
         _ => {}
diff --git a/pag-parser/src/utilities.rs b/pag-parser/src/utilities.rs
index c1beeca..487e503 100644
--- a/pag-parser/src/utilities.rs
+++ b/pag-parser/src/utilities.rs
@@ -43,7 +43,9 @@ fn is_ascii_ident_head(x: &u8) -> bool {
 }
 
 fn is_ascii_ident(s: &str) -> bool {
-    let [x, xs@..] = s.as_bytes() else { return false };
+    let [x, xs @ ..] = s.as_bytes() else {
+        return false;
+    };
     is_ascii_ident_head(x) && xs.iter().all(is_ascii_ident_body)
 }
 
diff --git a/pag-parser2/Cargo.toml b/pag-parser2/Cargo.toml
index 925356d..e9a6228 100644
--- a/pag-parser2/Cargo.toml
+++ b/pag-parser2/Cargo.toml
@@ -23,5 +23,5 @@ authors.workspace = true
 readme.workspace = true
 
 [dependencies]
-syn = "2.0.27"
-quote = "1.0.9"
\ No newline at end of file
+syn = { version = "2.0.27", features = ["full"] }
+quote = "1.0.9"
diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index caadc3c..f8dd109 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -10,8 +10,8 @@ use std::collections::HashMap;
 
 pub struct Ast {
     pub entry: syn::Ident,
-    pub skip: Option<LexerTree>,
-    pub lexer_map: HashMap<syn::Ident, LexerTree>,
+    pub skip: Option<LexerExpr>,
+    pub lexer_map: HashMap<syn::Ident, LexerExpr>,
     pub parser_map: HashMap<syn::Ident, ParserDef>,
 }
 
@@ -21,18 +21,18 @@ pub struct ParserDef {
 }
 
 pub struct ParserRule {
-    pub bindings: Vec<ParserBinding>,
+    pub vars: Vec<VarBinding>,
     pub action: Option<syn::Block>,
 }
 
-pub struct ParserBinding {
+pub struct VarBinding {
     pub name: Option<syn::Ident>,
     pub ty: Option<syn::Type>,
-    pub tree: ParserTree,
+    pub expr: ParserExpr,
 }
 
-// TODO: how to express "bottom" & "empty"?
-pub enum LexerTree {
+// TODO: how to express "bottom" & "any"?
+pub enum LexerExpr {
     Alt(Vec<Self>),
     Seq(Vec<Self>),
     And(Vec<Self>),
@@ -46,7 +46,7 @@ pub enum LexerTree {
 }
 
 // TODO: how to express "select" & "ignore"?
-pub enum ParserTree {
+pub enum ParserExpr {
     Seq(Vec<Self>),
     Star(Box<Self>),
     Plus(Box<Self>),
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index 8f81715..23ae51b 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -9,7 +9,6 @@
 use super::ast::*;
 
 use syn::parse::{Parse, ParseStream};
-use syn::punctuated::Punctuated;
 use syn::{parse_quote, Token};
 
 use std::collections::HashMap;
@@ -50,7 +49,7 @@ impl Parse for Ast {
                     }
                     "skip" => {
                         input.parse::<Token![=]>()?;
-                        skip = Some(input.parse::<LexerTree>()?);
+                        skip = Some(input.parse::<LexerExpr>()?);
                     }
                     _ => return Err(syn::Error::new(ident.span(), "invalid keyword")),
                 }
@@ -60,7 +59,7 @@ impl Parse for Ast {
                 match ident_kind(&ident) {
                     IdentKind::LexerName => {
                         input.parse::<Token![=]>()?;
-                        lexer_map.insert(ident, input.parse::<LexerTree>()?);
+                        lexer_map.insert(ident, input.parse::<LexerExpr>()?);
                     }
                     IdentKind::ParserName => {
                         parser_map.insert(ident, input.parse::<ParserDef>()?);
@@ -90,19 +89,14 @@ impl Parse for ParserDef {
 
         input.parse::<Token![=]>()?;
 
-        // let mut rules = Vec::new();
-        // loop {
-        //     rules.push(input.parse::<ParserRule>()?);
-        //     if !input.peek(Token![|]) {
-        //         break;
-        //     }
-        //     input.parse::<Token![|]>();
-        // }
-
-        // TODO: check whether this is in-place
-        let rules = Punctuated::<ParserRule, Token![|]>::parse_separated_nonempty(input)?
-            .into_iter()
-            .collect();
+        let mut rules = Vec::new();
+        loop {
+            rules.push(input.parse::<ParserRule>()?);
+            if !input.peek(Token![|]) {
+                break;
+            }
+            input.parse::<Token![|]>()?;
+        }
 
         Ok(Self { ty, rules })
     }
@@ -111,9 +105,9 @@ impl Parse for ParserDef {
 impl Parse for ParserRule {
     // (ParserBinding)+ syn::Block?
     fn parse(input: ParseStream) -> syn::Result<Self> {
-        let mut bindings = Vec::new();
+        let mut vars = Vec::new();
         while !input.peek(syn::token::Brace) && !input.peek(Token![|]) && !input.peek(Token![;]) {
-            bindings.push(input.parse::<ParserBinding>()?);
+            vars.push(input.parse::<VarBinding>()?);
         }
 
         let mut action = None;
@@ -121,11 +115,11 @@ impl Parse for ParserRule {
             action = Some(input.parse::<syn::Block>()?);
         }
 
-        Ok(Self { bindings, action })
+        Ok(Self { vars, action })
     }
 }
 
-impl Parse for ParserBinding {
+impl Parse for VarBinding {
     // ("$" syn::Ident ("<" syn::Type ">")? ":")? ParserTree
     fn parse(input: ParseStream) -> syn::Result<Self> {
         let mut name = None;
@@ -144,20 +138,20 @@ impl Parse for ParserBinding {
             input.parse::<Token![:]>()?;
         }
 
-        let tree = input.parse::<ParserTree>()?;
+        let expr = input.parse::<ParserExpr>()?;
 
-        Ok(Self { name, ty, tree })
+        Ok(Self { name, ty, expr })
     }
 }
 
-impl Parse for LexerTree {
+impl Parse for LexerExpr {
     // pratt parsing
     fn parse(_input: ParseStream) -> syn::Result<Self> {
         todo!()
     }
 }
 
-impl Parse for ParserTree {
+impl Parse for ParserExpr {
     // pratt parsing
     fn parse(_input: ParseStream) -> syn::Result<Self> {
         todo!()
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index 02a0655..29d6c92 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -1,3 +1,11 @@
+// Copyright (c) 2023 Paguroidea Developers
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
 use quote::format_ident;
 use syn::Ident;
 mod semact;
diff --git a/pag-parser2/src/nf/semact.rs b/pag-parser2/src/nf/semact.rs
index 1673dae..15d5c42 100644
--- a/pag-parser2/src/nf/semact.rs
+++ b/pag-parser2/src/nf/semact.rs
@@ -1,3 +1,11 @@
+// Copyright (c) 2023 Paguroidea Developers
+//
+// Licensed under the Apache License, Version 2.0
+// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. All files in the project carrying such notice may not be copied,
+// modified, or distributed except according to those terms.
+
 use std::collections::HashMap;
 
 use quote::format_ident;

From cc5b341ef728d646676d649fe5e3f7b7786675ed Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Tue, 25 Jul 2023 19:34:11 +0800
Subject: [PATCH 07/42] impl LexerExpr::parse

---
 pag-parser2/src/frontend/ast.rs   |   8 +-
 pag-parser2/src/frontend/parse.rs | 127 ++++++++++++++++++++++++++++--
 2 files changed, 126 insertions(+), 9 deletions(-)

diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index f8dd109..a09b1df 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -33,9 +33,9 @@ pub struct VarBinding {
 
 // TODO: how to express "bottom" & "any"?
 pub enum LexerExpr {
-    Alt(Vec<Self>),
-    Seq(Vec<Self>),
-    And(Vec<Self>),
+    Alt(Box<Self>, Box<Self>),
+    Seq(Box<Self>, Box<Self>),
+    And(Box<Self>, Box<Self>),
     Star(Box<Self>),
     Plus(Box<Self>),
     Opt(Box<Self>),
@@ -47,7 +47,7 @@ pub enum LexerExpr {
 
 // TODO: how to express "select" & "ignore"?
 pub enum ParserExpr {
-    Seq(Vec<Self>),
+    Seq(Box<Self>, Box<Self>),
     Star(Box<Self>),
     Plus(Box<Self>),
     Opt(Box<Self>),
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index 23ae51b..c80e68c 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -8,11 +8,13 @@
 
 use super::ast::*;
 
+use syn::ext::IdentExt;
 use syn::parse::{Parse, ParseStream};
-use syn::{parse_quote, Token};
+use syn::{parenthesized, parse_quote, Token};
 
 use std::collections::HashMap;
 
+#[derive(PartialEq, Eq)]
 enum IdentKind {
     LexerName,
     ParserName,
@@ -20,7 +22,7 @@ enum IdentKind {
 }
 
 fn ident_kind(ident: &syn::Ident) -> IdentKind {
-    let s = ident.to_string(); // TODO: should we add a `.unraw()` ?
+    let s = ident.unraw().to_string();
     if s.chars().all(|c| matches!(c, 'A'..='Z' | '0'..='9' | '_')) {
         return IdentKind::LexerName;
     }
@@ -145,15 +147,130 @@ impl Parse for VarBinding {
 }
 
 impl Parse for LexerExpr {
-    // pratt parsing
-    fn parse(_input: ParseStream) -> syn::Result<Self> {
-        todo!()
+    fn parse(input: ParseStream) -> syn::Result<Self> {
+        parse_lexer_expr(input, 0)
     }
 }
 
+// pratt parsing
+fn parse_lexer_expr(input: ParseStream, min_bp: u32) -> syn::Result<LexerExpr> {
+    let mut lhs = 'lhs: {
+        if input.peek(syn::Ident) {
+            let ident = input.parse::<syn::Ident>()?;
+            if ident_kind(&ident) != IdentKind::LexerName {
+                return Err(syn::Error::new(ident.span(), "invalid ident"));
+            }
+            break 'lhs LexerExpr::Ref(ident);
+        }
+        if input.peek(syn::LitStr) {
+            let str = input.parse::<syn::LitStr>()?;
+            break 'lhs LexerExpr::Str(str);
+        }
+        if input.peek(syn::LitChar) {
+            let l = input.parse::<syn::LitChar>()?;
+            input.parse::<Token![..]>()?;
+            let r = input.parse::<syn::LitChar>()?;
+            break 'lhs LexerExpr::Range(l, r);
+        }
+        if input.peek(syn::token::Paren) {
+            let content;
+            parenthesized!(content in input);
+            break 'lhs content.parse::<LexerExpr>()?;
+        }
+        if input.peek(Token![!]) {
+            input.parse::<Token![!]>()?;
+            let r_bp = 60;
+            let rhs = parse_lexer_expr(input, r_bp)?;
+            break 'lhs LexerExpr::Not(Box::new(rhs));
+        }
+        return Err(input.error("expect lexer expression"));
+    };
+
+    loop {
+        if input.peek(Token![|]) {
+            let (l_bp, r_bp) = (30, 31);
+            if l_bp < min_bp {
+                break;
+            }
+            input.parse::<Token![|]>()?;
+            let rhs = parse_lexer_expr(input, r_bp)?;
+            lhs = LexerExpr::Alt(Box::new(lhs), Box::new(rhs));
+            continue;
+        }
+        if input.peek(syn::Ident)
+            || input.peek(syn::LitStr)
+            || input.peek(syn::LitChar)
+            || input.peek(syn::token::Paren)
+            || input.peek(syn::token::Paren)
+            || input.peek(Token![!])
+        {
+            let (l_bp, r_bp) = (40, 41);
+            if l_bp < min_bp {
+                break;
+            }
+            let rhs = parse_lexer_expr(input, r_bp)?;
+            lhs = LexerExpr::Seq(Box::new(lhs), Box::new(rhs));
+            continue;
+        }
+        if input.peek(Token![&]) {
+            let (l_bp, r_bp) = (50, 51);
+            if l_bp < min_bp {
+                break;
+            }
+            input.parse::<Token![&]>()?;
+            let rhs = parse_lexer_expr(input, r_bp)?;
+            lhs = LexerExpr::And(Box::new(lhs), Box::new(rhs));
+            continue;
+        }
+        if input.peek(Token![*]) {
+            let l_bp = 70;
+            if l_bp < min_bp {
+                break;
+            }
+            input.parse::<Token![*]>()?;
+            lhs = LexerExpr::Star(Box::new(lhs));
+            continue;
+        }
+        if input.peek(Token![+]) {
+            let l_bp = 80;
+            if l_bp < min_bp {
+                break;
+            }
+            input.parse::<Token![+]>()?;
+            lhs = LexerExpr::Plus(Box::new(lhs));
+            continue;
+        }
+        if input.peek(Token![?]) {
+            let l_bp = 90;
+            if l_bp < min_bp {
+                break;
+            }
+            input.parse::<Token![?]>()?;
+            lhs = LexerExpr::Opt(Box::new(lhs));
+            continue;
+        }
+        break;
+    }
+
+    Ok(lhs)
+}
+
 impl Parse for ParserExpr {
     // pratt parsing
     fn parse(_input: ParseStream) -> syn::Result<Self> {
         todo!()
     }
 }
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_lexer_expr() {
+        syn::parse_str::<LexerExpr>(r#"("abc" 'a'..'z') r#A | B & C | D* E+ F? !G"#).unwrap();
+    }
+
+    #[test]
+    fn test_parser_expr() {}
+}

From 17561e830bcfc6408ebb8283973c4444b4079f41 Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Tue, 25 Jul 2023 19:45:51 +0800
Subject: [PATCH 08/42] impl ParserExpr::parse

---
 pag-parser2/src/frontend/parse.rs | 76 +++++++++++++++++++++++++++----
 1 file changed, 67 insertions(+), 9 deletions(-)

diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index c80e68c..0b0859e 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -22,7 +22,7 @@ enum IdentKind {
 }
 
 fn ident_kind(ident: &syn::Ident) -> IdentKind {
-    let s = ident.unraw().to_string();
+    let s = ident.to_string();
     if s.chars().all(|c| matches!(c, 'A'..='Z' | '0'..='9' | '_')) {
         return IdentKind::LexerName;
     }
@@ -43,7 +43,7 @@ impl Parse for Ast {
             if input.peek(Token![%]) {
                 // parse keyword
                 input.parse::<Token![%]>()?;
-                let ident = input.parse::<syn::Ident>()?;
+                let ident = input.parse::<syn::Ident>()?.unraw();
                 match ident.to_string().as_str() {
                     "entry" => {
                         input.parse::<Token![=]>()?;
@@ -57,7 +57,7 @@ impl Parse for Ast {
                 }
             } else {
                 // parse lexer / parser definitions
-                let ident = input.parse::<syn::Ident>()?;
+                let ident = input.parse::<syn::Ident>()?.unraw();
                 match ident_kind(&ident) {
                     IdentKind::LexerName => {
                         input.parse::<Token![=]>()?;
@@ -129,7 +129,7 @@ impl Parse for VarBinding {
 
         if input.peek(Token![$]) {
             input.parse::<Token![%]>()?;
-            name = Some(input.parse::<syn::Ident>()?);
+            name = Some(input.parse::<syn::Ident>()?.unraw());
 
             if input.peek(Token![<]) {
                 input.parse::<Token![<]>()?;
@@ -156,7 +156,7 @@ impl Parse for LexerExpr {
 fn parse_lexer_expr(input: ParseStream, min_bp: u32) -> syn::Result<LexerExpr> {
     let mut lhs = 'lhs: {
         if input.peek(syn::Ident) {
-            let ident = input.parse::<syn::Ident>()?;
+            let ident = input.parse::<syn::Ident>()?.unraw();
             if ident_kind(&ident) != IdentKind::LexerName {
                 return Err(syn::Error::new(ident.span(), "invalid ident"));
             }
@@ -256,10 +256,66 @@ fn parse_lexer_expr(input: ParseStream, min_bp: u32) -> syn::Result<LexerExpr> {
 }
 
 impl Parse for ParserExpr {
-    // pratt parsing
-    fn parse(_input: ParseStream) -> syn::Result<Self> {
-        todo!()
+    fn parse(input: ParseStream) -> syn::Result<Self> {
+        parse_parser_expr(input, 0)
+    }
+}
+
+// pratt parsing
+fn parse_parser_expr(input: ParseStream, min_bp: u32) -> syn::Result<ParserExpr> {
+    let mut lhs = 'lhs: {
+        if input.peek(syn::Ident) {
+            let ident = input.parse::<syn::Ident>()?.unraw();
+            match ident_kind(&ident) {
+                IdentKind::LexerName => break 'lhs ParserExpr::LexerRef(ident),
+                IdentKind::ParserName => break 'lhs ParserExpr::ParserRef(ident),
+                _ => return Err(syn::Error::new(ident.span(), "invalid ident")),
+            }
+        }
+        return Err(input.error("expect lexer expression"));
+    };
+
+    loop {
+        if input.peek(syn::Ident) {
+            let (l_bp, r_bp) = (40, 41);
+            if l_bp < min_bp {
+                break;
+            }
+            let rhs = parse_parser_expr(input, r_bp)?;
+            lhs = ParserExpr::Seq(Box::new(lhs), Box::new(rhs));
+            continue;
+        }
+        if input.peek(Token![*]) {
+            let l_bp = 70;
+            if l_bp < min_bp {
+                break;
+            }
+            input.parse::<Token![*]>()?;
+            lhs = ParserExpr::Star(Box::new(lhs));
+            continue;
+        }
+        if input.peek(Token![+]) {
+            let l_bp = 80;
+            if l_bp < min_bp {
+                break;
+            }
+            input.parse::<Token![+]>()?;
+            lhs = ParserExpr::Plus(Box::new(lhs));
+            continue;
+        }
+        if input.peek(Token![?]) {
+            let l_bp = 90;
+            if l_bp < min_bp {
+                break;
+            }
+            input.parse::<Token![?]>()?;
+            lhs = ParserExpr::Opt(Box::new(lhs));
+            continue;
+        }
+        break;
     }
+
+    Ok(lhs)
 }
 
 #[cfg(test)]
@@ -272,5 +328,7 @@ mod test {
     }
 
     #[test]
-    fn test_parser_expr() {}
+    fn test_parser_expr() {
+        syn::parse_str::<ParserExpr>(r#"A? b c* D+ F?"#).unwrap();
+    }
 }

From e7d40ff505e3c7798ee2ada6b8a824a6e4692a27 Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Tue, 25 Jul 2023 20:13:43 +0800
Subject: [PATCH 09/42] finish new frontend parser

---
 pag-parser2/src/frontend/parse.rs | 39 ++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index 0b0859e..0df6e2e 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -10,7 +10,7 @@ use super::ast::*;
 
 use syn::ext::IdentExt;
 use syn::parse::{Parse, ParseStream};
-use syn::{parenthesized, parse_quote, Token};
+use syn::{bracketed, parenthesized, parse_quote, Token};
 
 use std::collections::HashMap;
 
@@ -105,7 +105,7 @@ impl Parse for ParserDef {
 }
 
 impl Parse for ParserRule {
-    // (ParserBinding)+ syn::Block?
+    // (VarBinding)+ syn::Block?
     fn parse(input: ParseStream) -> syn::Result<Self> {
         let mut vars = Vec::new();
         while !input.peek(syn::token::Brace) && !input.peek(Token![|]) && !input.peek(Token![;]) {
@@ -122,19 +122,19 @@ impl Parse for ParserRule {
 }
 
 impl Parse for VarBinding {
-    // ("$" syn::Ident ("<" syn::Type ">")? ":")? ParserTree
+    // ("$" syn::Ident ("<" syn::Type ">")? ":")? ParserExpr
     fn parse(input: ParseStream) -> syn::Result<Self> {
         let mut name = None;
         let mut ty = None;
 
         if input.peek(Token![$]) {
-            input.parse::<Token![%]>()?;
+            input.parse::<Token![$]>()?;
             name = Some(input.parse::<syn::Ident>()?.unraw());
 
-            if input.peek(Token![<]) {
-                input.parse::<Token![<]>()?;
-                ty = Some(input.parse::<syn::Type>()?);
-                input.parse::<Token![>]>()?;
+            if input.peek(syn::token::Bracket) {
+                let content;
+                bracketed!(content in input);
+                ty = Some(content.parse::<syn::Type>()?);
             }
 
             input.parse::<Token![:]>()?;
@@ -331,4 +331,27 @@ mod test {
     fn test_parser_expr() {
         syn::parse_str::<ParserExpr>(r#"A? b c* D+ F?"#).unwrap();
     }
+
+    #[test]
+    fn test_full() {
+        syn::parse_str::<Ast>(
+            r#"
+            %entry = sexpr;
+
+            BLANK  = " ";
+            DIGIT  = '0'..'9';
+            ALPHA  = 'a'..'z' | 'A'..'Z';
+            LPAREN = "(";
+            RPAREN = ")";
+            ATOM   = ALPHA (ALPHA | DIGIT)*;
+            %skip  = (BLANK | "\t" | "\n" | "\r")+;
+
+            compound: SExp = LPAREN $sexp[Vec<_>]:sexp+ RPAREN { SExp::Compound(sexp) };
+            atom    : SExp = $atom:ATOM { SExp::Atom(atom) };
+            sexp    : SExp = compound
+                           | atom;
+            "#,
+        )
+        .unwrap();
+    }
 }

From 98d4da65587b84cb5e983d5ac9933a5fc379bfe1 Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Tue, 25 Jul 2023 22:00:07 +0800
Subject: [PATCH 10/42] impl post-fix var binding

---
 pag-parser2/src/frontend/ast.rs   |  2 +-
 pag-parser2/src/frontend/parse.rs | 37 ++++++++++++++++---------------
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index a09b1df..1b40038 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -26,9 +26,9 @@ pub struct ParserRule {
 }
 
 pub struct VarBinding {
+    pub expr: ParserExpr,
     pub name: Option<syn::Ident>,
     pub ty: Option<syn::Type>,
-    pub expr: ParserExpr,
 }
 
 // TODO: how to express "bottom" & "any"?
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index 0df6e2e..a7078c5 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -122,27 +122,29 @@ impl Parse for ParserRule {
 }
 
 impl Parse for VarBinding {
-    // ("$" syn::Ident ("<" syn::Type ">")? ":")? ParserExpr
+    // ParserExpr ("[" syn::Ident (":" syn::Type)? "]")?
     fn parse(input: ParseStream) -> syn::Result<Self> {
+        let expr = input.parse::<ParserExpr>()?;
+
         let mut name = None;
         let mut ty = None;
 
-        if input.peek(Token![$]) {
-            input.parse::<Token![$]>()?;
-            name = Some(input.parse::<syn::Ident>()?.unraw());
+        if input.peek(syn::token::Bracket) {
+            let content;
+            bracketed!(content in input);
+            name = Some(content.parse::<syn::Ident>()?.unraw());
 
-            if input.peek(syn::token::Bracket) {
-                let content;
-                bracketed!(content in input);
+            if content.peek(Token![:]) {
+                content.parse::<Token![:]>()?;
                 ty = Some(content.parse::<syn::Type>()?);
             }
 
-            input.parse::<Token![:]>()?;
+            if !content.is_empty() {
+                return Err(content.error("expected `]`"));
+            }
         }
 
-        let expr = input.parse::<ParserExpr>()?;
-
-        Ok(Self { name, ty, expr })
+        Ok(Self { expr, name, ty })
     }
 }
 
@@ -183,7 +185,7 @@ fn parse_lexer_expr(input: ParseStream, min_bp: u32) -> syn::Result<LexerExpr> {
             let rhs = parse_lexer_expr(input, r_bp)?;
             break 'lhs LexerExpr::Not(Box::new(rhs));
         }
-        return Err(input.error("expect lexer expression"));
+        return Err(input.error("expected lexer expression"));
     };
 
     loop {
@@ -272,7 +274,7 @@ fn parse_parser_expr(input: ParseStream, min_bp: u32) -> syn::Result<ParserExpr>
                 _ => return Err(syn::Error::new(ident.span(), "invalid ident")),
             }
         }
-        return Err(input.error("expect lexer expression"));
+        return Err(input.error("expected parser expression"));
     };
 
     loop {
@@ -336,18 +338,17 @@ mod test {
     fn test_full() {
         syn::parse_str::<Ast>(
             r#"
-            %entry = sexpr;
+            %entry = sexp;
 
-            BLANK  = " ";
             DIGIT  = '0'..'9';
             ALPHA  = 'a'..'z' | 'A'..'Z';
             LPAREN = "(";
             RPAREN = ")";
             ATOM   = ALPHA (ALPHA | DIGIT)*;
-            %skip  = (BLANK | "\t" | "\n" | "\r")+;
+            %skip  = (" " | "\t" | "\n" | "\r")+;
 
-            compound: SExp = LPAREN $sexp[Vec<_>]:sexp+ RPAREN { SExp::Compound(sexp) };
-            atom    : SExp = $atom:ATOM { SExp::Atom(atom) };
+            compound: SExp = LPAREN sexp+[sexp:Vec<_>] RPAREN { SExp::Compound(sexp) };
+            atom    : SExp = ATOM[atom] { SExp::Atom(atom) };
             sexp    : SExp = compound
                            | atom;
             "#,

From 4994ecb588325a83b6d43021a68a2a15e9e99d1b Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Tue, 25 Jul 2023 15:38:42 -0400
Subject: [PATCH 11/42] add more design details of semact

---
 pag-parser2/src/nf/semact.rs | 123 +++++++++++++++++++++++++++--------
 1 file changed, 96 insertions(+), 27 deletions(-)

diff --git a/pag-parser2/src/nf/semact.rs b/pag-parser2/src/nf/semact.rs
index 15d5c42..bec6291 100644
--- a/pag-parser2/src/nf/semact.rs
+++ b/pag-parser2/src/nf/semact.rs
@@ -8,42 +8,111 @@
 
 use std::collections::HashMap;
 
-use quote::format_ident;
-use syn::{parse_quote, Expr, ExprCall, Stmt, Type};
-
 use super::Tag;
+use syn::{parse_quote, Expr, Type};
 
 pub type SemActTable = HashMap<Tag, SemAct>;
-pub struct SemAct {
-    /// Identifier of the semantic action routine.
-    function: Expr,
-    /// Type annotation
-    ty: Option<Type>,
-    /// Number of arguments
-    arity: usize,
+
+///
+/// ```
+/// trait Collector<T> {
+///     pub type Output;
+///     fn finalize(self) -> Self::Output;
+///     fn collect(&mut self, data: T);
+/// }
+///
+/// ```
+pub enum SemAct {
+    CustomizedRoutine {
+        /// Identifier of the semantic action routine.
+        function: Expr,
+        /// Type annotation
+        ret_type: Type,
+        /// Number of arguments
+        arity: usize,
+    },
+    /// Specialized for the inner of @(@a, @b, @c). Return an Tuple of the inner routine.
+    Tuple,
+    /// Specialized for `inner?`. Return an Option of the inner routine
+    Option { inner_type: Type },
+    /// Specialized for `i*`
+    /// Initialize a `Collector`  (requires `Default + Collector<T>`) and return the result from `Collector::finalize`.
+    ZeroOrMore { collector: Type },
+    /// Specialized for `i+` = `i ~ i*`.
+    /// Initialize a `Collector`  (requires `From<T> + Collector<T>`), pass it to the recursive routine
+    /// and return the result from `Collector::finalize`.
+    OneOrMoreToplevel { collector: Type },
+    /// Specialized for `i+` = `i ~ i*`.
+    /// Accepts a `&mut Collector`
+    OneOrMoreNested { collector: Type },
 }
 
 impl SemAct {
-    fn generate_call(&self) -> ExprCall {
-        let exprs = (0..self.arity).map(|i| format_ident!("__{}", i));
-        let function = &self.function;
-        parse_quote!(
-            #function(#(#exprs),*)
-        )
-    }
-    pub fn generate_statement(&self, output: Option<usize>) -> Stmt {
-        let expr = self.generate_call();
-        match output {
-            None => parse_quote!(
-                #expr;
-            ),
-            Some(index) => {
-                let ty = self.ty.iter();
-                let output = format_ident!("__{}", index);
+    /// Generate inlined expr for reduce action `terminal shift [reduce] shift shift`
+    pub fn generate_inline_expr<'a, I: IntoIterator<Item = &'a Expr>>(
+        &self,
+        exprs: I,
+        delayed_func: Option<Expr>,
+    ) -> Expr {
+        debug_assert_eq!(
+            delayed_func.is_some(),
+            matches!(self, Self::OneOrMoreToplevel { .. })
+        );
+        match self {
+            Self::CustomizedRoutine {
+                function,
+                ret_type: _,
+                arity: _,
+            } => {
+                let exprs = exprs.into_iter();
                 parse_quote!(
-                    let #output #(: #ty)* = #expr;
+                    #function(#(#exprs,)*)
                 )
             }
+            Self::Tuple => {
+                let exprs = exprs.into_iter();
+                parse_quote!(
+                    (#(#exprs,)*)
+                )
+            }
+            Self::Option { .. } => {
+                unreachable!("Option can never be inlined, otherwise there is sequential ambiguity")
+            }
+
+            Self::ZeroOrMore { .. } => unreachable!(
+                "ZeroOrMore can never be inlined, otherwise there is sequential ambiguity"
+            ),
+
+            Self::OneOrMoreNested { .. } => unreachable!(
+                "OneOrMoreNested can never be inlined because it never appears in the first place"
+            ),
+
+            Self::OneOrMoreToplevel { collector } => {
+                let exprs = exprs.into_iter();
+                let delayed_func = delayed_func.unwrap();
+                // TODO: src, offset
+                parse_quote! {
+                    {
+                        let mut collector = #collector::from(#(#exprs)*);
+                        #delayed_func(&mut collector, src, offset);
+                        collector.finalize()
+                    }
+                }
+            }
         }
     }
+
+    /// This function is useful in the following cases:
+    /// - If a shift routine is nested one or more, we does not emit the call to it immediately. Instead, we wait until
+    /// [`Self::generate_inlin_expr`] is called.
+    /// - If a parser routine has a semact [`Self::OneOrMoreNested`], it should be parametized by `C : Collector` in type
+    /// and its has `&mut C` as its first input param.
+    pub fn is_nested_one_or_more(&self) -> bool {
+        matches!(self, Self::OneOrMoreNested { .. })
+    }
+
+    /// Check if we should generate loops for TCO.
+    pub fn should_tco(&self) -> bool {
+        matches!(self, Self::ZeroOrMore { .. } | Self::OneOrMoreNested { .. })
+    }
 }

From a0608326046c4270f6438e52c512178f294ab179 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Tue, 25 Jul 2023 16:15:34 -0400
Subject: [PATCH 12/42] address comments on trait design

---
 pag-parser2/src/nf/semact.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pag-parser2/src/nf/semact.rs b/pag-parser2/src/nf/semact.rs
index bec6291..e89c8c4 100644
--- a/pag-parser2/src/nf/semact.rs
+++ b/pag-parser2/src/nf/semact.rs
@@ -15,9 +15,7 @@ pub type SemActTable = HashMap<Tag, SemAct>;
 
 ///
 /// ```
-/// trait Collector<T> {
-///     pub type Output;
-///     fn finalize(self) -> Self::Output;
+/// trait Collector<T> : Default {
 ///     fn collect(&mut self, data: T);
 /// }
 ///
@@ -104,7 +102,7 @@ impl SemAct {
 
     /// This function is useful in the following cases:
     /// - If a shift routine is nested one or more, we does not emit the call to it immediately. Instead, we wait until
-    /// [`Self::generate_inlin_expr`] is called.
+    /// [`Self::generate_inline_expr`] is called.
     /// - If a parser routine has a semact [`Self::OneOrMoreNested`], it should be parametized by `C : Collector` in type
     /// and its has `&mut C` as its first input param.
     pub fn is_nested_one_or_more(&self) -> bool {

From b4706a21f241fb95ece9d6123087950bb8e6d782 Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Wed, 26 Jul 2023 19:28:29 +0800
Subject: [PATCH 13/42] record lexer idx

---
 pag-parser2/src/frontend/ast.rs   |  7 ++++-
 pag-parser2/src/frontend/parse.rs | 44 ++++++++++++++++++++-----------
 2 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index 1b40038..3215246 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -11,10 +11,15 @@ use std::collections::HashMap;
 pub struct Ast {
     pub entry: syn::Ident,
     pub skip: Option<LexerExpr>,
-    pub lexer_map: HashMap<syn::Ident, LexerExpr>,
+    pub lexer_map: HashMap<syn::Ident, LexerDef>,
     pub parser_map: HashMap<syn::Ident, ParserDef>,
 }
 
+pub struct LexerDef {
+    pub idx: u32,
+    pub expr: LexerExpr,
+}
+
 pub struct ParserDef {
     pub ty: syn::Type,
     pub rules: Vec<ParserRule>,
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index a7078c5..fb4c924 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -10,7 +10,7 @@ use super::ast::*;
 
 use syn::ext::IdentExt;
 use syn::parse::{Parse, ParseStream};
-use syn::{bracketed, parenthesized, parse_quote, Token};
+use syn::{bracketed, parenthesized, parse_quote, Error, Result, Token};
 
 use std::collections::HashMap;
 
@@ -33,7 +33,7 @@ fn ident_kind(ident: &syn::Ident) -> IdentKind {
 }
 
 impl Parse for Ast {
-    fn parse(input: ParseStream) -> syn::Result<Self> {
+    fn parse(input: ParseStream) -> Result<Self> {
         let mut entry = None;
         let mut skip = None;
         let mut lexer_map = HashMap::new();
@@ -41,32 +41,46 @@ impl Parse for Ast {
 
         while !input.is_empty() {
             if input.peek(Token![%]) {
-                // parse keyword
+                // parse keywords
                 input.parse::<Token![%]>()?;
                 let ident = input.parse::<syn::Ident>()?.unraw();
                 match ident.to_string().as_str() {
                     "entry" => {
+                        if entry.is_some() {
+                            return Err(Error::new(ident.span(), "duplicate %entry definition"));
+                        }
                         input.parse::<Token![=]>()?;
                         entry = Some(input.parse::<syn::Ident>()?);
                     }
                     "skip" => {
+                        if skip.is_some() {
+                            return Err(Error::new(ident.span(), "duplicate %skip definition"));
+                        }
                         input.parse::<Token![=]>()?;
                         skip = Some(input.parse::<LexerExpr>()?);
                     }
-                    _ => return Err(syn::Error::new(ident.span(), "invalid keyword")),
+                    _ => return Err(Error::new(ident.span(), "invalid keyword")),
                 }
             } else {
                 // parse lexer / parser definitions
                 let ident = input.parse::<syn::Ident>()?.unraw();
                 match ident_kind(&ident) {
                     IdentKind::LexerName => {
+                        if lexer_map.contains_key(&ident) {
+                            return Err(Error::new(ident.span(), "duplicate lexer definition"));
+                        }
                         input.parse::<Token![=]>()?;
-                        lexer_map.insert(ident, input.parse::<LexerExpr>()?);
+                        let idx = lexer_map.len() as _;
+                        let expr = input.parse::<LexerExpr>()?;
+                        lexer_map.insert(ident, LexerDef { idx, expr });
                     }
                     IdentKind::ParserName => {
+                        if parser_map.contains_key(&ident) {
+                            return Err(Error::new(ident.span(), "duplicate parser definition"));
+                        }
                         parser_map.insert(ident, input.parse::<ParserDef>()?);
                     }
-                    _ => return Err(syn::Error::new(ident.span(), "invalid ident")),
+                    _ => return Err(Error::new(ident.span(), "invalid ident")),
                 }
             }
             input.parse::<Token![;]>()?;
@@ -83,7 +97,7 @@ impl Parse for Ast {
 
 impl Parse for ParserDef {
     // (":" syn::Type)? = (ParserRule)|+
-    fn parse(input: ParseStream) -> syn::Result<Self> {
+    fn parse(input: ParseStream) -> Result<Self> {
         let ty = match input.parse::<Token![:]>() {
             Ok(_) => input.parse::<syn::Type>()?,
             Err(_) => parse_quote!(&'src str),
@@ -106,7 +120,7 @@ impl Parse for ParserDef {
 
 impl Parse for ParserRule {
     // (VarBinding)+ syn::Block?
-    fn parse(input: ParseStream) -> syn::Result<Self> {
+    fn parse(input: ParseStream) -> Result<Self> {
         let mut vars = Vec::new();
         while !input.peek(syn::token::Brace) && !input.peek(Token![|]) && !input.peek(Token![;]) {
             vars.push(input.parse::<VarBinding>()?);
@@ -123,7 +137,7 @@ impl Parse for ParserRule {
 
 impl Parse for VarBinding {
     // ParserExpr ("[" syn::Ident (":" syn::Type)? "]")?
-    fn parse(input: ParseStream) -> syn::Result<Self> {
+    fn parse(input: ParseStream) -> Result<Self> {
         let expr = input.parse::<ParserExpr>()?;
 
         let mut name = None;
@@ -149,18 +163,18 @@ impl Parse for VarBinding {
 }
 
 impl Parse for LexerExpr {
-    fn parse(input: ParseStream) -> syn::Result<Self> {
+    fn parse(input: ParseStream) -> Result<Self> {
         parse_lexer_expr(input, 0)
     }
 }
 
 // pratt parsing
-fn parse_lexer_expr(input: ParseStream, min_bp: u32) -> syn::Result<LexerExpr> {
+fn parse_lexer_expr(input: ParseStream, min_bp: u32) -> Result<LexerExpr> {
     let mut lhs = 'lhs: {
         if input.peek(syn::Ident) {
             let ident = input.parse::<syn::Ident>()?.unraw();
             if ident_kind(&ident) != IdentKind::LexerName {
-                return Err(syn::Error::new(ident.span(), "invalid ident"));
+                return Err(Error::new(ident.span(), "invalid ident"));
             }
             break 'lhs LexerExpr::Ref(ident);
         }
@@ -258,20 +272,20 @@ fn parse_lexer_expr(input: ParseStream, min_bp: u32) -> syn::Result<LexerExpr> {
 }
 
 impl Parse for ParserExpr {
-    fn parse(input: ParseStream) -> syn::Result<Self> {
+    fn parse(input: ParseStream) -> Result<Self> {
         parse_parser_expr(input, 0)
     }
 }
 
 // pratt parsing
-fn parse_parser_expr(input: ParseStream, min_bp: u32) -> syn::Result<ParserExpr> {
+fn parse_parser_expr(input: ParseStream, min_bp: u32) -> Result<ParserExpr> {
     let mut lhs = 'lhs: {
         if input.peek(syn::Ident) {
             let ident = input.parse::<syn::Ident>()?.unraw();
             match ident_kind(&ident) {
                 IdentKind::LexerName => break 'lhs ParserExpr::LexerRef(ident),
                 IdentKind::ParserName => break 'lhs ParserExpr::ParserRef(ident),
-                _ => return Err(syn::Error::new(ident.span(), "invalid ident")),
+                _ => return Err(Error::new(ident.span(), "invalid ident")),
             }
         }
         return Err(input.error("expected parser expression"));

From f59d893492cf9386b9d98a15a5b7f6a1a3c16359 Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Wed, 26 Jul 2023 21:32:39 +0800
Subject: [PATCH 14/42] support ignore in parser expr

---
 pag-parser2/src/frontend/ast.rs   |  1 +
 pag-parser2/src/frontend/parse.rs | 18 +++++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index 3215246..8de4969 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -58,4 +58,5 @@ pub enum ParserExpr {
     Opt(Box<Self>),
     LexerRef(syn::Ident),
     ParserRef(syn::Ident),
+    Ignore(Box<Self>),
 }
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index fb4c924..bf3bb53 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -288,11 +288,22 @@ fn parse_parser_expr(input: ParseStream, min_bp: u32) -> Result<ParserExpr> {
                 _ => return Err(Error::new(ident.span(), "invalid ident")),
             }
         }
+        if input.peek(syn::token::Paren) {
+            let content;
+            parenthesized!(content in input);
+            break 'lhs content.parse::<ParserExpr>()?;
+        }
+        if input.peek(Token![#]) {
+            input.parse::<Token![#]>()?;
+            let r_bp = 60;
+            let rhs = parse_parser_expr(input, r_bp)?;
+            break 'lhs ParserExpr::Ignore(Box::new(rhs));
+        }
         return Err(input.error("expected parser expression"));
     };
 
     loop {
-        if input.peek(syn::Ident) {
+        if input.peek(syn::Ident) || input.peek(Token![#]) {
             let (l_bp, r_bp) = (40, 41);
             if l_bp < min_bp {
                 break;
@@ -338,6 +349,11 @@ fn parse_parser_expr(input: ParseStream, min_bp: u32) -> Result<ParserExpr> {
 mod test {
     use super::*;
 
+    #[test]
+    fn test_var_binding() {
+        syn::parse_str::<VarBinding>(r#"(#LPAREN expr #RPAREN)?[e]"#).unwrap();
+    }
+
     #[test]
     fn test_lexer_expr() {
         syn::parse_str::<LexerExpr>(r#"("abc" 'a'..'z') r#A | B & C | D* E+ F? !G"#).unwrap();

From 7faa80df5ccbe5de1adeeab240ae55818b43e5fa Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Tue, 25 Jul 2023 20:10:06 -0400
Subject: [PATCH 15/42] add some debug facilities

---
 pag-parser2/Cargo.toml    |   5 ++
 pag-parser2/src/debug.rs  |  22 +++++++++
 pag-parser2/src/lib.rs    |   2 +
 pag-parser2/src/nf/mod.rs | 101 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 130 insertions(+)
 create mode 100644 pag-parser2/src/debug.rs

diff --git a/pag-parser2/Cargo.toml b/pag-parser2/Cargo.toml
index e9a6228..0513c4c 100644
--- a/pag-parser2/Cargo.toml
+++ b/pag-parser2/Cargo.toml
@@ -22,6 +22,11 @@ rust-version.workspace = true
 authors.workspace = true
 readme.workspace = true
 
+[features]
+ansi-debug = ["nu-ansi-term", "debug"]
+debug = []
+
 [dependencies]
 syn = { version = "2.0.27", features = ["full"] }
 quote = "1.0.9"
+nu-ansi-term = { version = "0.49.0", optional = true }
\ No newline at end of file
diff --git a/pag-parser2/src/debug.rs b/pag-parser2/src/debug.rs
new file mode 100644
index 0000000..fc99fdb
--- /dev/null
+++ b/pag-parser2/src/debug.rs
@@ -0,0 +1,22 @@
+#[cfg(feature = "ansi-debug")]
+macro_rules! styled {
+    ($style:expr, $($arg:tt)*) => {
+        {
+            use nu_ansi_term::*;
+            $style.paint(format!($($arg)*))
+        }
+    };
+}
+#[cfg(not(feature = "ansi-debug"))]
+macro_rules! styled {
+    ($style:expr, $($arg:tt)*) => {format!($($arg)*)};
+}
+
+macro_rules! styled_write {
+    ($dst:expr, $($arg:tt)*) => {
+        write!($dst, "{}", $crate::debug::styled!($($arg)*))
+    };
+}
+
+pub(crate) use styled;
+pub(crate) use styled_write;
diff --git a/pag-parser2/src/lib.rs b/pag-parser2/src/lib.rs
index 11e33c1..85c0851 100644
--- a/pag-parser2/src/lib.rs
+++ b/pag-parser2/src/lib.rs
@@ -6,5 +6,7 @@
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
 
+#[cfg(feature = "debug")]
+mod debug;
 mod frontend;
 mod nf;
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index 29d6c92..10aa16e 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -8,6 +8,9 @@
 
 use quote::format_ident;
 use syn::Ident;
+
+#[cfg(feature = "debug")]
+use crate::debug::styled_write;
 mod semact;
 
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -32,6 +35,16 @@ impl Tag {
     }
 }
 
+#[cfg(feature = "debug")]
+impl std::fmt::Display for Tag {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Tag::Toplevel(ident) => write!(f, "{}", ident),
+            Tag::Anonymous(index) => write!(f, "{{{}}}", index),
+        }
+    }
+}
+
 /// Action in the normal form.
 /// If this subroutine's return value is taken, it should mark [`Action::output`] as `true`.
 /// There is no need to assign an ident to a subroutine. As we are always
@@ -51,3 +64,91 @@ pub enum Action {
         output: bool,
     },
 }
+
+#[cfg(feature = "debug")]
+impl std::fmt::Display for Action {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Reduce { tag, output } => {
+                if *output {
+                    styled_write!(f, Color::Red, "[{tag}]")
+                } else {
+                    styled_write!(f, Color::Blue, "[{tag}]")
+                }
+            }
+            Self::Shift { tag, output } => {
+                if *output {
+                    styled_write!(f, Color::Red, "{tag}")
+                } else {
+                    styled_write!(f, Color::Blue, "{tag}")
+                }
+            }
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum NormalForm {
+    Empty(Vec<(Tag, bool)>),
+    Unexpanded(Vec<Action>),
+    Sequence(Ident, Vec<Action>),
+}
+
+#[cfg(feature = "debug")]
+impl std::fmt::Display for NormalForm {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Empty(actions) => {
+                write!(f, "ε")?;
+                for (tag, output) in actions.iter() {
+                    if *output {
+                        styled_write!(f, Color::Red, "[{tag}]")?;
+                    } else {
+                        styled_write!(f, Color::Blue, "[{tag}]")?;
+                    }
+                }
+            }
+            Self::Unexpanded(actions) => {
+                write!(f, "{}", actions[0])?;
+                for action in &actions[1..] {
+                    write!(f, " {}", action)?;
+                }
+            }
+            Self::Sequence(terminal, actions) => {
+                styled_write!(f, Color::Yellow.bold(), "{terminal}")?;
+                for action in actions.iter() {
+                    write!(f, " {}", action)?;
+                }
+            }
+        }
+        Ok(())
+    }
+}
+
+#[cfg(all(feature = "debug", test))]
+#[test]
+fn debug_print_test() {
+    use quote::format_ident;
+    let sequence = NormalForm::Sequence(
+        format_ident!("TEST"),
+        vec![
+            Action::Shift {
+                tag: Tag::Toplevel(format_ident!("a")),
+                output: false,
+            },
+            Action::Reduce {
+                tag: Tag::Toplevel(format_ident!("b")),
+                output: true,
+            },
+            Action::Shift {
+                tag: Tag::Toplevel(format_ident!("c")),
+                output: true,
+            },
+            Action::Reduce {
+                tag: Tag::Anonymous(1),
+                output: false,
+            },
+        ],
+    );
+    println!("{}", sequence);
+}

From 550d0ee801958e511cd83d37e2f1a14be1623613 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Tue, 25 Jul 2023 21:16:32 -0400
Subject: [PATCH 16/42] print normal form table

---
 pag-parser2/Cargo.toml    |   5 +-
 pag-parser2/src/nf/mod.rs | 108 ++++++++++++++++++++++++++++++++++----
 2 files changed, 100 insertions(+), 13 deletions(-)

diff --git a/pag-parser2/Cargo.toml b/pag-parser2/Cargo.toml
index 0513c4c..b6160ca 100644
--- a/pag-parser2/Cargo.toml
+++ b/pag-parser2/Cargo.toml
@@ -24,9 +24,10 @@ readme.workspace = true
 
 [features]
 ansi-debug = ["nu-ansi-term", "debug"]
-debug = []
+debug = ["term_size"]
 
 [dependencies]
 syn = { version = "2.0.27", features = ["full"] }
 quote = "1.0.9"
-nu-ansi-term = { version = "0.49.0", optional = true }
\ No newline at end of file
+nu-ansi-term = { version = "0.49.0", optional = true }
+term_size = { version = "0.3", optional = true }
\ No newline at end of file
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index 10aa16e..9f32362 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -6,9 +6,12 @@
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
 
+use std::{collections::HashMap, ops::Deref};
+
 use quote::format_ident;
 use syn::Ident;
 
+use crate::debug::styled;
 #[cfg(feature = "debug")]
 use crate::debug::styled_write;
 mod semact;
@@ -39,8 +42,8 @@ impl Tag {
 impl std::fmt::Display for Tag {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            Tag::Toplevel(ident) => write!(f, "{}", ident),
-            Tag::Anonymous(index) => write!(f, "{{{}}}", index),
+            Tag::Toplevel(ident) => write!(f, "{ident}"),
+            Tag::Anonymous(index) => styled_write!(f, Style::new().italic(), "_{index}"),
         }
     }
 }
@@ -71,16 +74,16 @@ impl std::fmt::Display for Action {
         match self {
             Self::Reduce { tag, output } => {
                 if *output {
-                    styled_write!(f, Color::Red, "[{tag}]")
+                    styled_write!(f, Color::Blue.underline(), "{tag}")
                 } else {
-                    styled_write!(f, Color::Blue, "[{tag}]")
+                    styled_write!(f, Color::Blue, "{tag}")
                 }
             }
             Self::Shift { tag, output } => {
                 if *output {
-                    styled_write!(f, Color::Red, "{tag}")
+                    styled_write!(f, Color::Red.underline(), "{tag}")
                 } else {
-                    styled_write!(f, Color::Blue, "{tag}")
+                    styled_write!(f, Color::Red, "{tag}")
                 }
             }
         }
@@ -102,22 +105,22 @@ impl std::fmt::Display for NormalForm {
                 write!(f, "ε")?;
                 for (tag, output) in actions.iter() {
                     if *output {
-                        styled_write!(f, Color::Red, "[{tag}]")?;
+                        styled_write!(f, Color::Blue.underline(), "\t{tag}")?;
                     } else {
-                        styled_write!(f, Color::Blue, "[{tag}]")?;
+                        styled_write!(f, Color::Blue, "\t{tag}")?;
                     }
                 }
             }
             Self::Unexpanded(actions) => {
                 write!(f, "{}", actions[0])?;
                 for action in &actions[1..] {
-                    write!(f, " {}", action)?;
+                    write!(f, "\t{}", action)?;
                 }
             }
             Self::Sequence(terminal, actions) => {
-                styled_write!(f, Color::Yellow.bold(), "{terminal}")?;
+                styled_write!(f, Color::Yellow, "{terminal}")?;
                 for action in actions.iter() {
-                    write!(f, " {}", action)?;
+                    write!(f, "\t{}", action)?;
                 }
             }
         }
@@ -152,3 +155,86 @@ fn debug_print_test() {
     );
     println!("{}", sequence);
 }
+
+/// Well, it is not the notorius firewall.
+pub struct NFTable(HashMap<Tag, Vec<NormalForm>>);
+
+impl Deref for NFTable {
+    type Target = HashMap<Tag, Vec<NormalForm>>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+#[cfg(feature = "debug")]
+impl std::fmt::Display for NFTable {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let width = term_size::dimensions().map(|x| x.0).unwrap_or(0);
+        writeln!(f, "┏{}┓", "━".repeat(width.saturating_sub(2)))?;
+        writeln!(
+            f,
+            "\t{}\t{}\t{}\t{}\n",
+            styled!(Color::Red.bold(), "Shift"),
+            styled!(Color::Blue.bold(), "Reduce"),
+            styled!(Style::new().underline().bold(), "Output"),
+            styled!(Style::new().italic().bold(), "Anonymous"),
+        )?;
+        for (tag, forms) in self.iter() {
+            writeln!(
+                f,
+                "\t{}\t=\t{}",
+                styled!(Style::new().underline(), "{tag}"),
+                forms[0]
+            )?;
+            for form in &forms[1..] {
+                writeln!(f, "\t\t|\t{}", form)?;
+            }
+            writeln!(f)?;
+        }
+        writeln!(f, "┗{}┛", "━".repeat(width.saturating_sub(2)))
+    }
+}
+
+#[cfg(all(feature = "debug", test))]
+#[test]
+fn debug_print_nf_table() {
+    use quote::format_ident;
+    let sequence = NormalForm::Sequence(
+        format_ident!("TEST"),
+        vec![
+            Action::Shift {
+                tag: Tag::Toplevel(format_ident!("a")),
+                output: false,
+            },
+            Action::Reduce {
+                tag: Tag::Toplevel(format_ident!("b")),
+                output: true,
+            },
+            Action::Shift {
+                tag: Tag::Toplevel(format_ident!("c")),
+                output: true,
+            },
+            Action::Reduce {
+                tag: Tag::Anonymous(1),
+                output: false,
+            },
+        ],
+    );
+    let empty = NormalForm::Empty(vec![
+        (Tag::Toplevel(format_ident!("a")), false),
+        (Tag::Toplevel(format_ident!("b")), true),
+    ]);
+    let table = NFTable(
+        vec![
+            (
+                Tag::Toplevel(format_ident!("TEST1")),
+                vec![sequence.clone(), empty.clone()],
+            ),
+            (Tag::Toplevel(format_ident!("TEST2")), vec![sequence, empty]),
+        ]
+        .into_iter()
+        .collect(),
+    );
+    println!("{}", table);
+}

From 8b81928b7be5c19250619832bfc0928d2a94988a Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Wed, 26 Jul 2023 21:53:16 +0800
Subject: [PATCH 17/42] fix use error

---
 pag-parser2/src/nf/mod.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index 9f32362..3f04e7f 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -6,15 +6,15 @@
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
 
+mod semact;
+
 use std::{collections::HashMap, ops::Deref};
 
 use quote::format_ident;
 use syn::Ident;
 
-use crate::debug::styled;
 #[cfg(feature = "debug")]
-use crate::debug::styled_write;
-mod semact;
+use crate::debug::{styled, styled_write};
 
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum Tag {

From ce21ff6d9e4d3da4e980163bf7f4f0bbf754a2fc Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Wed, 26 Jul 2023 22:09:28 +0800
Subject: [PATCH 18/42] fix parser bug

---
 pag-parser2/src/frontend/parse.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index bf3bb53..093842c 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -217,7 +217,6 @@ fn parse_lexer_expr(input: ParseStream, min_bp: u32) -> Result<LexerExpr> {
             || input.peek(syn::LitStr)
             || input.peek(syn::LitChar)
             || input.peek(syn::token::Paren)
-            || input.peek(syn::token::Paren)
             || input.peek(Token![!])
         {
             let (l_bp, r_bp) = (40, 41);
@@ -303,7 +302,7 @@ fn parse_parser_expr(input: ParseStream, min_bp: u32) -> Result<ParserExpr> {
     };
 
     loop {
-        if input.peek(syn::Ident) || input.peek(Token![#]) {
+        if input.peek(syn::Ident) || input.peek(syn::token::Paren) || input.peek(Token![#]) {
             let (l_bp, r_bp) = (40, 41);
             if l_bp < min_bp {
                 break;
@@ -351,7 +350,7 @@ mod test {
 
     #[test]
     fn test_var_binding() {
-        syn::parse_str::<VarBinding>(r#"(#LPAREN expr #RPAREN)?[e]"#).unwrap();
+        syn::parse_str::<VarBinding>(r#"(ident (#COLON expr)?)*[e]"#).unwrap();
     }
 
     #[test]

From 58ecd55a238a75aefc64528045dd98554ceb575e Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Wed, 26 Jul 2023 16:52:17 -0400
Subject: [PATCH 19/42] add `HKT` support and type inference prototype

---
 pag-parser2/src/frontend/ast.rs   |   9 +-
 pag-parser2/src/frontend/parse.rs |  24 +++-
 pag-parser2/src/nf/inference.rs   | 214 ++++++++++++++++++++++++++++++
 pag-parser2/src/nf/mod.rs         |  77 ++++++++---
 pag-parser2/src/nf/semact.rs      | 102 +++-----------
 5 files changed, 313 insertions(+), 113 deletions(-)
 create mode 100644 pag-parser2/src/nf/inference.rs

diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index 8de4969..0290189 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -21,7 +21,7 @@ pub struct LexerDef {
 }
 
 pub struct ParserDef {
-    pub ty: syn::Type,
+    pub ty: TypeAnnotation,
     pub rules: Vec<ParserRule>,
 }
 
@@ -29,11 +29,16 @@ pub struct ParserRule {
     pub vars: Vec<VarBinding>,
     pub action: Option<syn::Block>,
 }
+#[derive(Clone)]
+pub enum TypeAnnotation {
+    Concrete(syn::Type),
+    HigherKind(syn::Path),
+}
 
 pub struct VarBinding {
     pub expr: ParserExpr,
     pub name: Option<syn::Ident>,
-    pub ty: Option<syn::Type>,
+    pub ty: Option<TypeAnnotation>,
 }
 
 // TODO: how to express "bottom" & "any"?
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index 093842c..43570f2 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -99,7 +99,7 @@ impl Parse for ParserDef {
     // (":" syn::Type)? = (ParserRule)|+
     fn parse(input: ParseStream) -> Result<Self> {
         let ty = match input.parse::<Token![:]>() {
-            Ok(_) => input.parse::<syn::Type>()?,
+            Ok(_) => input.parse::<TypeAnnotation>()?,
             Err(_) => parse_quote!(&'src str),
         };
 
@@ -135,6 +135,18 @@ impl Parse for ParserRule {
     }
 }
 
+impl Parse for TypeAnnotation {
+    fn parse(input: ParseStream) -> Result<Self> {
+        if input.peek(Token![@]) {
+            input.parse::<Token![@]>()?;
+            let path = input.parse::<syn::Path>()?;
+            Ok(Self::HigherKind(path))
+        } else {
+            Ok(Self::Concrete(input.parse::<syn::Type>()?))
+        }
+    }
+}
+
 impl Parse for VarBinding {
     // ParserExpr ("[" syn::Ident (":" syn::Type)? "]")?
     fn parse(input: ParseStream) -> Result<Self> {
@@ -150,7 +162,7 @@ impl Parse for VarBinding {
 
             if content.peek(Token![:]) {
                 content.parse::<Token![:]>()?;
-                ty = Some(content.parse::<syn::Type>()?);
+                ty = Some(content.parse::<TypeAnnotation>()?);
             }
 
             if !content.is_empty() {
@@ -363,6 +375,12 @@ mod test {
         syn::parse_str::<ParserExpr>(r#"A? b c* D+ F?"#).unwrap();
     }
 
+    #[test]
+    fn test_parser_type_annotatopn() {
+        syn::parse_str::<TypeAnnotation>(r#"@Vec"#).unwrap();
+        syn::parse_str::<TypeAnnotation>(r#"Vec<u128>"#).unwrap();
+    }
+
     #[test]
     fn test_full() {
         syn::parse_str::<Ast>(
@@ -376,7 +394,7 @@ mod test {
             ATOM   = ALPHA (ALPHA | DIGIT)*;
             %skip  = (" " | "\t" | "\n" | "\r")+;
 
-            compound: SExp = LPAREN sexp+[sexp:Vec<_>] RPAREN { SExp::Compound(sexp) };
+            compound: SExp = LPAREN sexp+[sexp:@Vec] RPAREN { SExp::Compound(sexp) };
             atom    : SExp = ATOM[atom] { SExp::Atom(atom) };
             sexp    : SExp = compound
                            | atom;
diff --git a/pag-parser2/src/nf/inference.rs b/pag-parser2/src/nf/inference.rs
new file mode 100644
index 0000000..b1f51e8
--- /dev/null
+++ b/pag-parser2/src/nf/inference.rs
@@ -0,0 +1,214 @@
+// If there is no semantic action, the routine is plain scan over. Thus, the type is unit.
+// ⊢ x = ..., SemAct[x] = ∅
+// -------------------
+// ⊢ x : ()
+
+// A Customized Routine must have type annotation
+// ⊢ x = ..., SemAct[x] = Customized(𝜏)
+// -------------------
+// ⊢ x : 𝜏
+
+// A Token action gives the span of a terminal
+// ⊢ x = T, SemAct[x] = Token
+// -------------------
+// ⊢ x : Span
+
+// Fully normalized Option must be in the following form:
+// x = T_0 ... [r_0] | T_1 ... [r_1] | .. | ε
+//
+// Thus, the rule should be:
+//
+// Γ ⊢ x = T_0 ... [r_0] | T_1 ... [r_1] | .. | ε
+// Γ ⊢ r_0 : 𝜏_0, r_1 : 𝜏_1 ...
+// Γ ⊢ 𝜏 = 𝜏_0 = 𝜏_1 = ...
+// SemAct[x] = Option
+// -------------------
+//  Γ ⊢ x : Option<𝜏>
+
+// Fully normalized ZeroOrMore must be in the following form:
+// x = T_0 ... [r_0] | T_1 ... [r_1] | .. | ε
+//
+// Thus, the rule should be:
+//
+// Γ ⊢ x = T_0 ... [r_0] | T_1 ... [r_1] | .. | ε
+// Γ ⊢ r_0 : 𝜏_0, r_1 : 𝜏_1 ...
+// Γ ⊢ 𝜏 = 𝜏_0 = 𝜏_1 =...
+// SemAct[x] = ZeroOrMore(Σ ∈ Collector<𝜏>)
+// -------------------
+//  Γ ⊢ x : Σ
+
+// Fully normalized OneOrMoreToplevel must be in the following form:
+// x = T_0 ...[r_0] t | T_1 ... [r_1] t | ..
+//
+// Thus, the rule should be:
+//
+// Γ ⊢ x = T_0 ...[r_0] t | T_1 ... [r_1] t | ..
+// Γ ⊢ r_0 : 𝜏_0, r_1 : 𝜏_1 ...
+// Γ ⊢ 𝜏 = 𝜏_0 = 𝜏_1 = ...
+// SemAct[x] = OneOrMoreToplevel
+// -------------------
+//  Γ ⊢ x : Σ
+
+// Fully normalized OneOrMoreNested must be in the following form:
+// x = T_0 ... [r_0] | T_1 ... [r_1] | .. | ε
+//
+// Thus, the rule should be:
+//
+// Γ ⊢ x = T_0 ... [r_0] | T_1 ... [r_1] | .. | ε
+// Γ ⊢ r_0 : 𝜏_0, r_1 : 𝜏_1 ...
+// Γ ⊢ 𝜏 = 𝜏_0 = 𝜏_1 =...
+// SemAct[x] = ZeroOrMore
+// -------------------
+//  Γ ⊢ x : () -- Notice that x accept &mut C ∈ Collector<𝜏> instead
+
+// Fully normalized Tuple must be in the following form:
+// x = T_0 ... [r_0] x00 _x01 x02 | ..
+// let η_i be the type tuple of everything including last reduce that gives an output.
+// x = T_0 ... [r_0] x00 _x01 x02 | ..
+// Γ ⊢ ║ η_0 ║ = ║ η_1 ║ = ...
+// Γ ⊢ ∀i.∀j.∀k. η_i.k = η_j.k
+// SemAct[x] = Gather
+// -------------------
+// Γ ⊢ x : η
+
+use std::{
+    cell::UnsafeCell,
+    collections::{hash_map::Entry, HashMap},
+};
+
+use syn::{parse_quote, Type};
+
+use crate::{frontend::TypeAnnotation};
+
+use super::{
+    semact::{SemAct, SemActTable},
+    NormalForm, Tag,
+};
+
+pub struct InferenceContext<'a> {
+    /// Typed tags
+    gamma: UnsafeCell<HashMap<Tag, Type>>,
+    /// Type annotations from user
+    annotations: &'a HashMap<Tag, TypeAnnotation>,
+    /// Semantic action table
+    semact: &'a SemActTable,
+    /// Fully normalized terms
+    nforms: &'a HashMap<Tag, Vec<NormalForm>>,
+}
+impl<'a> InferenceContext<'a> {
+    /// Create a new inference context
+    pub fn new(
+        annotations: &'a HashMap<Tag, TypeAnnotation>,
+        semact: &'a SemActTable,
+        nforms: &'a HashMap<Tag, Vec<NormalForm>>,
+    ) -> Self {
+        Self {
+            gamma: UnsafeCell::new(HashMap::new()),
+            annotations,
+            semact,
+            nforms,
+        }
+    }
+    fn infer_gather<'i, I: Iterator<Item = &'i Tag>>(&self, mut tags: I) -> Option<Type> {
+        if let Some(tag) = tags.next() {
+            let mut types = vec![self.infer(tag)?];
+            for t in tags {
+                // If any inference fails, the whole inference fails
+                let ty = self.infer(t)?;
+                types.push(ty);
+            }
+            if types.len() == 1 {
+                // If there is only one field, no need to wrap in a tuple
+                Some(types.pop().unwrap())
+            } else {
+                // Otherwise, wrap in a tuple
+                Some(parse_quote!((#(#types),*)))
+            }
+        } else {
+            // no field, unit type
+            Some(parse_quote!(()))
+        }
+    }
+    fn infer(&self, tag: &Tag) -> Option<Type> {
+        match unsafe { (*self.gamma.get()).entry(tag.clone()) } {
+            // If a tag has been inferred, return its type directly
+            Entry::Occupied(entry) => Some(entry.get().clone()),
+            Entry::Vacant(slot) => Some(
+                slot.insert({
+                    // If a concrete type annotation is provided, use it directly
+                    if let Some(x) = self.annotations.get(tag).and_then(|anno| match anno {
+                        TypeAnnotation::Concrete(ty) => Some(ty.clone()),
+                        _ => None,
+                    }) {
+                        x
+                    } else {
+                        let semact = self.semact.get(tag);
+                        match semact {
+                            // No semantic action, the type is unit
+                            None => parse_quote!(()),
+                            // Token semantic action, the type is Span
+                            Some(SemAct::Token) => parse_quote!(::pag_runtime::Span<'src>),
+                            // Customized routine without type annotation -- inference failed
+                            Some(SemAct::CustomizedRoutine(..)) => return None,
+                            // Nested routine for one or more, the type is unit.
+                            Some(SemAct::OneOrMoreNested) => parse_quote!(()),
+                            Some(SemAct::Gather) => {
+                                let nfs = self.nforms.get(tag)?;
+                                let mut inferred = None;
+                                // find first subexpression that fulfills inference
+                                for i in nfs.iter() {
+                                    let visible = i.visible_bindings(0);
+                                    if let Some(gather_type) =
+                                        self.infer_gather(visible.into_iter().map(|x| x.1))
+                                    {
+                                        inferred.replace(gather_type);
+                                        break;
+                                    }
+                                }
+                                inferred?
+                            }
+                            Some(SemAct::ZeroOrMore) | Some(SemAct::Option) | Some(SemAct::OneOrMoreToplevel) => {
+                                let nfs = self.nforms.get(tag)?;
+                                let TypeAnnotation::HigherKind(path) = self
+                                    .annotations.get(tag).cloned().unwrap_or_else(||
+                                        if matches!(semact, Some(SemAct::Option)) {
+                                            TypeAnnotation::HigherKind(parse_quote!(::std::option::Option))
+                                        } else {
+                                        TypeAnnotation::HigherKind(parse_quote!(::std::collections::VecDeque)) })
+                                    else { unreachable!("must be higher kind type") };
+                                let mut inferred = None;
+                                // find first subexpression that fulfills inference
+                                for i in nfs.iter() {
+                                    // Skip epsilon production, this is safe since OneOrMoreToplevel will never be empty
+                                    if let NormalForm::Empty(x) = i {
+                                        if x.is_empty() {
+                                            continue;
+                                        }
+                                    }
+                                    // skip the trailing part of OneOrMoreToplevel
+                                    let visible = i.visible_bindings(
+                                        if matches!(semact, Some(SemAct::OneOrMoreToplevel)) {
+                                            1
+                                        } else {
+                                            0
+                                        },
+                                    );
+                                    if let Some(gather_type) =
+                                        self.infer_gather(visible.into_iter().map(|x| x.1))
+                                    {
+                                        inferred.replace(
+                                            parse_quote!(#path<#gather_type>),
+                                        );
+                                        break;
+                                    }
+                                }
+                                inferred?
+                            }
+                        }
+                    }
+                })
+                .clone(),
+            ),
+        }
+    }
+}
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index 3f04e7f..214d2fb 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -6,9 +6,13 @@
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
 
+mod inference;
 mod semact;
 
-use std::{collections::HashMap, ops::Deref};
+use std::{
+    collections::{HashMap, VecDeque},
+    ops::{Deref},
+};
 
 use quote::format_ident;
 use syn::Ident;
@@ -59,12 +63,12 @@ pub enum Action {
     Shift {
         /// Parser routine to call.
         tag: Tag,
-        output: bool,
+        output: Option<Ident>,
     },
     Reduce {
         /// Reduction routine to call.
         tag: Tag,
-        output: bool,
+        output: Option<Ident>,
     },
 }
 
@@ -73,15 +77,15 @@ impl std::fmt::Display for Action {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             Self::Reduce { tag, output } => {
-                if *output {
-                    styled_write!(f, Color::Blue.underline(), "{tag}")
+                if let Some(name) = output {
+                    styled_write!(f, Color::Blue, "{tag}[{name}]")
                 } else {
                     styled_write!(f, Color::Blue, "{tag}")
                 }
             }
             Self::Shift { tag, output } => {
-                if *output {
-                    styled_write!(f, Color::Red.underline(), "{tag}")
+                if let Some(name) = output {
+                    styled_write!(f, Color::Red, "{tag}[{name}]")
                 } else {
                     styled_write!(f, Color::Red, "{tag}")
                 }
@@ -92,11 +96,42 @@ impl std::fmt::Display for Action {
 
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum NormalForm {
-    Empty(Vec<(Tag, bool)>),
+    Empty(Vec<(Tag, Option<Ident>)>),
     Unexpanded(Vec<Action>),
     Sequence(Ident, Vec<Action>),
 }
 
+impl NormalForm {
+    pub fn visible_bindings(&self, skip: usize) -> Vec<(&Ident, &Tag)> {
+        match self {
+            Self::Empty(actions) => actions
+                .last()
+                .and_then(|(tag, ident)| Some((ident.as_ref()?, tag)))
+                .into_iter()
+                .collect(),
+            Self::Unexpanded(actions) | Self::Sequence(_, actions) => {
+                let mut acc = VecDeque::new();
+                for act in actions.iter().rev().skip(skip) {
+                    match act {
+                        Action::Shift { tag, output } => {
+                            if let Some(ident) = output {
+                                acc.push_front((ident, tag));
+                            }
+                        }
+                        Action::Reduce { tag, output } => {
+                            if let Some(ident) = output {
+                                acc.push_front((ident, tag));
+                            }
+                            break;
+                        }
+                    }
+                }
+                acc.into_iter().collect()
+            }
+        }
+    }
+}
+
 #[cfg(feature = "debug")]
 impl std::fmt::Display for NormalForm {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@@ -104,8 +139,8 @@ impl std::fmt::Display for NormalForm {
             Self::Empty(actions) => {
                 write!(f, "ε")?;
                 for (tag, output) in actions.iter() {
-                    if *output {
-                        styled_write!(f, Color::Blue.underline(), "\t{tag}")?;
+                    if let Some(name) = output {
+                        styled_write!(f, Color::Blue, "\t{tag}[{name}]")?;
                     } else {
                         styled_write!(f, Color::Blue, "\t{tag}")?;
                     }
@@ -137,19 +172,19 @@ fn debug_print_test() {
         vec![
             Action::Shift {
                 tag: Tag::Toplevel(format_ident!("a")),
-                output: false,
+                output: None,
             },
             Action::Reduce {
                 tag: Tag::Toplevel(format_ident!("b")),
-                output: true,
+                output: Some(format_ident!("x")),
             },
             Action::Shift {
                 tag: Tag::Toplevel(format_ident!("c")),
-                output: true,
+                output: Some(format_ident!("y")),
             },
             Action::Reduce {
                 tag: Tag::Anonymous(1),
-                output: false,
+                output: None,
             },
         ],
     );
@@ -177,7 +212,7 @@ impl std::fmt::Display for NFTable {
             "\t{}\t{}\t{}\t{}\n",
             styled!(Color::Red.bold(), "Shift"),
             styled!(Color::Blue.bold(), "Reduce"),
-            styled!(Style::new().underline().bold(), "Output"),
+            styled!(Style::new().bold(), "[Output]"),
             styled!(Style::new().italic().bold(), "Anonymous"),
         )?;
         for (tag, forms) in self.iter() {
@@ -205,25 +240,25 @@ fn debug_print_nf_table() {
         vec![
             Action::Shift {
                 tag: Tag::Toplevel(format_ident!("a")),
-                output: false,
+                output: None,
             },
             Action::Reduce {
                 tag: Tag::Toplevel(format_ident!("b")),
-                output: true,
+                output: Some(format_ident!("x")),
             },
             Action::Shift {
                 tag: Tag::Toplevel(format_ident!("c")),
-                output: true,
+                output: Some(format_ident!("y")),
             },
             Action::Reduce {
                 tag: Tag::Anonymous(1),
-                output: false,
+                output: None,
             },
         ],
     );
     let empty = NormalForm::Empty(vec![
-        (Tag::Toplevel(format_ident!("a")), false),
-        (Tag::Toplevel(format_ident!("b")), true),
+        (Tag::Toplevel(format_ident!("a")), None),
+        (Tag::Toplevel(format_ident!("b")), Some(format_ident!("x"))),
     ]);
     let table = NFTable(
         vec![
diff --git a/pag-parser2/src/nf/semact.rs b/pag-parser2/src/nf/semact.rs
index e89c8c4..0e137ad 100644
--- a/pag-parser2/src/nf/semact.rs
+++ b/pag-parser2/src/nf/semact.rs
@@ -9,7 +9,7 @@
 use std::collections::HashMap;
 
 use super::Tag;
-use syn::{parse_quote, Expr, Type};
+
 
 pub type SemActTable = HashMap<Tag, SemAct>;
 
@@ -20,97 +20,25 @@ pub type SemActTable = HashMap<Tag, SemAct>;
 /// }
 ///
 /// ```
+
+// those normal form without SemAct will be treated as plain scanner.
 pub enum SemAct {
-    CustomizedRoutine {
-        /// Identifier of the semantic action routine.
-        function: Expr,
-        /// Type annotation
-        ret_type: Type,
-        /// Number of arguments
-        arity: usize,
-    },
-    /// Specialized for the inner of @(@a, @b, @c). Return an Tuple of the inner routine.
-    Tuple,
+    CustomizedRoutine(syn::Block),
+    /// Gather inner data. If multiple is selected, return a tuple.
+    /// If only one is selected, return target data.
+    Gather,
     /// Specialized for `inner?`. Return an Option of the inner routine
-    Option { inner_type: Type },
+    Option,
     /// Specialized for `i*`
-    /// Initialize a `Collector`  (requires `Default + Collector<T>`) and return the result from `Collector::finalize`.
-    ZeroOrMore { collector: Type },
+    /// Initialize a `Collector`  (requires `Collector<T>`) and return the result from `Collector::finalize`.
+    ZeroOrMore,
     /// Specialized for `i+` = `i ~ i*`.
-    /// Initialize a `Collector`  (requires `From<T> + Collector<T>`), pass it to the recursive routine
+    /// Initialize a `Collector`  (requires `Collector<T>`), pass it to the recursive routine
     /// and return the result from `Collector::finalize`.
-    OneOrMoreToplevel { collector: Type },
+    OneOrMoreToplevel,
     /// Specialized for `i+` = `i ~ i*`.
     /// Accepts a `&mut Collector`
-    OneOrMoreNested { collector: Type },
-}
-
-impl SemAct {
-    /// Generate inlined expr for reduce action `terminal shift [reduce] shift shift`
-    pub fn generate_inline_expr<'a, I: IntoIterator<Item = &'a Expr>>(
-        &self,
-        exprs: I,
-        delayed_func: Option<Expr>,
-    ) -> Expr {
-        debug_assert_eq!(
-            delayed_func.is_some(),
-            matches!(self, Self::OneOrMoreToplevel { .. })
-        );
-        match self {
-            Self::CustomizedRoutine {
-                function,
-                ret_type: _,
-                arity: _,
-            } => {
-                let exprs = exprs.into_iter();
-                parse_quote!(
-                    #function(#(#exprs,)*)
-                )
-            }
-            Self::Tuple => {
-                let exprs = exprs.into_iter();
-                parse_quote!(
-                    (#(#exprs,)*)
-                )
-            }
-            Self::Option { .. } => {
-                unreachable!("Option can never be inlined, otherwise there is sequential ambiguity")
-            }
-
-            Self::ZeroOrMore { .. } => unreachable!(
-                "ZeroOrMore can never be inlined, otherwise there is sequential ambiguity"
-            ),
-
-            Self::OneOrMoreNested { .. } => unreachable!(
-                "OneOrMoreNested can never be inlined because it never appears in the first place"
-            ),
-
-            Self::OneOrMoreToplevel { collector } => {
-                let exprs = exprs.into_iter();
-                let delayed_func = delayed_func.unwrap();
-                // TODO: src, offset
-                parse_quote! {
-                    {
-                        let mut collector = #collector::from(#(#exprs)*);
-                        #delayed_func(&mut collector, src, offset);
-                        collector.finalize()
-                    }
-                }
-            }
-        }
-    }
-
-    /// This function is useful in the following cases:
-    /// - If a shift routine is nested one or more, we does not emit the call to it immediately. Instead, we wait until
-    /// [`Self::generate_inline_expr`] is called.
-    /// - If a parser routine has a semact [`Self::OneOrMoreNested`], it should be parametized by `C : Collector` in type
-    /// and its has `&mut C` as its first input param.
-    pub fn is_nested_one_or_more(&self) -> bool {
-        matches!(self, Self::OneOrMoreNested { .. })
-    }
-
-    /// Check if we should generate loops for TCO.
-    pub fn should_tco(&self) -> bool {
-        matches!(self, Self::ZeroOrMore { .. } | Self::OneOrMoreNested { .. })
-    }
+    OneOrMoreNested,
+    /// Yield a token span,
+    Token,
 }

From 203739ef0cda248f3367a0764f83c87e6fdc9949 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Thu, 27 Jul 2023 01:00:17 -0400
Subject: [PATCH 20/42] expose infer_all_type interface

---
 pag-parser2/src/nf/inference.rs | 15 ++++++++++++++-
 pag-parser2/src/nf/mod.rs       |  2 +-
 pag-parser2/src/nf/semact.rs    |  1 -
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/pag-parser2/src/nf/inference.rs b/pag-parser2/src/nf/inference.rs
index b1f51e8..98b7b0c 100644
--- a/pag-parser2/src/nf/inference.rs
+++ b/pag-parser2/src/nf/inference.rs
@@ -78,7 +78,7 @@ use std::{
 
 use syn::{parse_quote, Type};
 
-use crate::{frontend::TypeAnnotation};
+use crate::frontend::TypeAnnotation;
 
 use super::{
     semact::{SemAct, SemActTable},
@@ -129,6 +129,19 @@ impl<'a> InferenceContext<'a> {
             Some(parse_quote!(()))
         }
     }
+    /// try infer all types, but may fail with incomplete type information.
+    pub fn infer_all_types(mut self) -> HashMap<Tag, Type> {
+        let mut typed = 0;
+        while typed < self.nforms.len() {
+            typed = 0;
+            for i in self.nforms.keys() {
+                if self.infer(i).is_some() {
+                    typed += 1;
+                }
+            }
+        }
+        std::mem::take(self.gamma.get_mut())
+    }
     fn infer(&self, tag: &Tag) -> Option<Type> {
         match unsafe { (*self.gamma.get()).entry(tag.clone()) } {
             // If a tag has been inferred, return its type directly
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index 214d2fb..c26aa3f 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -11,7 +11,7 @@ mod semact;
 
 use std::{
     collections::{HashMap, VecDeque},
-    ops::{Deref},
+    ops::Deref,
 };
 
 use quote::format_ident;
diff --git a/pag-parser2/src/nf/semact.rs b/pag-parser2/src/nf/semact.rs
index 0e137ad..0dfd64b 100644
--- a/pag-parser2/src/nf/semact.rs
+++ b/pag-parser2/src/nf/semact.rs
@@ -10,7 +10,6 @@ use std::collections::HashMap;
 
 use super::Tag;
 
-
 pub type SemActTable = HashMap<Tag, SemAct>;
 
 ///

From d2d45b85cc28938fbdb7310fefa50cdd497af2f3 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Thu, 27 Jul 2023 03:04:35 -0400
Subject: [PATCH 21/42] address QC's method

---
 pag-parser2/src/frontend/ast.rs   |  9 +----
 pag-parser2/src/frontend/parse.rs | 24 ++---------
 pag-parser2/src/nf/inference.rs   | 67 +++++++++++++++++--------------
 pag-parser2/src/nf/mod.rs         |  2 +-
 4 files changed, 43 insertions(+), 59 deletions(-)

diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index 0290189..8de4969 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -21,7 +21,7 @@ pub struct LexerDef {
 }
 
 pub struct ParserDef {
-    pub ty: TypeAnnotation,
+    pub ty: syn::Type,
     pub rules: Vec<ParserRule>,
 }
 
@@ -29,16 +29,11 @@ pub struct ParserRule {
     pub vars: Vec<VarBinding>,
     pub action: Option<syn::Block>,
 }
-#[derive(Clone)]
-pub enum TypeAnnotation {
-    Concrete(syn::Type),
-    HigherKind(syn::Path),
-}
 
 pub struct VarBinding {
     pub expr: ParserExpr,
     pub name: Option<syn::Ident>,
-    pub ty: Option<TypeAnnotation>,
+    pub ty: Option<syn::Type>,
 }
 
 // TODO: how to express "bottom" & "any"?
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index 43570f2..093842c 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -99,7 +99,7 @@ impl Parse for ParserDef {
     // (":" syn::Type)? = (ParserRule)|+
     fn parse(input: ParseStream) -> Result<Self> {
         let ty = match input.parse::<Token![:]>() {
-            Ok(_) => input.parse::<TypeAnnotation>()?,
+            Ok(_) => input.parse::<syn::Type>()?,
             Err(_) => parse_quote!(&'src str),
         };
 
@@ -135,18 +135,6 @@ impl Parse for ParserRule {
     }
 }
 
-impl Parse for TypeAnnotation {
-    fn parse(input: ParseStream) -> Result<Self> {
-        if input.peek(Token![@]) {
-            input.parse::<Token![@]>()?;
-            let path = input.parse::<syn::Path>()?;
-            Ok(Self::HigherKind(path))
-        } else {
-            Ok(Self::Concrete(input.parse::<syn::Type>()?))
-        }
-    }
-}
-
 impl Parse for VarBinding {
     // ParserExpr ("[" syn::Ident (":" syn::Type)? "]")?
     fn parse(input: ParseStream) -> Result<Self> {
@@ -162,7 +150,7 @@ impl Parse for VarBinding {
 
             if content.peek(Token![:]) {
                 content.parse::<Token![:]>()?;
-                ty = Some(content.parse::<TypeAnnotation>()?);
+                ty = Some(content.parse::<syn::Type>()?);
             }
 
             if !content.is_empty() {
@@ -375,12 +363,6 @@ mod test {
         syn::parse_str::<ParserExpr>(r#"A? b c* D+ F?"#).unwrap();
     }
 
-    #[test]
-    fn test_parser_type_annotatopn() {
-        syn::parse_str::<TypeAnnotation>(r#"@Vec"#).unwrap();
-        syn::parse_str::<TypeAnnotation>(r#"Vec<u128>"#).unwrap();
-    }
-
     #[test]
     fn test_full() {
         syn::parse_str::<Ast>(
@@ -394,7 +376,7 @@ mod test {
             ATOM   = ALPHA (ALPHA | DIGIT)*;
             %skip  = (" " | "\t" | "\n" | "\r")+;
 
-            compound: SExp = LPAREN sexp+[sexp:@Vec] RPAREN { SExp::Compound(sexp) };
+            compound: SExp = LPAREN sexp+[sexp:Vec<_>] RPAREN { SExp::Compound(sexp) };
             atom    : SExp = ATOM[atom] { SExp::Atom(atom) };
             sexp    : SExp = compound
                            | atom;
diff --git a/pag-parser2/src/nf/inference.rs b/pag-parser2/src/nf/inference.rs
index 98b7b0c..75dd0a3 100644
--- a/pag-parser2/src/nf/inference.rs
+++ b/pag-parser2/src/nf/inference.rs
@@ -78,18 +78,24 @@ use std::{
 
 use syn::{parse_quote, Type};
 
-use crate::frontend::TypeAnnotation;
-
 use super::{
     semact::{SemAct, SemActTable},
     NormalForm, Tag,
 };
 
+#[derive(Clone)]
+pub enum InferredType {
+    Concrete(Type),
+    Collector(Box<Self>),
+    Option(Box<Self>),
+    Tuple(Vec<Self>),
+}
+
 pub struct InferenceContext<'a> {
     /// Typed tags
-    gamma: UnsafeCell<HashMap<Tag, Type>>,
-    /// Type annotations from user
-    annotations: &'a HashMap<Tag, TypeAnnotation>,
+    gamma: UnsafeCell<HashMap<Tag, InferredType>>,
+    /// Type annotations from user (for toplevel)
+    annotations: &'a HashMap<Tag, Type>,
     /// Semantic action table
     semact: &'a SemActTable,
     /// Fully normalized terms
@@ -98,7 +104,7 @@ pub struct InferenceContext<'a> {
 impl<'a> InferenceContext<'a> {
     /// Create a new inference context
     pub fn new(
-        annotations: &'a HashMap<Tag, TypeAnnotation>,
+        annotations: &'a HashMap<Tag, Type>,
         semact: &'a SemActTable,
         nforms: &'a HashMap<Tag, Vec<NormalForm>>,
     ) -> Self {
@@ -109,7 +115,7 @@ impl<'a> InferenceContext<'a> {
             nforms,
         }
     }
-    fn infer_gather<'i, I: Iterator<Item = &'i Tag>>(&self, mut tags: I) -> Option<Type> {
+    fn infer_gather<'i, I: Iterator<Item = &'i Tag>>(&self, mut tags: I) -> Option<InferredType> {
         if let Some(tag) = tags.next() {
             let mut types = vec![self.infer(tag)?];
             for t in tags {
@@ -122,15 +128,15 @@ impl<'a> InferenceContext<'a> {
                 Some(types.pop().unwrap())
             } else {
                 // Otherwise, wrap in a tuple
-                Some(parse_quote!((#(#types),*)))
+                Some(InferredType::Tuple(types))
             }
         } else {
             // no field, unit type
-            Some(parse_quote!(()))
+            Some(InferredType::Concrete(parse_quote! {()}))
         }
     }
     /// try infer all types, but may fail with incomplete type information.
-    pub fn infer_all_types(mut self) -> HashMap<Tag, Type> {
+    pub fn infer_all_types(mut self) -> HashMap<Tag, InferredType> {
         let mut typed = 0;
         while typed < self.nforms.len() {
             typed = 0;
@@ -142,29 +148,30 @@ impl<'a> InferenceContext<'a> {
         }
         std::mem::take(self.gamma.get_mut())
     }
-    fn infer(&self, tag: &Tag) -> Option<Type> {
+    fn infer(&self, tag: &Tag) -> Option<InferredType> {
         match unsafe { (*self.gamma.get()).entry(tag.clone()) } {
             // If a tag has been inferred, return its type directly
             Entry::Occupied(entry) => Some(entry.get().clone()),
             Entry::Vacant(slot) => Some(
                 slot.insert({
                     // If a concrete type annotation is provided, use it directly
-                    if let Some(x) = self.annotations.get(tag).and_then(|anno| match anno {
-                        TypeAnnotation::Concrete(ty) => Some(ty.clone()),
-                        _ => None,
-                    }) {
-                        x
+                    if let Some(x) = self.annotations.get(tag) {
+                        InferredType::Concrete(x.clone())
                     } else {
                         let semact = self.semact.get(tag);
                         match semact {
                             // No semantic action, the type is unit
-                            None => parse_quote!(()),
+                            None => InferredType::Concrete(parse_quote!(())),
                             // Token semantic action, the type is Span
-                            Some(SemAct::Token) => parse_quote!(::pag_runtime::Span<'src>),
+                            Some(SemAct::Token) => {
+                                InferredType::Concrete(parse_quote!(::pag_runtime::Span<'src>))
+                            }
                             // Customized routine without type annotation -- inference failed
                             Some(SemAct::CustomizedRoutine(..)) => return None,
                             // Nested routine for one or more, the type is unit.
-                            Some(SemAct::OneOrMoreNested) => parse_quote!(()),
+                            Some(SemAct::OneOrMoreNested) => {
+                                InferredType::Concrete(parse_quote!(()))
+                            }
                             Some(SemAct::Gather) => {
                                 let nfs = self.nforms.get(tag)?;
                                 let mut inferred = None;
@@ -180,15 +187,17 @@ impl<'a> InferenceContext<'a> {
                                 }
                                 inferred?
                             }
-                            Some(SemAct::ZeroOrMore) | Some(SemAct::Option) | Some(SemAct::OneOrMoreToplevel) => {
+                            Some(SemAct::ZeroOrMore)
+                            | Some(SemAct::Option)
+                            | Some(SemAct::OneOrMoreToplevel) => {
                                 let nfs = self.nforms.get(tag)?;
-                                let TypeAnnotation::HigherKind(path) = self
-                                    .annotations.get(tag).cloned().unwrap_or_else(||
-                                        if matches!(semact, Some(SemAct::Option)) {
-                                            TypeAnnotation::HigherKind(parse_quote!(::std::option::Option))
-                                        } else {
-                                        TypeAnnotation::HigherKind(parse_quote!(::std::collections::VecDeque)) })
-                                    else { unreachable!("must be higher kind type") };
+                                let mapper = |ty: InferredType| {
+                                    if matches!(semact, Some(SemAct::Option)) {
+                                        InferredType::Option(Box::new(ty.clone()))
+                                    } else {
+                                        InferredType::Collector(Box::new(ty.clone()))
+                                    }
+                                };
                                 let mut inferred = None;
                                 // find first subexpression that fulfills inference
                                 for i in nfs.iter() {
@@ -209,9 +218,7 @@ impl<'a> InferenceContext<'a> {
                                     if let Some(gather_type) =
                                         self.infer_gather(visible.into_iter().map(|x| x.1))
                                     {
-                                        inferred.replace(
-                                            parse_quote!(#path<#gather_type>),
-                                        );
+                                        inferred.replace(mapper(gather_type));
                                         break;
                                     }
                                 }
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index c26aa3f..ea57f14 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -11,7 +11,7 @@ mod semact;
 
 use std::{
     collections::{HashMap, VecDeque},
-    ops::Deref,
+    ops::{ControlFlow, Deref},
 };
 
 use quote::format_ident;

From e670899ec84889f2085d3a5a9714d0dd6191e476 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Thu, 27 Jul 2023 09:28:43 -0400
Subject: [PATCH 22/42] never coding again in the midnight

---
 pag-parser2/src/nf/inference.rs | 163 ++++++++++++++++----------------
 pag-parser2/src/nf/mod.rs       |   2 +-
 2 files changed, 81 insertions(+), 84 deletions(-)

diff --git a/pag-parser2/src/nf/inference.rs b/pag-parser2/src/nf/inference.rs
index 75dd0a3..7838780 100644
--- a/pag-parser2/src/nf/inference.rs
+++ b/pag-parser2/src/nf/inference.rs
@@ -71,10 +71,7 @@
 // -------------------
 // Γ ⊢ x : η
 
-use std::{
-    cell::UnsafeCell,
-    collections::{hash_map::Entry, HashMap},
-};
+use std::collections::{HashMap};
 
 use syn::{parse_quote, Type};
 
@@ -93,7 +90,7 @@ pub enum InferredType {
 
 pub struct InferenceContext<'a> {
     /// Typed tags
-    gamma: UnsafeCell<HashMap<Tag, InferredType>>,
+    gamma: HashMap<Tag, InferredType>,
     /// Type annotations from user (for toplevel)
     annotations: &'a HashMap<Tag, Type>,
     /// Semantic action table
@@ -109,13 +106,16 @@ impl<'a> InferenceContext<'a> {
         nforms: &'a HashMap<Tag, Vec<NormalForm>>,
     ) -> Self {
         Self {
-            gamma: UnsafeCell::new(HashMap::new()),
+            gamma: HashMap::new(),
             annotations,
             semact,
             nforms,
         }
     }
-    fn infer_gather<'i, I: Iterator<Item = &'i Tag>>(&self, mut tags: I) -> Option<InferredType> {
+    fn infer_gather<'i, I: Iterator<Item = &'i Tag>>(
+        &mut self,
+        mut tags: I,
+    ) -> Option<InferredType> {
         if let Some(tag) = tags.next() {
             let mut types = vec![self.infer(tag)?];
             for t in tags {
@@ -146,89 +146,86 @@ impl<'a> InferenceContext<'a> {
                 }
             }
         }
-        std::mem::take(self.gamma.get_mut())
+        self.gamma
     }
-    fn infer(&self, tag: &Tag) -> Option<InferredType> {
-        match unsafe { (*self.gamma.get()).entry(tag.clone()) } {
-            // If a tag has been inferred, return its type directly
-            Entry::Occupied(entry) => Some(entry.get().clone()),
-            Entry::Vacant(slot) => Some(
-                slot.insert({
-                    // If a concrete type annotation is provided, use it directly
-                    if let Some(x) = self.annotations.get(tag) {
-                        InferredType::Concrete(x.clone())
-                    } else {
-                        let semact = self.semact.get(tag);
-                        match semact {
-                            // No semantic action, the type is unit
-                            None => InferredType::Concrete(parse_quote!(())),
-                            // Token semantic action, the type is Span
-                            Some(SemAct::Token) => {
-                                InferredType::Concrete(parse_quote!(::pag_runtime::Span<'src>))
+    fn infer(&mut self, tag: &Tag) -> Option<InferredType> {
+        if let Some(x) = self.gamma.get(tag) {
+            return Some(x.clone());
+        }
+        let target =
+             // If a concrete type annotation is provided, use it directly
+             if let Some(x) = self.annotations.get(tag) {
+                InferredType::Concrete(x.clone())
+            } else {
+                let semact = self.semact.get(tag);
+                match semact {
+                    // No semantic action, the type is unit
+                    None => InferredType::Concrete(parse_quote!(())),
+                    // Token semantic action, the type is Span
+                    Some(SemAct::Token) => {
+                        InferredType::Concrete(parse_quote!(::pag_runtime::Span<'src>))
+                    }
+                    // Customized routine without type annotation -- inference failed
+                    Some(SemAct::CustomizedRoutine(..)) => return None,
+                    // Nested routine for one or more, the type is unit.
+                    Some(SemAct::OneOrMoreNested) => {
+                        InferredType::Concrete(parse_quote!(()))
+                    }
+                    Some(SemAct::Gather) => {
+                        let nfs = self.nforms.get(tag)?;
+                        let mut inferred = None;
+                        // find first subexpression that fulfills inference
+                        for i in nfs.iter() {
+                            let visible = i.visible_bindings(0);
+                            if let Some(gather_type) =
+                                self.infer_gather(visible.into_iter().map(|x| x.1))
+                            {
+                                inferred.replace(gather_type);
+                                break;
                             }
-                            // Customized routine without type annotation -- inference failed
-                            Some(SemAct::CustomizedRoutine(..)) => return None,
-                            // Nested routine for one or more, the type is unit.
-                            Some(SemAct::OneOrMoreNested) => {
-                                InferredType::Concrete(parse_quote!(()))
+                        }
+                        inferred?
+                    }
+                    Some(SemAct::ZeroOrMore)
+                    | Some(SemAct::Option)
+                    | Some(SemAct::OneOrMoreToplevel) => {
+                        let nfs = self.nforms.get(tag)?;
+                        let mapper = |ty: InferredType| {
+                            if matches!(semact, Some(SemAct::Option)) {
+                                InferredType::Option(Box::new(ty))
+                            } else {
+                                InferredType::Collector(Box::new(ty))
                             }
-                            Some(SemAct::Gather) => {
-                                let nfs = self.nforms.get(tag)?;
-                                let mut inferred = None;
-                                // find first subexpression that fulfills inference
-                                for i in nfs.iter() {
-                                    let visible = i.visible_bindings(0);
-                                    if let Some(gather_type) =
-                                        self.infer_gather(visible.into_iter().map(|x| x.1))
-                                    {
-                                        inferred.replace(gather_type);
-                                        break;
-                                    }
+                        };
+                        let mut inferred = None;
+                        // find first subexpression that fulfills inference
+                        for i in nfs.iter() {
+                            // Skip epsilon production, this is safe since OneOrMoreToplevel will never be empty
+                            if let NormalForm::Empty(x) = i {
+                                if x.is_empty() {
+                                    continue;
                                 }
-                                inferred?
                             }
-                            Some(SemAct::ZeroOrMore)
-                            | Some(SemAct::Option)
-                            | Some(SemAct::OneOrMoreToplevel) => {
-                                let nfs = self.nforms.get(tag)?;
-                                let mapper = |ty: InferredType| {
-                                    if matches!(semact, Some(SemAct::Option)) {
-                                        InferredType::Option(Box::new(ty.clone()))
-                                    } else {
-                                        InferredType::Collector(Box::new(ty.clone()))
-                                    }
-                                };
-                                let mut inferred = None;
-                                // find first subexpression that fulfills inference
-                                for i in nfs.iter() {
-                                    // Skip epsilon production, this is safe since OneOrMoreToplevel will never be empty
-                                    if let NormalForm::Empty(x) = i {
-                                        if x.is_empty() {
-                                            continue;
-                                        }
-                                    }
-                                    // skip the trailing part of OneOrMoreToplevel
-                                    let visible = i.visible_bindings(
-                                        if matches!(semact, Some(SemAct::OneOrMoreToplevel)) {
-                                            1
-                                        } else {
-                                            0
-                                        },
-                                    );
-                                    if let Some(gather_type) =
-                                        self.infer_gather(visible.into_iter().map(|x| x.1))
-                                    {
-                                        inferred.replace(mapper(gather_type));
-                                        break;
-                                    }
-                                }
-                                inferred?
+                            // skip the trailing part of OneOrMoreToplevel
+                            let visible = i.visible_bindings(
+                                if matches!(semact, Some(SemAct::OneOrMoreToplevel)) {
+                                    1
+                                } else {
+                                    0
+                                },
+                            );
+                            if let Some(gather_type) =
+                                self.infer_gather(visible.into_iter().map(|x| x.1))
+                            {
+                                inferred.replace(mapper(gather_type));
+                                break;
                             }
                         }
+                        inferred?
                     }
-                })
-                .clone(),
-            ),
-        }
+                }
+        };
+        self.gamma.insert(tag.clone(), target.clone());
+        Some(target)
     }
 }
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index ea57f14..c26aa3f 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -11,7 +11,7 @@ mod semact;
 
 use std::{
     collections::{HashMap, VecDeque},
-    ops::{ControlFlow, Deref},
+    ops::Deref,
 };
 
 use quote::format_ident;

From 9654bfc60ca8395ecd4f3f6a350636623af8dad0 Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Thu, 27 Jul 2023 02:52:49 +0800
Subject: [PATCH 23/42] add simd tail handling

---
 pag-lexer/src/lookahead.rs | 42 ++++++++++++++++++++------------------
 pag-lexer/src/vector.rs    |  8 ++++++--
 2 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/pag-lexer/src/lookahead.rs b/pag-lexer/src/lookahead.rs
index 52b236d..61e93d8 100644
--- a/pag-lexer/src/lookahead.rs
+++ b/pag-lexer/src/lookahead.rs
@@ -33,17 +33,13 @@ fn generate_lut_routine(index: usize) -> TokenStream {
 
 fn byte_simd(byte: u8) -> TokenStream {
     let byte = byte_char(byte);
-    quote! {
-        data.simd_eq(u8x16::splat(#byte))
-    }
+    quote! { data.simd_eq(u8x16::splat(#byte)) }
 }
 
 fn range_simd(min: u8, max: u8) -> TokenStream {
     let min = byte_char(min);
     let max = byte_char(max);
-    quote! {
-        data.simd_ge(u8x16::splat(#min)) & data.simd_le(u8x16::splat(#max))
-    }
+    quote! { data.simd_ge(u8x16::splat(#min)) & data.simd_le(u8x16::splat(#max)) }
 }
 
 fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream {
@@ -60,25 +56,31 @@ fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream
         .reduce(|acc, x| quote! { #acc | #x })
         .map(|x| {
             if cfg!(target_arch = "aarch64") {
-                quote! {{
-                    let mask : u128 = unsafe { core::mem::transmute(#x) };
-                    mask.#count_act() / 8
-                }}
+                quote! { unsafe { core::mem::transmute::<_, u128>(#x).#count_act() / 8 } }
             } else {
-                quote! {
-                    (#x).to_bitmask().#count_act()
-                }
+                quote! { (#x).to_bitmask().#count_act() }
             }
         });
+        let tail_act = match kind {
+            Kind::Positive => quote! {
+                while matches!(input.get(idx), Some(#intervals)) { idx += 1; }
+            },
+            Kind::Negative => quote! {
+                while !matches!(input.get(idx), Some(#intervals) | None) { idx += 1; }
+            },
+        };
     quote! {
-        for i in input[idx..].array_chunks::<16>() {
-            use core::simd::*;
-            let data = u8x16::from_slice(i);
-            let idx_offset = #idx_offset;
-            idx += idx_offset as usize;
-            if core::intrinsics::unlikely(idx_offset != 16) {
-                break;
+        'lookahead: {
+            for i in input[idx..].array_chunks::<16>() {
+                use core::simd::*;
+                let data = u8x16::from_slice(i);
+                let idx_offset = #idx_offset;
+                idx += idx_offset as usize;
+                if core::intrinsics::unlikely(idx_offset != 16) {
+                    break 'lookahead;
+                }
             }
+            #tail_act
         }
     }
 }
diff --git a/pag-lexer/src/vector.rs b/pag-lexer/src/vector.rs
index c513811..61488ae 100644
--- a/pag-lexer/src/vector.rs
+++ b/pag-lexer/src/vector.rs
@@ -151,16 +151,20 @@ impl Vector {
                     },
                 };
             }
+            let lookahead = optimizer.generate_lookahead(&dfa, state);
             let transitions = info.transitions.iter().map(|(interval, target)| {
                 if leaf_states.contains(target) {
                     let rule_idx = target.last_success.unwrap();
                     let on_success = &success_actions[rule_idx];
                     return quote! { Some(#interval) => { cursor = idx + 1; #on_success }, };
                 }
-                let target_label = format_ident!("S{}", dfa[target].state_id);
+                let target_id = dfa[target].state_id;
+                if lookahead.is_some() && info.state_id == target_id {
+                    return quote! {};
+                }
+                let target_label = format_ident!("S{}", target_id);
                 quote! { Some(#interval) => state = State::#target_label, }
             });
-            let lookahead = optimizer.generate_lookahead(&dfa, state);
             let otherwise = state
                 .last_success
                 .and_then(|x| success_actions.get(x))

From e0622ee22250d8ddee76704c90f4a5df9c2d188e Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Thu, 27 Jul 2023 03:12:09 +0800
Subject: [PATCH 24/42] adjust lookahead generation

---
 README.md                         |  2 --
 benches/csv/src/lib.rs            |  3 +--
 benches/json/src/lib.rs           |  3 +--
 pag-lexer/src/lib.rs              |  3 +--
 pag-lexer/src/lookahead.rs        | 44 +++++++++++++++----------------
 pag-parser/src/fusion.rs          |  2 +-
 tests/arith-expr/src/lib.rs       |  3 +--
 tests/sexpr-calculator/src/lib.rs |  3 +--
 tests/tokenizer/src/lib.rs        |  3 +--
 9 files changed, 29 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 44cf57e..1aa2db3 100644
--- a/README.md
+++ b/README.md
@@ -121,8 +121,6 @@ For some reasons (mostly performance issues), only nightly rust (1.71+) is suppo
 should be annotated with
 ```rust
 #![feature(portable_simd)]
-#![feature(core_intrinsics)]
-#![feature(array_chunks)]
 ```
 </details>
 
diff --git a/benches/csv/src/lib.rs b/benches/csv/src/lib.rs
index a598426..d0b5b15 100644
--- a/benches/csv/src/lib.rs
+++ b/benches/csv/src/lib.rs
@@ -1,6 +1,5 @@
 #![feature(portable_simd)]
-#![feature(core_intrinsics)]
-#![feature(array_chunks)]
+
 mod parser;
 
 pub use parser::parse;
diff --git a/benches/json/src/lib.rs b/benches/json/src/lib.rs
index 92b0dbe..aa04492 100644
--- a/benches/json/src/lib.rs
+++ b/benches/json/src/lib.rs
@@ -1,6 +1,5 @@
 #![feature(portable_simd)]
-#![feature(core_intrinsics)]
-#![feature(array_chunks)]
+
 mod parser;
 
 pub use parser::parse;
diff --git a/pag-lexer/src/lib.rs b/pag-lexer/src/lib.rs
index ab93457..45a0c1c 100644
--- a/pag-lexer/src/lib.rs
+++ b/pag-lexer/src/lib.rs
@@ -5,9 +5,8 @@
 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
+
 #![feature(portable_simd)]
-#![feature(core_intrinsics)]
-#![feature(array_chunks)]
 
 pub mod congruence;
 pub mod derivative;
diff --git a/pag-lexer/src/lookahead.rs b/pag-lexer/src/lookahead.rs
index 61e93d8..8f6c825 100644
--- a/pag-lexer/src/lookahead.rs
+++ b/pag-lexer/src/lookahead.rs
@@ -61,26 +61,24 @@ fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream
                 quote! { (#x).to_bitmask().#count_act() }
             }
         });
-        let tail_act = match kind {
-            Kind::Positive => quote! {
-                while matches!(input.get(idx), Some(#intervals)) { idx += 1; }
-            },
-            Kind::Negative => quote! {
-                while !matches!(input.get(idx), Some(#intervals) | None) { idx += 1; }
-            },
-        };
+    let tail_match = match kind {
+        Kind::Positive => quote! { matches!(input.get(idx), Some(#intervals)) },
+        Kind::Negative => quote! { !matches!(input.get(idx), Some(#intervals) | None) },
+    };
     quote! {
         'lookahead: {
-            for i in input[idx..].array_chunks::<16>() {
+            for chunk in input[idx..].chunks_exact(16) {
                 use core::simd::*;
-                let data = u8x16::from_slice(i);
+                let data = u8x16::from_slice(chunk);
                 let idx_offset = #idx_offset;
                 idx += idx_offset as usize;
-                if core::intrinsics::unlikely(idx_offset != 16) {
+                if idx_offset != 16 {
                     break 'lookahead;
                 }
             }
-            #tail_act
+            while #tail_match {
+                idx += 1;
+            }
         }
     }
 }
@@ -141,20 +139,22 @@ impl LoopOptimizer {
     }
 
     pub fn generate_lookahead(&mut self, dfa: &DfaTable, state: &DfaState) -> Option<TokenStream> {
-        let limit = 4;
+        let limit = 8;
 
         let positives = direct_self_loops(dfa, state)?;
-        if estimated_cost(&positives) <= limit {
-            return Some(generate_lookahead_routine(&positives, Kind::Positive));
-        }
-
         let negatives = positives.complement()?;
-        if estimated_cost(&negatives) <= limit {
-            return Some(generate_lookahead_routine(&negatives, Kind::Negative));
-        }
+        let pos_cost = estimated_cost(&positives);
+        let neg_cost = estimated_cost(&negatives);
 
-        let index = self.assign_table(&negatives);
-        Some(generate_lut_routine(index))
+        if pos_cost.min(neg_cost) > limit {
+            let index = self.assign_table(&negatives);
+            return Some(generate_lut_routine(index));
+        }
+        if pos_cost < neg_cost {
+            Some(generate_lookahead_routine(&positives, Kind::Positive))
+        } else {
+            Some(generate_lookahead_routine(&negatives, Kind::Negative))
+        }
     }
 }
 
diff --git a/pag-parser/src/fusion.rs b/pag-parser/src/fusion.rs
index a6a289b..12bef15 100644
--- a/pag-parser/src/fusion.rs
+++ b/pag-parser/src/fusion.rs
@@ -96,7 +96,7 @@ fn generate_error() -> TokenStream {
                 let expect = match self.expecting {
                     [head] => head.to_string(),
                     [init @ .., last] => format!("{} or {last}", init.join(", ")),
-                    _ => unsafe { std::intrinsics::unreachable() },
+                    _ => unsafe { std::hint::unreachable_unchecked() },
                 };
                 write!(
                     f,
diff --git a/tests/arith-expr/src/lib.rs b/tests/arith-expr/src/lib.rs
index f6def5e..c59f0e4 100644
--- a/tests/arith-expr/src/lib.rs
+++ b/tests/arith-expr/src/lib.rs
@@ -1,6 +1,5 @@
 #![feature(portable_simd)]
-#![feature(core_intrinsics)]
-#![feature(array_chunks)]
+
 use std::num::Wrapping;
 
 mod parser;
diff --git a/tests/sexpr-calculator/src/lib.rs b/tests/sexpr-calculator/src/lib.rs
index e3a768c..ee7bfc0 100644
--- a/tests/sexpr-calculator/src/lib.rs
+++ b/tests/sexpr-calculator/src/lib.rs
@@ -1,6 +1,5 @@
 #![feature(portable_simd)]
-#![feature(core_intrinsics)]
-#![feature(array_chunks)]
+
 use std::num::Wrapping;
 
 mod parser;
diff --git a/tests/tokenizer/src/lib.rs b/tests/tokenizer/src/lib.rs
index 0b27ef4..650fedf 100644
--- a/tests/tokenizer/src/lib.rs
+++ b/tests/tokenizer/src/lib.rs
@@ -1,6 +1,5 @@
 #![feature(portable_simd)]
-#![feature(core_intrinsics)]
-#![feature(array_chunks)]
+
 mod comment_and_string;
 mod common_prefix;
 mod generated;

From 2eaac6d7bb0819081251e1f657da5a5e441e6821 Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Thu, 27 Jul 2023 23:01:29 +0800
Subject: [PATCH 25/42] try to solve aarch64 performance regression

---
 benches/json/Cargo.toml    |  2 +-
 pag-lexer/src/lookahead.rs | 84 +++++++++++++++++++++++++-------------
 pag-lexer/src/vector.rs    |  1 +
 3 files changed, 58 insertions(+), 29 deletions(-)

diff --git a/benches/json/Cargo.toml b/benches/json/Cargo.toml
index c72e9d6..84b3803 100644
--- a/benches/json/Cargo.toml
+++ b/benches/json/Cargo.toml
@@ -17,7 +17,7 @@ lalrpop = "0.20.0"
 [dev-dependencies]
 criterion = { version = "0.4", features = ["html_reports"] }
 snmalloc-rs = { version = "0.3", features = ["build_cc"] }
-pest = { version = "2.5.7", features = [ "std", "memchr" ] }
+pest = { version = "2.5.7", features = ["std", "memchr"] }
 pest_derive = "2.5.7"
 lalrpop-util = { version = "0.20.0", features = ["lexer", "unicode"] }
 logos = "0.13.0"
diff --git a/pag-lexer/src/lookahead.rs b/pag-lexer/src/lookahead.rs
index 8f6c825..bc120d3 100644
--- a/pag-lexer/src/lookahead.rs
+++ b/pag-lexer/src/lookahead.rs
@@ -31,36 +31,27 @@ fn generate_lut_routine(index: usize) -> TokenStream {
     }
 }
 
-fn byte_simd(byte: u8) -> TokenStream {
-    let byte = byte_char(byte);
-    quote! { data.simd_eq(u8x16::splat(#byte)) }
-}
-
-fn range_simd(min: u8, max: u8) -> TokenStream {
-    let min = byte_char(min);
-    let max = byte_char(max);
-    quote! { data.simd_ge(u8x16::splat(#min)) & data.simd_le(u8x16::splat(#max)) }
-}
-
+#[cfg(not(target_arch = "aarch64"))]
 fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream {
-    let count_act = match kind {
-        Kind::Positive => quote! { trailing_ones },
-        Kind::Negative => quote! { trailing_zeros },
-    };
-    let idx_offset = intervals
+    let mask = intervals
         .iter()
         .map(|&Interval(l, r)| match l == r {
-            true => byte_simd(l),
-            false => range_simd(l, r),
+            true => {
+                let l = byte_char(l);
+                quote! { data.simd_eq(u8x16::splat(#l)) }
+            }
+            false => {
+                let l = byte_char(l);
+                let r = byte_char(r);
+                quote! { data.simd_ge(u8x16::splat(#l)) & data.simd_le(u8x16::splat(#r)) }
+            }
         })
         .reduce(|acc, x| quote! { #acc | #x })
-        .map(|x| {
-            if cfg!(target_arch = "aarch64") {
-                quote! { unsafe { core::mem::transmute::<_, u128>(#x).#count_act() / 8 } }
-            } else {
-                quote! { (#x).to_bitmask().#count_act() }
-            }
-        });
+        .unwrap();
+    let count_act = match kind {
+        Kind::Positive => quote! { trailing_ones },
+        Kind::Negative => quote! { trailing_zeros },
+    };
     let tail_match = match kind {
         Kind::Positive => quote! { matches!(input.get(idx), Some(#intervals)) },
         Kind::Negative => quote! { !matches!(input.get(idx), Some(#intervals) | None) },
@@ -70,7 +61,8 @@ fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream
             for chunk in input[idx..].chunks_exact(16) {
                 use core::simd::*;
                 let data = u8x16::from_slice(chunk);
-                let idx_offset = #idx_offset;
+                let mask = #mask;
+                let idx_offset = mask.to_bitmask().#count_act();
                 idx += idx_offset as usize;
                 if idx_offset != 16 {
                     break 'lookahead;
@@ -83,10 +75,46 @@ fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream
     }
 }
 
+#[cfg(target_arch = "aarch64")]
+fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream {
+    let mask = intervals
+        .iter()
+        .map(|&Interval(l, r)| match l == r {
+            true => {
+                let l = byte_char(l);
+                quote! { data.simd_eq(u8x16::splat(#l)) }
+            }
+            false => {
+                let l = byte_char(l);
+                let r = byte_char(r);
+                quote! { data.simd_ge(u8x16::splat(#l)) & data.simd_le(u8x16::splat(#r)) }
+            }
+        })
+        .reduce(|acc, x| quote! { #acc | #x })
+        .unwrap();
+    let count_act = match kind {
+        Kind::Positive => quote! { trailing_ones },
+        Kind::Negative => quote! { trailing_zeros },
+    };
+    quote! {
+        for chunk in input[idx..].chunks_exact(16) {
+            use core::simd::*;
+            let data = u8x16::from_slice(chunk);
+            let mask = #mask;
+            let mask = unsafe { core::mem::transmute::<_, u128>(mask) };
+            let idx_offset = mask.#count_act() / 8;
+            idx += idx_offset as usize;
+            if idx_offset != 16 {
+                break;
+            }
+        }
+    }
+}
+
 fn estimated_cost(intervals: &Intervals) -> u32 {
     intervals
         .iter()
-        .map(|Interval(l, r)| if l == r { 1 } else { 2 })
+        .map(|Interval(l, r)| 1 + (l != r) as u32)
         .sum()
 }
 
@@ -139,7 +167,7 @@ impl LoopOptimizer {
     }
 
     pub fn generate_lookahead(&mut self, dfa: &DfaTable, state: &DfaState) -> Option<TokenStream> {
-        let limit = 8;
+        let limit = 4;
 
         let positives = direct_self_loops(dfa, state)?;
         let negatives = positives.complement()?;
diff --git a/pag-lexer/src/vector.rs b/pag-lexer/src/vector.rs
index 61488ae..e23ee50 100644
--- a/pag-lexer/src/vector.rs
+++ b/pag-lexer/src/vector.rs
@@ -159,6 +159,7 @@ impl Vector {
                     return quote! { Some(#interval) => { cursor = idx + 1; #on_success }, };
                 }
                 let target_id = dfa[target].state_id;
+                #[cfg(not(target_arch = "aarch64"))]
                 if lookahead.is_some() && info.state_id == target_id {
                     return quote! {};
                 }

From 91b97100f53a419c92ec8ae9519f2870b476b3b7 Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Sun, 30 Jul 2023 04:03:10 +0800
Subject: [PATCH 26/42] optimize lut lookahead

---
 pag-lexer/src/lookahead.rs | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/pag-lexer/src/lookahead.rs b/pag-lexer/src/lookahead.rs
index bc120d3..b72d17c 100644
--- a/pag-lexer/src/lookahead.rs
+++ b/pag-lexer/src/lookahead.rs
@@ -19,15 +19,36 @@ enum Kind {
 }
 
 fn generate_lut_routine(index: usize) -> TokenStream {
+    // TODO: put the code to `pag_util::lookahead_lut` to reduce stack size under debug build
     let table = index / 8;
     let shift = index % 8;
     let bit = 1u8 << shift;
     quote! {
-        idx = idx
-            + input[idx..]
+        'lookahead: {
+            for chunk in input[idx..].chunks_exact(8) {
+                if GLOBAL_LUT[#table][chunk[0] as usize] & #bit == 0 {
+                if GLOBAL_LUT[#table][chunk[1] as usize] & #bit == 0 {
+                if GLOBAL_LUT[#table][chunk[2] as usize] & #bit == 0 {
+                if GLOBAL_LUT[#table][chunk[3] as usize] & #bit == 0 {
+                if GLOBAL_LUT[#table][chunk[4] as usize] & #bit == 0 {
+                if GLOBAL_LUT[#table][chunk[5] as usize] & #bit == 0 {
+                if GLOBAL_LUT[#table][chunk[6] as usize] & #bit == 0 {
+                if GLOBAL_LUT[#table][chunk[7] as usize] & #bit == 0 {
+                    idx += 8; continue; }
+                    idx += 7; break 'lookahead; }
+                    idx += 6; break 'lookahead; }
+                    idx += 5; break 'lookahead; }
+                    idx += 4; break 'lookahead; }
+                    idx += 3; break 'lookahead; }
+                    idx += 2; break 'lookahead; }
+                    idx += 1; break 'lookahead; }
+                break 'lookahead;
+            }
+            idx += input[idx..]
                 .iter()
                 .position(|x| GLOBAL_LUT[#table][*x as usize] & #bit > 0)
-                .unwrap_or(input.len() - idx);
+                .unwrap_or(input[idx..].len());
+        }
     }
 }
 

From fada7c6f6061519fd2690ef6a7fb519c879fe6b5 Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Sun, 30 Jul 2023 07:46:11 +0800
Subject: [PATCH 27/42] fix stack size issue

---
 benches/csv/Cargo.toml            |  1 +
 benches/json/Cargo.toml           |  1 +
 pag-lexer/src/lookahead.rs        | 30 +----------------------
 pag-util/Cargo.toml               | 23 ++++++++++++++++++
 pag-util/src/lib.rs               | 40 +++++++++++++++++++++++++++++++
 tests/arith-expr/Cargo.toml       |  1 +
 tests/sexpr-calculator/Cargo.toml |  1 +
 tests/tokenizer/Cargo.toml        |  1 +
 8 files changed, 69 insertions(+), 29 deletions(-)
 create mode 100644 pag-util/Cargo.toml
 create mode 100644 pag-util/src/lib.rs

diff --git a/benches/csv/Cargo.toml b/benches/csv/Cargo.toml
index e00dc46..8333262 100644
--- a/benches/csv/Cargo.toml
+++ b/benches/csv/Cargo.toml
@@ -7,6 +7,7 @@ publish = false
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
+pag-util = { version = "0.1.0-alpha.1", path = "../../pag-util" }
 rand = { version = "0.8" }
 snmalloc-rs = { version = "0.3", features = ["build_cc"] }
 
diff --git a/benches/json/Cargo.toml b/benches/json/Cargo.toml
index 84b3803..dc705b3 100644
--- a/benches/json/Cargo.toml
+++ b/benches/json/Cargo.toml
@@ -7,6 +7,7 @@ publish = false
 autobenches = false
 
 [dependencies]
+pag-util = { version = "0.1.0-alpha.1", path = "../../pag-util" }
 rand = { version = "0.8" }
 serde_json = "1.0"
 
diff --git a/pag-lexer/src/lookahead.rs b/pag-lexer/src/lookahead.rs
index b72d17c..0950294 100644
--- a/pag-lexer/src/lookahead.rs
+++ b/pag-lexer/src/lookahead.rs
@@ -19,37 +19,9 @@ enum Kind {
 }
 
 fn generate_lut_routine(index: usize) -> TokenStream {
-    // TODO: put the code to `pag_util::lookahead_lut` to reduce stack size under debug build
     let table = index / 8;
     let shift = index % 8;
-    let bit = 1u8 << shift;
-    quote! {
-        'lookahead: {
-            for chunk in input[idx..].chunks_exact(8) {
-                if GLOBAL_LUT[#table][chunk[0] as usize] & #bit == 0 {
-                if GLOBAL_LUT[#table][chunk[1] as usize] & #bit == 0 {
-                if GLOBAL_LUT[#table][chunk[2] as usize] & #bit == 0 {
-                if GLOBAL_LUT[#table][chunk[3] as usize] & #bit == 0 {
-                if GLOBAL_LUT[#table][chunk[4] as usize] & #bit == 0 {
-                if GLOBAL_LUT[#table][chunk[5] as usize] & #bit == 0 {
-                if GLOBAL_LUT[#table][chunk[6] as usize] & #bit == 0 {
-                if GLOBAL_LUT[#table][chunk[7] as usize] & #bit == 0 {
-                    idx += 8; continue; }
-                    idx += 7; break 'lookahead; }
-                    idx += 6; break 'lookahead; }
-                    idx += 5; break 'lookahead; }
-                    idx += 4; break 'lookahead; }
-                    idx += 3; break 'lookahead; }
-                    idx += 2; break 'lookahead; }
-                    idx += 1; break 'lookahead; }
-                break 'lookahead;
-            }
-            idx += input[idx..]
-                .iter()
-                .position(|x| GLOBAL_LUT[#table][*x as usize] & #bit > 0)
-                .unwrap_or(input[idx..].len());
-        }
-    }
+    quote! { idx = ::pag_util::lookahead_lut(input, idx, &GLOBAL_LUT[#table], #shift); }
 }
 
 #[cfg(not(target_arch = "aarch64"))]
diff --git a/pag-util/Cargo.toml b/pag-util/Cargo.toml
new file mode 100644
index 0000000..0a87fdf
--- /dev/null
+++ b/pag-util/Cargo.toml
@@ -0,0 +1,23 @@
+# Copyright (c) 2023 Paguroidea Developers
+#
+# Licensed under the Apache License, Version 2.0
+# <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
+# license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. All files in the project carrying such notice may not be copied,
+# modified, or distributed except according to those terms.
+
+[package]
+name = "pag-util"
+keywords = ["parser", "cfg", "grammar"]
+description = "Parser-lexer fusion generator (utilities)"
+documentation = "https://docs.rs/pag-util/"
+
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+exclude.workspace = true
+categories.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+authors.workspace = true
+readme.workspace = true
diff --git a/pag-util/src/lib.rs b/pag-util/src/lib.rs
new file mode 100644
index 0000000..3cf6031
--- /dev/null
+++ b/pag-util/src/lib.rs
@@ -0,0 +1,40 @@
+use std::hint::unreachable_unchecked;
+
+#[doc(hidden)]
+#[inline]
+pub unsafe fn assume(cond: bool) {
+    if !cond {
+        unreachable_unchecked()
+    }
+}
+
+#[doc(hidden)]
+#[inline]
+#[rustfmt::skip]
+pub fn lookahead_lut(input: &[u8], mut idx: usize, table: &[u8; 256], shift: usize) -> usize {
+    let mask = 1 << shift;
+    for chunk in input[idx..].chunks_exact(8) {
+        if table[chunk[0] as usize] & mask == 0 {
+        if table[chunk[1] as usize] & mask == 0 {
+        if table[chunk[2] as usize] & mask == 0 {
+        if table[chunk[3] as usize] & mask == 0 {
+        if table[chunk[4] as usize] & mask == 0 {
+        if table[chunk[5] as usize] & mask == 0 {
+        if table[chunk[6] as usize] & mask == 0 {
+        if table[chunk[7] as usize] & mask == 0 {
+            idx += 8; continue; }
+            idx += 7; return idx; }
+            idx += 6; return idx; }
+            idx += 5; return idx; }
+            idx += 4; return idx; }
+            idx += 3; return idx; }
+            idx += 2; return idx; }
+            idx += 1; return idx; }
+        return idx;
+    }
+    unsafe { assume(idx <= input.len()) };
+    idx + input[idx..]
+        .iter()
+        .position(|x| table[*x as usize] & mask > 0)
+        .unwrap_or(input[idx..].len())
+}
diff --git a/tests/arith-expr/Cargo.toml b/tests/arith-expr/Cargo.toml
index c761cf0..d495c11 100644
--- a/tests/arith-expr/Cargo.toml
+++ b/tests/arith-expr/Cargo.toml
@@ -6,6 +6,7 @@ build = "build.rs"
 publish = false
 
 [dependencies]
+pag-util = { version = "0.1.0-alpha.1", path = "../../pag-util" }
 rand = { version = "0.8" }
 
 [build-dependencies]
diff --git a/tests/sexpr-calculator/Cargo.toml b/tests/sexpr-calculator/Cargo.toml
index 2440e44..cbe7938 100644
--- a/tests/sexpr-calculator/Cargo.toml
+++ b/tests/sexpr-calculator/Cargo.toml
@@ -6,6 +6,7 @@ build = "build.rs"
 publish = false
 
 [dependencies]
+pag-util = { version = "0.1.0-alpha.1", path = "../../pag-util" }
 rand = { version = "0.8" }
 
 [build-dependencies]
diff --git a/tests/tokenizer/Cargo.toml b/tests/tokenizer/Cargo.toml
index 4de8101..d8cc23f 100644
--- a/tests/tokenizer/Cargo.toml
+++ b/tests/tokenizer/Cargo.toml
@@ -6,6 +6,7 @@ build = "build.rs"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
+pag-util = { version = "0.1.0-alpha.1", path = "../../pag-util" }
 rand = { version = "0.8" }
 
 [build-dependencies]

From 997a0fdfd7927fa7986d146e7436e389b0dc6a29 Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Sun, 30 Jul 2023 08:16:45 +0800
Subject: [PATCH 28/42] add assume all over the generated code

---
 pag-lexer/src/lookahead.rs | 2 ++
 pag-lexer/src/vector.rs    | 1 +
 pag-util/src/lib.rs        | 1 +
 3 files changed, 4 insertions(+)

diff --git a/pag-lexer/src/lookahead.rs b/pag-lexer/src/lookahead.rs
index 0950294..c1cad8c 100644
--- a/pag-lexer/src/lookahead.rs
+++ b/pag-lexer/src/lookahead.rs
@@ -51,6 +51,7 @@ fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream
     };
     quote! {
         'lookahead: {
+            unsafe { ::pag_util::assume(idx <= input.len()) };
             for chunk in input[idx..].chunks_exact(16) {
                 use core::simd::*;
                 let data = u8x16::from_slice(chunk);
@@ -90,6 +91,7 @@ fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream
         Kind::Negative => quote! { trailing_zeros },
     };
     quote! {
+        unsafe { ::pag_util::assume(idx <= input.len()) };
         for chunk in input[idx..].chunks_exact(16) {
             use core::simd::*;
             let data = u8x16::from_slice(chunk);
diff --git a/pag-lexer/src/vector.rs b/pag-lexer/src/vector.rs
index e23ee50..11421bf 100644
--- a/pag-lexer/src/vector.rs
+++ b/pag-lexer/src/vector.rs
@@ -142,6 +142,7 @@ impl Vector {
                 let on_success = &success_actions[rule_idx];
                 return quote! {
                     State::#label => {
+                        unsafe { ::pag_util::assume(idx <= input.len()) };
                         if input[idx..].starts_with(#literal) {
                             cursor = idx + #length;
                             #on_success
diff --git a/pag-util/src/lib.rs b/pag-util/src/lib.rs
index 3cf6031..6e2520f 100644
--- a/pag-util/src/lib.rs
+++ b/pag-util/src/lib.rs
@@ -13,6 +13,7 @@ pub unsafe fn assume(cond: bool) {
 #[rustfmt::skip]
 pub fn lookahead_lut(input: &[u8], mut idx: usize, table: &[u8; 256], shift: usize) -> usize {
     let mask = 1 << shift;
+    unsafe { assume(idx <= input.len()) };
     for chunk in input[idx..].chunks_exact(8) {
         if table[chunk[0] as usize] & mask == 0 {
         if table[chunk[1] as usize] & mask == 0 {

From b701d640feefaffacfd41e7ec6cefda73ec7edee Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Thu, 27 Jul 2023 16:22:44 -0400
Subject: [PATCH 29/42] refactor semact structure

---
 pag-parser2/src/frontend/ast.rs     |  58 ++++++++++++-
 pag-parser2/src/frontend/parse.rs   |   4 +-
 pag-parser2/src/nf/inference.rs     | 127 +++++++++++++---------------
 pag-parser2/src/nf/mod.rs           |  71 ++++++++++++----
 pag-parser2/src/nf/normalization.rs |   1 +
 pag-parser2/src/nf/semact.rs        |  36 +++++++-
 pag-parser2/src/nf/translation.rs   |  64 ++++++++++++++
 7 files changed, 270 insertions(+), 91 deletions(-)
 create mode 100644 pag-parser2/src/nf/normalization.rs
 create mode 100644 pag-parser2/src/nf/translation.rs

diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index 8de4969..4dce9d9 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -7,6 +7,7 @@
 // modified, or distributed except according to those terms.
 
 use std::collections::HashMap;
+use std::rc::Rc;
 
 pub struct Ast {
     pub entry: syn::Ident,
@@ -15,6 +16,36 @@ pub struct Ast {
     pub parser_map: HashMap<syn::Ident, ParserDef>,
 }
 
+#[derive(Clone)]
+#[repr(transparent)]
+pub struct CustomizedBlock(pub Rc<syn::Block>);
+
+impl PartialEq for CustomizedBlock {
+    fn eq(&self, other: &Self) -> bool {
+        Rc::ptr_eq(&self.0, &other.0)
+    }
+}
+
+impl Eq for CustomizedBlock {}
+
+impl PartialOrd for CustomizedBlock {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Rc::as_ptr(&self.0).partial_cmp(&Rc::as_ptr(&other.0))
+    }
+}
+
+impl Ord for CustomizedBlock {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        Rc::as_ptr(&self.0).cmp(&Rc::as_ptr(&other.0))
+    }
+}
+
+impl std::hash::Hash for CustomizedBlock {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        Rc::as_ptr(&self.0).hash(state)
+    }
+}
+
 pub struct LexerDef {
     pub idx: u32,
     pub expr: LexerExpr,
@@ -27,7 +58,7 @@ pub struct ParserDef {
 
 pub struct ParserRule {
     pub vars: Vec<VarBinding>,
-    pub action: Option<syn::Block>,
+    pub action: Option<CustomizedBlock>,
 }
 
 pub struct VarBinding {
@@ -60,3 +91,28 @@ pub enum ParserExpr {
     ParserRef(syn::Ident),
     Ignore(Box<Self>),
 }
+
+pub struct RightDeepIterator<'a> {
+    seq: Option<&'a LexerExpr>,
+}
+
+impl<'a> From<&'a LexerExpr> for RightDeepIterator<'a> {
+    fn from(expr: &'a LexerExpr) -> Self {
+        Self { seq: Some(expr) }
+    }
+}
+
+impl<'a> Iterator for RightDeepIterator<'a> {
+    type Item = &'a LexerExpr;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.seq {
+            Some(LexerExpr::Seq(a, b)) => {
+                self.seq = Some(b);
+                Some(a)
+            }
+            Some(_) => self.seq.take(),
+            None => None,
+        }
+    }
+}
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index 093842c..9b782ad 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -128,7 +128,9 @@ impl Parse for ParserRule {
 
         let mut action = None;
         if input.peek(syn::token::Brace) {
-            action = Some(input.parse::<syn::Block>()?);
+            action = Some(CustomizedBlock(std::rc::Rc::new(
+                input.parse::<syn::Block>()?,
+            )));
         }
 
         Ok(Self { vars, action })
diff --git a/pag-parser2/src/nf/inference.rs b/pag-parser2/src/nf/inference.rs
index 7838780..a474b3d 100644
--- a/pag-parser2/src/nf/inference.rs
+++ b/pag-parser2/src/nf/inference.rs
@@ -71,14 +71,11 @@
 // -------------------
 // Γ ⊢ x : η
 
-use std::collections::{HashMap};
+use std::collections::HashMap;
 
 use syn::{parse_quote, Type};
 
-use super::{
-    semact::{SemAct, SemActTable},
-    NormalForm, Tag,
-};
+use super::{semact::SemAct, BoundTarget, NormalForm, Tag};
 
 #[derive(Clone)]
 pub enum InferredType {
@@ -93,8 +90,6 @@ pub struct InferenceContext<'a> {
     gamma: HashMap<Tag, InferredType>,
     /// Type annotations from user (for toplevel)
     annotations: &'a HashMap<Tag, Type>,
-    /// Semantic action table
-    semact: &'a SemActTable,
     /// Fully normalized terms
     nforms: &'a HashMap<Tag, Vec<NormalForm>>,
 }
@@ -102,25 +97,31 @@ impl<'a> InferenceContext<'a> {
     /// Create a new inference context
     pub fn new(
         annotations: &'a HashMap<Tag, Type>,
-        semact: &'a SemActTable,
         nforms: &'a HashMap<Tag, Vec<NormalForm>>,
     ) -> Self {
         Self {
             gamma: HashMap::new(),
             annotations,
-            semact,
             nforms,
         }
     }
-    fn infer_gather<'i, I: Iterator<Item = &'i Tag>>(
+    fn infer_gather<'i, I: Iterator<Item = BoundTarget<'i>>>(
         &mut self,
         mut tags: I,
     ) -> Option<InferredType> {
         if let Some(tag) = tags.next() {
-            let mut types = vec![self.infer(tag)?];
+            let mut types = vec![if let BoundTarget::Tag(tag) = tag {
+                self.infer(tag)?
+            } else {
+                InferredType::Concrete(parse_quote! {::pag_runtime::Span<'src>})
+            }];
             for t in tags {
                 // If any inference fails, the whole inference fails
-                let ty = self.infer(t)?;
+                let ty = if let BoundTarget::Tag(t) = t {
+                    self.infer(t)?
+                } else {
+                    InferredType::Concrete(parse_quote! {::pag_runtime::Span<'src>})
+                };
                 types.push(ty);
             }
             if types.len() == 1 {
@@ -152,78 +153,70 @@ impl<'a> InferenceContext<'a> {
         if let Some(x) = self.gamma.get(tag) {
             return Some(x.clone());
         }
-        let target =
-             // If a concrete type annotation is provided, use it directly
-             if let Some(x) = self.annotations.get(tag) {
-                InferredType::Concrete(x.clone())
-            } else {
-                let semact = self.semact.get(tag);
+        let target = if let Some(x) = self.annotations.get(tag) {
+            // If a concrete type annotation is provided, use it directly
+            InferredType::Concrete(x.clone())
+        } else {
+            // find first subexpression that fulfills inference
+            let nfs = self.nforms.get(tag)?;
+            let mut inferred = None;
+            for i in nfs.iter() {
+                let semact = i.semact();
                 match semact {
-                    // No semantic action, the type is unit
-                    None => InferredType::Concrete(parse_quote!(())),
                     // Token semantic action, the type is Span
-                    Some(SemAct::Token) => {
-                        InferredType::Concrete(parse_quote!(::pag_runtime::Span<'src>))
+                    SemAct::Token => {
+                        inferred.replace(InferredType::Concrete(parse_quote!(
+                            ::pag_runtime::Span<'src>
+                        )));
+                        break;
                     }
-                    // Customized routine without type annotation -- inference failed
-                    Some(SemAct::CustomizedRoutine(..)) => return None,
+                    // Customized routine without type annotation, cannot infer
+                    SemAct::CustomizedRoutine(..) => continue,
                     // Nested routine for one or more, the type is unit.
-                    Some(SemAct::OneOrMoreNested) => {
-                        InferredType::Concrete(parse_quote!(()))
+                    SemAct::OneOrMoreNested => {
+                        inferred.replace(InferredType::Concrete(parse_quote!(())));
+                        break;
                     }
-                    Some(SemAct::Gather) => {
-                        let nfs = self.nforms.get(tag)?;
-                        let mut inferred = None;
-                        // find first subexpression that fulfills inference
-                        for i in nfs.iter() {
-                            let visible = i.visible_bindings(0);
-                            if let Some(gather_type) =
-                                self.infer_gather(visible.into_iter().map(|x| x.1))
-                            {
-                                inferred.replace(gather_type);
-                                break;
-                            }
+                    SemAct::Gather => {
+                        let visible = i.visible_bindings(0);
+                        if let Some(gather_type) =
+                            self.infer_gather(visible.into_iter().map(|x| x.1))
+                        {
+                            inferred.replace(gather_type);
+                            break;
                         }
-                        inferred?
                     }
-                    Some(SemAct::ZeroOrMore)
-                    | Some(SemAct::Option)
-                    | Some(SemAct::OneOrMoreToplevel) => {
-                        let nfs = self.nforms.get(tag)?;
+                    SemAct::ZeroOrMore | SemAct::Option | SemAct::OneOrMoreToplevel => {
                         let mapper = |ty: InferredType| {
-                            if matches!(semact, Some(SemAct::Option)) {
+                            if matches!(semact, SemAct::Option) {
                                 InferredType::Option(Box::new(ty))
                             } else {
                                 InferredType::Collector(Box::new(ty))
                             }
                         };
-                        let mut inferred = None;
-                        // find first subexpression that fulfills inference
-                        for i in nfs.iter() {
-                            // Skip epsilon production, this is safe since OneOrMoreToplevel will never be empty
-                            if let NormalForm::Empty(x) = i {
-                                if x.is_empty() {
-                                    continue;
-                                }
-                            }
-                            // skip the trailing part of OneOrMoreToplevel
-                            let visible = i.visible_bindings(
-                                if matches!(semact, Some(SemAct::OneOrMoreToplevel)) {
-                                    1
-                                } else {
-                                    0
-                                },
-                            );
-                            if let Some(gather_type) =
-                                self.infer_gather(visible.into_iter().map(|x| x.1))
-                            {
-                                inferred.replace(mapper(gather_type));
-                                break;
+                        // Skip epsilon production, this is safe since OneOrMoreToplevel will never be empty
+                        if let NormalForm::Empty(x, _) = i {
+                            if x.is_empty() {
+                                continue;
                             }
                         }
-                        inferred?
+                        // skip the trailing part of OneOrMoreToplevel
+                        let visible =
+                            i.visible_bindings(if matches!(semact, SemAct::OneOrMoreToplevel) {
+                                1
+                            } else {
+                                0
+                            });
+                        if let Some(gather_type) =
+                            self.infer_gather(visible.into_iter().map(|x| x.1))
+                        {
+                            inferred.replace(mapper(gather_type));
+                            break;
+                        }
                     }
                 }
+            }
+            inferred?
         };
         self.gamma.insert(tag.clone(), target.clone());
         Some(target)
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index c26aa3f..764d18c 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -7,7 +7,9 @@
 // modified, or distributed except according to those terms.
 
 mod inference;
+mod normalization;
 mod semact;
+mod translation;
 
 use std::{
     collections::{HashMap, VecDeque},
@@ -20,6 +22,9 @@ use syn::Ident;
 #[cfg(feature = "debug")]
 use crate::debug::{styled, styled_write};
 
+
+use self::semact::SemAct;
+
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum Tag {
     Toplevel(Ident),
@@ -94,38 +99,57 @@ impl std::fmt::Display for Action {
     }
 }
 
-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum NormalForm {
-    Empty(Vec<(Tag, Option<Ident>)>),
-    Unexpanded(Vec<Action>),
-    Sequence(Ident, Vec<Action>),
+    Empty(Vec<(Tag, Option<Ident>)>, SemAct),
+    Unexpanded(Vec<Action>, SemAct),
+    Sequence(Ident, Option<Ident>, Vec<Action>, SemAct),
+}
+
+pub enum BoundTarget<'a> {
+    Tag(&'a Tag),
+    Token,
 }
 
 impl NormalForm {
-    pub fn visible_bindings(&self, skip: usize) -> Vec<(&Ident, &Tag)> {
+    pub fn semact(&self) -> &SemAct {
+        match self {
+            Self::Empty(_, semact)
+            | Self::Unexpanded(_, semact)
+            | Self::Sequence(_, _, _, semact) => semact,
+        }
+    }
+    pub fn visible_bindings(&self, skip: usize) -> Vec<(&Ident, BoundTarget)> {
         match self {
-            Self::Empty(actions) => actions
+            Self::Empty(actions, _) => actions
                 .last()
-                .and_then(|(tag, ident)| Some((ident.as_ref()?, tag)))
+                .and_then(|(tag, ident)| Some((ident.as_ref()?, BoundTarget::Tag(tag))))
                 .into_iter()
                 .collect(),
-            Self::Unexpanded(actions) | Self::Sequence(_, actions) => {
+            Self::Unexpanded(actions, _) | Self::Sequence(_, _, actions, _) => {
                 let mut acc = VecDeque::new();
                 for act in actions.iter().rev().skip(skip) {
                     match act {
                         Action::Shift { tag, output } => {
                             if let Some(ident) = output {
-                                acc.push_front((ident, tag));
+                                acc.push_front((ident, BoundTarget::Tag(tag)));
                             }
                         }
                         Action::Reduce { tag, output } => {
                             if let Some(ident) = output {
-                                acc.push_front((ident, tag));
+                                acc.push_front((ident, BoundTarget::Tag(tag)));
                             }
                             break;
                         }
                     }
                 }
+                if let Self::Sequence(_, Some(tk), _, _) = self {
+                    if acc.len() == actions.len() - skip
+                        && !matches!(actions.first(), Some(Action::Reduce { .. }))
+                    {
+                        acc.push_front((tk, BoundTarget::Token));
+                    }
+                }
                 acc.into_iter().collect()
             }
         }
@@ -136,7 +160,7 @@ impl NormalForm {
 impl std::fmt::Display for NormalForm {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            Self::Empty(actions) => {
+            Self::Empty(actions, _) => {
                 write!(f, "ε")?;
                 for (tag, output) in actions.iter() {
                     if let Some(name) = output {
@@ -146,14 +170,18 @@ impl std::fmt::Display for NormalForm {
                     }
                 }
             }
-            Self::Unexpanded(actions) => {
+            Self::Unexpanded(actions, _) => {
                 write!(f, "{}", actions[0])?;
                 for action in &actions[1..] {
                     write!(f, "\t{}", action)?;
                 }
             }
-            Self::Sequence(terminal, actions) => {
-                styled_write!(f, Color::Yellow, "{terminal}")?;
+            Self::Sequence(terminal, var, actions, _) => {
+                if let Some(tk) = var {
+                    styled_write!(f, Color::Yellow, "{terminal}[{tk}]")?;
+                } else {
+                    styled_write!(f, Color::Yellow, "{terminal}")?;
+                }
                 for action in actions.iter() {
                     write!(f, "\t{}", action)?;
                 }
@@ -169,6 +197,7 @@ fn debug_print_test() {
     use quote::format_ident;
     let sequence = NormalForm::Sequence(
         format_ident!("TEST"),
+        Some(format_ident!("x")),
         vec![
             Action::Shift {
                 tag: Tag::Toplevel(format_ident!("a")),
@@ -187,6 +216,7 @@ fn debug_print_test() {
                 output: None,
             },
         ],
+        SemAct::Gather,
     );
     println!("{}", sequence);
 }
@@ -237,6 +267,7 @@ fn debug_print_nf_table() {
     use quote::format_ident;
     let sequence = NormalForm::Sequence(
         format_ident!("TEST"),
+        Some(format_ident!("x")),
         vec![
             Action::Shift {
                 tag: Tag::Toplevel(format_ident!("a")),
@@ -255,11 +286,15 @@ fn debug_print_nf_table() {
                 output: None,
             },
         ],
+        SemAct::Gather,
+    );
+    let empty = NormalForm::Empty(
+        vec![
+            (Tag::Toplevel(format_ident!("a")), None),
+            (Tag::Toplevel(format_ident!("b")), Some(format_ident!("x"))),
+        ],
+        SemAct::Gather,
     );
-    let empty = NormalForm::Empty(vec![
-        (Tag::Toplevel(format_ident!("a")), None),
-        (Tag::Toplevel(format_ident!("b")), Some(format_ident!("x"))),
-    ]);
     let table = NFTable(
         vec![
             (
diff --git a/pag-parser2/src/nf/normalization.rs b/pag-parser2/src/nf/normalization.rs
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/pag-parser2/src/nf/normalization.rs
@@ -0,0 +1 @@
+
diff --git a/pag-parser2/src/nf/semact.rs b/pag-parser2/src/nf/semact.rs
index 0dfd64b..c4b1e3e 100644
--- a/pag-parser2/src/nf/semact.rs
+++ b/pag-parser2/src/nf/semact.rs
@@ -6,11 +6,11 @@
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
 
-use std::collections::HashMap;
 
-use super::Tag;
 
-pub type SemActTable = HashMap<Tag, SemAct>;
+
+
+use crate::frontend::{CustomizedBlock, ParserExpr};
 
 ///
 /// ```
@@ -21,8 +21,9 @@ pub type SemActTable = HashMap<Tag, SemAct>;
 /// ```
 
 // those normal form without SemAct will be treated as plain scanner.
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum SemAct {
-    CustomizedRoutine(syn::Block),
+    CustomizedRoutine(CustomizedBlock),
     /// Gather inner data. If multiple is selected, return a tuple.
     /// If only one is selected, return target data.
     Gather,
@@ -41,3 +42,30 @@ pub enum SemAct {
     /// Yield a token span,
     Token,
 }
+
+impl SemAct {
+    pub fn infer(expr: &ParserExpr) -> Self {
+        match expr {
+            ParserExpr::LexerRef(_) => SemAct::Token,
+            ParserExpr::Plus(_) => SemAct::OneOrMoreToplevel,
+            ParserExpr::Opt(_) => SemAct::Option,
+            ParserExpr::Star(_) => SemAct::ZeroOrMore,
+            _ => SemAct::Gather,
+        }
+    }
+}
+
+#[cfg(feature = "debug")]
+impl std::fmt::Display for SemAct {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            SemAct::CustomizedRoutine(x) => write!(f, "{:?}", std::rc::Rc::as_ptr(&x.0)),
+            SemAct::Gather => write!(f, "Gather"),
+            SemAct::Option => write!(f, "Option"),
+            SemAct::ZeroOrMore => write!(f, "ZeroOrMore"),
+            SemAct::OneOrMoreToplevel => write!(f, "OneOrMoreToplevel"),
+            SemAct::OneOrMoreNested => write!(f, "OneOrMoreNested"),
+            SemAct::Token => write!(f, "Token"),
+        }
+    }
+}
diff --git a/pag-parser2/src/nf/translation.rs b/pag-parser2/src/nf/translation.rs
new file mode 100644
index 0000000..5ef09ce
--- /dev/null
+++ b/pag-parser2/src/nf/translation.rs
@@ -0,0 +1,64 @@
+//!
+//! Transform from surface syntax to semi-normalized form
+//!
+
+use std::collections::HashMap;
+
+use quote::format_ident;
+use syn::{Ident, Type};
+
+use crate::frontend::{ParserDef};
+
+use super::{semact::SemAct, NormalForm, Tag};
+
+struct Translation {
+    /// Table of semi-normalized production rules
+    semi_nfs: HashMap<Tag, Vec<NormalForm>>,
+    /// Toplevel type annotations
+    annotations: HashMap<Tag, Type>,
+    /// Type hints when calling inner routines (collector)
+    hints: HashMap<Tag, Type>,
+    /// Counter of assigned non-explicit variable names
+    output_cnt: usize,
+    /// Counter of assigned anonymous routines
+    anonymous_cnt: usize,
+}
+
+impl Translation {
+    // Allocate a new symbol for unamed variable bindings.
+    fn new_output_sym(&mut self) -> Ident {
+        let result = format_ident!("_{}", self.output_cnt);
+        self.output_cnt += 1;
+        result
+    }
+    // Allocate a new tag for anonymous routines.
+    fn new_anonymous_tag(&mut self) -> Tag {
+        let result = Tag::Anonymous(self.anonymous_cnt);
+        self.anonymous_cnt += 1;
+        result
+    }
+    // Translate a top-level definitioin
+    fn add_toplevel_def(&mut self, name: Ident, def: &ParserDef) {
+        let tag = Tag::Toplevel(name);
+        self.annotations.insert(tag.clone(), def.ty.clone());
+        let rules = def
+            .rules
+            .iter()
+            .map(|rule| {
+                let semact = if let Some(x) = rule.action.clone() {
+                    SemAct::CustomizedRoutine(x)
+                } else if rule.vars.len() == 1 {
+                    SemAct::infer(&rule.vars[0].expr)
+                } else {
+                    SemAct::Gather
+                };
+                match semact {
+                    SemAct::Gather | SemAct::CustomizedRoutine(_) => {}
+                    _ => {}
+                }
+                todo!()
+            })
+            .collect();
+        self.semi_nfs.insert(tag, rules);
+    }
+}

From 0e1dab09eba445a980b9ada49259ad839a96987a Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Thu, 27 Jul 2023 22:20:51 -0400
Subject: [PATCH 30/42] allow inner collector to be hinted

---
 pag-parser2/src/frontend/ast.rs   |  2 +-
 pag-parser2/src/frontend/parse.rs | 20 +++++++++++++++++---
 pag-parser2/src/nf/mod.rs         |  1 -
 pag-parser2/src/nf/semact.rs      |  4 ----
 pag-parser2/src/nf/translation.rs |  4 ++--
 5 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index 4dce9d9..9912d84 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -64,7 +64,6 @@ pub struct ParserRule {
 pub struct VarBinding {
     pub expr: ParserExpr,
     pub name: Option<syn::Ident>,
-    pub ty: Option<syn::Type>,
 }
 
 // TODO: how to express "bottom" & "any"?
@@ -90,6 +89,7 @@ pub enum ParserExpr {
     LexerRef(syn::Ident),
     ParserRef(syn::Ident),
     Ignore(Box<Self>),
+    Hinted(Box<Self>, syn::Type),
 }
 
 pub struct RightDeepIterator<'a> {
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index 9b782ad..e70a51b 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -140,7 +140,7 @@ impl Parse for ParserRule {
 impl Parse for VarBinding {
     // ParserExpr ("[" syn::Ident (":" syn::Type)? "]")?
     fn parse(input: ParseStream) -> Result<Self> {
-        let expr = input.parse::<ParserExpr>()?;
+        let mut expr = input.parse::<ParserExpr>()?;
 
         let mut name = None;
         let mut ty = None;
@@ -159,8 +159,10 @@ impl Parse for VarBinding {
                 return Err(content.error("expected `]`"));
             }
         }
-
-        Ok(Self { expr, name, ty })
+        if let Some(ty) = ty {
+            expr = ParserExpr::Hinted(Box::new(expr), ty);
+        }
+        Ok(Self { expr, name })
     }
 }
 
@@ -313,6 +315,15 @@ fn parse_parser_expr(input: ParseStream, min_bp: u32) -> Result<ParserExpr> {
             lhs = ParserExpr::Seq(Box::new(lhs), Box::new(rhs));
             continue;
         }
+        fn peek_and_parse_type(input: ParseStream, expr: ParserExpr) -> Result<ParserExpr> {
+            Ok(if input.peek(Token!(:)) {
+                input.parse::<Token!(:)>()?;
+                let ty = input.parse::<syn::Type>()?;
+                ParserExpr::Hinted(Box::new(expr), ty)
+            } else {
+                expr
+            })
+        }
         if input.peek(Token![*]) {
             let l_bp = 70;
             if l_bp < min_bp {
@@ -320,6 +331,7 @@ fn parse_parser_expr(input: ParseStream, min_bp: u32) -> Result<ParserExpr> {
             }
             input.parse::<Token![*]>()?;
             lhs = ParserExpr::Star(Box::new(lhs));
+            lhs = peek_and_parse_type(input, lhs)?;
             continue;
         }
         if input.peek(Token![+]) {
@@ -329,6 +341,7 @@ fn parse_parser_expr(input: ParseStream, min_bp: u32) -> Result<ParserExpr> {
             }
             input.parse::<Token![+]>()?;
             lhs = ParserExpr::Plus(Box::new(lhs));
+            lhs = peek_and_parse_type(input, lhs)?;
             continue;
         }
         if input.peek(Token![?]) {
@@ -363,6 +376,7 @@ mod test {
     #[test]
     fn test_parser_expr() {
         syn::parse_str::<ParserExpr>(r#"A? b c* D+ F?"#).unwrap();
+        syn::parse_str::<ParserExpr>(r#"A? b c* (key value*:Vec<_>)+:HashMap<_, _> F?"#).unwrap();
     }
 
     #[test]
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index 764d18c..6a19f75 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -22,7 +22,6 @@ use syn::Ident;
 #[cfg(feature = "debug")]
 use crate::debug::{styled, styled_write};
 
-
 use self::semact::SemAct;
 
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
diff --git a/pag-parser2/src/nf/semact.rs b/pag-parser2/src/nf/semact.rs
index c4b1e3e..69fa6d2 100644
--- a/pag-parser2/src/nf/semact.rs
+++ b/pag-parser2/src/nf/semact.rs
@@ -6,10 +6,6 @@
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
 
-
-
-
-
 use crate::frontend::{CustomizedBlock, ParserExpr};
 
 ///
diff --git a/pag-parser2/src/nf/translation.rs b/pag-parser2/src/nf/translation.rs
index 5ef09ce..f93cf18 100644
--- a/pag-parser2/src/nf/translation.rs
+++ b/pag-parser2/src/nf/translation.rs
@@ -7,7 +7,7 @@ use std::collections::HashMap;
 use quote::format_ident;
 use syn::{Ident, Type};
 
-use crate::frontend::{ParserDef};
+use crate::frontend::ParserDef;
 
 use super::{semact::SemAct, NormalForm, Tag};
 
@@ -37,7 +37,7 @@ impl Translation {
         self.anonymous_cnt += 1;
         result
     }
-    // Translate a top-level definitioin
+    // Translate a top-level definition
     fn add_toplevel_def(&mut self, name: Ident, def: &ParserDef) {
         let tag = Tag::Toplevel(name);
         self.annotations.insert(tag.clone(), def.ty.clone());

From a76362ad5bd00874388ade21a950967a8ccfe391 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Fri, 28 Jul 2023 00:36:55 -0400
Subject: [PATCH 31/42] stage work for translation

---
 pag-parser2/src/nf/translation.rs | 54 +++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/pag-parser2/src/nf/translation.rs b/pag-parser2/src/nf/translation.rs
index f93cf18..0dccb81 100644
--- a/pag-parser2/src/nf/translation.rs
+++ b/pag-parser2/src/nf/translation.rs
@@ -7,9 +7,9 @@ use std::collections::HashMap;
 use quote::format_ident;
 use syn::{Ident, Type};
 
-use crate::frontend::ParserDef;
+use crate::frontend::{ParserDef, ParserExpr};
 
-use super::{semact::SemAct, NormalForm, Tag};
+use super::{semact::SemAct, NormalForm, Tag, Action};
 
 struct Translation {
     /// Table of semi-normalized production rules
@@ -37,6 +37,56 @@ impl Translation {
         self.anonymous_cnt += 1;
         result
     }
+    fn construct_actions_from_expr_sequence<'a, I>(&mut self, stream: I) -> Vec<Action> 
+    where
+        I: Iterator<Item = (&'a ParserExpr, Option<Ident>)>,
+    {
+        stream.map(|(expr, output )| {
+            match expr {
+                ParserExpr::ParserRef(rule) => 
+                    Action::Shift { tag: Tag::Toplevel(rule.clone()), output: output.or_else(|| Some(self.new_output_sym())) },
+                ParserExpr::LexerRef(_)  => 
+                    Action::Shift { tag: self.add_anonymous_rule(expr), output }, 
+                ParserExpr::Ignore(inner)  => 
+                    Action::Shift { tag: self.add_anonymous_rule(inner), output: None }, 
+                ParserExpr::Hinted( inner, ty) => {
+                    let tag = self.add_anonymous_rule(inner);
+                    self.hints.insert(tag.clone(), ty.clone());
+                    Action::Shift { tag, output: None }
+                }
+                _ => 
+                    Action::Shift { tag: self.add_anonymous_rule(expr), output: output.or_else(|| Some(self.new_output_sym())) },    
+            }
+        }).collect()
+    }
+    fn construct_nf_from_expr_sequence<'a, I>(&mut self, mut stream: I, semact: SemAct) -> NormalForm
+    where
+        I: Iterator<Item = (&'a ParserExpr, Option<Ident>)>,
+    {
+        let head = stream.next();
+        match head {
+            None => NormalForm::Empty(vec![], semact),
+            // Token rule is ignored on default, but can be used to specify the label.
+            Some((ParserExpr::LexerRef(token), label)) => {
+                let actions = self.construct_actions_from_expr_sequence(stream);
+                NormalForm::Sequence(token.clone(), label, actions, semact)
+            }
+            Some(_) => {
+                let recovered = head.into_iter().chain(stream);
+                let actions = self.construct_actions_from_expr_sequence(recovered);
+                NormalForm::Unexpanded(actions, semact)
+            }
+        }
+    }
+
+    fn add_anonymous_rule(&mut self, expr: &ParserExpr) -> Tag {
+        // Must be primitive rules
+
+        let tag = self.new_anonymous_tag();
+        let semact = SemAct::infer(expr);
+        
+    }
+
     // Translate a top-level definition
     fn add_toplevel_def(&mut self, name: Ident, def: &ParserDef) {
         let tag = Tag::Toplevel(name);

From 3e767f08a5237a5b8f1b450e8557a8415a6cb882 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Sat, 29 Jul 2023 14:54:04 -0400
Subject: [PATCH 32/42] add markers for tailcall

---
 pag-parser2/src/frontend/ast.rs   |  10 +-
 pag-parser2/src/lib.rs            |   1 +
 pag-parser2/src/nf/inference.rs   |   4 +
 pag-parser2/src/nf/mod.rs         |   4 +
 pag-parser2/src/nf/semact.rs      |   3 +
 pag-parser2/src/nf/translation.rs | 314 +++++++++++++++++++++++++-----
 6 files changed, 286 insertions(+), 50 deletions(-)

diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index 9912d84..9877b44 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -93,21 +93,21 @@ pub enum ParserExpr {
 }
 
 pub struct RightDeepIterator<'a> {
-    seq: Option<&'a LexerExpr>,
+    seq: Option<&'a ParserExpr>,
 }
 
-impl<'a> From<&'a LexerExpr> for RightDeepIterator<'a> {
-    fn from(expr: &'a LexerExpr) -> Self {
+impl<'a> From<&'a ParserExpr> for RightDeepIterator<'a> {
+    fn from(expr: &'a ParserExpr) -> Self {
         Self { seq: Some(expr) }
     }
 }
 
 impl<'a> Iterator for RightDeepIterator<'a> {
-    type Item = &'a LexerExpr;
+    type Item = &'a ParserExpr;
 
     fn next(&mut self) -> Option<Self::Item> {
         match self.seq {
-            Some(LexerExpr::Seq(a, b)) => {
+            Some(ParserExpr::Seq(a, b)) => {
                 self.seq = Some(b);
                 Some(a)
             }
diff --git a/pag-parser2/src/lib.rs b/pag-parser2/src/lib.rs
index 85c0851..932e93c 100644
--- a/pag-parser2/src/lib.rs
+++ b/pag-parser2/src/lib.rs
@@ -6,6 +6,7 @@
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
 
+#![feature(box_patterns)]
 #[cfg(feature = "debug")]
 mod debug;
 mod frontend;
diff --git a/pag-parser2/src/nf/inference.rs b/pag-parser2/src/nf/inference.rs
index a474b3d..b614faa 100644
--- a/pag-parser2/src/nf/inference.rs
+++ b/pag-parser2/src/nf/inference.rs
@@ -163,6 +163,10 @@ impl<'a> InferenceContext<'a> {
             for i in nfs.iter() {
                 let semact = i.semact();
                 match semact {
+                    SemAct::Recognize => {
+                        inferred.replace(InferredType::Concrete(parse_quote!(())));
+                        break;
+                    }
                     // Token semantic action, the type is Span
                     SemAct::Token => {
                         inferred.replace(InferredType::Concrete(parse_quote!(
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index 6a19f75..f314d0f 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -74,6 +74,8 @@ pub enum Action {
         tag: Tag,
         output: Option<Ident>,
     },
+    /// Specialized action for tail call optimization.
+    TailCall
 }
 
 #[cfg(feature = "debug")]
@@ -94,6 +96,7 @@ impl std::fmt::Display for Action {
                     styled_write!(f, Color::Red, "{tag}")
                 }
             }
+            Self::TailCall => styled_write!(f, Color::Green, "↻"),
         }
     }
 }
@@ -140,6 +143,7 @@ impl NormalForm {
                             }
                             break;
                         }
+                        Action::TailCall => continue,
                     }
                 }
                 if let Self::Sequence(_, Some(tk), _, _) = self {
diff --git a/pag-parser2/src/nf/semact.rs b/pag-parser2/src/nf/semact.rs
index 69fa6d2..9dec982 100644
--- a/pag-parser2/src/nf/semact.rs
+++ b/pag-parser2/src/nf/semact.rs
@@ -37,6 +37,8 @@ pub enum SemAct {
     OneOrMoreNested,
     /// Yield a token span,
     Token,
+    /// Recognize without generate any data.
+    Recognize,
 }
 
 impl SemAct {
@@ -62,6 +64,7 @@ impl std::fmt::Display for SemAct {
             SemAct::OneOrMoreToplevel => write!(f, "OneOrMoreToplevel"),
             SemAct::OneOrMoreNested => write!(f, "OneOrMoreNested"),
             SemAct::Token => write!(f, "Token"),
+            SemAct::Recognize => write!(f, "Recognize"),
         }
     }
 }
diff --git a/pag-parser2/src/nf/translation.rs b/pag-parser2/src/nf/translation.rs
index 0dccb81..17d22bf 100644
--- a/pag-parser2/src/nf/translation.rs
+++ b/pag-parser2/src/nf/translation.rs
@@ -7,10 +7,10 @@ use std::collections::HashMap;
 use quote::format_ident;
 use syn::{Ident, Type};
 
+use super::{semact::SemAct, Action, NormalForm, Tag};
+use crate::frontend::RightDeepIterator;
 use crate::frontend::{ParserDef, ParserExpr};
 
-use super::{semact::SemAct, NormalForm, Tag, Action};
-
 struct Translation {
     /// Table of semi-normalized production rules
     semi_nfs: HashMap<Tag, Vec<NormalForm>>,
@@ -22,9 +22,18 @@ struct Translation {
     output_cnt: usize,
     /// Counter of assigned anonymous routines
     anonymous_cnt: usize,
+    /// Whether we are currently ignoring the output
+    ignoring: bool,
 }
 
 impl Translation {
+    fn start_ignoring(&mut self) {
+        self.ignoring = true;
+    }
+    fn end_ignore(&mut self) {
+        self.ignoring = false;
+    }
+
     // Allocate a new symbol for unamed variable bindings.
     fn new_output_sym(&mut self) -> Ident {
         let result = format_ident!("_{}", self.output_cnt);
@@ -37,54 +46,269 @@ impl Translation {
         self.anonymous_cnt += 1;
         result
     }
-    fn construct_actions_from_expr_sequence<'a, I>(&mut self, stream: I) -> Vec<Action> 
-    where
-        I: Iterator<Item = (&'a ParserExpr, Option<Ident>)>,
-    {
-        stream.map(|(expr, output )| {
-            match expr {
-                ParserExpr::ParserRef(rule) => 
-                    Action::Shift { tag: Tag::Toplevel(rule.clone()), output: output.or_else(|| Some(self.new_output_sym())) },
-                ParserExpr::LexerRef(_)  => 
-                    Action::Shift { tag: self.add_anonymous_rule(expr), output }, 
-                ParserExpr::Ignore(inner)  => 
-                    Action::Shift { tag: self.add_anonymous_rule(inner), output: None }, 
-                ParserExpr::Hinted( inner, ty) => {
-                    let tag = self.add_anonymous_rule(inner);
-                    self.hints.insert(tag.clone(), ty.clone());
-                    Action::Shift { tag, output: None }
-                }
-                _ => 
-                    Action::Shift { tag: self.add_anonymous_rule(expr), output: output.or_else(|| Some(self.new_output_sym())) },    
-            }
-        }).collect()
+    fn add_nf(&mut self, tag: Tag, nf: NormalForm) {
+        self.semi_nfs.entry(tag).or_default().push(nf);
     }
-    fn construct_nf_from_expr_sequence<'a, I>(&mut self, mut stream: I, semact: SemAct) -> NormalForm
-    where
-        I: Iterator<Item = (&'a ParserExpr, Option<Ident>)>,
-    {
-        let head = stream.next();
-        match head {
-            None => NormalForm::Empty(vec![], semact),
-            // Token rule is ignored on default, but can be used to specify the label.
-            Some((ParserExpr::LexerRef(token), label)) => {
-                let actions = self.construct_actions_from_expr_sequence(stream);
-                NormalForm::Sequence(token.clone(), label, actions, semact)
+    fn add_nf_from_anonymous_expr(&mut self, expr: &ParserExpr, tag: &Tag) {
+        match expr {
+            ParserExpr::Seq(box ParserExpr::Ignore(box ParserExpr::LexerRef(head)), tail) => {
+                let tail_actions = RightDeepIterator::from(tail.as_ref())
+                    .map(|inner| self.add_anonymous_rule(inner))
+                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .collect();
+                self.add_nf(
+                    tag.clone(),
+                    NormalForm::Sequence(
+                        head.clone(),
+                        None,
+                        tail_actions,
+                        if self.ignoring {
+                            SemAct::Recognize
+                        } else {
+                            SemAct::Gather
+                        },
+                    ),
+                );
+            }
+            ParserExpr::Seq(box ParserExpr::LexerRef(head), tail) => {
+                let tail_actions = RightDeepIterator::from(tail.as_ref())
+                    .map(|inner| self.add_anonymous_rule(inner))
+                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .collect();
+                let nf = NormalForm::Sequence(
+                    head.clone(),
+                    if self.ignoring {
+                        None
+                    } else {
+                        Some(self.new_output_sym())
+                    },
+                    tail_actions,
+                    if self.ignoring {
+                        SemAct::Recognize
+                    } else {
+                        SemAct::Gather
+                    },
+                );
+                self.add_nf(tag.clone(), nf);
+            }
+            ParserExpr::Seq(_, _) => {
+                let actions = RightDeepIterator::from(expr)
+                    .map(|inner| self.add_anonymous_rule(inner))
+                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .collect();
+                let nf = NormalForm::Unexpanded(
+                    actions,
+                    if self.ignoring {
+                        SemAct::Recognize
+                    } else {
+                        SemAct::Gather
+                    },
+                );
+                self.add_nf(tag.clone(), nf);
+            }
+            ParserExpr::Opt(box ParserExpr::Seq(
+                box ParserExpr::Ignore(box ParserExpr::LexerRef(head)),
+                tail,
+            )) => {
+                let tail_actions = RightDeepIterator::from(tail.as_ref())
+                    .map(|inner| self.add_anonymous_rule(inner))
+                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .collect();
+                self.add_nf(
+                    tag.clone(),
+                    NormalForm::Sequence(
+                        head.clone(),
+                        None,
+                        tail_actions,
+                        if self.ignoring {
+                            SemAct::Recognize
+                        } else {
+                            SemAct::Option
+                        },
+                    ),
+                );
+                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::Option));
+            }
+            ParserExpr::Opt(box ParserExpr::Seq(box ParserExpr::LexerRef(head), tail)) => {
+                let tail_actions = RightDeepIterator::from(tail.as_ref())
+                    .map(|inner| self.add_anonymous_rule(inner))
+                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .collect();
+                let nf = NormalForm::Sequence(
+                    head.clone(),
+                    if self.ignoring {
+                        None
+                    } else {
+                        Some(self.new_output_sym())
+                    },
+                    tail_actions,
+                    if self.ignoring {
+                        SemAct::Recognize
+                    } else {
+                        SemAct::Option
+                    },
+                );
+                self.add_nf(tag.clone(), nf);
+                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::Option));
+            }
+            ParserExpr::Opt(inner) => {
+                let actions = RightDeepIterator::from(inner.as_ref())
+                    .map(|inner| self.add_anonymous_rule(inner))
+                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .collect();
+                let nf = NormalForm::Unexpanded(
+                    actions,
+                    if self.ignoring {
+                        SemAct::Recognize
+                    } else {
+                        SemAct::Option
+                    },
+                );
+                self.add_nf(tag.clone(), nf);
+                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::Option));
             }
-            Some(_) => {
-                let recovered = head.into_iter().chain(stream);
-                let actions = self.construct_actions_from_expr_sequence(recovered);
-                NormalForm::Unexpanded(actions, semact)
+            ParserExpr::Star(box ParserExpr::Seq(
+                box ParserExpr::Ignore(box ParserExpr::LexerRef(head)),
+                tail,
+            )) => {
+                let tail_actions = RightDeepIterator::from(tail.as_ref())
+                    .map(|inner| self.add_anonymous_rule(inner))
+                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .collect();
+                self.add_nf(
+                    tag.clone(),
+                    NormalForm::Sequence(
+                        head.clone(),
+                        None,
+                        tail_actions,
+                        if self.ignoring {
+                            SemAct::Recognize
+                        } else {
+                            SemAct::ZeroOrMore
+                        },
+                    ),
+                );
+                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::ZeroOrMore));
             }
+            ParserExpr::Star(box ParserExpr::Seq(box ParserExpr::LexerRef(head), tail)) => {
+                let tail_actions = RightDeepIterator::from(tail.as_ref())
+                    .map(|inner| self.add_anonymous_rule(inner))
+                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .collect();
+                let nf = NormalForm::Sequence(
+                    head.clone(),
+                    if self.ignoring {
+                        None
+                    } else {
+                        Some(self.new_output_sym())
+                    },
+                    tail_actions,
+                    if self.ignoring {
+                        SemAct::Recognize
+                    } else {
+                        SemAct::ZeroOrMore
+                    },
+                );
+                self.add_nf(tag.clone(), nf);
+                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::ZeroOrMore));
+            }
+            ParserExpr::Star(inner) => {
+                let actions = RightDeepIterator::from(inner.as_ref())
+                    .map(|inner| self.add_anonymous_rule(inner))
+                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .collect();
+                let nf = NormalForm::Unexpanded(
+                    actions,
+                    if self.ignoring {
+                        SemAct::Recognize
+                    } else {
+                        SemAct::ZeroOrMore
+                    },
+                );
+                self.add_nf(tag.clone(), nf);
+                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::ZeroOrMore));
+            }
+            ParserExpr::Plus(box ParserExpr::Seq(
+                box ParserExpr::Ignore(box ParserExpr::LexerRef(head)),
+                tail,
+            )) => {
+                let tail_actions = RightDeepIterator::from(tail.as_ref())
+                    .map(|inner| self.add_anonymous_rule(inner))
+                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .collect();
+                self.add_nf(
+                    tag.clone(),
+                    NormalForm::Sequence(
+                        head.clone(),
+                        None,
+                        tail_actions,
+                        if self.ignoring {
+                            SemAct::Recognize
+                        } else {
+                            SemAct::OneOrMoreToplevel
+                        },
+                    ),
+                );
+                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::ZeroOrMore));
+            }
+            ParserExpr::Plus(box ParserExpr::Seq(box ParserExpr::LexerRef(head), tail)) => {
+                let tail_actions = RightDeepIterator::from(tail.as_ref())
+                    .map(|inner| self.add_anonymous_rule(inner))
+                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .collect();
+                let nf = NormalForm::Sequence(
+                    head.clone(),
+                    if self.ignoring {
+                        None
+                    } else {
+                        Some(self.new_output_sym())
+                    },
+                    tail_actions,
+                    if self.ignoring {
+                        SemAct::Recognize
+                    } else {
+                        SemAct::ZeroOrMore
+                    },
+                );
+                self.add_nf(tag.clone(), nf);
+                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::ZeroOrMore));
+            }
+            ParserExpr::Plus(inner) => {
+                let actions = RightDeepIterator::from(inner.as_ref())
+                    .map(|inner| self.add_anonymous_rule(inner))
+                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .collect();
+                let nf = NormalForm::Unexpanded(
+                    actions,
+                    if self.ignoring {
+                        SemAct::Recognize
+                    } else {
+                        SemAct::ZeroOrMore
+                    },
+                );
+                self.add_nf(tag.clone(), nf);
+                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::ZeroOrMore));
+            }
+            ParserExpr::LexerRef(ident) => {
+                let nf = if self.ignoring {
+                    NormalForm::Sequence(ident.clone(), None, vec![], SemAct::Recognize)
+                } else {
+                    NormalForm::Sequence(
+                        ident.clone(),
+                        Some(self.new_output_sym()),
+                        vec![],
+                        SemAct::Token,
+                    )
+                };
+                self.add_nf(tag.clone(), nf);
+            }
+            ParserExpr::ParserRef(_) => unreachable!("cannot create nf from parser ref"),
+            ParserExpr::Ignore(_) => unreachable!("cannot create nf from ignore"),
+            ParserExpr::Hinted(_, _) => unreachable!("cannot create nf from hinted"),
         }
     }
-
-    fn add_anonymous_rule(&mut self, expr: &ParserExpr) -> Tag {
-        // Must be primitive rules
-
-        let tag = self.new_anonymous_tag();
-        let semact = SemAct::infer(expr);
-        
+    fn add_anonymous_rule(&mut self, expr: &ParserExpr) -> (Tag, Option<Ident>) {
+        todo!()
     }
 
     // Translate a top-level definition

From a9a61ca7abe1513e25aa83e3d1cfbbd2bfb207fc Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Sat, 29 Jul 2023 23:07:11 -0400
Subject: [PATCH 33/42] implement translation

---
 pag-parser2/src/frontend/parse.rs |  12 +-
 pag-parser2/src/nf/mod.rs         |  50 +++-
 pag-parser2/src/nf/translation.rs | 444 ++++++++++++++----------------
 3 files changed, 263 insertions(+), 243 deletions(-)

diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index e70a51b..32bba36 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -140,7 +140,7 @@ impl Parse for ParserRule {
 impl Parse for VarBinding {
     // ParserExpr ("[" syn::Ident (":" syn::Type)? "]")?
     fn parse(input: ParseStream) -> Result<Self> {
-        let mut expr = input.parse::<ParserExpr>()?;
+        let mut expr = parse_parser_expr(input, 0, true)?;
 
         let mut name = None;
         let mut ty = None;
@@ -276,12 +276,12 @@ fn parse_lexer_expr(input: ParseStream, min_bp: u32) -> Result<LexerExpr> {
 
 impl Parse for ParserExpr {
     fn parse(input: ParseStream) -> Result<Self> {
-        parse_parser_expr(input, 0)
+        parse_parser_expr(input, 0, false)
     }
 }
 
 // pratt parsing
-fn parse_parser_expr(input: ParseStream, min_bp: u32) -> Result<ParserExpr> {
+fn parse_parser_expr(input: ParseStream, min_bp: u32, is_toplevel: bool) -> Result<ParserExpr> {
     let mut lhs = 'lhs: {
         if input.peek(syn::Ident) {
             let ident = input.parse::<syn::Ident>()?.unraw();
@@ -299,19 +299,19 @@ fn parse_parser_expr(input: ParseStream, min_bp: u32) -> Result<ParserExpr> {
         if input.peek(Token![#]) {
             input.parse::<Token![#]>()?;
             let r_bp = 60;
-            let rhs = parse_parser_expr(input, r_bp)?;
+            let rhs = parse_parser_expr(input, r_bp, is_toplevel)?;
             break 'lhs ParserExpr::Ignore(Box::new(rhs));
         }
         return Err(input.error("expected parser expression"));
     };
 
     loop {
-        if input.peek(syn::Ident) || input.peek(syn::token::Paren) || input.peek(Token![#]) {
+        if !is_toplevel && (input.peek(syn::Ident) || input.peek(syn::token::Paren) || input.peek(Token![#])) {
             let (l_bp, r_bp) = (40, 41);
             if l_bp < min_bp {
                 break;
             }
-            let rhs = parse_parser_expr(input, r_bp)?;
+            let rhs = parse_parser_expr(input, r_bp, is_toplevel)?;
             lhs = ParserExpr::Seq(Box::new(lhs), Box::new(rhs));
             continue;
         }
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index f314d0f..f1e1ceb 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -13,7 +13,7 @@ mod translation;
 
 use std::{
     collections::{HashMap, VecDeque},
-    ops::Deref,
+    ops::{Deref, DerefMut},
 };
 
 use quote::format_ident;
@@ -51,7 +51,7 @@ impl std::fmt::Display for Tag {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             Tag::Toplevel(ident) => write!(f, "{ident}"),
-            Tag::Anonymous(index) => styled_write!(f, Style::new().italic(), "_{index}"),
+            Tag::Anonymous(index) => styled_write!(f, Style::new().italic(), "_A{index}"),
         }
     }
 }
@@ -75,7 +75,9 @@ pub enum Action {
         output: Option<Ident>,
     },
     /// Specialized action for tail call optimization.
-    TailCall
+    TailCall,
+    /// Specialized action for passing collector to subroutines.
+    PassCollector(Tag),
 }
 
 #[cfg(feature = "debug")]
@@ -97,6 +99,7 @@ impl std::fmt::Display for Action {
                 }
             }
             Self::TailCall => styled_write!(f, Color::Green, "↻"),
+            Self::PassCollector(tag) => styled_write!(f, Color::Green, "⇒{tag}"),
         }
     }
 }
@@ -121,6 +124,39 @@ impl NormalForm {
             | Self::Sequence(_, _, _, semact) => semact,
         }
     }
+    pub fn semact_mut(&mut self) -> &mut SemAct {
+        match self {
+            Self::Empty(_, semact)
+            | Self::Unexpanded(_, semact)
+            | Self::Sequence(_, _, _, semact) => semact,
+        }
+    }
+    pub fn append_tailcall(&mut self) {
+        match self {
+            Self::Empty(_actions, _) => {
+                unreachable!("empty cannot be tail called, otherwise there will be ambiguity")
+            }
+            Self::Unexpanded(actions, _) => {
+                actions.push(Action::TailCall);
+            }
+            Self::Sequence(_, _, actions, _) => {
+                actions.push(Action::TailCall);
+            }
+        }
+    }
+    pub fn append_pass_collector(&mut self, tag: Tag) {
+        match self {
+            Self::Empty(_actions, _) => {
+                unreachable!("empty cannot be followed by another subroutine, otherwise there will be ambiguity")
+            }
+            Self::Unexpanded(actions, _) => {
+                actions.push(Action::PassCollector(tag));
+            }
+            Self::Sequence(_, _, actions, _) => {
+                actions.push(Action::PassCollector(tag));
+            }
+        }
+    }
     pub fn visible_bindings(&self, skip: usize) -> Vec<(&Ident, BoundTarget)> {
         match self {
             Self::Empty(actions, _) => actions
@@ -143,6 +179,7 @@ impl NormalForm {
                             }
                             break;
                         }
+                        Action::PassCollector(..) => continue,
                         Action::TailCall => continue,
                     }
                 }
@@ -224,6 +261,7 @@ fn debug_print_test() {
     println!("{}", sequence);
 }
 
+#[derive(Default, Clone)]
 /// Well, it is not the notorius firewall.
 pub struct NFTable(HashMap<Tag, Vec<NormalForm>>);
 
@@ -235,6 +273,12 @@ impl Deref for NFTable {
     }
 }
 
+impl DerefMut for NFTable {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
 #[cfg(feature = "debug")]
 impl std::fmt::Display for NFTable {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
diff --git a/pag-parser2/src/nf/translation.rs b/pag-parser2/src/nf/translation.rs
index 17d22bf..90a8823 100644
--- a/pag-parser2/src/nf/translation.rs
+++ b/pag-parser2/src/nf/translation.rs
@@ -7,13 +7,15 @@ use std::collections::HashMap;
 use quote::format_ident;
 use syn::{Ident, Type};
 
+use super::NFTable;
 use super::{semact::SemAct, Action, NormalForm, Tag};
 use crate::frontend::RightDeepIterator;
 use crate::frontend::{ParserDef, ParserExpr};
 
+#[derive(Default)]
 struct Translation {
     /// Table of semi-normalized production rules
-    semi_nfs: HashMap<Tag, Vec<NormalForm>>,
+    semi_nfs: NFTable,
     /// Toplevel type annotations
     annotations: HashMap<Tag, Type>,
     /// Type hints when calling inner routines (collector)
@@ -23,274 +25,162 @@ struct Translation {
     /// Counter of assigned anonymous routines
     anonymous_cnt: usize,
     /// Whether we are currently ignoring the output
-    ignoring: bool,
+    ignoring_cnt: usize,
 }
 
 impl Translation {
+    /// Enter ignoring mode
     fn start_ignoring(&mut self) {
-        self.ignoring = true;
+        self.ignoring_cnt += 1;
     }
-    fn end_ignore(&mut self) {
-        self.ignoring = false;
+    /// Exit ignoring mode
+    fn end_ignoring(&mut self) {
+        self.ignoring_cnt -= 1;
     }
 
-    // Allocate a new symbol for unamed variable bindings.
+    fn ignoring(&mut self) -> bool {
+        self.ignoring_cnt > 0
+    }
+
+    /// Allocate a new symbol for unamed variable bindings.
     fn new_output_sym(&mut self) -> Ident {
         let result = format_ident!("_{}", self.output_cnt);
         self.output_cnt += 1;
         result
     }
-    // Allocate a new tag for anonymous routines.
+    /// Allocate a new tag for anonymous routines.
     fn new_anonymous_tag(&mut self) -> Tag {
         let result = Tag::Anonymous(self.anonymous_cnt);
         self.anonymous_cnt += 1;
         result
     }
-    fn add_nf(&mut self, tag: Tag, nf: NormalForm) {
-        self.semi_nfs.entry(tag).or_default().push(nf);
-    }
-    fn add_nf_from_anonymous_expr(&mut self, expr: &ParserExpr, tag: &Tag) {
-        match expr {
-            ParserExpr::Seq(box ParserExpr::Ignore(box ParserExpr::LexerRef(head)), tail) => {
-                let tail_actions = RightDeepIterator::from(tail.as_ref())
-                    .map(|inner| self.add_anonymous_rule(inner))
+    /// Construct a normal form from a sequence of parser expressions. The semact is always `Recognize`.
+    fn partial_nf_from_sequence<
+        'a,
+        const IGNORE_UNNAMED: bool,
+        I: Iterator<Item = (&'a ParserExpr, Option<Ident>)>,
+    >(
+        &mut self,
+        mut iter: I,
+    ) -> NormalForm {
+        match iter.next() {
+            None => NormalForm::Empty(vec![], SemAct::Recognize),
+            Some((ParserExpr::Ignore(box ParserExpr::LexerRef(token)), _)) => {
+                let tail = iter
+                    .map(|(inner, named)| self.add_anonymous_rule::<IGNORE_UNNAMED>(inner, named))
                     .map(|(tag, output)| Action::Shift { tag, output })
                     .collect();
-                self.add_nf(
-                    tag.clone(),
-                    NormalForm::Sequence(
-                        head.clone(),
-                        None,
-                        tail_actions,
-                        if self.ignoring {
-                            SemAct::Recognize
-                        } else {
-                            SemAct::Gather
-                        },
-                    ),
-                );
+                NormalForm::Sequence(token.clone(), None, tail, SemAct::Recognize)
             }
-            ParserExpr::Seq(box ParserExpr::LexerRef(head), tail) => {
-                let tail_actions = RightDeepIterator::from(tail.as_ref())
-                    .map(|inner| self.add_anonymous_rule(inner))
+            Some((ParserExpr::LexerRef(token), named)) => {
+                let tail = iter
+                    .map(|(inner, named)| self.add_anonymous_rule::<IGNORE_UNNAMED>(inner, named))
                     .map(|(tag, output)| Action::Shift { tag, output })
                     .collect();
-                let nf = NormalForm::Sequence(
-                    head.clone(),
-                    if self.ignoring {
+                NormalForm::Sequence(
+                    token.clone(),
+                    if self.ignoring() {
                         None
+                    } else if IGNORE_UNNAMED {
+                        named
                     } else {
-                        Some(self.new_output_sym())
-                    },
-                    tail_actions,
-                    if self.ignoring {
-                        SemAct::Recognize
-                    } else {
-                        SemAct::Gather
+                        named.or_else(|| Some(self.new_output_sym()))
                     },
-                );
-                self.add_nf(tag.clone(), nf);
+                    tail,
+                    SemAct::Recognize,
+                )
             }
-            ParserExpr::Seq(_, _) => {
-                let actions = RightDeepIterator::from(expr)
-                    .map(|inner| self.add_anonymous_rule(inner))
+            Some((expr, named)) => {
+                let sequence = [(expr, named)]
+                    .into_iter()
+                    .chain(iter)
+                    .map(|(inner, named)| self.add_anonymous_rule::<IGNORE_UNNAMED>(inner, named))
                     .map(|(tag, output)| Action::Shift { tag, output })
                     .collect();
-                let nf = NormalForm::Unexpanded(
-                    actions,
-                    if self.ignoring {
-                        SemAct::Recognize
-                    } else {
-                        SemAct::Gather
-                    },
-                );
-                self.add_nf(tag.clone(), nf);
-            }
-            ParserExpr::Opt(box ParserExpr::Seq(
-                box ParserExpr::Ignore(box ParserExpr::LexerRef(head)),
-                tail,
-            )) => {
-                let tail_actions = RightDeepIterator::from(tail.as_ref())
-                    .map(|inner| self.add_anonymous_rule(inner))
-                    .map(|(tag, output)| Action::Shift { tag, output })
-                    .collect();
-                self.add_nf(
-                    tag.clone(),
-                    NormalForm::Sequence(
-                        head.clone(),
-                        None,
-                        tail_actions,
-                        if self.ignoring {
-                            SemAct::Recognize
-                        } else {
-                            SemAct::Option
-                        },
-                    ),
-                );
-                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::Option));
+                NormalForm::Unexpanded(sequence, SemAct::Recognize)
             }
-            ParserExpr::Opt(box ParserExpr::Seq(box ParserExpr::LexerRef(head), tail)) => {
-                let tail_actions = RightDeepIterator::from(tail.as_ref())
-                    .map(|inner| self.add_anonymous_rule(inner))
-                    .map(|(tag, output)| Action::Shift { tag, output })
-                    .collect();
-                let nf = NormalForm::Sequence(
-                    head.clone(),
-                    if self.ignoring {
-                        None
-                    } else {
-                        Some(self.new_output_sym())
-                    },
-                    tail_actions,
-                    if self.ignoring {
-                        SemAct::Recognize
-                    } else {
-                        SemAct::Option
-                    },
+        }
+    }
+    fn add_nf(&mut self, tag: Tag, nf: NormalForm) {
+        self.semi_nfs.entry(tag).or_default().push(nf);
+    }
+    fn add_nf_from_anonymous_expr(&mut self, expr: &ParserExpr, tag: &Tag) {
+        match expr {
+            ParserExpr::Seq(..) => {
+                let mut partial_nf = self.partial_nf_from_sequence::<false, _>(
+                    RightDeepIterator::from(expr).map(|expr| (expr, None)),
                 );
-                self.add_nf(tag.clone(), nf);
-                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::Option));
+                *partial_nf.semact_mut() = if self.ignoring() {
+                    SemAct::Recognize
+                } else {
+                    SemAct::Gather
+                };
+                self.add_nf(tag.clone(), partial_nf);
             }
             ParserExpr::Opt(inner) => {
-                let actions = RightDeepIterator::from(inner.as_ref())
-                    .map(|inner| self.add_anonymous_rule(inner))
-                    .map(|(tag, output)| Action::Shift { tag, output })
-                    .collect();
-                let nf = NormalForm::Unexpanded(
-                    actions,
-                    if self.ignoring {
-                        SemAct::Recognize
-                    } else {
-                        SemAct::Option
-                    },
-                );
-                self.add_nf(tag.clone(), nf);
-                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::Option));
-            }
-            ParserExpr::Star(box ParserExpr::Seq(
-                box ParserExpr::Ignore(box ParserExpr::LexerRef(head)),
-                tail,
-            )) => {
-                let tail_actions = RightDeepIterator::from(tail.as_ref())
-                    .map(|inner| self.add_anonymous_rule(inner))
-                    .map(|(tag, output)| Action::Shift { tag, output })
-                    .collect();
-                self.add_nf(
-                    tag.clone(),
-                    NormalForm::Sequence(
-                        head.clone(),
-                        None,
-                        tail_actions,
-                        if self.ignoring {
-                            SemAct::Recognize
-                        } else {
-                            SemAct::ZeroOrMore
-                        },
-                    ),
+                let mut partial_nf = self.partial_nf_from_sequence::<false, _>(
+                    RightDeepIterator::from(inner.as_ref()).map(|expr| (expr, None)),
                 );
-                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::ZeroOrMore));
-            }
-            ParserExpr::Star(box ParserExpr::Seq(box ParserExpr::LexerRef(head), tail)) => {
-                let tail_actions = RightDeepIterator::from(tail.as_ref())
-                    .map(|inner| self.add_anonymous_rule(inner))
-                    .map(|(tag, output)| Action::Shift { tag, output })
-                    .collect();
-                let nf = NormalForm::Sequence(
-                    head.clone(),
-                    if self.ignoring {
-                        None
-                    } else {
-                        Some(self.new_output_sym())
-                    },
-                    tail_actions,
-                    if self.ignoring {
-                        SemAct::Recognize
-                    } else {
-                        SemAct::ZeroOrMore
-                    },
-                );
-                self.add_nf(tag.clone(), nf);
-                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::ZeroOrMore));
+                let semact = if self.ignoring() {
+                    SemAct::Recognize
+                } else {
+                    SemAct::Option
+                };
+                *partial_nf.semact_mut() = semact.clone();
+                self.add_nf(tag.clone(), partial_nf);
+                // add one more rule for empty
+                self.add_nf(tag.clone(), NormalForm::Empty(vec![], semact));
             }
             ParserExpr::Star(inner) => {
-                let actions = RightDeepIterator::from(inner.as_ref())
-                    .map(|inner| self.add_anonymous_rule(inner))
-                    .map(|(tag, output)| Action::Shift { tag, output })
-                    .collect();
-                let nf = NormalForm::Unexpanded(
-                    actions,
-                    if self.ignoring {
-                        SemAct::Recognize
-                    } else {
-                        SemAct::ZeroOrMore
-                    },
+                let mut partial_nf = self.partial_nf_from_sequence::<false, _>(
+                    RightDeepIterator::from(inner.as_ref()).map(|expr| (expr, None)),
                 );
-                self.add_nf(tag.clone(), nf);
-                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::ZeroOrMore));
+                let semact = if self.ignoring() {
+                    SemAct::Recognize
+                } else {
+                    SemAct::ZeroOrMore
+                };
+                *partial_nf.semact_mut() = semact.clone();
+                self.add_nf(tag.clone(), partial_nf);
+                // add one more rule for empty
+                self.add_nf(tag.clone(), NormalForm::Empty(vec![], semact));
             }
-            ParserExpr::Plus(box ParserExpr::Seq(
-                box ParserExpr::Ignore(box ParserExpr::LexerRef(head)),
-                tail,
-            )) => {
-                let tail_actions = RightDeepIterator::from(tail.as_ref())
-                    .map(|inner| self.add_anonymous_rule(inner))
-                    .map(|(tag, output)| Action::Shift { tag, output })
-                    .collect();
-                self.add_nf(
-                    tag.clone(),
-                    NormalForm::Sequence(
-                        head.clone(),
-                        None,
-                        tail_actions,
-                        if self.ignoring {
-                            SemAct::Recognize
-                        } else {
-                            SemAct::OneOrMoreToplevel
-                        },
-                    ),
+            ParserExpr::Plus(inner) => {
+                let mut partial_nf = self.partial_nf_from_sequence::<false, _>(
+                    RightDeepIterator::from(inner.as_ref()).map(|expr| (expr, None)),
                 );
-                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::ZeroOrMore));
-            }
-            ParserExpr::Plus(box ParserExpr::Seq(box ParserExpr::LexerRef(head), tail)) => {
-                let tail_actions = RightDeepIterator::from(tail.as_ref())
-                    .map(|inner| self.add_anonymous_rule(inner))
-                    .map(|(tag, output)| Action::Shift { tag, output })
-                    .collect();
-                let nf = NormalForm::Sequence(
-                    head.clone(),
-                    if self.ignoring {
-                        None
-                    } else {
-                        Some(self.new_output_sym())
-                    },
-                    tail_actions,
-                    if self.ignoring {
+                let nested_tag = self.new_anonymous_tag();
+                // the nested routine
+                {
+                    let semact = if self.ignoring() {
                         SemAct::Recognize
                     } else {
-                        SemAct::ZeroOrMore
-                    },
-                );
-                self.add_nf(tag.clone(), nf);
-                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::ZeroOrMore));
-            }
-            ParserExpr::Plus(inner) => {
-                let actions = RightDeepIterator::from(inner.as_ref())
-                    .map(|inner| self.add_anonymous_rule(inner))
-                    .map(|(tag, output)| Action::Shift { tag, output })
-                    .collect();
-                let nf = NormalForm::Unexpanded(
-                    actions,
-                    if self.ignoring {
+                        SemAct::OneOrMoreNested
+                    };
+
+                    self.add_nf(nested_tag.clone(), {
+                        let mut nf = partial_nf.clone();
+                        nf.append_tailcall();
+                        *nf.semact_mut() = semact.clone();
+                        nf
+                    });
+
+                    self.add_nf(nested_tag.clone(), NormalForm::Empty(vec![], semact));
+                }
+                // the toplevel routine
+                {
+                    let semact = if self.ignoring() {
                         SemAct::Recognize
                     } else {
-                        SemAct::ZeroOrMore
-                    },
-                );
-                self.add_nf(tag.clone(), nf);
-                self.add_nf(tag.clone(), NormalForm::Empty(vec![], SemAct::ZeroOrMore));
+                        SemAct::OneOrMoreToplevel
+                    };
+                    partial_nf.append_pass_collector(nested_tag);
+                    *partial_nf.semact_mut() = semact;
+                    self.add_nf(tag.clone(), partial_nf);
+                }
             }
             ParserExpr::LexerRef(ident) => {
-                let nf = if self.ignoring {
+                let nf = if self.ignoring() {
                     NormalForm::Sequence(ident.clone(), None, vec![], SemAct::Recognize)
                 } else {
                     NormalForm::Sequence(
@@ -307,8 +197,49 @@ impl Translation {
             ParserExpr::Hinted(_, _) => unreachable!("cannot create nf from hinted"),
         }
     }
-    fn add_anonymous_rule(&mut self, expr: &ParserExpr) -> (Tag, Option<Ident>) {
-        todo!()
+    fn add_anonymous_rule<const IGNORE_UNNAMED: bool>(
+        &mut self,
+        expr: &ParserExpr,
+        named: Option<Ident>,
+    ) -> (Tag, Option<Ident>) {
+        let is_unnamed = named.is_none();
+        if IGNORE_UNNAMED && is_unnamed {
+            self.start_ignoring();
+        }
+        let result = match expr {
+            ParserExpr::ParserRef(x) => {
+                let tag = Tag::Toplevel(x.clone());
+                if self.ignoring() {
+                    (tag, None)
+                } else {
+                    (tag, named.or_else(|| Some(self.new_output_sym())))
+                }
+            }
+            ParserExpr::Ignore(expr) => {
+                self.start_ignoring();
+                let (tag, output) = self.add_anonymous_rule::<IGNORE_UNNAMED>(expr, named);
+                self.end_ignoring();
+                (tag, output)
+            }
+            ParserExpr::Hinted(expr, hint) => {
+                let (tag, output) = self.add_anonymous_rule::<IGNORE_UNNAMED>(expr, named);
+                self.hints.insert(tag.clone(), hint.clone());
+                (tag, output)
+            }
+            _ => {
+                let tag = self.new_anonymous_tag();
+                self.add_nf_from_anonymous_expr(expr, &tag);
+                if self.ignoring() {
+                    (tag, None)
+                } else {
+                    (tag, named.or_else(|| Some(self.new_output_sym())))
+                }
+            }
+        };
+        if IGNORE_UNNAMED && is_unnamed {
+            self.end_ignoring();
+        }
+        result
     }
 
     // Translate a top-level definition
@@ -319,20 +250,65 @@ impl Translation {
             .rules
             .iter()
             .map(|rule| {
-                let semact = if let Some(x) = rule.action.clone() {
-                    SemAct::CustomizedRoutine(x)
+                let semact = if let Some(action) = &rule.action {
+                    SemAct::CustomizedRoutine(action.clone())
                 } else if rule.vars.len() == 1 {
                     SemAct::infer(&rule.vars[0].expr)
                 } else {
                     SemAct::Gather
                 };
-                match semact {
-                    SemAct::Gather | SemAct::CustomizedRoutine(_) => {}
-                    _ => {}
-                }
-                todo!()
+                let mut partial_nf = if matches!(semact, SemAct::CustomizedRoutine(..)) {
+                    self.partial_nf_from_sequence::<true, _>(
+                        rule.vars
+                            .iter()
+                            .map(|binding| (&binding.expr, binding.name.clone())),
+                    )
+                } else {
+                    self.partial_nf_from_sequence::<false, _>(
+                        rule.vars
+                            .iter()
+                            .map(|binding| (&binding.expr, binding.name.clone())),
+                    )
+                };
+                *partial_nf.semact_mut() = semact;
+                partial_nf
             })
             .collect();
         self.semi_nfs.insert(tag, rules);
     }
 }
+
+#[cfg(test)]
+mod test {
+    use crate::frontend::Ast;
+
+    use super::Translation;
+
+    #[test]
+    fn sexpr() {
+        let ast = syn::parse_str::<Ast>(
+            r#"
+            %entry = sexp;
+
+            DIGIT  = '0'..'9';
+            ALPHA  = 'a'..'z' | 'A'..'Z';
+            LPAREN = "(";
+            RPAREN = ")";
+            ATOM   = ALPHA (ALPHA | DIGIT)*;
+            %skip  = (" " | "\t" | "\n" | "\r")+;
+
+            compound: SExp = LPAREN sexp+[sexp:Vec<_>] RPAREN { SExp::Compound(sexp) };
+            atom    : SExp = ATOM[atom] { SExp::Atom(atom) };
+            sexp    : SExp = compound
+                           | atom;
+            "#,
+        )
+        .unwrap();
+        let mut translation = Translation::default();
+        for (name, def) in ast.parser_map.iter() {
+            translation.add_toplevel_def(name.clone(), def);
+        }
+        #[cfg(feature = "debug")]
+        println!("{}", translation.semi_nfs);
+    }
+}

From c00774eb8abe567cd281a5fe9fd947c4dd34b7cf Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Sun, 30 Jul 2023 11:48:46 +0800
Subject: [PATCH 34/42] disallow # in toplevel

---
 pag-lexer/src/lookahead.rs        |  2 +-
 pag-parser2/src/frontend/parse.rs | 20 +++++++++++---------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/pag-lexer/src/lookahead.rs b/pag-lexer/src/lookahead.rs
index c1cad8c..1db0c39 100644
--- a/pag-lexer/src/lookahead.rs
+++ b/pag-lexer/src/lookahead.rs
@@ -157,7 +157,7 @@ impl LoopOptimizer {
         let table_size = self.global_lut.len();
         let table = self.global_lut.iter().map(|x| quote!([#(#x,)*]));
         Some(quote! {
-            const GLOBAL_LUT : [[u8; 256]; #table_size] = [ #(#table,)* ];
+            const GLOBAL_LUT: [[u8; 256]; #table_size] = [ #(#table,)* ];
         })
     }
 
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index 32bba36..685cc3a 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -140,7 +140,7 @@ impl Parse for ParserRule {
 impl Parse for VarBinding {
     // ParserExpr ("[" syn::Ident (":" syn::Type)? "]")?
     fn parse(input: ParseStream) -> Result<Self> {
-        let mut expr = parse_parser_expr(input, 0, true)?;
+        let mut expr = input.parse::<ParserExpr>()?;
 
         let mut name = None;
         let mut ty = None;
@@ -276,7 +276,7 @@ fn parse_lexer_expr(input: ParseStream, min_bp: u32) -> Result<LexerExpr> {
 
 impl Parse for ParserExpr {
     fn parse(input: ParseStream) -> Result<Self> {
-        parse_parser_expr(input, 0, false)
+        parse_parser_expr(input, 0, true)
     }
 }
 
@@ -294,24 +294,26 @@ fn parse_parser_expr(input: ParseStream, min_bp: u32, is_toplevel: bool) -> Resu
         if input.peek(syn::token::Paren) {
             let content;
             parenthesized!(content in input);
-            break 'lhs content.parse::<ParserExpr>()?;
+            break 'lhs parse_parser_expr(&content, 0, false)?;
         }
-        if input.peek(Token![#]) {
+        if !is_toplevel && input.peek(Token![#]) {
             input.parse::<Token![#]>()?;
             let r_bp = 60;
-            let rhs = parse_parser_expr(input, r_bp, is_toplevel)?;
+            let rhs = parse_parser_expr(input, r_bp, false)?;
             break 'lhs ParserExpr::Ignore(Box::new(rhs));
         }
         return Err(input.error("expected parser expression"));
     };
 
     loop {
-        if !is_toplevel && (input.peek(syn::Ident) || input.peek(syn::token::Paren) || input.peek(Token![#])) {
+        if !is_toplevel
+            && (input.peek(syn::Ident) || input.peek(syn::token::Paren) || input.peek(Token![#]))
+        {
             let (l_bp, r_bp) = (40, 41);
             if l_bp < min_bp {
                 break;
             }
-            let rhs = parse_parser_expr(input, r_bp, is_toplevel)?;
+            let rhs = parse_parser_expr(input, r_bp, false)?;
             lhs = ParserExpr::Seq(Box::new(lhs), Box::new(rhs));
             continue;
         }
@@ -375,8 +377,8 @@ mod test {
 
     #[test]
     fn test_parser_expr() {
-        syn::parse_str::<ParserExpr>(r#"A? b c* D+ F?"#).unwrap();
-        syn::parse_str::<ParserExpr>(r#"A? b c* (key value*:Vec<_>)+:HashMap<_, _> F?"#).unwrap();
+        syn::parse_str::<ParserExpr>(r#"(A? b c* D+ F?)"#).unwrap();
+        syn::parse_str::<ParserExpr>(r#"(A? b c* (key value*:Vec<_>)+:HashMap<_, _> F?)"#).unwrap();
     }
 
     #[test]

From 86e93d3e1ad577cfa0cc638fb57a9899e6ebe94f Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Sun, 30 Jul 2023 12:35:16 +0800
Subject: [PATCH 35/42] remove `Hinted`

---
 pag-parser2/src/frontend/ast.rs   |   2 +-
 pag-parser2/src/frontend/parse.rs |  22 +----
 pag-parser2/src/nf/inference.rs   | 137 ++++++++++++++----------------
 pag-parser2/src/nf/mod.rs         |   6 +-
 pag-parser2/src/nf/translation.rs |  12 +--
 5 files changed, 81 insertions(+), 98 deletions(-)

diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index 9877b44..456f8e3 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -64,6 +64,7 @@ pub struct ParserRule {
 pub struct VarBinding {
     pub expr: ParserExpr,
     pub name: Option<syn::Ident>,
+    pub ty: Option<syn::Type>
 }
 
 // TODO: how to express "bottom" & "any"?
@@ -89,7 +90,6 @@ pub enum ParserExpr {
     LexerRef(syn::Ident),
     ParserRef(syn::Ident),
     Ignore(Box<Self>),
-    Hinted(Box<Self>, syn::Type),
 }
 
 pub struct RightDeepIterator<'a> {
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index 685cc3a..9f18550 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -100,7 +100,7 @@ impl Parse for ParserDef {
     fn parse(input: ParseStream) -> Result<Self> {
         let ty = match input.parse::<Token![:]>() {
             Ok(_) => input.parse::<syn::Type>()?,
-            Err(_) => parse_quote!(&'src str),
+            Err(_) => parse_quote!(::pag_util::Span<'src>),
         };
 
         input.parse::<Token![=]>()?;
@@ -140,7 +140,7 @@ impl Parse for ParserRule {
 impl Parse for VarBinding {
     // ParserExpr ("[" syn::Ident (":" syn::Type)? "]")?
     fn parse(input: ParseStream) -> Result<Self> {
-        let mut expr = input.parse::<ParserExpr>()?;
+        let expr = input.parse::<ParserExpr>()?;
 
         let mut name = None;
         let mut ty = None;
@@ -159,10 +159,8 @@ impl Parse for VarBinding {
                 return Err(content.error("expected `]`"));
             }
         }
-        if let Some(ty) = ty {
-            expr = ParserExpr::Hinted(Box::new(expr), ty);
-        }
-        Ok(Self { expr, name })
+
+        Ok(Self { expr, name, ty })
     }
 }
 
@@ -317,15 +315,6 @@ fn parse_parser_expr(input: ParseStream, min_bp: u32, is_toplevel: bool) -> Resu
             lhs = ParserExpr::Seq(Box::new(lhs), Box::new(rhs));
             continue;
         }
-        fn peek_and_parse_type(input: ParseStream, expr: ParserExpr) -> Result<ParserExpr> {
-            Ok(if input.peek(Token!(:)) {
-                input.parse::<Token!(:)>()?;
-                let ty = input.parse::<syn::Type>()?;
-                ParserExpr::Hinted(Box::new(expr), ty)
-            } else {
-                expr
-            })
-        }
         if input.peek(Token![*]) {
             let l_bp = 70;
             if l_bp < min_bp {
@@ -333,7 +322,6 @@ fn parse_parser_expr(input: ParseStream, min_bp: u32, is_toplevel: bool) -> Resu
             }
             input.parse::<Token![*]>()?;
             lhs = ParserExpr::Star(Box::new(lhs));
-            lhs = peek_and_parse_type(input, lhs)?;
             continue;
         }
         if input.peek(Token![+]) {
@@ -343,7 +331,6 @@ fn parse_parser_expr(input: ParseStream, min_bp: u32, is_toplevel: bool) -> Resu
             }
             input.parse::<Token![+]>()?;
             lhs = ParserExpr::Plus(Box::new(lhs));
-            lhs = peek_and_parse_type(input, lhs)?;
             continue;
         }
         if input.peek(Token![?]) {
@@ -378,7 +365,6 @@ mod test {
     #[test]
     fn test_parser_expr() {
         syn::parse_str::<ParserExpr>(r#"(A? b c* D+ F?)"#).unwrap();
-        syn::parse_str::<ParserExpr>(r#"(A? b c* (key value*:Vec<_>)+:HashMap<_, _> F?)"#).unwrap();
     }
 
     #[test]
diff --git a/pag-parser2/src/nf/inference.rs b/pag-parser2/src/nf/inference.rs
index b614faa..ac75948 100644
--- a/pag-parser2/src/nf/inference.rs
+++ b/pag-parser2/src/nf/inference.rs
@@ -88,23 +88,23 @@ pub enum InferredType {
 pub struct InferenceContext<'a> {
     /// Typed tags
     gamma: HashMap<Tag, InferredType>,
-    /// Type annotations from user (for toplevel)
-    annotations: &'a HashMap<Tag, Type>,
     /// Fully normalized terms
     nforms: &'a HashMap<Tag, Vec<NormalForm>>,
 }
+
 impl<'a> InferenceContext<'a> {
     /// Create a new inference context
     pub fn new(
         annotations: &'a HashMap<Tag, Type>,
         nforms: &'a HashMap<Tag, Vec<NormalForm>>,
     ) -> Self {
-        Self {
-            gamma: HashMap::new(),
-            annotations,
-            nforms,
-        }
+        let gamma = annotations
+            .iter()
+            .map(|(k, v)| (k.clone(), InferredType::Concrete(v.clone())))
+            .collect();
+        Self { gamma, nforms }
     }
+
     fn infer_gather<'i, I: Iterator<Item = BoundTarget<'i>>>(
         &mut self,
         mut tags: I,
@@ -113,14 +113,14 @@ impl<'a> InferenceContext<'a> {
             let mut types = vec![if let BoundTarget::Tag(tag) = tag {
                 self.infer(tag)?
             } else {
-                InferredType::Concrete(parse_quote! {::pag_runtime::Span<'src>})
+                InferredType::Concrete(parse_quote!(::pag_util::Span<'src>))
             }];
             for t in tags {
                 // If any inference fails, the whole inference fails
                 let ty = if let BoundTarget::Tag(t) = t {
                     self.infer(t)?
                 } else {
-                    InferredType::Concrete(parse_quote! {::pag_runtime::Span<'src>})
+                    InferredType::Concrete(parse_quote!(::pag_util::Span<'src>))
                 };
                 types.push(ty);
             }
@@ -133,9 +133,10 @@ impl<'a> InferenceContext<'a> {
             }
         } else {
             // no field, unit type
-            Some(InferredType::Concrete(parse_quote! {()}))
+            Some(InferredType::Concrete(parse_quote!(())))
         }
     }
+
     /// try infer all types, but may fail with incomplete type information.
     pub fn infer_all_types(mut self) -> HashMap<Tag, InferredType> {
         let mut typed = 0;
@@ -149,79 +150,71 @@ impl<'a> InferenceContext<'a> {
         }
         self.gamma
     }
+
     fn infer(&mut self, tag: &Tag) -> Option<InferredType> {
         if let Some(x) = self.gamma.get(tag) {
             return Some(x.clone());
         }
-        let target = if let Some(x) = self.annotations.get(tag) {
-            // If a concrete type annotation is provided, use it directly
-            InferredType::Concrete(x.clone())
-        } else {
-            // find first subexpression that fulfills inference
-            let nfs = self.nforms.get(tag)?;
-            let mut inferred = None;
-            for i in nfs.iter() {
-                let semact = i.semact();
-                match semact {
-                    SemAct::Recognize => {
-                        inferred.replace(InferredType::Concrete(parse_quote!(())));
-                        break;
-                    }
-                    // Token semantic action, the type is Span
-                    SemAct::Token => {
-                        inferred.replace(InferredType::Concrete(parse_quote!(
-                            ::pag_runtime::Span<'src>
-                        )));
-                        break;
-                    }
-                    // Customized routine without type annotation, cannot infer
-                    SemAct::CustomizedRoutine(..) => continue,
-                    // Nested routine for one or more, the type is unit.
-                    SemAct::OneOrMoreNested => {
-                        inferred.replace(InferredType::Concrete(parse_quote!(())));
+
+        // Find first subexpression that fulfills inference
+        let nfs = self.nforms.get(tag)?;
+        let mut inferred = None;
+        for i in nfs.iter() {
+            let semact = i.semact();
+            match semact {
+                SemAct::Recognize => {
+                    inferred.replace(InferredType::Concrete(parse_quote!(())));
+                    break;
+                }
+                // Token semantic action, the type is Span
+                SemAct::Token => {
+                    inferred.replace(InferredType::Concrete(parse_quote!(::pag_util::Span<'src>)));
+                    break;
+                }
+                // Customized routine without type annotation, cannot infer
+                SemAct::CustomizedRoutine(..) => continue,
+                // Nested routine for one or more, the type is unit.
+                SemAct::OneOrMoreNested => {
+                    inferred.replace(InferredType::Concrete(parse_quote!(())));
+                    break;
+                }
+                SemAct::Gather => {
+                    let visible = i.visible_bindings(0);
+                    if let Some(gather_type) = self.infer_gather(visible.into_iter().map(|x| x.1)) {
+                        inferred.replace(gather_type);
                         break;
                     }
-                    SemAct::Gather => {
-                        let visible = i.visible_bindings(0);
-                        if let Some(gather_type) =
-                            self.infer_gather(visible.into_iter().map(|x| x.1))
-                        {
-                            inferred.replace(gather_type);
-                            break;
-                        }
-                    }
-                    SemAct::ZeroOrMore | SemAct::Option | SemAct::OneOrMoreToplevel => {
-                        let mapper = |ty: InferredType| {
-                            if matches!(semact, SemAct::Option) {
-                                InferredType::Option(Box::new(ty))
-                            } else {
-                                InferredType::Collector(Box::new(ty))
-                            }
-                        };
-                        // Skip epsilon production, this is safe since OneOrMoreToplevel will never be empty
-                        if let NormalForm::Empty(x, _) = i {
-                            if x.is_empty() {
-                                continue;
-                            }
+                }
+                SemAct::ZeroOrMore | SemAct::Option | SemAct::OneOrMoreToplevel => {
+                    let mapper = |ty: InferredType| {
+                        if matches!(semact, SemAct::Option) {
+                            InferredType::Option(Box::new(ty))
+                        } else {
+                            InferredType::Collector(Box::new(ty))
                         }
-                        // skip the trailing part of OneOrMoreToplevel
-                        let visible =
-                            i.visible_bindings(if matches!(semact, SemAct::OneOrMoreToplevel) {
-                                1
-                            } else {
-                                0
-                            });
-                        if let Some(gather_type) =
-                            self.infer_gather(visible.into_iter().map(|x| x.1))
-                        {
-                            inferred.replace(mapper(gather_type));
-                            break;
+                    };
+                    // Skip epsilon production, this is safe since OneOrMoreToplevel will never be empty
+                    if let NormalForm::Empty(x, _) = i {
+                        if x.is_empty() {
+                            continue;
                         }
                     }
+                    // skip the trailing part of OneOrMoreToplevel
+                    let visible =
+                        i.visible_bindings(if matches!(semact, SemAct::OneOrMoreToplevel) {
+                            1
+                        } else {
+                            0
+                        });
+                    if let Some(gather_type) = self.infer_gather(visible.into_iter().map(|x| x.1)) {
+                        inferred.replace(mapper(gather_type));
+                        break;
+                    }
                 }
             }
-            inferred?
-        };
+        }
+
+        let target = inferred?;
         self.gamma.insert(tag.clone(), target.clone());
         Some(target)
     }
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index f1e1ceb..46b16ae 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -124,6 +124,7 @@ impl NormalForm {
             | Self::Sequence(_, _, _, semact) => semact,
         }
     }
+
     pub fn semact_mut(&mut self) -> &mut SemAct {
         match self {
             Self::Empty(_, semact)
@@ -131,6 +132,7 @@ impl NormalForm {
             | Self::Sequence(_, _, _, semact) => semact,
         }
     }
+
     pub fn append_tailcall(&mut self) {
         match self {
             Self::Empty(_actions, _) => {
@@ -144,6 +146,7 @@ impl NormalForm {
             }
         }
     }
+
     pub fn append_pass_collector(&mut self, tag: Tag) {
         match self {
             Self::Empty(_actions, _) => {
@@ -157,6 +160,7 @@ impl NormalForm {
             }
         }
     }
+
     pub fn visible_bindings(&self, skip: usize) -> Vec<(&Ident, BoundTarget)> {
         match self {
             Self::Empty(actions, _) => actions
@@ -261,8 +265,8 @@ fn debug_print_test() {
     println!("{}", sequence);
 }
 
-#[derive(Default, Clone)]
 /// Well, it is not the notorius firewall.
+#[derive(Default, Clone)]
 pub struct NFTable(HashMap<Tag, Vec<NormalForm>>);
 
 impl Deref for NFTable {
diff --git a/pag-parser2/src/nf/translation.rs b/pag-parser2/src/nf/translation.rs
index 90a8823..7a1120b 100644
--- a/pag-parser2/src/nf/translation.rs
+++ b/pag-parser2/src/nf/translation.rs
@@ -33,6 +33,7 @@ impl Translation {
     fn start_ignoring(&mut self) {
         self.ignoring_cnt += 1;
     }
+
     /// Exit ignoring mode
     fn end_ignoring(&mut self) {
         self.ignoring_cnt -= 1;
@@ -48,12 +49,14 @@ impl Translation {
         self.output_cnt += 1;
         result
     }
+
     /// Allocate a new tag for anonymous routines.
     fn new_anonymous_tag(&mut self) -> Tag {
         let result = Tag::Anonymous(self.anonymous_cnt);
         self.anonymous_cnt += 1;
         result
     }
+
     /// Construct a normal form from a sequence of parser expressions. The semact is always `Recognize`.
     fn partial_nf_from_sequence<
         'a,
@@ -101,9 +104,11 @@ impl Translation {
             }
         }
     }
+
     fn add_nf(&mut self, tag: Tag, nf: NormalForm) {
         self.semi_nfs.entry(tag).or_default().push(nf);
     }
+
     fn add_nf_from_anonymous_expr(&mut self, expr: &ParserExpr, tag: &Tag) {
         match expr {
             ParserExpr::Seq(..) => {
@@ -194,9 +199,9 @@ impl Translation {
             }
             ParserExpr::ParserRef(_) => unreachable!("cannot create nf from parser ref"),
             ParserExpr::Ignore(_) => unreachable!("cannot create nf from ignore"),
-            ParserExpr::Hinted(_, _) => unreachable!("cannot create nf from hinted"),
         }
     }
+
     fn add_anonymous_rule<const IGNORE_UNNAMED: bool>(
         &mut self,
         expr: &ParserExpr,
@@ -221,11 +226,6 @@ impl Translation {
                 self.end_ignoring();
                 (tag, output)
             }
-            ParserExpr::Hinted(expr, hint) => {
-                let (tag, output) = self.add_anonymous_rule::<IGNORE_UNNAMED>(expr, named);
-                self.hints.insert(tag.clone(), hint.clone());
-                (tag, output)
-            }
             _ => {
                 let tag = self.new_anonymous_tag();
                 self.add_nf_from_anonymous_expr(expr, &tag);

From f03a5991a5fb0ac8509a6e02e50e453780ed8096 Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Sun, 30 Jul 2023 14:42:34 +0800
Subject: [PATCH 36/42] reformat

---
 pag-parser2/src/frontend/ast.rs   | 19 +++++++++----------
 pag-parser2/src/frontend/parse.rs |  4 +---
 pag-parser2/src/nf/inference.rs   |  2 +-
 pag-parser2/src/nf/semact.rs      |  6 +++---
 pag-parser2/src/nf/translation.rs |  4 ++--
 5 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index 456f8e3..fb40153 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -17,30 +17,29 @@ pub struct Ast {
 }
 
 #[derive(Clone)]
-#[repr(transparent)]
-pub struct CustomizedBlock(pub Rc<syn::Block>);
+pub struct CodeBlock(pub Rc<syn::Block>);
 
-impl PartialEq for CustomizedBlock {
+impl PartialEq for CodeBlock {
     fn eq(&self, other: &Self) -> bool {
         Rc::ptr_eq(&self.0, &other.0)
     }
 }
 
-impl Eq for CustomizedBlock {}
+impl Eq for CodeBlock {}
 
-impl PartialOrd for CustomizedBlock {
+impl PartialOrd for CodeBlock {
     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
         Rc::as_ptr(&self.0).partial_cmp(&Rc::as_ptr(&other.0))
     }
 }
 
-impl Ord for CustomizedBlock {
+impl Ord for CodeBlock {
     fn cmp(&self, other: &Self) -> std::cmp::Ordering {
         Rc::as_ptr(&self.0).cmp(&Rc::as_ptr(&other.0))
     }
 }
 
-impl std::hash::Hash for CustomizedBlock {
+impl std::hash::Hash for CodeBlock {
     fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
         Rc::as_ptr(&self.0).hash(state)
     }
@@ -52,19 +51,19 @@ pub struct LexerDef {
 }
 
 pub struct ParserDef {
-    pub ty: syn::Type,
+    pub ty: syn::Type, // TODO: syn::Type is huge, maybe we should box it or only keep the span
     pub rules: Vec<ParserRule>,
 }
 
 pub struct ParserRule {
     pub vars: Vec<VarBinding>,
-    pub action: Option<CustomizedBlock>,
+    pub action: Option<CodeBlock>,
 }
 
 pub struct VarBinding {
     pub expr: ParserExpr,
     pub name: Option<syn::Ident>,
-    pub ty: Option<syn::Type>
+    pub ty: Option<syn::Type>, // TODO: syn::Type is huge, maybe we should box it or only keep the span
 }
 
 // TODO: how to express "bottom" & "any"?
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index 9f18550..1758763 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -128,9 +128,7 @@ impl Parse for ParserRule {
 
         let mut action = None;
         if input.peek(syn::token::Brace) {
-            action = Some(CustomizedBlock(std::rc::Rc::new(
-                input.parse::<syn::Block>()?,
-            )));
+            action = Some(CodeBlock(std::rc::Rc::new(input.parse::<syn::Block>()?)));
         }
 
         Ok(Self { vars, action })
diff --git a/pag-parser2/src/nf/inference.rs b/pag-parser2/src/nf/inference.rs
index ac75948..d2d9f86 100644
--- a/pag-parser2/src/nf/inference.rs
+++ b/pag-parser2/src/nf/inference.rs
@@ -172,7 +172,7 @@ impl<'a> InferenceContext<'a> {
                     break;
                 }
                 // Customized routine without type annotation, cannot infer
-                SemAct::CustomizedRoutine(..) => continue,
+                SemAct::Customized(..) => continue,
                 // Nested routine for one or more, the type is unit.
                 SemAct::OneOrMoreNested => {
                     inferred.replace(InferredType::Concrete(parse_quote!(())));
diff --git a/pag-parser2/src/nf/semact.rs b/pag-parser2/src/nf/semact.rs
index 9dec982..9e44a4f 100644
--- a/pag-parser2/src/nf/semact.rs
+++ b/pag-parser2/src/nf/semact.rs
@@ -6,7 +6,7 @@
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
 
-use crate::frontend::{CustomizedBlock, ParserExpr};
+use crate::frontend::{CodeBlock, ParserExpr};
 
 ///
 /// ```
@@ -19,7 +19,7 @@ use crate::frontend::{CustomizedBlock, ParserExpr};
 // those normal form without SemAct will be treated as plain scanner.
 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum SemAct {
-    CustomizedRoutine(CustomizedBlock),
+    Customized(CodeBlock),
     /// Gather inner data. If multiple is selected, return a tuple.
     /// If only one is selected, return target data.
     Gather,
@@ -57,7 +57,7 @@ impl SemAct {
 impl std::fmt::Display for SemAct {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            SemAct::CustomizedRoutine(x) => write!(f, "{:?}", std::rc::Rc::as_ptr(&x.0)),
+            SemAct::Customized(x) => write!(f, "{:?}", std::rc::Rc::as_ptr(&x.0)),
             SemAct::Gather => write!(f, "Gather"),
             SemAct::Option => write!(f, "Option"),
             SemAct::ZeroOrMore => write!(f, "ZeroOrMore"),
diff --git a/pag-parser2/src/nf/translation.rs b/pag-parser2/src/nf/translation.rs
index 7a1120b..e12adb2 100644
--- a/pag-parser2/src/nf/translation.rs
+++ b/pag-parser2/src/nf/translation.rs
@@ -251,13 +251,13 @@ impl Translation {
             .iter()
             .map(|rule| {
                 let semact = if let Some(action) = &rule.action {
-                    SemAct::CustomizedRoutine(action.clone())
+                    SemAct::Customized(action.clone())
                 } else if rule.vars.len() == 1 {
                     SemAct::infer(&rule.vars[0].expr)
                 } else {
                     SemAct::Gather
                 };
-                let mut partial_nf = if matches!(semact, SemAct::CustomizedRoutine(..)) {
+                let mut partial_nf = if matches!(semact, SemAct::Customized(..)) {
                     self.partial_nf_from_sequence::<true, _>(
                         rule.vars
                             .iter()

From b545ec31a62ea1ae150e77941a7c6ae12179894f Mon Sep 17 00:00:00 2001
From: QuarticCat <QuarticCat@pm.me>
Date: Mon, 31 Jul 2023 02:57:15 +0800
Subject: [PATCH 37/42] parse ParserExpr::Seq to vector

---
 pag-parser2/src/frontend/ast.rs   | 45 +++++++------------------------
 pag-parser2/src/frontend/parse.rs | 21 ++++++++-------
 pag-parser2/src/lib.rs            |  2 +-
 3 files changed, 23 insertions(+), 45 deletions(-)

diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index fb40153..00ef1b5 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -9,13 +9,6 @@
 use std::collections::HashMap;
 use std::rc::Rc;
 
-pub struct Ast {
-    pub entry: syn::Ident,
-    pub skip: Option<LexerExpr>,
-    pub lexer_map: HashMap<syn::Ident, LexerDef>,
-    pub parser_map: HashMap<syn::Ident, ParserDef>,
-}
-
 #[derive(Clone)]
 pub struct CodeBlock(pub Rc<syn::Block>);
 
@@ -45,13 +38,20 @@ impl std::hash::Hash for CodeBlock {
     }
 }
 
+pub struct Ast {
+    pub entry: syn::Ident,
+    pub skip: Option<LexerExpr>,
+    pub lexer_map: HashMap<syn::Ident, LexerDef>,
+    pub parser_map: HashMap<syn::Ident, ParserDef>,
+}
+
 pub struct LexerDef {
     pub idx: u32,
     pub expr: LexerExpr,
 }
 
 pub struct ParserDef {
-    pub ty: syn::Type, // TODO: syn::Type is huge, maybe we should box it or only keep the span
+    pub ty: Rc<syn::Type>,
     pub rules: Vec<ParserRule>,
 }
 
@@ -63,7 +63,7 @@ pub struct ParserRule {
 pub struct VarBinding {
     pub expr: ParserExpr,
     pub name: Option<syn::Ident>,
-    pub ty: Option<syn::Type>, // TODO: syn::Type is huge, maybe we should box it or only keep the span
+    pub ty: Option<Rc<syn::Type>>,
 }
 
 // TODO: how to express "bottom" & "any"?
@@ -82,7 +82,7 @@ pub enum LexerExpr {
 
 // TODO: how to express "select" & "ignore"?
 pub enum ParserExpr {
-    Seq(Box<Self>, Box<Self>),
+    Seq(Vec<Self>),
     Star(Box<Self>),
     Plus(Box<Self>),
     Opt(Box<Self>),
@@ -90,28 +90,3 @@ pub enum ParserExpr {
     ParserRef(syn::Ident),
     Ignore(Box<Self>),
 }
-
-pub struct RightDeepIterator<'a> {
-    seq: Option<&'a ParserExpr>,
-}
-
-impl<'a> From<&'a ParserExpr> for RightDeepIterator<'a> {
-    fn from(expr: &'a ParserExpr) -> Self {
-        Self { seq: Some(expr) }
-    }
-}
-
-impl<'a> Iterator for RightDeepIterator<'a> {
-    type Item = &'a ParserExpr;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.seq {
-            Some(ParserExpr::Seq(a, b)) => {
-                self.seq = Some(b);
-                Some(a)
-            }
-            Some(_) => self.seq.take(),
-            None => None,
-        }
-    }
-}
diff --git a/pag-parser2/src/frontend/parse.rs b/pag-parser2/src/frontend/parse.rs
index 1758763..c082c49 100644
--- a/pag-parser2/src/frontend/parse.rs
+++ b/pag-parser2/src/frontend/parse.rs
@@ -13,6 +13,7 @@ use syn::parse::{Parse, ParseStream};
 use syn::{bracketed, parenthesized, parse_quote, Error, Result, Token};
 
 use std::collections::HashMap;
+use std::rc::Rc;
 
 #[derive(PartialEq, Eq)]
 enum IdentKind {
@@ -98,10 +99,10 @@ impl Parse for Ast {
 impl Parse for ParserDef {
     // (":" syn::Type)? = (ParserRule)|+
     fn parse(input: ParseStream) -> Result<Self> {
-        let ty = match input.parse::<Token![:]>() {
+        let ty = Rc::new(match input.parse::<Token![:]>() {
             Ok(_) => input.parse::<syn::Type>()?,
             Err(_) => parse_quote!(::pag_util::Span<'src>),
-        };
+        });
 
         input.parse::<Token![=]>()?;
 
@@ -150,7 +151,7 @@ impl Parse for VarBinding {
 
             if content.peek(Token![:]) {
                 content.parse::<Token![:]>()?;
-                ty = Some(content.parse::<syn::Type>()?);
+                ty = Some(Rc::new(content.parse::<syn::Type>()?));
             }
 
             if !content.is_empty() {
@@ -305,13 +306,15 @@ fn parse_parser_expr(input: ParseStream, min_bp: u32, is_toplevel: bool) -> Resu
         if !is_toplevel
             && (input.peek(syn::Ident) || input.peek(syn::token::Paren) || input.peek(Token![#]))
         {
-            let (l_bp, r_bp) = (40, 41);
-            if l_bp < min_bp {
-                break;
+            let mut seq = vec![lhs];
+            while input.peek(syn::Ident) || input.peek(syn::token::Paren) || input.peek(Token![#]) {
+                let (l_bp, r_bp) = (40, 41);
+                if l_bp < min_bp {
+                    break;
+                }
+                seq.push(parse_parser_expr(input, r_bp, false)?);
             }
-            let rhs = parse_parser_expr(input, r_bp, false)?;
-            lhs = ParserExpr::Seq(Box::new(lhs), Box::new(rhs));
-            continue;
+            return Ok(ParserExpr::Seq(seq));
         }
         if input.peek(Token![*]) {
             let l_bp = 70;
diff --git a/pag-parser2/src/lib.rs b/pag-parser2/src/lib.rs
index 932e93c..a5b4fe7 100644
--- a/pag-parser2/src/lib.rs
+++ b/pag-parser2/src/lib.rs
@@ -10,4 +10,4 @@
 #[cfg(feature = "debug")]
 mod debug;
 mod frontend;
-mod nf;
+// mod nf;

From dc192386d872e8714858a545b0f7e73a38cb45a1 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Sun, 30 Jul 2023 17:09:30 -0400
Subject: [PATCH 38/42] embed type info into NF

---
 pag-parser2/src/debug.rs          |  22 ---
 pag-parser2/src/lib.rs            |   6 +-
 pag-parser2/src/nf/inference.rs   | 221 ------------------------------
 pag-parser2/src/nf/mod.rs         | 202 ++++++++++++++-------------
 pag-parser2/src/nf/translation.rs |   1 -
 pag-parser2/src/utils.rs          |  70 ++++++++++
 6 files changed, 182 insertions(+), 340 deletions(-)
 delete mode 100644 pag-parser2/src/debug.rs
 delete mode 100644 pag-parser2/src/nf/inference.rs
 create mode 100644 pag-parser2/src/utils.rs

diff --git a/pag-parser2/src/debug.rs b/pag-parser2/src/debug.rs
deleted file mode 100644
index fc99fdb..0000000
--- a/pag-parser2/src/debug.rs
+++ /dev/null
@@ -1,22 +0,0 @@
-#[cfg(feature = "ansi-debug")]
-macro_rules! styled {
-    ($style:expr, $($arg:tt)*) => {
-        {
-            use nu_ansi_term::*;
-            $style.paint(format!($($arg)*))
-        }
-    };
-}
-#[cfg(not(feature = "ansi-debug"))]
-macro_rules! styled {
-    ($style:expr, $($arg:tt)*) => {format!($($arg)*)};
-}
-
-macro_rules! styled_write {
-    ($dst:expr, $($arg:tt)*) => {
-        write!($dst, "{}", $crate::debug::styled!($($arg)*))
-    };
-}
-
-pub(crate) use styled;
-pub(crate) use styled_write;
diff --git a/pag-parser2/src/lib.rs b/pag-parser2/src/lib.rs
index a5b4fe7..781a7a8 100644
--- a/pag-parser2/src/lib.rs
+++ b/pag-parser2/src/lib.rs
@@ -6,8 +6,6 @@
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
 
-#![feature(box_patterns)]
-#[cfg(feature = "debug")]
-mod debug;
 mod frontend;
-// mod nf;
+mod nf;
+mod utils;
diff --git a/pag-parser2/src/nf/inference.rs b/pag-parser2/src/nf/inference.rs
deleted file mode 100644
index d2d9f86..0000000
--- a/pag-parser2/src/nf/inference.rs
+++ /dev/null
@@ -1,221 +0,0 @@
-// If there is no semantic action, the routine is plain scan over. Thus, the type is unit.
-// ⊢ x = ..., SemAct[x] = ∅
-// -------------------
-// ⊢ x : ()
-
-// A Customized Routine must have type annotation
-// ⊢ x = ..., SemAct[x] = Customized(𝜏)
-// -------------------
-// ⊢ x : 𝜏
-
-// A Token action gives the span of a terminal
-// ⊢ x = T, SemAct[x] = Token
-// -------------------
-// ⊢ x : Span
-
-// Fully normalized Option must be in the following form:
-// x = T_0 ... [r_0] | T_1 ... [r_1] | .. | ε
-//
-// Thus, the rule should be:
-//
-// Γ ⊢ x = T_0 ... [r_0] | T_1 ... [r_1] | .. | ε
-// Γ ⊢ r_0 : 𝜏_0, r_1 : 𝜏_1 ...
-// Γ ⊢ 𝜏 = 𝜏_0 = 𝜏_1 = ...
-// SemAct[x] = Option
-// -------------------
-//  Γ ⊢ x : Option<𝜏>
-
-// Fully normalized ZeroOrMore must be in the following form:
-// x = T_0 ... [r_0] | T_1 ... [r_1] | .. | ε
-//
-// Thus, the rule should be:
-//
-// Γ ⊢ x = T_0 ... [r_0] | T_1 ... [r_1] | .. | ε
-// Γ ⊢ r_0 : 𝜏_0, r_1 : 𝜏_1 ...
-// Γ ⊢ 𝜏 = 𝜏_0 = 𝜏_1 =...
-// SemAct[x] = ZeroOrMore(Σ ∈ Collector<𝜏>)
-// -------------------
-//  Γ ⊢ x : Σ
-
-// Fully normalized OneOrMoreToplevel must be in the following form:
-// x = T_0 ...[r_0] t | T_1 ... [r_1] t | ..
-//
-// Thus, the rule should be:
-//
-// Γ ⊢ x = T_0 ...[r_0] t | T_1 ... [r_1] t | ..
-// Γ ⊢ r_0 : 𝜏_0, r_1 : 𝜏_1 ...
-// Γ ⊢ 𝜏 = 𝜏_0 = 𝜏_1 = ...
-// SemAct[x] = OneOrMoreToplevel
-// -------------------
-//  Γ ⊢ x : Σ
-
-// Fully normalized OneOrMoreNested must be in the following form:
-// x = T_0 ... [r_0] | T_1 ... [r_1] | .. | ε
-//
-// Thus, the rule should be:
-//
-// Γ ⊢ x = T_0 ... [r_0] | T_1 ... [r_1] | .. | ε
-// Γ ⊢ r_0 : 𝜏_0, r_1 : 𝜏_1 ...
-// Γ ⊢ 𝜏 = 𝜏_0 = 𝜏_1 =...
-// SemAct[x] = ZeroOrMore
-// -------------------
-//  Γ ⊢ x : () -- Notice that x accept &mut C ∈ Collector<𝜏> instead
-
-// Fully normalized Tuple must be in the following form:
-// x = T_0 ... [r_0] x00 _x01 x02 | ..
-// let η_i be the type tuple of everything including last reduce that gives an output.
-// x = T_0 ... [r_0] x00 _x01 x02 | ..
-// Γ ⊢ ║ η_0 ║ = ║ η_1 ║ = ...
-// Γ ⊢ ∀i.∀j.∀k. η_i.k = η_j.k
-// SemAct[x] = Gather
-// -------------------
-// Γ ⊢ x : η
-
-use std::collections::HashMap;
-
-use syn::{parse_quote, Type};
-
-use super::{semact::SemAct, BoundTarget, NormalForm, Tag};
-
-#[derive(Clone)]
-pub enum InferredType {
-    Concrete(Type),
-    Collector(Box<Self>),
-    Option(Box<Self>),
-    Tuple(Vec<Self>),
-}
-
-pub struct InferenceContext<'a> {
-    /// Typed tags
-    gamma: HashMap<Tag, InferredType>,
-    /// Fully normalized terms
-    nforms: &'a HashMap<Tag, Vec<NormalForm>>,
-}
-
-impl<'a> InferenceContext<'a> {
-    /// Create a new inference context
-    pub fn new(
-        annotations: &'a HashMap<Tag, Type>,
-        nforms: &'a HashMap<Tag, Vec<NormalForm>>,
-    ) -> Self {
-        let gamma = annotations
-            .iter()
-            .map(|(k, v)| (k.clone(), InferredType::Concrete(v.clone())))
-            .collect();
-        Self { gamma, nforms }
-    }
-
-    fn infer_gather<'i, I: Iterator<Item = BoundTarget<'i>>>(
-        &mut self,
-        mut tags: I,
-    ) -> Option<InferredType> {
-        if let Some(tag) = tags.next() {
-            let mut types = vec![if let BoundTarget::Tag(tag) = tag {
-                self.infer(tag)?
-            } else {
-                InferredType::Concrete(parse_quote!(::pag_util::Span<'src>))
-            }];
-            for t in tags {
-                // If any inference fails, the whole inference fails
-                let ty = if let BoundTarget::Tag(t) = t {
-                    self.infer(t)?
-                } else {
-                    InferredType::Concrete(parse_quote!(::pag_util::Span<'src>))
-                };
-                types.push(ty);
-            }
-            if types.len() == 1 {
-                // If there is only one field, no need to wrap in a tuple
-                Some(types.pop().unwrap())
-            } else {
-                // Otherwise, wrap in a tuple
-                Some(InferredType::Tuple(types))
-            }
-        } else {
-            // no field, unit type
-            Some(InferredType::Concrete(parse_quote!(())))
-        }
-    }
-
-    /// try infer all types, but may fail with incomplete type information.
-    pub fn infer_all_types(mut self) -> HashMap<Tag, InferredType> {
-        let mut typed = 0;
-        while typed < self.nforms.len() {
-            typed = 0;
-            for i in self.nforms.keys() {
-                if self.infer(i).is_some() {
-                    typed += 1;
-                }
-            }
-        }
-        self.gamma
-    }
-
-    fn infer(&mut self, tag: &Tag) -> Option<InferredType> {
-        if let Some(x) = self.gamma.get(tag) {
-            return Some(x.clone());
-        }
-
-        // Find first subexpression that fulfills inference
-        let nfs = self.nforms.get(tag)?;
-        let mut inferred = None;
-        for i in nfs.iter() {
-            let semact = i.semact();
-            match semact {
-                SemAct::Recognize => {
-                    inferred.replace(InferredType::Concrete(parse_quote!(())));
-                    break;
-                }
-                // Token semantic action, the type is Span
-                SemAct::Token => {
-                    inferred.replace(InferredType::Concrete(parse_quote!(::pag_util::Span<'src>)));
-                    break;
-                }
-                // Customized routine without type annotation, cannot infer
-                SemAct::Customized(..) => continue,
-                // Nested routine for one or more, the type is unit.
-                SemAct::OneOrMoreNested => {
-                    inferred.replace(InferredType::Concrete(parse_quote!(())));
-                    break;
-                }
-                SemAct::Gather => {
-                    let visible = i.visible_bindings(0);
-                    if let Some(gather_type) = self.infer_gather(visible.into_iter().map(|x| x.1)) {
-                        inferred.replace(gather_type);
-                        break;
-                    }
-                }
-                SemAct::ZeroOrMore | SemAct::Option | SemAct::OneOrMoreToplevel => {
-                    let mapper = |ty: InferredType| {
-                        if matches!(semact, SemAct::Option) {
-                            InferredType::Option(Box::new(ty))
-                        } else {
-                            InferredType::Collector(Box::new(ty))
-                        }
-                    };
-                    // Skip epsilon production, this is safe since OneOrMoreToplevel will never be empty
-                    if let NormalForm::Empty(x, _) = i {
-                        if x.is_empty() {
-                            continue;
-                        }
-                    }
-                    // skip the trailing part of OneOrMoreToplevel
-                    let visible =
-                        i.visible_bindings(if matches!(semact, SemAct::OneOrMoreToplevel) {
-                            1
-                        } else {
-                            0
-                        });
-                    if let Some(gather_type) = self.infer_gather(visible.into_iter().map(|x| x.1)) {
-                        inferred.replace(mapper(gather_type));
-                        break;
-                    }
-                }
-            }
-        }
-
-        let target = inferred?;
-        self.gamma.insert(tag.clone(), target.clone());
-        Some(target)
-    }
-}
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index 46b16ae..6de59d6 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -6,21 +6,23 @@
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
 
-mod inference;
 mod normalization;
 mod semact;
-mod translation;
+//mod translation;
+
+use crate::utils::Appendix;
 
 use std::{
     collections::{HashMap, VecDeque},
     ops::{Deref, DerefMut},
+    rc::Rc,
 };
 
 use quote::format_ident;
 use syn::Ident;
 
 #[cfg(feature = "debug")]
-use crate::debug::{styled, styled_write};
+use crate::utils::{styled, styled_write};
 
 use self::semact::SemAct;
 
@@ -104,11 +106,34 @@ impl std::fmt::Display for Action {
     }
 }
 
+#[derive(Clone)]
+pub enum AbstractType {
+    /// Concrete type without any type parameter.
+    Concrete(Rc<syn::Type>),
+    Option(Box<Self>),
+    Tuple(Vec<Self>),
+    Collector(Box<Self>),
+}
+
 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum NormalForm {
-    Empty(Vec<(Tag, Option<Ident>)>, SemAct),
-    Unexpanded(Vec<Action>, SemAct),
-    Sequence(Ident, Option<Ident>, Vec<Action>, SemAct),
+    Empty {
+        actions: Vec<Action>,
+        semact: SemAct,
+        ty: Appendix<AbstractType>,
+    },
+    Unexpanded {
+        actions: Vec<Action>,
+        semact: SemAct,
+        ty: Appendix<AbstractType>,
+    },
+    Sequence {
+        token: Ident,
+        token_output: Option<Ident>,
+        actions: Vec<Action>,
+        semact: SemAct,
+        ty: Appendix<AbstractType>,
+    },
 }
 
 pub enum BoundTarget<'a> {
@@ -119,84 +144,67 @@ pub enum BoundTarget<'a> {
 impl NormalForm {
     pub fn semact(&self) -> &SemAct {
         match self {
-            Self::Empty(_, semact)
-            | Self::Unexpanded(_, semact)
-            | Self::Sequence(_, _, _, semact) => semact,
+            Self::Empty { semact, .. } => semact,
+            Self::Unexpanded { semact, .. } => semact,
+            Self::Sequence { semact, .. } => semact,
         }
     }
 
     pub fn semact_mut(&mut self) -> &mut SemAct {
         match self {
-            Self::Empty(_, semact)
-            | Self::Unexpanded(_, semact)
-            | Self::Sequence(_, _, _, semact) => semact,
+            Self::Empty { semact, .. } => semact,
+            Self::Unexpanded { semact, .. } => semact,
+            Self::Sequence { semact, .. } => semact,
         }
     }
 
-    pub fn append_tailcall(&mut self) {
+    pub fn actions(&self) -> &[Action] {
         match self {
-            Self::Empty(_actions, _) => {
-                unreachable!("empty cannot be tail called, otherwise there will be ambiguity")
-            }
-            Self::Unexpanded(actions, _) => {
-                actions.push(Action::TailCall);
-            }
-            Self::Sequence(_, _, actions, _) => {
-                actions.push(Action::TailCall);
-            }
+            Self::Empty { actions, .. } => actions,
+            Self::Unexpanded { actions, .. } => actions,
+            Self::Sequence { actions, .. } => actions,
         }
     }
 
-    pub fn append_pass_collector(&mut self, tag: Tag) {
+    pub fn actions_mut(&mut self) -> &mut Vec<Action> {
         match self {
-            Self::Empty(_actions, _) => {
-                unreachable!("empty cannot be followed by another subroutine, otherwise there will be ambiguity")
-            }
-            Self::Unexpanded(actions, _) => {
-                actions.push(Action::PassCollector(tag));
-            }
-            Self::Sequence(_, _, actions, _) => {
-                actions.push(Action::PassCollector(tag));
-            }
+            Self::Empty { actions, .. } => actions,
+            Self::Unexpanded { actions, .. } => actions,
+            Self::Sequence { actions, .. } => actions,
         }
     }
 
-    pub fn visible_bindings(&self, skip: usize) -> Vec<(&Ident, BoundTarget)> {
-        match self {
-            Self::Empty(actions, _) => actions
-                .last()
-                .and_then(|(tag, ident)| Some((ident.as_ref()?, BoundTarget::Tag(tag))))
-                .into_iter()
-                .collect(),
-            Self::Unexpanded(actions, _) | Self::Sequence(_, _, actions, _) => {
-                let mut acc = VecDeque::new();
-                for act in actions.iter().rev().skip(skip) {
-                    match act {
-                        Action::Shift { tag, output } => {
-                            if let Some(ident) = output {
-                                acc.push_front((ident, BoundTarget::Tag(tag)));
-                            }
-                        }
-                        Action::Reduce { tag, output } => {
-                            if let Some(ident) = output {
-                                acc.push_front((ident, BoundTarget::Tag(tag)));
-                            }
-                            break;
-                        }
-                        Action::PassCollector(..) => continue,
-                        Action::TailCall => continue,
+    pub fn visible_bindings(&self, skip: usize) -> Box<[(&Ident, BoundTarget)]> {
+        let mut acc = VecDeque::new();
+        for act in self.actions().iter().rev().skip(skip) {
+            match act {
+                Action::Shift { tag, output } => {
+                    if let Some(ident) = output {
+                        acc.push_front((ident, BoundTarget::Tag(tag)));
                     }
                 }
-                if let Self::Sequence(_, Some(tk), _, _) = self {
-                    if acc.len() == actions.len() - skip
-                        && !matches!(actions.first(), Some(Action::Reduce { .. }))
-                    {
-                        acc.push_front((tk, BoundTarget::Token));
+                Action::Reduce { tag, output } => {
+                    if let Some(ident) = output {
+                        acc.push_front((ident, BoundTarget::Tag(tag)));
                     }
+                    break;
                 }
-                acc.into_iter().collect()
+                Action::PassCollector(..) => continue,
+                Action::TailCall => continue,
+            }
+        }
+        if let Self::Sequence {
+            token_output: Some(tk),
+            ..
+        } = self
+        {
+            if acc.len() == self.actions().len() - skip
+                && !matches!(self.actions().first(), Some(Action::Reduce { .. }))
+            {
+                acc.push_front((tk, BoundTarget::Token));
             }
         }
+        acc.into_iter().collect()
     }
 }
 
@@ -204,27 +212,28 @@ impl NormalForm {
 impl std::fmt::Display for NormalForm {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            Self::Empty(actions, _) => {
+            Self::Empty { actions, .. } => {
                 write!(f, "ε")?;
-                for (tag, output) in actions.iter() {
-                    if let Some(name) = output {
-                        styled_write!(f, Color::Blue, "\t{tag}[{name}]")?;
-                    } else {
-                        styled_write!(f, Color::Blue, "\t{tag}")?;
-                    }
+                for action in actions {
+                    write!(f, "\t{}", action)?;
                 }
             }
-            Self::Unexpanded(actions, _) => {
+            Self::Unexpanded { actions, .. } => {
                 write!(f, "{}", actions[0])?;
                 for action in &actions[1..] {
                     write!(f, "\t{}", action)?;
                 }
             }
-            Self::Sequence(terminal, var, actions, _) => {
-                if let Some(tk) = var {
-                    styled_write!(f, Color::Yellow, "{terminal}[{tk}]")?;
+            Self::Sequence {
+                token,
+                token_output,
+                actions,
+                ..
+            } => {
+                if let Some(tk) = token_output {
+                    styled_write!(f, Color::Yellow, "{token}[{tk}]")?;
                 } else {
-                    styled_write!(f, Color::Yellow, "{terminal}")?;
+                    styled_write!(f, Color::Yellow, "{token}")?;
                 }
                 for action in actions.iter() {
                     write!(f, "\t{}", action)?;
@@ -239,10 +248,10 @@ impl std::fmt::Display for NormalForm {
 #[test]
 fn debug_print_test() {
     use quote::format_ident;
-    let sequence = NormalForm::Sequence(
-        format_ident!("TEST"),
-        Some(format_ident!("x")),
-        vec![
+    let sequence = NormalForm::Sequence {
+        token: format_ident!("TEST"),
+        token_output: Some(format_ident!("x")),
+        actions: vec![
             Action::Shift {
                 tag: Tag::Toplevel(format_ident!("a")),
                 output: None,
@@ -260,8 +269,9 @@ fn debug_print_test() {
                 output: None,
             },
         ],
-        SemAct::Gather,
-    );
+        semact: SemAct::Gather,
+        ty: Appendix(AbstractType::Concrete(Rc::new(syn::parse_quote!(u32)))),
+    };
     println!("{}", sequence);
 }
 
@@ -316,10 +326,10 @@ impl std::fmt::Display for NFTable {
 #[test]
 fn debug_print_nf_table() {
     use quote::format_ident;
-    let sequence = NormalForm::Sequence(
-        format_ident!("TEST"),
-        Some(format_ident!("x")),
-        vec![
+    let sequence = NormalForm::Sequence {
+        token: format_ident!("TEST"),
+        token_output: Some(format_ident!("x")),
+        actions: vec![
             Action::Shift {
                 tag: Tag::Toplevel(format_ident!("a")),
                 output: None,
@@ -337,15 +347,23 @@ fn debug_print_nf_table() {
                 output: None,
             },
         ],
-        SemAct::Gather,
-    );
-    let empty = NormalForm::Empty(
-        vec![
-            (Tag::Toplevel(format_ident!("a")), None),
-            (Tag::Toplevel(format_ident!("b")), Some(format_ident!("x"))),
+        semact: SemAct::Gather,
+        ty: Appendix(AbstractType::Concrete(Rc::new(syn::parse_quote!(u32)))),
+    };
+    let empty = NormalForm::Empty {
+        actions: vec![
+            Action::Reduce {
+                tag: Tag::Toplevel(format_ident!("b")),
+                output: Some(format_ident!("x")),
+            },
+            Action::Reduce {
+                tag: Tag::Toplevel(format_ident!("c")),
+                output: Some(format_ident!("y")),
+            },
         ],
-        SemAct::Gather,
-    );
+        semact: SemAct::Gather,
+        ty: Appendix(AbstractType::Concrete(Rc::new(syn::parse_quote!(u32)))),
+    };
     let table = NFTable(
         vec![
             (
diff --git a/pag-parser2/src/nf/translation.rs b/pag-parser2/src/nf/translation.rs
index e12adb2..a871361 100644
--- a/pag-parser2/src/nf/translation.rs
+++ b/pag-parser2/src/nf/translation.rs
@@ -9,7 +9,6 @@ use syn::{Ident, Type};
 
 use super::NFTable;
 use super::{semact::SemAct, Action, NormalForm, Tag};
-use crate::frontend::RightDeepIterator;
 use crate::frontend::{ParserDef, ParserExpr};
 
 #[derive(Default)]
diff --git a/pag-parser2/src/utils.rs b/pag-parser2/src/utils.rs
new file mode 100644
index 0000000..01a9511
--- /dev/null
+++ b/pag-parser2/src/utils.rs
@@ -0,0 +1,70 @@
+use std::ops::{Deref, DerefMut};
+
+#[cfg(feature = "ansi-debug")]
+macro_rules! styled {
+    ($style:expr, $($arg:tt)*) => {
+        {
+            use nu_ansi_term::*;
+            $style.paint(format!($($arg)*))
+        }
+    };
+}
+#[cfg(not(feature = "ansi-debug"))]
+macro_rules! styled {
+    ($style:expr, $($arg:tt)*) => {format!($($arg)*)};
+}
+
+#[cfg(feature = "debug")]
+macro_rules! styled_write {
+    ($dst:expr, $($arg:tt)*) => {
+        write!($dst, "{}", $crate::utils::styled!($($arg)*))
+    };
+}
+
+#[cfg(feature = "debug")]
+pub(crate) use styled;
+
+#[cfg(feature = "debug")]
+pub(crate) use styled_write;
+
+/// Appendix that does not count in equality/ordinality/hashing.
+#[derive(Clone)]
+pub struct Appendix<T>(pub T);
+
+impl<T> Deref for Appendix<T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl<T> DerefMut for Appendix<T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
+impl<T> PartialEq for Appendix<T> {
+    fn eq(&self, _other: &Self) -> bool {
+        true
+    }
+}
+
+impl<T> Eq for Appendix<T> {}
+
+impl<T> PartialOrd for Appendix<T> {
+    fn partial_cmp(&self, _other: &Self) -> Option<std::cmp::Ordering> {
+        Some(std::cmp::Ordering::Equal)
+    }
+}
+
+impl<T> Ord for Appendix<T> {
+    fn cmp(&self, _other: &Self) -> std::cmp::Ordering {
+        std::cmp::Ordering::Equal
+    }
+}
+
+impl<T> std::hash::Hash for Appendix<T> {
+    fn hash<H: std::hash::Hasher>(&self, _state: &mut H) {}
+}

From 4865e766c4f5d3bda0ac4b8f7aed6ac21869a375 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Sun, 30 Jul 2023 18:48:27 -0400
Subject: [PATCH 39/42] translation with type info

---
 pag-parser2/src/frontend/ast.rs   |  31 +++
 pag-parser2/src/lib.rs            |   2 +-
 pag-parser2/src/nf/mod.rs         |  44 ++++-
 pag-parser2/src/nf/translation.rs | 308 +++++++++++++++++++++++-------
 pag-parser2/src/utils.rs          |   6 +
 5 files changed, 313 insertions(+), 78 deletions(-)

diff --git a/pag-parser2/src/frontend/ast.rs b/pag-parser2/src/frontend/ast.rs
index 00ef1b5..98bc33d 100644
--- a/pag-parser2/src/frontend/ast.rs
+++ b/pag-parser2/src/frontend/ast.rs
@@ -90,3 +90,34 @@ pub enum ParserExpr {
     ParserRef(syn::Ident),
     Ignore(Box<Self>),
 }
+
+pub enum SequenceIterator<'a> {
+    End,
+    Singleton(&'a ParserExpr),
+    Multiple(std::slice::Iter<'a, ParserExpr>),
+}
+
+impl<'a> From<&'a ParserExpr> for SequenceIterator<'a> {
+    fn from(value: &'a ParserExpr) -> Self {
+        match value {
+            ParserExpr::Seq(inner) => Self::Multiple(inner.iter()),
+            _ => Self::Singleton(value),
+        }
+    }
+}
+
+impl<'a> Iterator for SequenceIterator<'a> {
+    type Item = &'a ParserExpr;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self {
+            SequenceIterator::End => None,
+            SequenceIterator::Singleton(result) => {
+                let result = *result;
+                *self = Self::End;
+                Some(result)
+            }
+            SequenceIterator::Multiple(ref mut iter) => iter.next(),
+        }
+    }
+}
diff --git a/pag-parser2/src/lib.rs b/pag-parser2/src/lib.rs
index 781a7a8..0394c10 100644
--- a/pag-parser2/src/lib.rs
+++ b/pag-parser2/src/lib.rs
@@ -5,7 +5,7 @@
 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
-
+#![feature(box_patterns)]
 mod frontend;
 mod nf;
 mod utils;
diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index 6de59d6..d6d49f7 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -8,7 +8,7 @@
 
 mod normalization;
 mod semact;
-//mod translation;
+mod translation;
 
 use crate::utils::Appendix;
 
@@ -115,6 +115,20 @@ pub enum AbstractType {
     Collector(Box<Self>),
 }
 
+thread_local! {
+    static UNIT_TYPE: AbstractType = AbstractType::Concrete(Rc::new(syn::parse_quote!(())));
+    static SPAN_TYPE: AbstractType = AbstractType::Concrete(Rc::new(syn::parse_quote!(::pag_util::Span<'src>)));
+}
+
+impl AbstractType {
+    pub fn unit_type() -> Self {
+        UNIT_TYPE.with(Self::clone)
+    }
+    pub fn span_type() -> Self {
+        SPAN_TYPE.with(Self::clone)
+    }
+}
+
 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum NormalForm {
     Empty {
@@ -174,6 +188,22 @@ impl NormalForm {
         }
     }
 
+    pub fn ty(&self) -> &Appendix<AbstractType> {
+        match self {
+            Self::Empty { ty, .. } => ty,
+            Self::Unexpanded { ty, .. } => ty,
+            Self::Sequence { ty, .. } => ty,
+        }
+    }
+
+    pub fn ty_mut(&mut self) -> &mut Appendix<AbstractType> {
+        match self {
+            Self::Empty { ty, .. } => ty,
+            Self::Unexpanded { ty, .. } => ty,
+            Self::Sequence { ty, .. } => ty,
+        }
+    }
+
     pub fn visible_bindings(&self, skip: usize) -> Box<[(&Ident, BoundTarget)]> {
         let mut acc = VecDeque::new();
         for act in self.actions().iter().rev().skip(skip) {
@@ -212,22 +242,29 @@ impl NormalForm {
 impl std::fmt::Display for NormalForm {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            Self::Empty { actions, .. } => {
+            Self::Empty {
+                actions, semact, ..
+            } => {
                 write!(f, "ε")?;
                 for action in actions {
                     write!(f, "\t{}", action)?;
                 }
+                write!(f, "\t{{{}}}", semact)?;
             }
-            Self::Unexpanded { actions, .. } => {
+            Self::Unexpanded {
+                actions, semact, ..
+            } => {
                 write!(f, "{}", actions[0])?;
                 for action in &actions[1..] {
                     write!(f, "\t{}", action)?;
                 }
+                write!(f, "\t{{{}}}", semact)?;
             }
             Self::Sequence {
                 token,
                 token_output,
                 actions,
+                semact,
                 ..
             } => {
                 if let Some(tk) = token_output {
@@ -238,6 +275,7 @@ impl std::fmt::Display for NormalForm {
                 for action in actions.iter() {
                     write!(f, "\t{}", action)?;
                 }
+                write!(f, "\t{{{}}}", semact)?;
             }
         }
         Ok(())
diff --git a/pag-parser2/src/nf/translation.rs b/pag-parser2/src/nf/translation.rs
index a871361..469e90a 100644
--- a/pag-parser2/src/nf/translation.rs
+++ b/pag-parser2/src/nf/translation.rs
@@ -3,22 +3,22 @@
 //!
 
 use std::collections::HashMap;
+use std::rc::Rc;
 
 use quote::format_ident;
 use syn::{Ident, Type};
 
-use super::NFTable;
 use super::{semact::SemAct, Action, NormalForm, Tag};
-use crate::frontend::{ParserDef, ParserExpr};
+use super::{AbstractType, NFTable};
+use crate::frontend::{Ast, ParserDef, ParserExpr, SequenceIterator};
 
-#[derive(Default)]
 struct Translation {
     /// Table of semi-normalized production rules
     semi_nfs: NFTable,
     /// Toplevel type annotations
-    annotations: HashMap<Tag, Type>,
+    annotations: HashMap<Tag, Rc<Type>>,
     /// Type hints when calling inner routines (collector)
-    hints: HashMap<Tag, Type>,
+    hints: HashMap<Tag, Rc<Type>>,
     /// Counter of assigned non-explicit variable names
     output_cnt: usize,
     /// Counter of assigned anonymous routines
@@ -27,6 +27,30 @@ struct Translation {
     ignoring_cnt: usize,
 }
 
+type NFAttrTuple = (Tag, Option<Ident>, AbstractType);
+
+impl From<&'_ Ast> for Translation {
+    fn from(value: &Ast) -> Self {
+        let annotations = value
+            .parser_map
+            .iter()
+            .map(|(ident, def)| (Tag::Toplevel(ident.clone()), def.ty.clone()))
+            .collect();
+        let mut translation = Self {
+            semi_nfs: Default::default(),
+            annotations,
+            hints: Default::default(),
+            output_cnt: 0,
+            anonymous_cnt: 0,
+            ignoring_cnt: 0,
+        };
+        for (i, def) in &value.parser_map {
+            translation.add_toplevel_def(i.clone(), def);
+        }
+        translation
+    }
+}
+
 impl Translation {
     /// Enter ignoring mode
     fn start_ignoring(&mut self) {
@@ -56,50 +80,130 @@ impl Translation {
         result
     }
 
-    /// Construct a normal form from a sequence of parser expressions. The semact is always `Recognize`.
-    fn partial_nf_from_sequence<
+    fn infer_type<I: ExactSizeIterator<Item = AbstractType>>(
+        &self,
+        mut inner_types: I,
+        semact: &SemAct,
+        tag: &Tag,
+    ) -> AbstractType {
+        match semact {
+            SemAct::Customized(_) => AbstractType::Concrete(self.annotations[tag].clone()),
+            SemAct::Gather => match inner_types.len() {
+                0 => AbstractType::unit_type(),
+                1 => inner_types.next().unwrap(),
+                _ => AbstractType::Tuple(inner_types.collect()),
+            },
+            SemAct::Option => match inner_types.len() {
+                0 => AbstractType::Option(Box::new(AbstractType::unit_type())),
+                1 => AbstractType::Option(Box::new(inner_types.next().unwrap())),
+                _ => AbstractType::Option(Box::new(AbstractType::Tuple(inner_types.collect()))),
+            },
+            SemAct::ZeroOrMore => match inner_types.len() {
+                0 => AbstractType::Collector(Box::new(AbstractType::unit_type())),
+                1 => AbstractType::Collector(Box::new(inner_types.next().unwrap())),
+                _ => AbstractType::Collector(Box::new(AbstractType::Tuple(inner_types.collect()))),
+            },
+            SemAct::OneOrMoreToplevel => match inner_types.len() {
+                0 => AbstractType::Collector(Box::new(AbstractType::unit_type())),
+                1 => AbstractType::Collector(Box::new(inner_types.next().unwrap())),
+                _ => AbstractType::Collector(Box::new(AbstractType::Tuple(inner_types.collect()))),
+            },
+            SemAct::OneOrMoreNested => AbstractType::unit_type(),
+            SemAct::Token => AbstractType::span_type(),
+            SemAct::Recognize => AbstractType::unit_type(),
+        }
+    }
+
+    /// Construct a normal form from a sequence of parser expressions.
+    fn create_nf_from_sequence<
         'a,
         const IGNORE_UNNAMED: bool,
         I: Iterator<Item = (&'a ParserExpr, Option<Ident>)>,
     >(
         &mut self,
         mut iter: I,
+        semact: SemAct,
+        tag: &Tag,
     ) -> NormalForm {
+        debug_assert_eq!(
+            self.ignoring(),
+            matches!(semact, SemAct::Recognize),
+            "semact must be Recognize in ignoring mode"
+        );
         match iter.next() {
-            None => NormalForm::Empty(vec![], SemAct::Recognize),
+            None => NormalForm::Empty {
+                actions: vec![],
+                semact,
+                ty: AbstractType::unit_type().into(),
+            },
             Some((ParserExpr::Ignore(box ParserExpr::LexerRef(token)), _)) => {
-                let tail = iter
+                let mut types = Vec::new();
+                let actions = iter
                     .map(|(inner, named)| self.add_anonymous_rule::<IGNORE_UNNAMED>(inner, named))
-                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .map(|(tag, output, ty)| {
+                        if output.is_some() {
+                            types.push(ty);
+                        }
+                        Action::Shift { tag, output }
+                    })
                     .collect();
-                NormalForm::Sequence(token.clone(), None, tail, SemAct::Recognize)
+                let ty = self.infer_type(types.into_iter(), &semact, tag).into();
+                NormalForm::Sequence {
+                    token: token.clone(),
+                    token_output: None,
+                    actions,
+                    semact,
+                    ty,
+                }
             }
             Some((ParserExpr::LexerRef(token), named)) => {
-                let tail = iter
+                let mut types = Vec::new();
+                if named.is_some() {
+                    types.push(AbstractType::span_type())
+                }
+                let actions = iter
                     .map(|(inner, named)| self.add_anonymous_rule::<IGNORE_UNNAMED>(inner, named))
-                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .map(|(tag, output, ty)| {
+                        if output.is_some() {
+                            types.push(ty);
+                        }
+                        Action::Shift { tag, output }
+                    })
                     .collect();
-                NormalForm::Sequence(
-                    token.clone(),
-                    if self.ignoring() {
+                let ty = self.infer_type(types.into_iter(), &semact, tag).into();
+                NormalForm::Sequence {
+                    token: token.clone(),
+                    token_output: if matches!(semact, SemAct::Recognize) {
                         None
                     } else if IGNORE_UNNAMED {
                         named
                     } else {
                         named.or_else(|| Some(self.new_output_sym()))
                     },
-                    tail,
-                    SemAct::Recognize,
-                )
+                    actions,
+                    semact,
+                    ty,
+                }
             }
             Some((expr, named)) => {
-                let sequence = [(expr, named)]
+                let mut types = Vec::new();
+                let actions = [(expr, named)]
                     .into_iter()
                     .chain(iter)
                     .map(|(inner, named)| self.add_anonymous_rule::<IGNORE_UNNAMED>(inner, named))
-                    .map(|(tag, output)| Action::Shift { tag, output })
+                    .map(|(tag, output, ty)| {
+                        if output.is_some() {
+                            types.push(ty);
+                        }
+                        Action::Shift { tag, output }
+                    })
                     .collect();
-                NormalForm::Unexpanded(sequence, SemAct::Recognize)
+                let ty = self.infer_type(types.into_iter(), &semact, tag).into();
+                NormalForm::Unexpanded {
+                    actions,
+                    semact,
+                    ty,
+                }
             }
         }
     }
@@ -108,50 +212,83 @@ impl Translation {
         self.semi_nfs.entry(tag).or_default().push(nf);
     }
 
-    fn add_nf_from_anonymous_expr(&mut self, expr: &ParserExpr, tag: &Tag) {
+    fn add_nf_from_anonymous_expr(&mut self, expr: &ParserExpr, tag: &Tag) -> AbstractType {
         match expr {
-            ParserExpr::Seq(..) => {
-                let mut partial_nf = self.partial_nf_from_sequence::<false, _>(
-                    RightDeepIterator::from(expr).map(|expr| (expr, None)),
-                );
-                *partial_nf.semact_mut() = if self.ignoring() {
+            ParserExpr::Seq(exprs) => {
+                let semact = if self.ignoring() {
                     SemAct::Recognize
                 } else {
                     SemAct::Gather
                 };
+                let partial_nf = self.create_nf_from_sequence::<false, _>(
+                    exprs.iter().map(|expr| (expr, None)),
+                    semact,
+                    tag,
+                );
+                let ty = partial_nf.ty().0.clone();
                 self.add_nf(tag.clone(), partial_nf);
+                ty
             }
             ParserExpr::Opt(inner) => {
-                let mut partial_nf = self.partial_nf_from_sequence::<false, _>(
-                    RightDeepIterator::from(inner.as_ref()).map(|expr| (expr, None)),
-                );
                 let semact = if self.ignoring() {
                     SemAct::Recognize
                 } else {
                     SemAct::Option
                 };
+                let mut partial_nf = self.create_nf_from_sequence::<false, _>(
+                    SequenceIterator::from(inner.as_ref()).map(|expr| (expr, None)),
+                    semact.clone(),
+                    tag,
+                );
+                let ty = partial_nf.ty().clone();
                 *partial_nf.semact_mut() = semact.clone();
                 self.add_nf(tag.clone(), partial_nf);
                 // add one more rule for empty
-                self.add_nf(tag.clone(), NormalForm::Empty(vec![], semact));
+                self.add_nf(
+                    tag.clone(),
+                    NormalForm::Empty {
+                        actions: vec![],
+                        semact,
+                        ty: ty.clone(),
+                    },
+                );
+                ty.0
             }
             ParserExpr::Star(inner) => {
-                let mut partial_nf = self.partial_nf_from_sequence::<false, _>(
-                    RightDeepIterator::from(inner.as_ref()).map(|expr| (expr, None)),
-                );
                 let semact = if self.ignoring() {
-                    SemAct::Recognize
-                } else {
                     SemAct::ZeroOrMore
+                } else {
+                    SemAct::Option
                 };
+                let mut partial_nf = self.create_nf_from_sequence::<false, _>(
+                    SequenceIterator::from(inner.as_ref()).map(|expr| (expr, None)),
+                    semact.clone(),
+                    tag,
+                );
+                let ty = partial_nf.ty().clone();
                 *partial_nf.semact_mut() = semact.clone();
                 self.add_nf(tag.clone(), partial_nf);
                 // add one more rule for empty
-                self.add_nf(tag.clone(), NormalForm::Empty(vec![], semact));
+                self.add_nf(
+                    tag.clone(),
+                    NormalForm::Empty {
+                        actions: vec![],
+                        semact,
+                        ty: ty.clone(),
+                    },
+                );
+                ty.0
             }
             ParserExpr::Plus(inner) => {
-                let mut partial_nf = self.partial_nf_from_sequence::<false, _>(
-                    RightDeepIterator::from(inner.as_ref()).map(|expr| (expr, None)),
+                let semact = if self.ignoring() {
+                    SemAct::Recognize
+                } else {
+                    SemAct::OneOrMoreToplevel
+                };
+                let mut partial_nf = self.create_nf_from_sequence::<false, _>(
+                    SequenceIterator::from(inner.as_ref()).map(|expr| (expr, None)),
+                    semact.clone(),
+                    tag,
                 );
                 let nested_tag = self.new_anonymous_tag();
                 // the nested routine
@@ -164,37 +301,51 @@ impl Translation {
 
                     self.add_nf(nested_tag.clone(), {
                         let mut nf = partial_nf.clone();
-                        nf.append_tailcall();
+                        nf.actions_mut().push(Action::TailCall);
                         *nf.semact_mut() = semact.clone();
                         nf
                     });
 
-                    self.add_nf(nested_tag.clone(), NormalForm::Empty(vec![], semact));
+                    self.add_nf(
+                        nested_tag.clone(),
+                        NormalForm::Empty {
+                            actions: vec![],
+                            semact,
+                            ty: AbstractType::unit_type().into(),
+                        },
+                    );
                 }
                 // the toplevel routine
                 {
-                    let semact = if self.ignoring() {
-                        SemAct::Recognize
-                    } else {
-                        SemAct::OneOrMoreToplevel
-                    };
-                    partial_nf.append_pass_collector(nested_tag);
-                    *partial_nf.semact_mut() = semact;
+                    partial_nf
+                        .actions_mut()
+                        .push(Action::PassCollector(tag.clone()));
+                    let ty = partial_nf.ty().0.clone();
                     self.add_nf(tag.clone(), partial_nf);
+                    ty
                 }
             }
             ParserExpr::LexerRef(ident) => {
                 let nf = if self.ignoring() {
-                    NormalForm::Sequence(ident.clone(), None, vec![], SemAct::Recognize)
+                    NormalForm::Sequence {
+                        token: ident.clone(),
+                        token_output: None,
+                        actions: vec![],
+                        semact: SemAct::Recognize,
+                        ty: AbstractType::unit_type().into(),
+                    }
                 } else {
-                    NormalForm::Sequence(
-                        ident.clone(),
-                        Some(self.new_output_sym()),
-                        vec![],
-                        SemAct::Token,
-                    )
+                    NormalForm::Sequence {
+                        token: ident.clone(),
+                        token_output: Some(self.new_output_sym()),
+                        actions: vec![],
+                        semact: SemAct::Token,
+                        ty: AbstractType::span_type().into(),
+                    }
                 };
+                let ty = nf.ty().0.clone();
                 self.add_nf(tag.clone(), nf);
+                ty
             }
             ParserExpr::ParserRef(_) => unreachable!("cannot create nf from parser ref"),
             ParserExpr::Ignore(_) => unreachable!("cannot create nf from ignore"),
@@ -205,7 +356,7 @@ impl Translation {
         &mut self,
         expr: &ParserExpr,
         named: Option<Ident>,
-    ) -> (Tag, Option<Ident>) {
+    ) -> NFAttrTuple {
         let is_unnamed = named.is_none();
         if IGNORE_UNNAMED && is_unnamed {
             self.start_ignoring();
@@ -214,24 +365,30 @@ impl Translation {
             ParserExpr::ParserRef(x) => {
                 let tag = Tag::Toplevel(x.clone());
                 if self.ignoring() {
-                    (tag, None)
+                    (tag, None, AbstractType::unit_type())
                 } else {
-                    (tag, named.or_else(|| Some(self.new_output_sym())))
+                    let ty = self
+                        .annotations
+                        .get(&tag)
+                        .map(Rc::clone)
+                        .map(AbstractType::Concrete)
+                        .expect("toplevel rule must be typed");
+                    (tag, named.or_else(|| Some(self.new_output_sym())), ty)
                 }
             }
             ParserExpr::Ignore(expr) => {
                 self.start_ignoring();
-                let (tag, output) = self.add_anonymous_rule::<IGNORE_UNNAMED>(expr, named);
+                let result = self.add_anonymous_rule::<IGNORE_UNNAMED>(expr, named);
                 self.end_ignoring();
-                (tag, output)
+                result
             }
             _ => {
                 let tag = self.new_anonymous_tag();
-                self.add_nf_from_anonymous_expr(expr, &tag);
+                let ty = self.add_nf_from_anonymous_expr(expr, &tag);
                 if self.ignoring() {
-                    (tag, None)
+                    (tag, None, AbstractType::unit_type())
                 } else {
-                    (tag, named.or_else(|| Some(self.new_output_sym())))
+                    (tag, named.or_else(|| Some(self.new_output_sym())), ty)
                 }
             }
         };
@@ -244,7 +401,6 @@ impl Translation {
     // Translate a top-level definition
     fn add_toplevel_def(&mut self, name: Ident, def: &ParserDef) {
         let tag = Tag::Toplevel(name);
-        self.annotations.insert(tag.clone(), def.ty.clone());
         let rules = def
             .rules
             .iter()
@@ -256,20 +412,23 @@ impl Translation {
                 } else {
                     SemAct::Gather
                 };
-                let mut partial_nf = if matches!(semact, SemAct::Customized(..)) {
-                    self.partial_nf_from_sequence::<true, _>(
+                let partial_nf = if matches!(semact, SemAct::Customized(..)) {
+                    self.create_nf_from_sequence::<true, _>(
                         rule.vars
                             .iter()
                             .map(|binding| (&binding.expr, binding.name.clone())),
+                        semact,
+                        &tag,
                     )
                 } else {
-                    self.partial_nf_from_sequence::<false, _>(
+                    self.create_nf_from_sequence::<false, _>(
                         rule.vars
                             .iter()
                             .map(|binding| (&binding.expr, binding.name.clone())),
+                        semact,
+                        &tag,
                     )
                 };
-                *partial_nf.semact_mut() = semact;
                 partial_nf
             })
             .collect();
@@ -303,11 +462,12 @@ mod test {
             "#,
         )
         .unwrap();
-        let mut translation = Translation::default();
-        for (name, def) in ast.parser_map.iter() {
-            translation.add_toplevel_def(name.clone(), def);
-        }
         #[cfg(feature = "debug")]
-        println!("{}", translation.semi_nfs);
+        {
+            let translation = Translation::from(&ast);
+            println!("{}", translation.semi_nfs);
+        }
+        #[cfg(not(feature = "debug"))]
+        let _ = Translation::from(&ast);
     }
 }
diff --git a/pag-parser2/src/utils.rs b/pag-parser2/src/utils.rs
index 01a9511..32ee6dc 100644
--- a/pag-parser2/src/utils.rs
+++ b/pag-parser2/src/utils.rs
@@ -31,6 +31,12 @@ pub(crate) use styled_write;
 #[derive(Clone)]
 pub struct Appendix<T>(pub T);
 
+impl<T> From<T> for Appendix<T> {
+    fn from(x: T) -> Self {
+        Self(x)
+    }
+}
+
 impl<T> Deref for Appendix<T> {
     type Target = T;
 

From e6422995703ab45a2aa524a7736b5cb492a88319 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Sun, 30 Jul 2023 22:47:13 -0400
Subject: [PATCH 40/42] process type hints

---
 pag-lexer/src/lookahead.rs        |  6 ++--
 pag-parser2/src/nf/translation.rs | 58 +++++++++++++++++++------------
 2 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/pag-lexer/src/lookahead.rs b/pag-lexer/src/lookahead.rs
index 1db0c39..b66862c 100644
--- a/pag-lexer/src/lookahead.rs
+++ b/pag-lexer/src/lookahead.rs
@@ -199,7 +199,9 @@ mod test {
     fn test_lookahead_codegen() {
         use crate::intervals;
         let positives = intervals!((b'0', b'9'), (b'0', b'9'), (b'A', b'F'));
-        syn::parse2::<syn::Expr>(generate_lookahead_routine(&positives, Kind::Positive)).unwrap();
-        syn::parse2::<syn::Expr>(generate_lookahead_routine(&positives, Kind::Negative)).unwrap();
+        let positive = generate_lookahead_routine(&positives, Kind::Positive);
+        let _: syn::Expr = syn::parse_quote! { { #positive } };
+        let negative = generate_lookahead_routine(&positives, Kind::Negative);
+        let _: syn::Expr = syn::parse_quote! { { #negative } };
     }
 }
diff --git a/pag-parser2/src/nf/translation.rs b/pag-parser2/src/nf/translation.rs
index 469e90a..587ce03 100644
--- a/pag-parser2/src/nf/translation.rs
+++ b/pag-parser2/src/nf/translation.rs
@@ -118,7 +118,7 @@ impl Translation {
     fn create_nf_from_sequence<
         'a,
         const IGNORE_UNNAMED: bool,
-        I: Iterator<Item = (&'a ParserExpr, Option<Ident>)>,
+        I: Iterator<Item = (&'a ParserExpr, Option<Ident>, Option<Rc<Type>>)>,
     >(
         &mut self,
         mut iter: I,
@@ -136,11 +136,15 @@ impl Translation {
                 semact,
                 ty: AbstractType::unit_type().into(),
             },
-            Some((ParserExpr::Ignore(box ParserExpr::LexerRef(token)), _)) => {
+            Some((ParserExpr::Ignore(box ParserExpr::LexerRef(token)), _, _)) => {
                 let mut types = Vec::new();
                 let actions = iter
-                    .map(|(inner, named)| self.add_anonymous_rule::<IGNORE_UNNAMED>(inner, named))
-                    .map(|(tag, output, ty)| {
+                    .map(|(inner, named, hint)| {
+                        let (tag, output, ty) =
+                            self.add_anonymous_rule::<IGNORE_UNNAMED>(inner, named);
+                        if let Some(x) = hint {
+                            self.hints.insert(tag.clone(), x);
+                        }
                         if output.is_some() {
                             types.push(ty);
                         }
@@ -156,14 +160,18 @@ impl Translation {
                     ty,
                 }
             }
-            Some((ParserExpr::LexerRef(token), named)) => {
+            Some((ParserExpr::LexerRef(token), named, _)) => {
                 let mut types = Vec::new();
                 if named.is_some() {
                     types.push(AbstractType::span_type())
                 }
                 let actions = iter
-                    .map(|(inner, named)| self.add_anonymous_rule::<IGNORE_UNNAMED>(inner, named))
-                    .map(|(tag, output, ty)| {
+                    .map(|(inner, named, hint)| {
+                        let (tag, output, ty) =
+                            self.add_anonymous_rule::<IGNORE_UNNAMED>(inner, named);
+                        if let Some(x) = hint {
+                            self.hints.insert(tag.clone(), x);
+                        }
                         if output.is_some() {
                             types.push(ty);
                         }
@@ -185,13 +193,17 @@ impl Translation {
                     ty,
                 }
             }
-            Some((expr, named)) => {
+            Some((expr, named, hint)) => {
                 let mut types = Vec::new();
-                let actions = [(expr, named)]
+                let actions = [(expr, named, hint)]
                     .into_iter()
                     .chain(iter)
-                    .map(|(inner, named)| self.add_anonymous_rule::<IGNORE_UNNAMED>(inner, named))
-                    .map(|(tag, output, ty)| {
+                    .map(|(inner, named, hint)| {
+                        let (tag, output, ty) =
+                            self.add_anonymous_rule::<IGNORE_UNNAMED>(inner, named);
+                        if let Some(x) = hint {
+                            self.hints.insert(tag.clone(), x);
+                        }
                         if output.is_some() {
                             types.push(ty);
                         }
@@ -221,7 +233,7 @@ impl Translation {
                     SemAct::Gather
                 };
                 let partial_nf = self.create_nf_from_sequence::<false, _>(
-                    exprs.iter().map(|expr| (expr, None)),
+                    exprs.iter().map(|expr| (expr, None, None)),
                     semact,
                     tag,
                 );
@@ -236,7 +248,7 @@ impl Translation {
                     SemAct::Option
                 };
                 let mut partial_nf = self.create_nf_from_sequence::<false, _>(
-                    SequenceIterator::from(inner.as_ref()).map(|expr| (expr, None)),
+                    SequenceIterator::from(inner.as_ref()).map(|expr| (expr, None, None)),
                     semact.clone(),
                     tag,
                 );
@@ -261,7 +273,7 @@ impl Translation {
                     SemAct::Option
                 };
                 let mut partial_nf = self.create_nf_from_sequence::<false, _>(
-                    SequenceIterator::from(inner.as_ref()).map(|expr| (expr, None)),
+                    SequenceIterator::from(inner.as_ref()).map(|expr| (expr, None, None)),
                     semact.clone(),
                     tag,
                 );
@@ -286,8 +298,8 @@ impl Translation {
                     SemAct::OneOrMoreToplevel
                 };
                 let mut partial_nf = self.create_nf_from_sequence::<false, _>(
-                    SequenceIterator::from(inner.as_ref()).map(|expr| (expr, None)),
-                    semact.clone(),
+                    SequenceIterator::from(inner.as_ref()).map(|expr| (expr, None, None)),
+                    semact,
                     tag,
                 );
                 let nested_tag = self.new_anonymous_tag();
@@ -307,7 +319,7 @@ impl Translation {
                     });
 
                     self.add_nf(
-                        nested_tag.clone(),
+                        nested_tag,
                         NormalForm::Empty {
                             actions: vec![],
                             semact,
@@ -414,17 +426,17 @@ impl Translation {
                 };
                 let partial_nf = if matches!(semact, SemAct::Customized(..)) {
                     self.create_nf_from_sequence::<true, _>(
-                        rule.vars
-                            .iter()
-                            .map(|binding| (&binding.expr, binding.name.clone())),
+                        rule.vars.iter().map(|binding| {
+                            (&binding.expr, binding.name.clone(), binding.ty.clone())
+                        }),
                         semact,
                         &tag,
                     )
                 } else {
                     self.create_nf_from_sequence::<false, _>(
-                        rule.vars
-                            .iter()
-                            .map(|binding| (&binding.expr, binding.name.clone())),
+                        rule.vars.iter().map(|binding| {
+                            (&binding.expr, binding.name.clone(), binding.ty.clone())
+                        }),
                         semact,
                         &tag,
                     )

From aa1db2c7f4a518f526dc4548870a3031766e32d5 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Mon, 31 Jul 2023 10:17:27 -0400
Subject: [PATCH 41/42] fix wrong semact

---
 pag-parser2/src/nf/translation.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pag-parser2/src/nf/translation.rs b/pag-parser2/src/nf/translation.rs
index 587ce03..4f81cc0 100644
--- a/pag-parser2/src/nf/translation.rs
+++ b/pag-parser2/src/nf/translation.rs
@@ -268,9 +268,9 @@ impl Translation {
             }
             ParserExpr::Star(inner) => {
                 let semact = if self.ignoring() {
-                    SemAct::ZeroOrMore
+                    SemAct::Recognize
                 } else {
-                    SemAct::Option
+                    SemAct::ZeroOrMore
                 };
                 let mut partial_nf = self.create_nf_from_sequence::<false, _>(
                     SequenceIterator::from(inner.as_ref()).map(|expr| (expr, None, None)),

From 0900a4439b1750dd1e036a51a1255c91dc9f3b48 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu@rochester.edu>
Date: Mon, 31 Jul 2023 12:07:45 -0400
Subject: [PATCH 42/42] implement normaliation

---
 pag-parser2/src/nf/mod.rs           |  84 +++++------------
 pag-parser2/src/nf/normalization.rs | 135 ++++++++++++++++++++++++++++
 pag-parser2/src/nf/translation.rs   |  28 +++---
 pag-parser2/src/utils.rs            |   1 +
 4 files changed, 171 insertions(+), 77 deletions(-)

diff --git a/pag-parser2/src/nf/mod.rs b/pag-parser2/src/nf/mod.rs
index d6d49f7..fb29a25 100644
--- a/pag-parser2/src/nf/mod.rs
+++ b/pag-parser2/src/nf/mod.rs
@@ -5,21 +5,20 @@
 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
-
-mod normalization;
 mod semact;
 mod translation;
+mod normalization;
 
 use crate::utils::Appendix;
 
 use std::{
-    collections::{HashMap, VecDeque},
+    collections::HashMap,
     ops::{Deref, DerefMut},
     rc::Rc,
 };
 
 use quote::format_ident;
-use syn::Ident;
+use syn::{Ident, Type};
 
 #[cfg(feature = "debug")]
 use crate::utils::{styled, styled_write};
@@ -64,7 +63,7 @@ impl std::fmt::Display for Tag {
 /// reducing from left to right, we maintain the context of which the current
 /// semantic action to reduce, and always assign "__0", "__1", "__2". When a [`Reduce`] is
 /// encountered, we start over from "__0".
-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum Action {
     Shift {
         /// Parser routine to call.
@@ -73,7 +72,8 @@ pub enum Action {
     },
     Reduce {
         /// Reduction routine to call.
-        tag: Tag,
+        semact: SemAct,
+        hint: Option<Appendix<Rc<Type>>>,
         output: Option<Ident>,
     },
     /// Specialized action for tail call optimization.
@@ -86,11 +86,11 @@ pub enum Action {
 impl std::fmt::Display for Action {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            Self::Reduce { tag, output } => {
+            Self::Reduce { semact, output, .. } => {
                 if let Some(name) = output {
-                    styled_write!(f, Color::Blue, "{tag}[{name}]")
+                    styled_write!(f, Color::Blue, "{semact}[{name}]")
                 } else {
-                    styled_write!(f, Color::Blue, "{tag}")
+                    styled_write!(f, Color::Blue, "{semact}")
                 }
             }
             Self::Shift { tag, output } => {
@@ -143,18 +143,12 @@ pub enum NormalForm {
     },
     Sequence {
         token: Ident,
-        token_output: Option<Ident>,
         actions: Vec<Action>,
         semact: SemAct,
         ty: Appendix<AbstractType>,
     },
 }
 
-pub enum BoundTarget<'a> {
-    Tag(&'a Tag),
-    Token,
-}
-
 impl NormalForm {
     pub fn semact(&self) -> &SemAct {
         match self {
@@ -203,39 +197,6 @@ impl NormalForm {
             Self::Sequence { ty, .. } => ty,
         }
     }
-
-    pub fn visible_bindings(&self, skip: usize) -> Box<[(&Ident, BoundTarget)]> {
-        let mut acc = VecDeque::new();
-        for act in self.actions().iter().rev().skip(skip) {
-            match act {
-                Action::Shift { tag, output } => {
-                    if let Some(ident) = output {
-                        acc.push_front((ident, BoundTarget::Tag(tag)));
-                    }
-                }
-                Action::Reduce { tag, output } => {
-                    if let Some(ident) = output {
-                        acc.push_front((ident, BoundTarget::Tag(tag)));
-                    }
-                    break;
-                }
-                Action::PassCollector(..) => continue,
-                Action::TailCall => continue,
-            }
-        }
-        if let Self::Sequence {
-            token_output: Some(tk),
-            ..
-        } = self
-        {
-            if acc.len() == self.actions().len() - skip
-                && !matches!(self.actions().first(), Some(Action::Reduce { .. }))
-            {
-                acc.push_front((tk, BoundTarget::Token));
-            }
-        }
-        acc.into_iter().collect()
-    }
 }
 
 #[cfg(feature = "debug")]
@@ -262,16 +223,11 @@ impl std::fmt::Display for NormalForm {
             }
             Self::Sequence {
                 token,
-                token_output,
                 actions,
                 semact,
                 ..
             } => {
-                if let Some(tk) = token_output {
-                    styled_write!(f, Color::Yellow, "{token}[{tk}]")?;
-                } else {
-                    styled_write!(f, Color::Yellow, "{token}")?;
-                }
+                styled_write!(f, Color::Yellow, "{token}")?;
                 for action in actions.iter() {
                     write!(f, "\t{}", action)?;
                 }
@@ -288,14 +244,14 @@ fn debug_print_test() {
     use quote::format_ident;
     let sequence = NormalForm::Sequence {
         token: format_ident!("TEST"),
-        token_output: Some(format_ident!("x")),
         actions: vec![
             Action::Shift {
                 tag: Tag::Toplevel(format_ident!("a")),
                 output: None,
             },
             Action::Reduce {
-                tag: Tag::Toplevel(format_ident!("b")),
+                semact: SemAct::Gather,
+                hint: None,
                 output: Some(format_ident!("x")),
             },
             Action::Shift {
@@ -303,7 +259,8 @@ fn debug_print_test() {
                 output: Some(format_ident!("y")),
             },
             Action::Reduce {
-                tag: Tag::Anonymous(1),
+                semact: SemAct::Gather,
+                hint: None,
                 output: None,
             },
         ],
@@ -366,14 +323,14 @@ fn debug_print_nf_table() {
     use quote::format_ident;
     let sequence = NormalForm::Sequence {
         token: format_ident!("TEST"),
-        token_output: Some(format_ident!("x")),
         actions: vec![
             Action::Shift {
                 tag: Tag::Toplevel(format_ident!("a")),
                 output: None,
             },
             Action::Reduce {
-                tag: Tag::Toplevel(format_ident!("b")),
+                semact: SemAct::Gather,
+                hint: None,
                 output: Some(format_ident!("x")),
             },
             Action::Shift {
@@ -381,7 +338,8 @@ fn debug_print_nf_table() {
                 output: Some(format_ident!("y")),
             },
             Action::Reduce {
-                tag: Tag::Anonymous(1),
+                semact: SemAct::Gather,
+                hint: None,
                 output: None,
             },
         ],
@@ -391,11 +349,13 @@ fn debug_print_nf_table() {
     let empty = NormalForm::Empty {
         actions: vec![
             Action::Reduce {
-                tag: Tag::Toplevel(format_ident!("b")),
+                semact: SemAct::Gather,
+                hint: None,
                 output: Some(format_ident!("x")),
             },
             Action::Reduce {
-                tag: Tag::Toplevel(format_ident!("c")),
+                semact: SemAct::Gather,
+                hint: None,
                 output: Some(format_ident!("y")),
             },
         ],
diff --git a/pag-parser2/src/nf/normalization.rs b/pag-parser2/src/nf/normalization.rs
index 8b13789..53c74ba 100644
--- a/pag-parser2/src/nf/normalization.rs
+++ b/pag-parser2/src/nf/normalization.rs
@@ -1 +1,136 @@
+use std::{collections::HashMap, rc::Rc};
 
+use syn::Type;
+
+use crate::utils::Appendix;
+
+use super::{NFTable, Tag, translation::Translation, NormalForm, Action};
+
+pub struct Normalized {
+    nfs: NFTable,
+    hints: HashMap<Tag, Rc<Type>>
+}
+
+impl Normalized {
+    fn normalize(&mut self) {
+        loop {
+            let mut updates = Vec::new();
+            for (target, nfs) in self.nfs.iter().map(|(k, v)| (k.clone(), v.clone())) {
+                if !nfs.iter().any(|x| matches!(x, NormalForm::Unexpanded {.. })) {
+                    continue;
+                }
+                let mut stepped = Vec::new();
+                for i in nfs {
+                    let NormalForm::Unexpanded{ actions, semact, ty } = i else {
+                        stepped.push(i);
+                        continue;
+                    };
+                    let first_subroutine = actions.iter().enumerate().find_map(|(index, act)| {
+                        if let Action::Shift { tag, output } = act {
+                            Some((index, tag, output.clone()))
+                        } else {
+                            None
+                        }
+                    });
+                    match first_subroutine {
+                        None => {
+                            stepped.push(NormalForm::Empty{ actions, semact, ty });
+                        }
+                        Some((index, tag, output)) => {
+                            let variable_nf = self.nfs.get(tag).cloned().expect("tag must have associated");
+                            for k in variable_nf {
+                                let head = actions[..index].iter().cloned();
+                                let tail = actions[index + 1..].iter().cloned();
+                                match k {
+                                    NormalForm::Empty { actions: mut expanded_actions, semact: expanded_semact, .. } => {
+                                        let hint = self.hints.get(tag).cloned().map(Appendix);
+                                        expanded_actions.push(Action::Reduce { semact: expanded_semact, hint, output: output.clone()});
+                                        let acts = head.chain(expanded_actions).chain(tail).collect();
+                                        stepped.push(NormalForm::Unexpanded { actions: acts, semact: semact.clone(), ty: ty.clone() });
+                                    }
+                                    NormalForm::Unexpanded { actions: mut expanded_actions, semact: expanded_semact, .. } => {
+                                        let hint = self.hints.get(tag).cloned().map(Appendix);
+                                        expanded_actions.push(Action::Reduce { semact: expanded_semact, hint,  output: output.clone()});
+                                        let acts = head.chain(expanded_actions).chain(tail).collect();
+                                        stepped.push(NormalForm::Unexpanded { actions: acts, semact: semact.clone(), ty: ty.clone() });
+                                    }
+                                    NormalForm::Sequence {
+                                        token,
+                                        actions: mut expanded_actions,
+                                        semact: expanded_semact,
+                                        ..
+                                    } => {
+                                        let hint = self.hints.get(tag).cloned().map(Appendix);
+                                        expanded_actions.push(Action::Reduce { semact: expanded_semact, hint,  output: output.clone()});
+                                        let acts = head.chain(expanded_actions).chain(tail).collect();
+                                        stepped.push(NormalForm::Sequence { token, actions: acts,  semact: semact.clone(), ty: ty.clone() });
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+                updates.push((target, stepped));
+            }
+            if updates.is_empty() {
+                break;
+            } else {
+                for (k, v) in updates {
+                    self.nfs.insert(k, v);
+                }
+            }
+        }
+    }
+}
+
+impl From<Translation> for Normalized {
+    fn from(value: Translation) -> Self {
+        let mut normalized = Self {
+            nfs: value.semi_nfs,
+            hints: value.hints
+        };
+        normalized.normalize();
+        normalized
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::{frontend::Ast, nf::normalization::Normalized};
+
+    use super::Translation;
+
+    #[test]
+    fn sexpr() {
+        let ast = syn::parse_str::<Ast>(
+            r#"
+            %entry = sexp;
+
+            DIGIT  = '0'..'9';
+            ALPHA  = 'a'..'z' | 'A'..'Z';
+            LPAREN = "(";
+            RPAREN = ")";
+            ATOM   = ALPHA (ALPHA | DIGIT)*;
+            %skip  = (" " | "\t" | "\n" | "\r")+;
+
+            compound: SExp = LPAREN sexp+[sexp:Vec<_>] RPAREN { SExp::Compound(sexp) };
+            atom    : SExp = ATOM[atom] { SExp::Atom(atom) };
+            sexp    : SExp = compound
+                           | atom;
+            "#,
+        )
+        .unwrap();
+        #[cfg(feature = "debug")]
+        {
+            let translation = Translation::from(&ast);
+            println!("{}", translation.semi_nfs);
+            let normalized = Normalized::from(translation);
+            println!("{}", normalized.nfs);
+        }
+        #[cfg(not(feature = "debug"))]
+        {
+            let translation = Translation::from(&ast);
+            let _ = Normalized::from(translation);
+        }
+    }
+}
\ No newline at end of file
diff --git a/pag-parser2/src/nf/translation.rs b/pag-parser2/src/nf/translation.rs
index 4f81cc0..0e64ad3 100644
--- a/pag-parser2/src/nf/translation.rs
+++ b/pag-parser2/src/nf/translation.rs
@@ -3,6 +3,7 @@
 //!
 
 use std::collections::HashMap;
+use std::io::Read;
 use std::rc::Rc;
 
 use quote::format_ident;
@@ -12,13 +13,13 @@ use super::{semact::SemAct, Action, NormalForm, Tag};
 use super::{AbstractType, NFTable};
 use crate::frontend::{Ast, ParserDef, ParserExpr, SequenceIterator};
 
-struct Translation {
+pub struct Translation {
     /// Table of semi-normalized production rules
-    semi_nfs: NFTable,
+    pub semi_nfs: NFTable,
     /// Toplevel type annotations
     annotations: HashMap<Tag, Rc<Type>>,
     /// Type hints when calling inner routines (collector)
-    hints: HashMap<Tag, Rc<Type>>,
+    pub hints: HashMap<Tag, Rc<Type>>,
     /// Counter of assigned non-explicit variable names
     output_cnt: usize,
     /// Counter of assigned anonymous routines
@@ -154,7 +155,6 @@ impl Translation {
                 let ty = self.infer_type(types.into_iter(), &semact, tag).into();
                 NormalForm::Sequence {
                     token: token.clone(),
-                    token_output: None,
                     actions,
                     semact,
                     ty,
@@ -165,7 +165,14 @@ impl Translation {
                 if named.is_some() {
                     types.push(AbstractType::span_type())
                 }
-                let actions = iter
+                let head_action = if matches!(semact, SemAct::Recognize) {
+                    None
+                } else if IGNORE_UNNAMED && named.is_none() {
+                    None
+                } else {
+                    Some( Action::Reduce { semact: SemAct::Option, hint: None, output: named.or_else(|| Some(self.new_output_sym())) } )
+                };
+                let actions = head_action.into_iter().chain(iter
                     .map(|(inner, named, hint)| {
                         let (tag, output, ty) =
                             self.add_anonymous_rule::<IGNORE_UNNAMED>(inner, named);
@@ -176,18 +183,11 @@ impl Translation {
                             types.push(ty);
                         }
                         Action::Shift { tag, output }
-                    })
+                    }))
                     .collect();
                 let ty = self.infer_type(types.into_iter(), &semact, tag).into();
                 NormalForm::Sequence {
                     token: token.clone(),
-                    token_output: if matches!(semact, SemAct::Recognize) {
-                        None
-                    } else if IGNORE_UNNAMED {
-                        named
-                    } else {
-                        named.or_else(|| Some(self.new_output_sym()))
-                    },
                     actions,
                     semact,
                     ty,
@@ -341,7 +341,6 @@ impl Translation {
                 let nf = if self.ignoring() {
                     NormalForm::Sequence {
                         token: ident.clone(),
-                        token_output: None,
                         actions: vec![],
                         semact: SemAct::Recognize,
                         ty: AbstractType::unit_type().into(),
@@ -349,7 +348,6 @@ impl Translation {
                 } else {
                     NormalForm::Sequence {
                         token: ident.clone(),
-                        token_output: Some(self.new_output_sym()),
                         actions: vec![],
                         semact: SemAct::Token,
                         ty: AbstractType::span_type().into(),
diff --git a/pag-parser2/src/utils.rs b/pag-parser2/src/utils.rs
index 32ee6dc..632465d 100644
--- a/pag-parser2/src/utils.rs
+++ b/pag-parser2/src/utils.rs
@@ -28,6 +28,7 @@ pub(crate) use styled;
 pub(crate) use styled_write;
 
 /// Appendix that does not count in equality/ordinality/hashing.
+#[repr(transparent)]
 #[derive(Clone)]
 pub struct Appendix<T>(pub T);