SchrodingerZhu · SchrodingerZhu · Jun 22, 2023 · Jun 22, 2023 · Jul 17, 2023 · Jul 24, 2023
diff --git a/Cargo.toml b/Cargo.toml
@@ -7,16 +7,7 @@
 # modified, or distributed except according to those terms.
 
 [workspace]
-members = [
-    "pag-lexer",
-    "pag-parser",
-    "pag-compiler",
-    "tests/sexpr-calculator",
-    "tests/arith-expr",
-    "tests/tokenizer",
-    "benches/csv",
-    "benches/json",
-]
+members = ["pag-*", "tests/*", "benches/*"]
 resolver = "2"
 
 [workspace.package]

diff --git a/README.md b/README.md
@@ -121,8 +121,6 @@ For some reasons (mostly performance issues), only nightly rust (1.71+) is suppo
 should be annotated with
 ```rust
 #![feature(portable_simd)]
-#![feature(core_intrinsics)]
-#![feature(array_chunks)]
 ```
 </details>
 

diff --git a/benches/csv/Cargo.toml b/benches/csv/Cargo.toml
@@ -7,6 +7,7 @@ publish = false
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
+pag-util = { version = "0.1.0-alpha.1", path = "../../pag-util" }
 rand = { version = "0.8" }
 snmalloc-rs = { version = "0.3", features = ["build_cc"] }
 

diff --git a/benches/csv/src/lib.rs b/benches/csv/src/lib.rs
@@ -1,6 +1,5 @@
 #![feature(portable_simd)]
-#![feature(core_intrinsics)]
-#![feature(array_chunks)]
+
 mod parser;
 
 pub use parser::parse;

diff --git a/benches/json/Cargo.toml b/benches/json/Cargo.toml
@@ -7,6 +7,7 @@ publish = false
 autobenches = false
 
 [dependencies]
+pag-util = { version = "0.1.0-alpha.1", path = "../../pag-util" }
 rand = { version = "0.8" }
 serde_json = "1.0"
 
@@ -17,7 +18,7 @@ lalrpop = "0.20.0"
 [dev-dependencies]
 criterion = { version = "0.4", features = ["html_reports"] }
 snmalloc-rs = { version = "0.3", features = ["build_cc"] }
-pest = { version = "2.5.7", features = [ "std", "memchr" ] }
+pest = { version = "2.5.7", features = ["std", "memchr"] }
 pest_derive = "2.5.7"
 lalrpop-util = { version = "0.20.0", features = ["lexer", "unicode"] }
 logos = "0.13.0"

diff --git a/benches/json/src/lib.rs b/benches/json/src/lib.rs
@@ -1,6 +1,5 @@
 #![feature(portable_simd)]
-#![feature(core_intrinsics)]
-#![feature(array_chunks)]
+
 mod parser;
 
 pub use parser::parse;

diff --git a/pag-lexer/src/lib.rs b/pag-lexer/src/lib.rs
@@ -5,9 +5,8 @@
 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. All files in the project carrying such notice may not be copied,
 // modified, or distributed except according to those terms.
+
 #![feature(portable_simd)]
-#![feature(core_intrinsics)]
-#![feature(array_chunks)]
 
 pub mod congruence;
 pub mod derivative;

diff --git a/pag-lexer/src/lookahead.rs b/pag-lexer/src/lookahead.rs
@@ -21,62 +21,85 @@ enum Kind {
 fn generate_lut_routine(index: usize) -> TokenStream {
     let table = index / 8;
     let shift = index % 8;
-    let bit = 1u8 << shift;
-    quote! {
-        idx = idx
-            + input[idx..]
-                .iter()
-                .position(|x| GLOBAL_LUT[#table][*x as usize] & #bit > 0)
-                .unwrap_or(input.len() - idx);
-    }
-}
-
-fn byte_simd(byte: u8) -> TokenStream {
-    let byte = byte_char(byte);
-    quote! {
-        data.simd_eq(u8x16::splat(#byte))
-    }
-}
-
-fn range_simd(min: u8, max: u8) -> TokenStream {
-    let min = byte_char(min);
-    let max = byte_char(max);
-    quote! {
-        data.simd_ge(u8x16::splat(#min)) & data.simd_le(u8x16::splat(#max))
-    }
+    quote! { idx = ::pag_util::lookahead_lut(input, idx, &GLOBAL_LUT[#table], #shift); }
 }
 
+#[cfg(not(target_arch = "aarch64"))]
 fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream {
+    let mask = intervals
+        .iter()
+        .map(|&Interval(l, r)| match l == r {
+            true => {
+                let l = byte_char(l);
+                quote! { data.simd_eq(u8x16::splat(#l)) }
+            }
+            false => {
+                let l = byte_char(l);
+                let r = byte_char(r);
+                quote! { data.simd_ge(u8x16::splat(#l)) & data.simd_le(u8x16::splat(#r)) }
+            }
+        })
+        .reduce(|acc, x| quote! { #acc | #x })
+        .unwrap();
     let count_act = match kind {
         Kind::Positive => quote! { trailing_ones },
         Kind::Negative => quote! { trailing_zeros },
     };
-    let idx_offset = intervals
+    let tail_match = match kind {
+        Kind::Positive => quote! { matches!(input.get(idx), Some(#intervals)) },
+        Kind::Negative => quote! { !matches!(input.get(idx), Some(#intervals) | None) },
+    };
+    quote! {
+        'lookahead: {
+            unsafe { ::pag_util::assume(idx <= input.len()) };
+            for chunk in input[idx..].chunks_exact(16) {
+                use core::simd::*;
+                let data = u8x16::from_slice(chunk);
+                let mask = #mask;
+                let idx_offset = mask.to_bitmask().#count_act();
+                idx += idx_offset as usize;
+                if idx_offset != 16 {
+                    break 'lookahead;
+                }
+            }
+            while #tail_match {
+                idx += 1;
+            }
+        }
+    }
+}
+
+#[cfg(target_arch = "aarch64")]
+fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream {
+    let mask = intervals
         .iter()
         .map(|&Interval(l, r)| match l == r {
-            true => byte_simd(l),
-            false => range_simd(l, r),
+            true => {
+                let l = byte_char(l);
+                quote! { data.simd_eq(u8x16::splat(#l)) }
+            }
+            false => {
+                let l = byte_char(l);
+                let r = byte_char(r);
+                quote! { data.simd_ge(u8x16::splat(#l)) & data.simd_le(u8x16::splat(#r)) }
+            }
         })
         .reduce(|acc, x| quote! { #acc | #x })
-        .map(|x| {
-            if cfg!(target_arch = "aarch64") {
-                quote! {{
-                    let mask : u128 = unsafe { core::mem::transmute(#x) };
-                    mask.#count_act() / 8
-                }}
-            } else {
-                quote! {
-                    (#x).to_bitmask().#count_act()
-                }
-            }
-        });
+        .unwrap();
+    let count_act = match kind {
+        Kind::Positive => quote! { trailing_ones },
+        Kind::Negative => quote! { trailing_zeros },
+    };
     quote! {
-        for i in input[idx..].array_chunks::<16>() {
+        unsafe { ::pag_util::assume(idx <= input.len()) };
+        for chunk in input[idx..].chunks_exact(16) {
             use core::simd::*;
-            let data = u8x16::from_slice(i);
-            let idx_offset = #idx_offset;
+            let data = u8x16::from_slice(chunk);
+            let mask = #mask;
+            let mask = unsafe { core::mem::transmute::<_, u128>(mask) };
+            let idx_offset = mask.#count_act() / 8;
             idx += idx_offset as usize;
-            if core::intrinsics::unlikely(idx_offset != 16) {
+            if idx_offset != 16 {
                 break;
             }
         }
@@ -86,7 +109,7 @@ fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream
 fn estimated_cost(intervals: &Intervals) -> u32 {
     intervals
         .iter()
-        .map(|Interval(l, r)| if l == r { 1 } else { 2 })
+        .map(|Interval(l, r)| 1 + (l != r) as u32)
         .sum()
 }
 
@@ -134,25 +157,27 @@ impl LoopOptimizer {
         let table_size = self.global_lut.len();
         let table = self.global_lut.iter().map(|x| quote!([#(#x,)*]));
         Some(quote! {
-            const GLOBAL_LUT : [[u8; 256]; #table_size] = [ #(#table,)* ];
+            const GLOBAL_LUT: [[u8; 256]; #table_size] = [ #(#table,)* ];
         })
     }
 
     pub fn generate_lookahead(&mut self, dfa: &DfaTable, state: &DfaState) -> Option<TokenStream> {
         let limit = 4;
 
         let positives = direct_self_loops(dfa, state)?;
-        if estimated_cost(&positives) <= limit {
-            return Some(generate_lookahead_routine(&positives, Kind::Positive));
-        }
-
         let negatives = positives.complement()?;
-        if estimated_cost(&negatives) <= limit {
-            return Some(generate_lookahead_routine(&negatives, Kind::Negative));
-        }
+        let pos_cost = estimated_cost(&positives);
+        let neg_cost = estimated_cost(&negatives);
 
-        let index = self.assign_table(&negatives);
-        Some(generate_lut_routine(index))
+        if pos_cost.min(neg_cost) > limit {
+            let index = self.assign_table(&negatives);
+            return Some(generate_lut_routine(index));
+        }
+        if pos_cost < neg_cost {
+            Some(generate_lookahead_routine(&positives, Kind::Positive))
+        } else {
+            Some(generate_lookahead_routine(&negatives, Kind::Negative))
+        }
     }
 }
 
@@ -174,7 +199,9 @@ mod test {
     fn test_lookahead_codegen() {
         use crate::intervals;
         let positives = intervals!((b'0', b'9'), (b'0', b'9'), (b'A', b'F'));
-        syn::parse2::<syn::Expr>(generate_lookahead_routine(&positives, Kind::Positive)).unwrap();
-        syn::parse2::<syn::Expr>(generate_lookahead_routine(&positives, Kind::Negative)).unwrap();
+        let positive = generate_lookahead_routine(&positives, Kind::Positive);
+        let _: syn::Expr = syn::parse_quote! { { #positive } };
+        let negative = generate_lookahead_routine(&positives, Kind::Negative);
+        let _: syn::Expr = syn::parse_quote! { { #negative } };
     }
 }
diff --git a/pag-lexer/src/utilities.rs b/pag-lexer/src/utilities.rs
@@ -10,7 +10,7 @@ where
     }
     #[cfg(debug_assertions)]
     {
-        let mut vec = Vec::from_iter(data.into_iter());
+        let mut vec = Vec::from_iter(data);
         vec.sort_unstable_by_key(_f);
         vec.into_iter()
     }

diff --git a/pag-lexer/src/vector.rs b/pag-lexer/src/vector.rs
@@ -142,6 +142,7 @@ impl Vector {
                 let on_success = &success_actions[rule_idx];
                 return quote! {
                     State::#label => {
+                        unsafe { ::pag_util::assume(idx <= input.len()) };
                         if input[idx..].starts_with(#literal) {
                             cursor = idx + #length;
                             #on_success
@@ -151,16 +152,21 @@ impl Vector {
                     },
                 };
             }
+            let lookahead = optimizer.generate_lookahead(&dfa, state);
             let transitions = info.transitions.iter().map(|(interval, target)| {
                 if leaf_states.contains(target) {
                     let rule_idx = target.last_success.unwrap();
                     let on_success = &success_actions[rule_idx];
                     return quote! { Some(#interval) => { cursor = idx + 1; #on_success }, };
                 }
-                let target_label = format_ident!("S{}", dfa[target].state_id);
+                let target_id = dfa[target].state_id;
+                #[cfg(not(target_arch = "aarch64"))]
+                if lookahead.is_some() && info.state_id == target_id {
+                    return quote! {};
+                }
+                let target_label = format_ident!("S{}", target_id);
                 quote! { Some(#interval) => state = State::#target_label, }
             });
-            let lookahead = optimizer.generate_lookahead(&dfa, state);
             let otherwise = state
                 .last_success
                 .and_then(|x| success_actions.get(x))

diff --git a/pag-parser/src/frontend/mod.rs b/pag-parser/src/frontend/mod.rs
@@ -493,7 +493,9 @@ mod test {
         dbg!(size_of::<NormalForm>());
         let pairs = GrammarParser::parse(Rule::grammar, TEST).unwrap();
         let tree = parse_surface_syntax(pairs, &PRATT_PARSER, TEST).unwrap();
-        let Grammar { lexer, parser } = &tree.node else { unreachable!() };
+        let Grammar { lexer, parser } = &tree.node else {
+            unreachable!()
+        };
 
         println!("\n---------< construct lexer database >----------");
         let database = construct_lexer_database(lexer).unwrap();

diff --git a/pag-parser/src/frontend/syntax.rs b/pag-parser/src/frontend/syntax.rs
@@ -71,7 +71,7 @@ pub fn construct_parser<'src, 'arena>(
     };
     let mut errs = Vec::new();
     for rule in rules {
-        let ParserRuleDef { active, name, expr, } = &rule.node else {
+        let ParserRuleDef { active, name, expr } = &rule.node else {
             unreachable_branch!("parser should only contain rule definitions")
         };
         match construct_core_syntax_tree(&parser, expr) {