Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

normalization #80

Closed
wants to merge 42 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
639ce57
rename ParserTree to ParseTree
QuarticCat Jun 22, 2023
9d92fd0
avoid name conflicts
QuarticCat Jun 22, 2023
f5b1866
make clippy happy
QuarticCat Jul 17, 2023
f7792d6
impl prototype of new frontend (wip)
QuarticCat Jul 24, 2023
6386241
initial work to add semact
SchrodingerZhu Jul 25, 2023
4e87b65
minor update
QuarticCat Jul 25, 2023
cc5b341
impl LexerExpr::parse
QuarticCat Jul 25, 2023
17561e8
impl ParserExpr::parse
QuarticCat Jul 25, 2023
e7d40ff
finish new frontend parser
QuarticCat Jul 25, 2023
98d4da6
impl post-fix var binding
QuarticCat Jul 25, 2023
4994ecb
add more design details of semact
SchrodingerZhu Jul 25, 2023
a060832
address comments on trait design
SchrodingerZhu Jul 25, 2023
b4706a2
record lexer idx
QuarticCat Jul 26, 2023
f59d893
support ignore in parser expr
QuarticCat Jul 26, 2023
7faa80d
add some debug facilities
SchrodingerZhu Jul 26, 2023
550d0ee
print normal form table
SchrodingerZhu Jul 26, 2023
8b81928
fix use error
QuarticCat Jul 26, 2023
ce21ff6
fix parser bug
QuarticCat Jul 26, 2023
58ecd55
add `HKT` support and type inference prototype
SchrodingerZhu Jul 26, 2023
203739e
expose infer_all_type interface
SchrodingerZhu Jul 27, 2023
d2d45b8
address QC's method
SchrodingerZhu Jul 27, 2023
e670899
never coding again in the midnight
SchrodingerZhu Jul 27, 2023
9654bfc
add simd tail handling
QuarticCat Jul 26, 2023
e0622ee
adjust lookahead generation
QuarticCat Jul 26, 2023
2eaac6d
try to solve aarch64 performance regression
QuarticCat Jul 27, 2023
91b9710
optimize lut lookahead
QuarticCat Jul 29, 2023
fada7c6
fix stack size issue
QuarticCat Jul 29, 2023
997a0fd
add assume all over the generated code
QuarticCat Jul 30, 2023
b701d64
refactor semact structure
SchrodingerZhu Jul 27, 2023
0e1dab0
allow inner collector to be hinted
SchrodingerZhu Jul 28, 2023
a76362a
stage work for translation
SchrodingerZhu Jul 28, 2023
3e767f0
add markers for tailcall
SchrodingerZhu Jul 29, 2023
a9a61ca
implement translation
SchrodingerZhu Jul 30, 2023
c00774e
disallow # in toplevel
QuarticCat Jul 30, 2023
86e93d3
remove `Hinted`
QuarticCat Jul 30, 2023
f03a599
reformat
QuarticCat Jul 30, 2023
b545ec3
parse ParserExpr::Seq to vector
QuarticCat Jul 30, 2023
dc19238
embed type info into NF
SchrodingerZhu Jul 30, 2023
4865e76
translation with type info
SchrodingerZhu Jul 30, 2023
e642299
process type hints
SchrodingerZhu Jul 31, 2023
aa1db2c
fix wrong semact
SchrodingerZhu Jul 31, 2023
0900a44
implement normaliation
SchrodingerZhu Jul 31, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,7 @@
# modified, or distributed except according to those terms.

[workspace]
members = [
"pag-lexer",
"pag-parser",
"pag-compiler",
"tests/sexpr-calculator",
"tests/arith-expr",
"tests/tokenizer",
"benches/csv",
"benches/json",
]
members = ["pag-*", "tests/*", "benches/*"]
resolver = "2"

[workspace.package]
Expand Down
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,6 @@ For some reasons (mostly performance issues), only nightly rust (1.71+) is suppo
should be annotated with
```rust
#![feature(portable_simd)]
#![feature(core_intrinsics)]
#![feature(array_chunks)]
```
</details>

Expand Down
1 change: 1 addition & 0 deletions benches/csv/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ publish = false

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
pag-util = { version = "0.1.0-alpha.1", path = "../../pag-util" }
rand = { version = "0.8" }
snmalloc-rs = { version = "0.3", features = ["build_cc"] }

Expand Down
3 changes: 1 addition & 2 deletions benches/csv/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#![feature(portable_simd)]
#![feature(core_intrinsics)]
#![feature(array_chunks)]

mod parser;

pub use parser::parse;
Expand Down
3 changes: 2 additions & 1 deletion benches/json/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ publish = false
autobenches = false

[dependencies]
pag-util = { version = "0.1.0-alpha.1", path = "../../pag-util" }
rand = { version = "0.8" }
serde_json = "1.0"

Expand All @@ -17,7 +18,7 @@ lalrpop = "0.20.0"
[dev-dependencies]
criterion = { version = "0.4", features = ["html_reports"] }
snmalloc-rs = { version = "0.3", features = ["build_cc"] }
pest = { version = "2.5.7", features = [ "std", "memchr" ] }
pest = { version = "2.5.7", features = ["std", "memchr"] }
pest_derive = "2.5.7"
lalrpop-util = { version = "0.20.0", features = ["lexer", "unicode"] }
logos = "0.13.0"
Expand Down
3 changes: 1 addition & 2 deletions benches/json/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#![feature(portable_simd)]
#![feature(core_intrinsics)]
#![feature(array_chunks)]

mod parser;

pub use parser::parse;
Expand Down
3 changes: 1 addition & 2 deletions pag-lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.

#![feature(portable_simd)]
#![feature(core_intrinsics)]
#![feature(array_chunks)]

pub mod congruence;
pub mod derivative;
Expand Down
137 changes: 82 additions & 55 deletions pag-lexer/src/lookahead.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,62 +21,85 @@ enum Kind {
fn generate_lut_routine(index: usize) -> TokenStream {
let table = index / 8;
let shift = index % 8;
let bit = 1u8 << shift;
quote! {
idx = idx
+ input[idx..]
.iter()
.position(|x| GLOBAL_LUT[#table][*x as usize] & #bit > 0)
.unwrap_or(input.len() - idx);
}
}

fn byte_simd(byte: u8) -> TokenStream {
let byte = byte_char(byte);
quote! {
data.simd_eq(u8x16::splat(#byte))
}
}

fn range_simd(min: u8, max: u8) -> TokenStream {
let min = byte_char(min);
let max = byte_char(max);
quote! {
data.simd_ge(u8x16::splat(#min)) & data.simd_le(u8x16::splat(#max))
}
quote! { idx = ::pag_util::lookahead_lut(input, idx, &GLOBAL_LUT[#table], #shift); }
}

#[cfg(not(target_arch = "aarch64"))]
fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream {
let mask = intervals
.iter()
.map(|&Interval(l, r)| match l == r {
true => {
let l = byte_char(l);
quote! { data.simd_eq(u8x16::splat(#l)) }
}
false => {
let l = byte_char(l);
let r = byte_char(r);
quote! { data.simd_ge(u8x16::splat(#l)) & data.simd_le(u8x16::splat(#r)) }
}
})
.reduce(|acc, x| quote! { #acc | #x })
.unwrap();
let count_act = match kind {
Kind::Positive => quote! { trailing_ones },
Kind::Negative => quote! { trailing_zeros },
};
let idx_offset = intervals
let tail_match = match kind {
Kind::Positive => quote! { matches!(input.get(idx), Some(#intervals)) },
Kind::Negative => quote! { !matches!(input.get(idx), Some(#intervals) | None) },
};
quote! {
'lookahead: {
unsafe { ::pag_util::assume(idx <= input.len()) };
for chunk in input[idx..].chunks_exact(16) {
use core::simd::*;
let data = u8x16::from_slice(chunk);
let mask = #mask;
let idx_offset = mask.to_bitmask().#count_act();
idx += idx_offset as usize;
if idx_offset != 16 {
break 'lookahead;
}
}
while #tail_match {
idx += 1;
}
}
}
}

#[cfg(target_arch = "aarch64")]
fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream {
let mask = intervals
.iter()
.map(|&Interval(l, r)| match l == r {
true => byte_simd(l),
false => range_simd(l, r),
true => {
let l = byte_char(l);
quote! { data.simd_eq(u8x16::splat(#l)) }
}
false => {
let l = byte_char(l);
let r = byte_char(r);
quote! { data.simd_ge(u8x16::splat(#l)) & data.simd_le(u8x16::splat(#r)) }
}
})
.reduce(|acc, x| quote! { #acc | #x })
.map(|x| {
if cfg!(target_arch = "aarch64") {
quote! {{
let mask : u128 = unsafe { core::mem::transmute(#x) };
mask.#count_act() / 8
}}
} else {
quote! {
(#x).to_bitmask().#count_act()
}
}
});
.unwrap();
let count_act = match kind {
Kind::Positive => quote! { trailing_ones },
Kind::Negative => quote! { trailing_zeros },
};
quote! {
for i in input[idx..].array_chunks::<16>() {
unsafe { ::pag_util::assume(idx <= input.len()) };
for chunk in input[idx..].chunks_exact(16) {
use core::simd::*;
let data = u8x16::from_slice(i);
let idx_offset = #idx_offset;
let data = u8x16::from_slice(chunk);
let mask = #mask;
let mask = unsafe { core::mem::transmute::<_, u128>(mask) };
let idx_offset = mask.#count_act() / 8;
idx += idx_offset as usize;
if core::intrinsics::unlikely(idx_offset != 16) {
if idx_offset != 16 {
break;
}
}
Expand All @@ -86,7 +109,7 @@ fn generate_lookahead_routine(intervals: &Intervals, kind: Kind) -> TokenStream
fn estimated_cost(intervals: &Intervals) -> u32 {
intervals
.iter()
.map(|Interval(l, r)| if l == r { 1 } else { 2 })
.map(|Interval(l, r)| 1 + (l != r) as u32)
.sum()
}

Expand Down Expand Up @@ -134,25 +157,27 @@ impl LoopOptimizer {
let table_size = self.global_lut.len();
let table = self.global_lut.iter().map(|x| quote!([#(#x,)*]));
Some(quote! {
const GLOBAL_LUT : [[u8; 256]; #table_size] = [ #(#table,)* ];
const GLOBAL_LUT: [[u8; 256]; #table_size] = [ #(#table,)* ];
})
}

pub fn generate_lookahead(&mut self, dfa: &DfaTable, state: &DfaState) -> Option<TokenStream> {
let limit = 4;

let positives = direct_self_loops(dfa, state)?;
if estimated_cost(&positives) <= limit {
return Some(generate_lookahead_routine(&positives, Kind::Positive));
}

let negatives = positives.complement()?;
if estimated_cost(&negatives) <= limit {
return Some(generate_lookahead_routine(&negatives, Kind::Negative));
}
let pos_cost = estimated_cost(&positives);
let neg_cost = estimated_cost(&negatives);

let index = self.assign_table(&negatives);
Some(generate_lut_routine(index))
if pos_cost.min(neg_cost) > limit {
let index = self.assign_table(&negatives);
return Some(generate_lut_routine(index));
}
if pos_cost < neg_cost {
Some(generate_lookahead_routine(&positives, Kind::Positive))
} else {
Some(generate_lookahead_routine(&negatives, Kind::Negative))
}
}
}

Expand All @@ -174,7 +199,9 @@ mod test {
fn test_lookahead_codegen() {
use crate::intervals;
let positives = intervals!((b'0', b'9'), (b'0', b'9'), (b'A', b'F'));
syn::parse2::<syn::Expr>(generate_lookahead_routine(&positives, Kind::Positive)).unwrap();
syn::parse2::<syn::Expr>(generate_lookahead_routine(&positives, Kind::Negative)).unwrap();
let positive = generate_lookahead_routine(&positives, Kind::Positive);
let _: syn::Expr = syn::parse_quote! { { #positive } };
let negative = generate_lookahead_routine(&positives, Kind::Negative);
let _: syn::Expr = syn::parse_quote! { { #negative } };
}
}
2 changes: 1 addition & 1 deletion pag-lexer/src/utilities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ where
}
#[cfg(debug_assertions)]
{
let mut vec = Vec::from_iter(data.into_iter());
let mut vec = Vec::from_iter(data);
vec.sort_unstable_by_key(_f);
vec.into_iter()
}
Expand Down
10 changes: 8 additions & 2 deletions pag-lexer/src/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ impl Vector {
let on_success = &success_actions[rule_idx];
return quote! {
State::#label => {
unsafe { ::pag_util::assume(idx <= input.len()) };
if input[idx..].starts_with(#literal) {
cursor = idx + #length;
#on_success
Expand All @@ -151,16 +152,21 @@ impl Vector {
},
};
}
let lookahead = optimizer.generate_lookahead(&dfa, state);
let transitions = info.transitions.iter().map(|(interval, target)| {
if leaf_states.contains(target) {
let rule_idx = target.last_success.unwrap();
let on_success = &success_actions[rule_idx];
return quote! { Some(#interval) => { cursor = idx + 1; #on_success }, };
}
let target_label = format_ident!("S{}", dfa[target].state_id);
let target_id = dfa[target].state_id;
#[cfg(not(target_arch = "aarch64"))]
if lookahead.is_some() && info.state_id == target_id {
return quote! {};
}
let target_label = format_ident!("S{}", target_id);
quote! { Some(#interval) => state = State::#target_label, }
});
let lookahead = optimizer.generate_lookahead(&dfa, state);
let otherwise = state
.last_success
.and_then(|x| success_actions.get(x))
Expand Down
4 changes: 3 additions & 1 deletion pag-parser/src/frontend/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,9 @@ mod test {
dbg!(size_of::<NormalForm>());
let pairs = GrammarParser::parse(Rule::grammar, TEST).unwrap();
let tree = parse_surface_syntax(pairs, &PRATT_PARSER, TEST).unwrap();
let Grammar { lexer, parser } = &tree.node else { unreachable!() };
let Grammar { lexer, parser } = &tree.node else {
unreachable!()
};

println!("\n---------< construct lexer database >----------");
let database = construct_lexer_database(lexer).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion pag-parser/src/frontend/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ pub fn construct_parser<'src, 'arena>(
};
let mut errs = Vec::new();
for rule in rules {
let ParserRuleDef { active, name, expr, } = &rule.node else {
let ParserRuleDef { active, name, expr } = &rule.node else {
unreachable_branch!("parser should only contain rule definitions")
};
match construct_core_syntax_tree(&parser, expr) {
Expand Down
Loading
Loading