diff --git a/docs/compiler-options.md b/docs/compiler-options.md index 4faa016d7..7ae6f75f7 100644 --- a/docs/compiler-options.md +++ b/docs/compiler-options.md @@ -8,7 +8,8 @@ | `-Oref-to-ref` `-Ono-ref-to-ref` | Disabled | [ref-to-ref](#ref-to-ref) | | `-Oprune` `-Ono-prune` | Disabled | [definition-pruning](#definition-pruning) | | `-Opre-reduce` `-Ono-pre-reduce` | Disabled | [pre-reduce](#pre-reduce) | -| `-Osupercombinators` `-Ono-supercombinators` | Enabled | [supercombinators](#supercombinators) | +| `-Olinearize-matches` `-Olinearize-matches-extra` `-Ono-linearize-matches` | Extra | [linearize-matches](#linearize-matches) | +| `-Ofloat_combinators` `-Ono-float_combinators` | Enabled | [float-combinators](#float-combinators) | | `-Osimplify-main` `-Ono-simplify-main` | Disabled | [simplify-main](#simplify-main) | | `-Omerge` `-Ono-merge` | Disabled | [definition-merging](#definition-merging) | | `-Oinline` `-Ono-inline` | Disabled | [inline](#inline) | @@ -134,7 +135,30 @@ main = (bar) @main = @bar ``` -## Supercombinators +## linearize-matches + +Linearizes the variables between match cases, transforming them into combinators when possible. +When the `linearize-matches` option is used, linearizes only vars that are used in more than one arm. + +Example: +```rs +λa λb match a { 0: b; +: b } + +// Is transformed to +λa λb (match a { 0: λc c; +: λd d } b) +``` + +When the `linearize-matches-extra` option is used, linearizes all vars used in the arms. + +example: +```rs +λa λb λc match a { 0: b; +: c } + +// Is transformed to +λa λb λc (match a { 0: λd λ* d; +: λ* λe e } b c) +``` + +## float-combinators Extracts closed terms to new definitions. See [lazy definitions](lazy-definitions#automatic-optimization). Since HVM-Core is an eager runtime, this pass is enabled by default to prevent infinite expansions. diff --git a/docs/lazy-definitions.md b/docs/lazy-definitions.md index 9c7da9535..31967521a 100644 --- a/docs/lazy-definitions.md +++ b/docs/lazy-definitions.md @@ -49,4 +49,4 @@ The lambda terms without free variables are extracted to new definitions. ToMachine0 = λp (+ 1 (ToMachine p)) ToMachine = λn (n ToMachine0 0) ``` -Definitions are lazy in the runtime. Lifting lambda terms to new definitions will prevent infinite expansion. +Definitions are lazy in the runtime. Floating lambda terms into new definitions will prevent infinite expansion. diff --git a/src/lib.rs b/src/lib.rs index cb03fa796..ad8811f60 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -83,7 +83,7 @@ pub fn create_host(book: Arc, labels: Arc, compile_opts: CompileOp match term { Term::Str { val } => { println!("{}", val); - }, + } _ => (), } } @@ -134,12 +134,16 @@ pub fn desugar_book(book: &mut Book, opts: CompileOpts) -> Result, ctx.book.desugar_implicit_match_binds(); ctx.check_ctrs_arities()?; - // Must be between [`Book::desugar_implicit_match_binds`] and [`Ctx::linearize_simple_matches`] + // Must be between [`Book::desugar_implicit_match_binds`] and [`Ctx::linearize_matches`] ctx.check_unbound_vars()?; ctx.book.convert_match_def_to_term(); ctx.simplify_matches()?; - ctx.linearize_simple_matches()?; + + if opts.linearize_matches.enabled() { + ctx.linearize_simple_matches(opts.linearize_matches.is_extra())?; + } + ctx.book.encode_simple_matches(opts.adt_encoding); // sanity check @@ -155,8 +159,8 @@ pub fn desugar_book(book: &mut Book, opts: CompileOpts) -> Result, if opts.eta { ctx.book.eta_reduction(); } - if opts.supercombinators { - ctx.book.detach_supercombinators(); + if opts.float_combinators { + ctx.book.float_combinators(); } if opts.ref_to_ref { ctx.simplify_ref_to_ref()?; @@ -331,6 +335,24 @@ impl RunOpts { } } +#[derive(Clone, Copy, Debug, Default)] +pub enum OptLevel { + #[default] + Disabled, + Enabled, + Extra, +} + +impl OptLevel { + pub fn enabled(&self) -> bool { + !matches!(self, OptLevel::Disabled) + } + + pub fn is_extra(&self) -> bool { + matches!(self, OptLevel::Extra) + } +} + #[derive(Clone, Copy, Debug, Default)] pub struct CompileOpts { /// Selects the encoding for the ADT syntax. @@ -348,8 +370,11 @@ pub struct CompileOpts { /// Enables [hvmc_net::pre_reduce]. pub pre_reduce: bool, - /// Enables [term::transform::detach_supercombinators]. - pub supercombinators: bool, + /// Enables [term::transform::linearize_matches]. + pub linearize_matches: OptLevel, + + /// Enables [term::transform::float_combinators]. + pub float_combinators: bool, /// Enables [term::transform::simplify_main_ref]. pub simplify_main: bool, @@ -369,11 +394,12 @@ impl CompileOpts { ref_to_ref: true, prune: true, pre_reduce: true, - supercombinators: true, + float_combinators: true, simplify_main: true, merge: true, inline: true, adt_encoding: Default::default(), + linearize_matches: OptLevel::Extra, } } @@ -387,24 +413,34 @@ impl CompileOpts { Self { adt_encoding: self.adt_encoding, ..Self::default() } } - /// All optimizations disabled, except detach supercombinators. + /// All optimizations disabled, except float_combinators and linearize_matches pub fn light() -> Self { - Self { supercombinators: true, ..Self::default() } + Self { float_combinators: true, linearize_matches: OptLevel::Extra, ..Self::default() } } // Disable optimizations that don't work or are unnecessary on lazy mode pub fn lazy_mode(&mut self) { - self.supercombinators = false; + self.float_combinators = false; + if self.linearize_matches.is_extra() { + self.linearize_matches = OptLevel::Enabled; + } self.pre_reduce = false; } } impl CompileOpts { pub fn check(&self, lazy_mode: bool) { - if !self.supercombinators && !lazy_mode { - println!( - "Warning: Running in strict mode without enabling the supercombinators pass can lead to some functions expanding infinitely." - ); + if !lazy_mode { + if !self.float_combinators { + println!( + "Warning: Running in strict mode without enabling the float_combinators pass can lead to some functions expanding infinitely." + ); + } + if !self.linearize_matches.enabled() { + println!( + "Warning: Running in strict mode without enabling the linearize_matches pass can lead to some functions expanding infinitely." + ); + } } } } diff --git a/src/main.rs b/src/main.rs index 58a59b670..6afe45f8e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,7 +4,7 @@ use hvml::{ diagnostics::Info, load_file_to_book, run_book, term::{display::display_readback_errors, AdtEncoding, Book, Name}, - CompileOpts, RunInfo, RunOpts, WarnState, WarningOpts, + CompileOpts, OptLevel, RunInfo, RunOpts, WarnState, WarningOpts, }; use std::{ path::{Path, PathBuf}, @@ -38,7 +38,7 @@ enum Mode { value_delimiter = ' ', action = clap::ArgAction::Append, long_help = r#"Enables or disables the given optimizations - supercombinators is enabled by default."#, + float_combinators is enabled by default on strict mode."#, )] comp_opts: Vec, @@ -82,7 +82,7 @@ enum Mode { value_delimiter = ' ', action = clap::ArgAction::Append, long_help = r#"Enables or disables the given optimizations - supercombinators is enabled by default."#, + float_combinators is enabled by default on strict mode."#, )] comp_opts: Vec, @@ -91,12 +91,15 @@ enum Mode { }, /// Runs the lambda-term level desugaring passes. Desugar { + #[arg(short = 'L', help = "Lazy mode")] + lazy_mode: bool, + #[arg( short = 'O', value_delimiter = ' ', action = clap::ArgAction::Append, long_help = r#"Enables or disables the given optimizations - supercombinators is enabled by default."#, + float_combinators is enabled by default on strict mode."#, )] comp_opts: Vec, @@ -185,15 +188,18 @@ fn execute_cli_mode(mut cli: Cli) -> Result<(), Info> { let mut opts = OptArgs::opts_from_cli(&comp_opts); if lazy_mode { - opts.lazy_mode() + opts.lazy_mode(); } let mut book = load_book(&path)?; let compiled = compile_book(&mut book, opts)?; println!("{}", compiled.display_with_warns(warning_opts)?); } - Mode::Desugar { path, comp_opts } => { - let opts = OptArgs::opts_from_cli(&comp_opts); + Mode::Desugar { path, comp_opts, lazy_mode } => { + let mut opts = OptArgs::opts_from_cli(&comp_opts); + if lazy_mode { + opts.lazy_mode(); + } let mut book = load_book(&path)?; // TODO: Shouldn't the desugar have `warn_opts` too? maybe WarningOpts::allow_all() by default let _warns = desugar_book(&mut book, opts)?; @@ -221,7 +227,7 @@ fn execute_cli_mode(mut cli: Cli) -> Result<(), Info> { if lazy_mode { single_core = true; - opts.lazy_mode() + opts.lazy_mode(); } let book = load_book(&path)?; @@ -288,8 +294,11 @@ pub enum OptArgs { NoRefToRef, PreReduce, NoPreReduce, - Supercombinators, - NoSupercombinators, + LinearizeMatches, + LinearizeMatchesExtra, + NoLinearizeMatches, + FloatCombinators, + NoFloatCombinators, SimplifyMain, NoSimplifyMain, Merge, @@ -316,16 +325,21 @@ impl OptArgs { NoRefToRef => opts.ref_to_ref = false, PreReduce => opts.pre_reduce = true, NoPreReduce => opts.pre_reduce = false, - Supercombinators => opts.supercombinators = true, - NoSupercombinators => opts.supercombinators = false, + FloatCombinators => opts.float_combinators = true, + NoFloatCombinators => opts.float_combinators = false, SimplifyMain => opts.simplify_main = true, NoSimplifyMain => opts.simplify_main = false, Merge => opts.merge = true, NoMerge => opts.merge = false, Inline => opts.inline = true, NoInline => opts.inline = false, + AdtScott => opts.adt_encoding = AdtEncoding::Scott, AdtTaggedScott => opts.adt_encoding = AdtEncoding::TaggedScott, + + LinearizeMatches => opts.linearize_matches = OptLevel::Enabled, + LinearizeMatchesExtra => opts.linearize_matches = OptLevel::Extra, + NoLinearizeMatches => opts.linearize_matches = OptLevel::Disabled, } } opts diff --git a/src/term/transform/detach_supercombinators.rs b/src/term/transform/detach_supercombinators.rs deleted file mode 100644 index 7427a30a3..000000000 --- a/src/term/transform/detach_supercombinators.rs +++ /dev/null @@ -1,286 +0,0 @@ -use crate::term::{Book, Definition, Name, Pattern, Rule, Term}; -use std::{ - collections::{BTreeMap, HashSet}, - ops::BitAnd, -}; - -/// Replaces closed Terms (i.e. without free variables) with a Ref to the extracted term -/// Precondition: Vars must have been sanitized -impl Book { - pub fn detach_supercombinators(&mut self) { - let mut combinators = Combinators::new(); - - for (def_name, def) in self.defs.iter_mut() { - if self.entrypoint.as_ref().is_some_and(|m| m == def_name) { - continue; - } - - let builtin = def.builtin; - let rule = def.rule_mut(); - rule.body.detach_combinators(def_name, builtin, &mut combinators); - } - - // Definitions are not inserted to the book as they are defined to appease the borrow checker. - // Since we are mut borrowing the rules we can't borrow the book to insert at the same time. - self.defs.extend(combinators); - } -} - -type Combinators = BTreeMap; - -#[derive(Debug)] -struct TermInfo<'d> { - // Number of times a Term has been detached from the current Term - counter: u32, - def_name: Name, - builtin: bool, - needed_names: HashSet, - combinators: &'d mut Combinators, -} - -impl<'d> TermInfo<'d> { - fn new(def_name: Name, builtin: bool, combinators: &'d mut Combinators) -> Self { - Self { counter: 0, def_name, builtin, needed_names: HashSet::new(), combinators } - } - fn request_name(&mut self, name: &Name) { - self.needed_names.insert(name.clone()); - } - - fn provide(&mut self, name: Option<&Name>) { - if let Some(name) = name { - self.needed_names.remove(name); - } - } - - fn has_no_free_vars(&self) -> bool { - self.needed_names.is_empty() - } - - fn replace_scope(&mut self, new_scope: HashSet) -> HashSet { - std::mem::replace(&mut self.needed_names, new_scope) - } - - fn merge_scope(&mut self, target: HashSet) { - self.needed_names.extend(target); - } - - fn detach_term(&mut self, term: &mut Term) { - let comb_name = Name::new(format!("{}$S{}", self.def_name, self.counter)); - self.counter += 1; - - let comb_var = Term::Ref { nam: comb_name.clone() }; - let extracted_term = std::mem::replace(term, comb_var); - - let rules = vec![Rule { body: extracted_term, pats: Vec::new() }]; - let rule = Definition { name: comb_name.clone(), rules, builtin: self.builtin }; - self.combinators.insert(comb_name, rule); - } -} - -#[derive(Debug)] -enum Detach { - /// Can be detached freely - Combinator, - /// Can not be detached - Unscoped { lams: HashSet, vars: HashSet }, - /// Should be detached to make the program not hang - Recursive, -} - -impl Detach { - fn can_detach(&self) -> bool { - !matches!(self, Detach::Unscoped { .. }) - } - - fn unscoped_lam(nam: Name) -> Self { - Detach::Unscoped { lams: [nam].into(), vars: Default::default() } - } - - fn unscoped_var(nam: Name) -> Self { - Detach::Unscoped { lams: Default::default(), vars: [nam].into() } - } -} - -impl BitAnd for Detach { - type Output = Detach; - - fn bitand(self, rhs: Self) -> Self::Output { - match (self, rhs) { - (Detach::Combinator, Detach::Combinator) => Detach::Combinator, - - (Detach::Combinator, unscoped @ Detach::Unscoped { .. }) => unscoped, - (unscoped @ Detach::Unscoped { .. }, Detach::Combinator) => unscoped, - - // Merge two unscoped terms into one, removing entries of each matching `Lam` and `Var`. - // If all unscoped pairs are found and removed this way, the term can be treated as a Combinator. - (Detach::Unscoped { mut lams, mut vars }, Detach::Unscoped { lams: rhs_lams, vars: rhs_vars }) => { - lams.extend(rhs_lams); - vars.extend(rhs_vars); - - let res_lams: HashSet<_> = lams.difference(&vars).cloned().collect(); - let res_vars: HashSet<_> = vars.difference(&lams).cloned().collect(); - - if res_lams.is_empty() && res_vars.is_empty() { - Detach::Combinator - } else { - Detach::Unscoped { lams: res_lams, vars: res_vars } - } - } - - (Detach::Combinator, Detach::Recursive) => Detach::Recursive, - (Detach::Recursive, Detach::Combinator) => Detach::Recursive, - (Detach::Recursive, Detach::Recursive) => Detach::Recursive, - - // TODO: Is this the best way to best deal with a term that is both unscoped and recursive? - (unscoped @ Detach::Unscoped { .. }, Detach::Recursive) => unscoped, - (Detach::Recursive, unscoped @ Detach::Unscoped { .. }) => unscoped, - } - } -} - -impl Term { - pub fn detach_combinators(&mut self, def_name: &Name, builtin: bool, combinators: &mut Combinators) { - fn go_lam(term: &mut Term, depth: usize, term_info: &mut TermInfo) -> Detach { - let parent_scope = term_info.replace_scope(HashSet::new()); - - let (nam, bod, unscoped): (Option<&Name>, &mut Term, bool) = match term { - Term::Lam { nam, bod, .. } => (nam.as_ref(), bod, false), - Term::Chn { nam, bod, .. } => (Some(nam), bod, true), - _ => unreachable!(), - }; - - let mut detach = go(bod, depth + 1, term_info); - - if unscoped { - detach = detach & Detach::unscoped_lam(nam.cloned().unwrap()); - } - - if !unscoped { - term_info.provide(nam); - } - - if detach.can_detach() && !term.is_id() && depth != 0 && term_info.has_no_free_vars() { - term_info.detach_term(term); - } - - term_info.merge_scope(parent_scope); - - detach - } - - fn go(term: &mut Term, depth: usize, term_info: &mut TermInfo) -> Detach { - match term { - Term::Lam { .. } => go_lam(term, depth, term_info), - Term::Chn { .. } => go_lam(term, depth, term_info), - - Term::App { fun, arg, .. } => { - let parent_scope = term_info.replace_scope(HashSet::new()); - - let fun_detach = go(fun, depth + 1, term_info); - let fun_scope = term_info.replace_scope(HashSet::new()); - - let arg_detach = go(arg, depth + 1, term_info); - let arg_scope = term_info.replace_scope(parent_scope); - - let detach = match fun_detach { - // If the fun scope is not empty, we know the recursive ref is not in a active position, - // Because if it was an application, it would be already extracted - // - // e.g.: {recursive_ref some_var} - Detach::Recursive if !fun_scope.is_empty() => arg_detach, - - Detach::Recursive if arg_scope.is_empty() && arg_detach.can_detach() => { - term_info.detach_term(term); - Detach::Combinator - } - - // If the only term that is possible to detach is just a Term::Ref, - // there is no benefit to it, so that case is skipped - Detach::Recursive if !matches!(fun, box Term::Ref { .. }) => { - term_info.detach_term(fun); - arg_detach - } - - _ => fun_detach & arg_detach, - }; - - term_info.merge_scope(fun_scope); - term_info.merge_scope(arg_scope); - - detach - } - - Term::Var { nam } => { - term_info.request_name(nam); - Detach::Combinator - } - - Term::Lnk { nam } => Detach::unscoped_var(nam.clone()), - - Term::Let { pat: Pattern::Var(nam), val, nxt } => { - let val_detach = go(val, depth + 1, term_info); - let nxt_detach = go(nxt, depth + 1, term_info); - term_info.provide(nam.as_ref()); - - val_detach & nxt_detach - } - Term::Dup { fst, snd, val, nxt, .. } - | Term::Let { pat: Pattern::Tup(box Pattern::Var(fst), box Pattern::Var(snd)), val, nxt } => { - let val_detach = go(val, depth + 1, term_info); - let nxt_detach = go(nxt, depth + 1, term_info); - - term_info.provide(fst.as_ref()); - term_info.provide(snd.as_ref()); - - val_detach & nxt_detach - } - Term::Mat { args, rules } => { - let mut detach = Detach::Combinator; - for arg in args { - detach = detach & go(arg, depth + 1, term_info); - } - let parent_scope = term_info.replace_scope(HashSet::new()); - - for rule in rules { - for pat in &rule.pats { - debug_assert!(pat.is_detached_num_match()); - } - - let arm_detach = match go(&mut rule.body, depth + 1, term_info) { - // If the recursive ref reached here, it is not in a active position - Detach::Recursive => Detach::Combinator, - detach => detach, - }; - - // It is expected that match arms were already linearized - debug_assert!(term_info.has_no_free_vars()); - - detach = detach & arm_detach; - } - - term_info.replace_scope(parent_scope); - detach - } - Term::Sup { fst, snd, .. } | Term::Tup { fst, snd } | Term::Opx { fst, snd, .. } => { - let fst_is_super = go(fst, depth + 1, term_info); - let snd_is_super = go(snd, depth + 1, term_info); - - fst_is_super & snd_is_super - } - Term::Ref { nam: def_name } if def_name == &term_info.def_name => Detach::Recursive, - Term::Let { .. } | Term::Lst { .. } => unreachable!(), - Term::Ref { .. } | Term::Num { .. } | Term::Str { .. } | Term::Era | Term::Err => Detach::Combinator, - } - } - - go(self, 0, &mut TermInfo::new(def_name.clone(), builtin, combinators)); - } - - // We don't want to detach id function, since that's not a net gain in performance or space - fn is_id(&self) -> bool { - match self { - Term::Lam { nam: Some(lam_nam), bod: box Term::Var { nam: var_nam }, .. } => lam_nam == var_nam, - _ => false, - } - } -} diff --git a/src/term/transform/float_combinators.rs b/src/term/transform/float_combinators.rs new file mode 100644 index 000000000..8bcac0718 --- /dev/null +++ b/src/term/transform/float_combinators.rs @@ -0,0 +1,297 @@ +use crate::term::{Book, Definition, Name, Rule, Term}; +use std::{ + collections::{BTreeMap, BTreeSet}, + ops::BitAnd, +}; + +/// Replaces closed Terms (i.e. without free variables) with a Ref to the extracted term +/// Precondition: Vars must have been sanitized +impl Book { + pub fn float_combinators(&mut self) { + let mut combinators = Combinators::new(); + + for (def_name, def) in self.defs.iter_mut() { + if self.entrypoint.as_ref().is_some_and(|m| m == def_name) { + continue; + } + + let builtin = def.builtin; + let rule = def.rule_mut(); + rule.body.float_combinators(def_name, builtin, &mut combinators); + } + + // Definitions are not inserted to the book as they are defined to appease the borrow checker. + // Since we are mut borrowing the rules we can't borrow the book to insert at the same time. + self.defs.extend(combinators); + } +} + +type Combinators = BTreeMap; + +#[derive(Debug)] +struct TermInfo<'d> { + // Number of times a Term has been detached from the current Term + counter: u32, + def_name: Name, + + builtin: bool, + needed_names: BTreeSet, + combinators: &'d mut Combinators, +} + +impl<'d> TermInfo<'d> { + fn new(def_name: Name, builtin: bool, combinators: &'d mut Combinators) -> Self { + Self { counter: 0, def_name, builtin, needed_names: BTreeSet::new(), combinators } + } + fn request_name(&mut self, name: &Name) { + self.needed_names.insert(name.clone()); + } + + fn provide(&mut self, name: Option<&Name>) { + if let Some(name) = name { + self.needed_names.remove(name); + } + } + + fn has_no_free_vars(&self) -> bool { + self.needed_names.is_empty() + } + + fn replace_scope(&mut self, new_scope: BTreeSet) -> BTreeSet { + std::mem::replace(&mut self.needed_names, new_scope) + } + + fn merge_scope(&mut self, target: BTreeSet) { + self.needed_names.extend(target); + } + + fn detach_term(&mut self, term: &mut Term) { + let comb_name = Name::new(format!("{}$S{}", self.def_name, self.counter)); + self.counter += 1; + + let comb_var = Term::Ref { nam: comb_name.clone() }; + let extracted_term = std::mem::replace(term, comb_var); + + let rules = vec![Rule { body: extracted_term, pats: Vec::new() }]; + let rule = Definition { name: comb_name.clone(), rules, builtin: self.builtin }; + self.combinators.insert(comb_name, rule); + } +} + +#[derive(Debug)] +enum Detach { + /// Can be detached freely + Combinator, + /// Can not be detached + Unscoped { lams: BTreeSet, vars: BTreeSet }, + /// Should be detached to make the program not hang + Recursive, +} + +impl Detach { + fn can_detach(&self) -> bool { + !matches!(self, Detach::Unscoped { .. }) + } + + fn unscoped_lam(nam: Name) -> Self { + Detach::Unscoped { lams: [nam].into(), vars: Default::default() } + } + + fn unscoped_var(nam: Name) -> Self { + Detach::Unscoped { lams: Default::default(), vars: [nam].into() } + } +} + +impl BitAnd for Detach { + type Output = Detach; + + fn bitand(self, rhs: Self) -> Self::Output { + match (self, rhs) { + (Detach::Combinator, Detach::Combinator) => Detach::Combinator, + + (Detach::Combinator, unscoped @ Detach::Unscoped { .. }) => unscoped, + (unscoped @ Detach::Unscoped { .. }, Detach::Combinator) => unscoped, + + // Merge two unscoped terms into one, removing entries of each matching `Lam` and `Var`. + // If all unscoped pairs are found and removed this way, the term can be treated as a Combinator. + (Detach::Unscoped { mut lams, mut vars }, Detach::Unscoped { lams: rhs_lams, vars: rhs_vars }) => { + lams.extend(rhs_lams); + vars.extend(rhs_vars); + + let res_lams: BTreeSet<_> = lams.difference(&vars).cloned().collect(); + let res_vars: BTreeSet<_> = vars.difference(&lams).cloned().collect(); + + if res_lams.is_empty() && res_vars.is_empty() { + Detach::Combinator + } else { + Detach::Unscoped { lams: res_lams, vars: res_vars } + } + } + + (Detach::Combinator, Detach::Recursive) => Detach::Recursive, + (Detach::Recursive, Detach::Combinator) => Detach::Recursive, + (Detach::Recursive, Detach::Recursive) => Detach::Recursive, + + // TODO: Is this the best way to best deal with a term that is both unscoped and recursive? + (unscoped @ Detach::Unscoped { .. }, Detach::Recursive) => unscoped, + (Detach::Recursive, unscoped @ Detach::Unscoped { .. }) => unscoped, + } + } +} + +impl Term { + pub fn float_combinators(&mut self, def_name: &Name, builtin: bool, combinators: &mut Combinators) { + self.go_float(0, &mut TermInfo::new(def_name.clone(), builtin, combinators)); + } + + fn go_float(&mut self, depth: usize, term_info: &mut TermInfo) -> Detach { + match self { + Term::Lam { .. } | Term::Chn { .. } if self.is_id() => Detach::Combinator, + + Term::Lam { .. } => self.float_lam(depth, term_info), + Term::Chn { .. } => self.float_lam(depth, term_info), + Term::App { .. } => self.float_app(depth, term_info), + Term::Mat { .. } => self.float_mat(depth, term_info), + + Term::Var { nam } => { + term_info.request_name(nam); + Detach::Combinator + } + + Term::Lnk { nam } => Detach::unscoped_var(nam.clone()), + + Term::Let { pat, val, nxt } => { + let val_detach = val.go_float(depth + 1, term_info); + let nxt_detach = nxt.go_float(depth + 1, term_info); + + for nam in pat.bind_or_eras() { + term_info.provide(nam.as_ref()); + } + + val_detach & nxt_detach + } + + Term::Dup { fst, snd, val, nxt, .. } => { + let val_detach = val.go_float(depth + 1, term_info); + let nxt_detach = nxt.go_float(depth + 1, term_info); + + term_info.provide(fst.as_ref()); + term_info.provide(snd.as_ref()); + + val_detach & nxt_detach + } + + Term::Sup { fst, snd, .. } | Term::Tup { fst, snd } | Term::Opx { fst, snd, .. } => { + let fst_is_super = fst.go_float(depth + 1, term_info); + let snd_is_super = snd.go_float(depth + 1, term_info); + + fst_is_super & snd_is_super + } + + Term::Ref { nam: def_name } if def_name == &term_info.def_name => Detach::Recursive, + + Term::Lst { .. } => unreachable!(), + Term::Ref { .. } | Term::Num { .. } | Term::Str { .. } | Term::Era | Term::Err => Detach::Combinator, + } + } + + fn float_lam(&mut self, depth: usize, term_info: &mut TermInfo) -> Detach { + let (nam, bod, unscoped): (Option<&Name>, &mut Term, bool) = match self { + Term::Lam { nam, bod, .. } => (nam.as_ref(), bod, false), + Term::Chn { nam, bod, .. } => (Some(nam), bod, true), + _ => unreachable!(), + }; + + let parent_scope = term_info.replace_scope(BTreeSet::new()); + + let mut detach = bod.go_float(depth + 1, term_info); + + if unscoped { + detach = detach & Detach::unscoped_lam(nam.cloned().unwrap()); + } else { + term_info.provide(nam); + } + + if depth != 0 && detach.can_detach() && term_info.has_no_free_vars() { + term_info.detach_term(self); + } + + term_info.merge_scope(parent_scope); + + detach + } + + fn float_app(&mut self, depth: usize, term_info: &mut TermInfo) -> Detach { + let Term::App { fun, arg, .. } = self else { unreachable!() }; + + let parent_scope = term_info.replace_scope(BTreeSet::new()); + + let fun_detach = fun.go_float(depth + 1, term_info); + let fun_scope = term_info.replace_scope(BTreeSet::new()); + + let arg_detach = arg.go_float(depth + 1, term_info); + let arg_scope = term_info.replace_scope(parent_scope); + + let detach = match fun_detach { + // If the fun scope is not empty, we know the recursive ref is not in a active position, + // Because if it was an application, it would be already extracted + // + // e.g.: {recursive_ref some_var} + Detach::Recursive if !fun_scope.is_empty() => arg_detach, + + Detach::Recursive if arg_scope.is_empty() && arg_detach.can_detach() => { + term_info.detach_term(self); + Detach::Combinator + } + + // If the only term that is possible to detach is just a Term::Ref, + // there is no benefit to it, so that case is skipped + Detach::Recursive if !matches!(fun, box Term::Ref { .. }) => { + term_info.detach_term(fun); + arg_detach + } + + _ => fun_detach & arg_detach, + }; + + term_info.merge_scope(fun_scope); + term_info.merge_scope(arg_scope); + + detach + } + + fn float_mat(&mut self, depth: usize, term_info: &mut TermInfo) -> Detach { + let Term::Mat { args, rules } = self else { unreachable!() }; + + let mut detach = Detach::Combinator; + for arg in args { + detach = detach & arg.go_float(depth + 1, term_info); + } + let parent_scope = term_info.replace_scope(BTreeSet::new()); + + for rule in rules.iter_mut() { + for pat in &rule.pats { + debug_assert!(pat.is_detached_num_match()); + } + + let arm_detach = match rule.body.go_float(depth + 1, term_info) { + // If the recursive ref reached here, it is not in a active position + Detach::Recursive => Detach::Combinator, + detach => detach, + }; + + detach = detach & arm_detach; + } + + term_info.merge_scope(parent_scope); + detach + } + + // We don't want to detach id function, since that's not a net gain in performance or space + fn is_id(&self) -> bool { + match self { + Term::Lam { nam: Some(lam_nam), bod: box Term::Var { nam: var_nam }, .. } => lam_nam == var_nam, + _ => false, + } + } +} diff --git a/src/term/transform/linearize_matches.rs b/src/term/transform/linearize_matches.rs index 031954dd2..0c606c334 100644 --- a/src/term/transform/linearize_matches.rs +++ b/src/term/transform/linearize_matches.rs @@ -3,18 +3,17 @@ use crate::{ diagnostics::Info, term::{check::type_check::infer_type, Constructors, Ctx, Name, Pattern, Term, Type}, }; -use indexmap::IndexSet; use itertools::Itertools; -use std::collections::BTreeSet; +use std::collections::{BTreeMap, BTreeSet}; impl Ctx<'_> { /// Linearizes the variables between match cases, transforming them into combinators when possible. - pub fn linearize_simple_matches(&mut self) -> Result<(), Info> { + pub fn linearize_simple_matches(&mut self, lift_all_vars: bool) -> Result<(), Info> { self.info.start_pass(); for (def_name, def) in self.book.defs.iter_mut() { for rule in def.rules.iter_mut() { - let res = rule.body.linearize_simple_matches(&self.book.ctrs); + let res = rule.body.linearize_simple_matches(&self.book.ctrs, lift_all_vars); self.info.take_err(res, Some(def_name)); } } @@ -24,23 +23,23 @@ impl Ctx<'_> { } impl Term { - fn linearize_simple_matches(&mut self, ctrs: &Constructors) -> Result<(), MatchErr> { + fn linearize_simple_matches(&mut self, ctrs: &Constructors, lift_all_vars: bool) -> Result<(), MatchErr> { match self { Term::Mat { args: _, rules } => { for rule in rules.iter_mut() { - rule.body.linearize_simple_matches(ctrs).unwrap(); + rule.body.linearize_simple_matches(ctrs, lift_all_vars).unwrap(); } let matched_type = infer_type(rules.iter().map(|r| &r.pats[0]), ctrs)?; match matched_type { - Type::Num | Type::Tup | Type::Any => _ = linearize_match_free_vars(self), + Type::Num | Type::Tup | Type::Any => _ = lift_match_vars(self, lift_all_vars), Type::Adt(_) => { - linearize_match_free_vars(self); + lift_match_vars(self, lift_all_vars); } } } Term::Lam { bod, .. } | Term::Chn { bod, .. } => { - bod.linearize_simple_matches(ctrs)?; + bod.linearize_simple_matches(ctrs, lift_all_vars)?; } Term::Let { pat: Pattern::Var(..), val: fst, nxt: snd } @@ -49,8 +48,8 @@ impl Term { | Term::Sup { fst, snd, .. } | Term::Opx { fst, snd, .. } | Term::App { fun: fst, arg: snd, .. } => { - fst.linearize_simple_matches(ctrs)?; - snd.linearize_simple_matches(ctrs)?; + fst.linearize_simple_matches(ctrs, lift_all_vars)?; + snd.linearize_simple_matches(ctrs, lift_all_vars)?; } Term::Lst { .. } => unreachable!(), @@ -74,16 +73,33 @@ impl Term { /// Converts free vars inside the match arms into lambdas with applications to give them the external value. /// Makes the rules extractable and linear (no need for dups when variable used in both rules) -pub fn linearize_match_free_vars(match_term: &mut Term) -> &mut Term { +/// +/// If `lift_all_vars`, acts on all variables found in the arms, +/// Otherwise, only lift vars that are used on more than one arm. +/// +/// Obs: This does not interact unscoped variables +pub fn lift_match_vars(match_term: &mut Term, lift_all_vars: bool) -> &mut Term { let Term::Mat { args: _, rules } = match_term else { unreachable!() }; + + let free = rules.iter().flat_map(|rule| { + rule.body.free_vars().into_iter().filter(|(name, _)| !rule.pats.iter().any(|p| p.binds().contains(name))) + }); + // Collect the vars. // We need consistent iteration order. - let free_vars: BTreeSet = rules - .iter() - .flat_map(|r| { - r.body.free_vars().into_keys().filter(|k| !r.pats.iter().flat_map(|p| p.binds()).contains(k)) - }) - .collect(); + let free_vars: BTreeSet = if lift_all_vars { + free.map(|(name, _)| name).collect() + } else { + free + .fold(BTreeMap::new(), |mut acc, (name, count)| { + *acc.entry(name).or_insert(0) += count.min(1); + acc + }) + .into_iter() + .filter(|(_, count)| *count >= 2) + .map(|(name, _)| name) + .collect() + }; // Add lambdas to the arms for rule in rules { @@ -98,40 +114,6 @@ pub fn linearize_match_free_vars(match_term: &mut Term) -> &mut Term { get_match_reference(match_term) } -pub fn linearize_match_unscoped_vars(match_term: &mut Term) -> Result<&mut Term, MatchErr> { - let Term::Mat { args: _, rules } = match_term else { unreachable!() }; - // Collect the vars - let mut free_vars = IndexSet::new(); - for rule in rules.iter_mut() { - let (decls, uses) = rule.body.unscoped_vars(); - // Not allowed to declare unscoped var and not use it since we need to extract the match arm. - if let Some(var) = decls.difference(&uses).next() { - return Err(MatchErr::Linearize(Name::new(format!("λ${var}")))); - } - // Change unscoped var to normal scoped var if it references something outside this match arm. - let arm_free_vars = uses.difference(&decls); - for var in arm_free_vars.clone() { - rule.body.subst_unscoped(var, &Term::Var { nam: Name::new(format!("%match%unscoped%{var}")) }); - } - free_vars.extend(arm_free_vars.cloned()); - } - - // Add lambdas to the arms - for rule in rules { - let old_body = std::mem::take(&mut rule.body); - rule.body = free_vars - .iter() - .rev() - .fold(old_body, |body, var| Term::named_lam(Name::new(format!("%match%unscoped%{var}")), body)); - } - - // Add apps to the match - let old_match = std::mem::take(match_term); - *match_term = free_vars.into_iter().fold(old_match, |acc, nam| Term::call(acc, [Term::Lnk { nam }])); - - Ok(get_match_reference(match_term)) -} - /// Get a reference to the match again /// It returns a reference and not an owned value because we want /// to keep the new surrounding Apps but still modify the match further. diff --git a/src/term/transform/mod.rs b/src/term/transform/mod.rs index d087a0d6a..5a8693c66 100644 --- a/src/term/transform/mod.rs +++ b/src/term/transform/mod.rs @@ -2,10 +2,10 @@ pub mod definition_merge; pub mod definition_pruning; pub mod desugar_implicit_match_binds; pub mod desugar_let_destructors; -pub mod detach_supercombinators; pub mod encode_adts; pub mod encode_pattern_matching; pub mod eta_reduction; +pub mod float_combinators; pub mod inline; pub mod linearize_matches; pub mod linearize_vars; diff --git a/tests/golden_tests.rs b/tests/golden_tests.rs index 63a6b3368..6b6b28beb 100644 --- a/tests/golden_tests.rs +++ b/tests/golden_tests.rs @@ -201,7 +201,7 @@ fn simplify_matches() { ctx.check_ctrs_arities()?; ctx.check_unbound_vars()?; ctx.simplify_matches()?; - ctx.linearize_simple_matches()?; + ctx.linearize_simple_matches(true)?; ctx.check_unbound_vars()?; ctx.book.make_var_names_unique(); ctx.book.linearize_vars(); @@ -239,7 +239,7 @@ fn encode_pattern_match() { ctx.check_ctrs_arities()?; ctx.check_unbound_vars()?; ctx.simplify_matches()?; - ctx.linearize_simple_matches()?; + ctx.linearize_simple_matches(true)?; ctx.book.encode_simple_matches(adt_encoding); ctx.check_unbound_vars()?; ctx.book.make_var_names_unique(); diff --git a/tests/snapshots/encode_pattern_match__adt_tup_era.hvm.snap b/tests/snapshots/encode_pattern_match__adt_tup_era.hvm.snap index 721f284cf..9e2d9a0e9 100644 --- a/tests/snapshots/encode_pattern_match__adt_tup_era.hvm.snap +++ b/tests/snapshots/encode_pattern_match__adt_tup_era.hvm.snap @@ -15,5 +15,3 @@ Scott: (Main) = (Foo (Pair 1 5)) (Pair) = λa λb λc (c a b) - - diff --git a/tests/snapshots/encode_pattern_match__def_tups.hvm.snap b/tests/snapshots/encode_pattern_match__def_tups.hvm.snap index 71378a77f..3bee95847 100644 --- a/tests/snapshots/encode_pattern_match__def_tups.hvm.snap +++ b/tests/snapshots/encode_pattern_match__def_tups.hvm.snap @@ -11,5 +11,3 @@ Scott: (go) = λa let (b, c) = a; (let (d, e) = c; λf (let (g, h) = e; λi λj (let (k, l) = h; λm λn λo (+ (+ (+ (+ l k) o) n) m) i j g) f d) b) (main) = (go (1, (2, (3, (4, 5))))) - - diff --git a/tests/snapshots/encode_pattern_match__flatten_era_pat.hvm.snap b/tests/snapshots/encode_pattern_match__flatten_era_pat.hvm.snap index 4cb8d3345..db2ad4fa2 100644 --- a/tests/snapshots/encode_pattern_match__flatten_era_pat.hvm.snap +++ b/tests/snapshots/encode_pattern_match__flatten_era_pat.hvm.snap @@ -19,5 +19,3 @@ Scott: (Fn3) = λa λb (let (c, d) = a; λe (match c { 0: λ* λ* 0; +: λi λ* λ* i } d e) b) (main) = (Fn2 ((1, 2), (3, (4, (5, 6)))) 0) - - diff --git a/tests/snapshots/encode_pattern_match__match_bind.hvm.snap b/tests/snapshots/encode_pattern_match__match_bind.hvm.snap index 172e70721..f1b9170b8 100644 --- a/tests/snapshots/encode_pattern_match__match_bind.hvm.snap +++ b/tests/snapshots/encode_pattern_match__match_bind.hvm.snap @@ -11,5 +11,3 @@ Scott: (cheese) = let {a a_2} = (+ 2 3); (match a { 0: λ* 653323; +: λd λe (+ e d) } a_2) (main) = cheese - - diff --git a/tests/snapshots/encode_pattern_match__nested_let_tup.hvm.snap b/tests/snapshots/encode_pattern_match__nested_let_tup.hvm.snap index 872145b78..ce13f20ac 100644 --- a/tests/snapshots/encode_pattern_match__nested_let_tup.hvm.snap +++ b/tests/snapshots/encode_pattern_match__nested_let_tup.hvm.snap @@ -7,5 +7,3 @@ TaggedScott: Scott: (main) = let (k, l) = let (b, c) = (10, ((1, ((2, 3), 4)), 3)); (let (d, *) = c; λ* d b); (let (m, *) = l; λ* let (s, *) = m; s k) - - diff --git a/tests/snapshots/encode_pattern_match__ntup_sum.hvm.snap b/tests/snapshots/encode_pattern_match__ntup_sum.hvm.snap index cdb517685..657be29a2 100644 --- a/tests/snapshots/encode_pattern_match__ntup_sum.hvm.snap +++ b/tests/snapshots/encode_pattern_match__ntup_sum.hvm.snap @@ -11,5 +11,3 @@ Scott: (ntupSum) = λa let (b, c) = a; (let (d, e) = b; λf (let (g, h) = f; λi λj (let (k, l) = h; λm λn λo (+ m (+ n (+ o (+ k l)))) i j g) d e) c) (main) = (ntupSum ((1, 3), (3, (2, 1)))) - - diff --git a/tests/snapshots/run_file__print.hvm.snap b/tests/snapshots/run_file__print.hvm.snap new file mode 100644 index 000000000..4cdae60ab --- /dev/null +++ b/tests/snapshots/run_file__print.hvm.snap @@ -0,0 +1,5 @@ +--- +source: tests/golden_tests.rs +input_file: tests/golden_tests/run_file/print.hvm +--- +"goodbye world" diff --git a/tests/snapshots/run_lazy__adt_option_and.hvm.snap b/tests/snapshots/run_lazy__adt_option_and.hvm.snap index fafab6193..e86516c81 100644 --- a/tests/snapshots/run_lazy__adt_option_and.hvm.snap +++ b/tests/snapshots/run_lazy__adt_option_and.hvm.snap @@ -2,4 +2,4 @@ source: tests/golden_tests.rs input_file: tests/golden_tests/run_lazy/adt_option_and.hvm --- -λa match a { (Some b): λc (match c { (Some d): λe (Some (e, d)); (None): λ* None } b); (None): λ* None } +λa λb match a { (Some c): match b { (Some d): (Some (c, d)); (None): None }; (None): None }