From 91a956b581c6ebe9c002f572236c1515ceb7d3eb Mon Sep 17 00:00:00 2001 From: Robin Freyler Date: Fri, 24 Nov 2023 17:10:58 +0100 Subject: [PATCH] Fuse `i32.{and,or, xor}` + [`i32.eqz`] + `br_if` Wasm instructions (#796) * add i32.branch_{and,or,xor}[_imm] instructions There is no need for i64 counterparts since in Wasm only i32 types are used as conditional "bool" types. * add i32.branch_{nand, nor, xnor}[_imm] instructions We added these instruction to provide optimizations for encode_eqz. * rename new branch instructions * add fusion of i32.{and,or,xor} + i32.eqz * add forgotten i32.{and,or,xor}+i32.eqz+branch translations * add fuse benchmark to showcase perf gains * bump count_until limit to make it less noisy * fix bug in executor for new fuse instructions * add i32.{and,or,xor} + i32.eqz fusion tests * add i32.{and,or,xor} + i32.eqz + br_if fuse tests --- crates/wasmi/benches/benches.rs | 21 +- crates/wasmi/benches/wat/fuse.wat | 27 + .../src/engine/regmach/bytecode/construct.rs | 18 + .../wasmi/src/engine/regmach/bytecode/mod.rs | 77 ++- .../src/engine/regmach/executor/instrs.rs | 44 ++ .../engine/regmach/executor/instrs/binary.rs | 8 +- .../engine/regmach/executor/instrs/branch.rs | 17 +- .../src/engine/regmach/tests/op/cmp_br.rs | 86 +++ .../src/engine/regmach/tests/op/i32_eqz.rs | 69 +++ .../wasmi/src/engine/regmach/tests/op/mod.rs | 1 + .../regmach/translator/instr_encoder.rs | 95 +++- .../src/engine/regmach/translator/mod.rs | 524 +++++++++--------- .../engine/regmach/translator/result_mut.rs | 18 + .../src/engine/regmach/translator/visit.rs | 4 + .../regmach/translator/visit_register.rs | 24 +- 15 files changed, 757 insertions(+), 276 deletions(-) create mode 100644 crates/wasmi/benches/wat/fuse.wat create mode 100644 crates/wasmi/src/engine/regmach/tests/op/i32_eqz.rs diff --git a/crates/wasmi/benches/benches.rs b/crates/wasmi/benches/benches.rs index f3c462a882..ef24df2739 100644 --- a/crates/wasmi/benches/benches.rs +++ b/crates/wasmi/benches/benches.rs @@ -66,6 +66,7 @@ criterion_group! { bench_execute_recursive_scan, bench_execute_recursive_trap, bench_execute_host_calls, + bench_execute_fuse, bench_execute_fibonacci, bench_execute_recursive_is_even, bench_execute_memory_sum, @@ -421,7 +422,7 @@ fn bench_execute_regex_redux(c: &mut Criterion) { } fn bench_execute_count_until(c: &mut Criterion) { - const COUNT_UNTIL: i32 = 100_000; + const COUNT_UNTIL: i32 = 1_000_000; c.bench_function("execute/count_until", |b| { let (mut store, instance) = load_instance_from_wat(include_bytes!("wat/count_until.wat")); let count_until = instance @@ -887,6 +888,24 @@ fn bench_execute_host_calls(c: &mut Criterion) { }); } +fn bench_execute_fuse(c: &mut Criterion) { + let (mut store, instance) = load_instance_from_wat(include_bytes!("wat/fuse.wat")); + let mut bench_fuse = |bench_id: &str, func_name: &str, input: i32| { + c.bench_function(bench_id, |b| { + let fib = instance + .get_export(&store, func_name) + .and_then(Extern::into_func) + .unwrap() + .typed::(&store) + .unwrap(); + b.iter(|| { + assert_eq!(fib.call(&mut store, input).unwrap(), input); + }); + }); + }; + bench_fuse("execute/fuse", "test", 1_000_000); +} + fn bench_execute_fibonacci(c: &mut Criterion) { const fn fib(n: i64) -> i64 { if n <= 1 { diff --git a/crates/wasmi/benches/wat/fuse.wat b/crates/wasmi/benches/wat/fuse.wat new file mode 100644 index 0000000000..a455e3a2d0 --- /dev/null +++ b/crates/wasmi/benches/wat/fuse.wat @@ -0,0 +1,27 @@ +(module + (func (export "test") (param $n i32) (result i32) + (local $i i32) + (loop $continue + ;; i += 1 + (local.set $i + (i32.add + (local.get $i) + (i32.const 1) + ) + ) + ;; if not((i >= n) and (i <= n)) then continue + ;; Note: The above is equal to: + ;; if i != n then continue + (br_if + $continue + (i32.eqz + (i32.and + (i32.ge_u (local.get $i) (local.get $n)) + (i32.le_u (local.get $i) (local.get $n)) + ) + ) + ) + ) + (return (local.get $i)) + ) +) diff --git a/crates/wasmi/src/engine/regmach/bytecode/construct.rs b/crates/wasmi/src/engine/regmach/bytecode/construct.rs index 5ba856b30f..3082c3d059 100644 --- a/crates/wasmi/src/engine/regmach/bytecode/construct.rs +++ b/crates/wasmi/src/engine/regmach/bytecode/construct.rs @@ -175,6 +175,12 @@ macro_rules! constructor_for_branch_binop { } } constructor_for_branch_binop! { + fn branch_i32_and() -> Self::BranchI32And; + fn branch_i32_or() -> Self::BranchI32Or; + fn branch_i32_xor() -> Self::BranchI32Xor; + fn branch_i32_and_eqz() -> Self::BranchI32AndEqz; + fn branch_i32_or_eqz() -> Self::BranchI32OrEqz; + fn branch_i32_xor_eqz() -> Self::BranchI32XorEqz; fn branch_i32_eq() -> Self::BranchI32Eq; fn branch_i32_ne() -> Self::BranchI32Ne; fn branch_i32_lt_s() -> Self::BranchI32LtS; @@ -225,6 +231,12 @@ macro_rules! constructor_for_branch_binop_imm { } } constructor_for_branch_binop_imm! { + fn branch_i32_and_imm(i32) -> Self::BranchI32AndImm; + fn branch_i32_or_imm(i32) -> Self::BranchI32OrImm; + fn branch_i32_xor_imm(i32) -> Self::BranchI32XorImm; + fn branch_i32_and_eqz_imm(i32) -> Self::BranchI32AndEqzImm; + fn branch_i32_or_eqz_imm(i32) -> Self::BranchI32OrEqzImm; + fn branch_i32_xor_eqz_imm(i32) -> Self::BranchI32XorEqzImm; fn branch_i32_eq_imm(i32) -> Self::BranchI32EqImm; fn branch_i32_ne_imm(i32) -> Self::BranchI32NeImm; fn branch_i32_lt_s_imm(i32) -> Self::BranchI32LtSImm; @@ -1555,18 +1567,24 @@ impl Instruction { // Integer Bitwise Logic fn i32_and(binary) -> Self::I32And; + fn i32_and_eqz(binary) -> Self::I32AndEqz; + fn i32_and_eqz_imm16(binary_i32imm16) -> Self::I32AndEqzImm16; fn i32_and_imm16(binary_i32imm16) -> Self::I32AndImm16; fn i64_and(binary) -> Self::I64And; fn i64_and_imm16(binary_i64imm16) -> Self::I64AndImm16; fn i32_or(binary) -> Self::I32Or; + fn i32_or_eqz(binary) -> Self::I32OrEqz; + fn i32_or_eqz_imm16(binary_i32imm16) -> Self::I32OrEqzImm16; fn i32_or_imm16(binary_i32imm16) -> Self::I32OrImm16; fn i64_or(binary) -> Self::I64Or; fn i64_or_imm16(binary_i64imm16) -> Self::I64OrImm16; fn i32_xor(binary) -> Self::I32Xor; + fn i32_xor_eqz(binary) -> Self::I32XorEqz; + fn i32_xor_eqz_imm16(binary_i32imm16) -> Self::I32XorEqzImm16; fn i32_xor_imm16(binary_i32imm16) -> Self::I32XorImm16; fn i64_xor(binary) -> Self::I64Xor; diff --git a/crates/wasmi/src/engine/regmach/bytecode/mod.rs b/crates/wasmi/src/engine/regmach/bytecode/mod.rs index 8ea97232b7..f7b09d7fb2 100644 --- a/crates/wasmi/src/engine/regmach/bytecode/mod.rs +++ b/crates/wasmi/src/engine/regmach/bytecode/mod.rs @@ -390,6 +390,56 @@ pub enum Instruction { offset: BranchOffset, }, + /// A fused [`Instruction::I32And`] and [`Instruction::BranchNez`] instruction. + BranchI32And(BranchBinOpInstr), + /// A fused [`Instruction::I32And`] and [`Instruction::BranchNez`] instruction. + /// + /// # Note + /// + /// Variant of [`Instruction::BranchI32And`] with 16-bit encoded constant `rhs`. + BranchI32AndImm(BranchBinOpInstrImm), + /// A fused [`Instruction::I32Or`] and [`Instruction::BranchNez`] instruction. + BranchI32Or(BranchBinOpInstr), + /// A fused [`Instruction::I32Or`] and [`Instruction::BranchNez`] instruction. + /// + /// # Note + /// + /// Variant of [`Instruction::BranchI32Or`] with 16-bit encoded constant `rhs`. + BranchI32OrImm(BranchBinOpInstrImm), + /// A fused [`Instruction::I32Xor`] and [`Instruction::BranchNez`] instruction. + BranchI32Xor(BranchBinOpInstr), + /// A fused [`Instruction::I32Xor`] and [`Instruction::BranchNez`] instruction. + /// + /// # Note + /// + /// Variant of [`Instruction::BranchI32Xor`] with 16-bit encoded constant `rhs`. + BranchI32XorImm(BranchBinOpInstrImm), + + /// A fused not-[`Instruction::I32And`] and [`Instruction::BranchNez`] instruction. + BranchI32AndEqz(BranchBinOpInstr), + /// A fused not-[`Instruction::I32And`] and [`Instruction::BranchNez`] instruction. + /// + /// # Note + /// + /// Variant of [`Instruction::BranchI32AndEqz`] with 16-bit encoded constant `rhs`. + BranchI32AndEqzImm(BranchBinOpInstrImm), + /// A fused not-[`Instruction::I32Or`] and [`Instruction::BranchNez`] instruction. + BranchI32OrEqz(BranchBinOpInstr), + /// A fused not-[`Instruction::I32Or`] and [`Instruction::BranchNez`] instruction. + /// + /// # Note + /// + /// Variant of [`Instruction::BranchI32OrEqz`] with 16-bit encoded constant `rhs`. + BranchI32OrEqzImm(BranchBinOpInstrImm), + /// A fused not-[`Instruction::I32Xor`] and [`Instruction::BranchNez`] instruction. + BranchI32XorEqz(BranchBinOpInstr), + /// A fused not-[`Instruction::I32Xor`] and [`Instruction::BranchNez`] instruction. + /// + /// # Note + /// + /// Variant of [`Instruction::BranchI32XorEqz`] with 16-bit encoded constant `rhs`. + BranchI32XorEqzImm(BranchBinOpInstrImm), + /// A fused [`Instruction::I32Eq`] and [`Instruction::BranchNez`] instruction. BranchI32Eq(BranchBinOpInstr), /// A fused [`Instruction::I32Eq`] and [`Instruction::BranchNez`] instruction. @@ -2677,14 +2727,19 @@ pub enum Instruction { /// `i32` bitwise-and instruction: `r0 = r1 & r2` I32And(BinInstr), - /// `i64` bitwise-and instruction: `r0 = r1 & r2` - I64And(BinInstr), + /// Fused Wasm `i32.and` + `i32.eqz` [`Instruction`]. + I32AndEqz(BinInstr), + /// Fused Wasm `i32.and` + `i32.eqz` [`Instruction`] with 16-bit encoded immediate. + I32AndEqzImm16(BinInstrImm16), /// `i32` bitwise-and (small) immediate instruction: `r0 = r1 & c0` /// /// # Note /// /// Optimized variant of [`Instruction::I32And`] for 16-bit constant values. I32AndImm16(BinInstrImm16), + + /// `i64` bitwise-and instruction: `r0 = r1 & r2` + I64And(BinInstr), /// `i64` bitwise-and (small) immediate instruction: `r0 = r1 & c0` /// /// # Note @@ -2694,14 +2749,19 @@ pub enum Instruction { /// `i32` bitwise-or instruction: `r0 = r1 & r2` I32Or(BinInstr), - /// `i64` bitwise-or instruction: `r0 = r1 & r2` - I64Or(BinInstr), + /// Fused Wasm `i32.or` + `i32.eqz` [`Instruction`]. + I32OrEqz(BinInstr), + /// Fused Wasm `i32.or` + `i32.eqz` [`Instruction`] with 16-bit encoded immediate. + I32OrEqzImm16(BinInstrImm16), /// `i32` bitwise-or (small) immediate instruction: `r0 = r1 & c0` /// /// # Note /// /// Optimized variant of [`Instruction::I32Or`] for 16-bit constant values. I32OrImm16(BinInstrImm16), + + /// `i64` bitwise-or instruction: `r0 = r1 & r2` + I64Or(BinInstr), /// `i64` bitwise-or (small) immediate instruction: `r0 = r1 & c0` /// /// # Note @@ -2711,14 +2771,19 @@ pub enum Instruction { /// `i32` bitwise-or instruction: `r0 = r1 ^ r2` I32Xor(BinInstr), - /// `i64` bitwise-or instruction: `r0 = r1 ^ r2` - I64Xor(BinInstr), + /// Fused Wasm `i32.xor` + `i32.eqz` [`Instruction`]. + I32XorEqz(BinInstr), + /// Fused Wasm `i32.xor` + `i32.eqz` [`Instruction`] with 16-bit encoded immediate. + I32XorEqzImm16(BinInstrImm16), /// `i32` bitwise-or (small) immediate instruction: `r0 = r1 ^ c0` /// /// # Note /// /// Optimized variant of [`Instruction::I32Xor`] for 16-bit constant values. I32XorImm16(BinInstrImm16), + + /// `i64` bitwise-or instruction: `r0 = r1 ^ r2` + I64Xor(BinInstr), /// `i64` bitwise-or (small) immediate instruction: `r0 = r1 ^ c0` /// /// # Note diff --git a/crates/wasmi/src/engine/regmach/executor/instrs.rs b/crates/wasmi/src/engine/regmach/executor/instrs.rs index eec6196df6..824d308858 100644 --- a/crates/wasmi/src/engine/regmach/executor/instrs.rs +++ b/crates/wasmi/src/engine/regmach/executor/instrs.rs @@ -254,6 +254,18 @@ impl<'ctx, 'engine> Executor<'ctx, 'engine> { Instr::BranchTable { index, len_targets } => { self.execute_branch_table(index, len_targets) } + Instr::BranchI32And(instr) => self.execute_branch_i32_and(instr), + Instr::BranchI32AndImm(instr) => self.execute_branch_i32_and_imm(instr), + Instr::BranchI32Or(instr) => self.execute_branch_i32_or(instr), + Instr::BranchI32OrImm(instr) => self.execute_branch_i32_or_imm(instr), + Instr::BranchI32Xor(instr) => self.execute_branch_i32_xor(instr), + Instr::BranchI32XorImm(instr) => self.execute_branch_i32_xor_imm(instr), + Instr::BranchI32AndEqz(instr) => self.execute_branch_i32_and_eqz(instr), + Instr::BranchI32AndEqzImm(instr) => self.execute_branch_i32_and_eqz_imm(instr), + Instr::BranchI32OrEqz(instr) => self.execute_branch_i32_or_eqz(instr), + Instr::BranchI32OrEqzImm(instr) => self.execute_branch_i32_or_eqz_imm(instr), + Instr::BranchI32XorEqz(instr) => self.execute_branch_i32_xor_eqz(instr), + Instr::BranchI32XorEqzImm(instr) => self.execute_branch_i32_xor_eqz_imm(instr), Instr::BranchI32Eq(instr) => self.execute_branch_i32_eq(instr), Instr::BranchI32EqImm(instr) => self.execute_branch_i32_eq_imm(instr), Instr::BranchI32Ne(instr) => self.execute_branch_i32_ne(instr), @@ -711,10 +723,16 @@ impl<'ctx, 'engine> Executor<'ctx, 'engine> { Instr::I32RemUImm16(instr) => self.execute_i32_rem_u_imm16(instr)?, Instr::I32RemUImm16Rev(instr) => self.execute_i32_rem_u_imm16_rev(instr)?, Instr::I32And(instr) => self.execute_i32_and(instr), + Instr::I32AndEqz(instr) => self.execute_i32_and_eqz(instr), + Instr::I32AndEqzImm16(instr) => self.execute_i32_and_eqz_imm16(instr), Instr::I32AndImm16(instr) => self.execute_i32_and_imm16(instr), Instr::I32Or(instr) => self.execute_i32_or(instr), + Instr::I32OrEqz(instr) => self.execute_i32_or_eqz(instr), + Instr::I32OrEqzImm16(instr) => self.execute_i32_or_eqz_imm16(instr), Instr::I32OrImm16(instr) => self.execute_i32_or_imm16(instr), Instr::I32Xor(instr) => self.execute_i32_xor(instr), + Instr::I32XorEqz(instr) => self.execute_i32_xor_eqz(instr), + Instr::I32XorEqzImm16(instr) => self.execute_i32_xor_eqz_imm16(instr), Instr::I32XorImm16(instr) => self.execute_i32_xor_imm16(instr), Instr::I32Shl(instr) => self.execute_i32_shl(instr), Instr::I32ShlImm(instr) => self.execute_i32_shl_imm(instr), @@ -1232,3 +1250,29 @@ impl<'ctx, 'engine> Executor<'ctx, 'engine> { self.next_instr(); } } + +/// Extension method for [`UntypedValue`] required by the [`Executor`]. +trait UntypedValueExt { + /// Executes a fused `i32.and` + `i32.eqz` instruction. + fn i32_and_eqz(x: UntypedValue, y: UntypedValue) -> UntypedValue; + + /// Executes a fused `i32.or` + `i32.eqz` instruction. + fn i32_or_eqz(x: UntypedValue, y: UntypedValue) -> UntypedValue; + + /// Executes a fused `i32.xor` + `i32.eqz` instruction. + fn i32_xor_eqz(x: UntypedValue, y: UntypedValue) -> UntypedValue; +} + +impl UntypedValueExt for UntypedValue { + fn i32_and_eqz(x: UntypedValue, y: UntypedValue) -> UntypedValue { + (i32::from(UntypedValue::i32_and(x, y)) == 0).into() + } + + fn i32_or_eqz(x: UntypedValue, y: UntypedValue) -> UntypedValue { + (i32::from(UntypedValue::i32_or(x, y)) == 0).into() + } + + fn i32_xor_eqz(x: UntypedValue, y: UntypedValue) -> UntypedValue { + (i32::from(UntypedValue::i32_xor(x, y)) == 0).into() + } +} diff --git a/crates/wasmi/src/engine/regmach/executor/instrs/binary.rs b/crates/wasmi/src/engine/regmach/executor/instrs/binary.rs index d1695dd9c4..2131cdd6d6 100644 --- a/crates/wasmi/src/engine/regmach/executor/instrs/binary.rs +++ b/crates/wasmi/src/engine/regmach/executor/instrs/binary.rs @@ -1,4 +1,4 @@ -use super::Executor; +use super::{Executor, UntypedValueExt}; use crate::{ core::{TrapCode, UntypedValue}, engine::regmach::bytecode::{BinInstr, BinInstrImm16, CopysignImmInstr, Sign}, @@ -24,8 +24,11 @@ impl<'ctx, 'engine> Executor<'ctx, 'engine> { (Instruction::I32Sub, execute_i32_sub, UntypedValue::i32_sub), (Instruction::I32Mul, execute_i32_mul, UntypedValue::i32_mul), (Instruction::I32And, execute_i32_and, UntypedValue::i32_and), + (Instruction::I32AndEqz, execute_i32_and_eqz, UntypedValue::i32_and_eqz), (Instruction::I32Or, execute_i32_or, UntypedValue::i32_or), + (Instruction::I32OrEqz, execute_i32_or_eqz, UntypedValue::i32_or_eqz), (Instruction::I32Xor, execute_i32_xor, UntypedValue::i32_xor), + (Instruction::I32XorEqz, execute_i32_xor_eqz, UntypedValue::i32_xor_eqz), (Instruction::I64Add, execute_i64_add, UntypedValue::i64_add), (Instruction::I64Sub, execute_i64_sub, UntypedValue::i64_sub), @@ -81,8 +84,11 @@ impl<'ctx, 'engine> Executor<'ctx, 'engine> { (i32, Instruction::I32SubImm16, execute_i32_sub_imm16, UntypedValue::i32_sub), (i32, Instruction::I32MulImm16, execute_i32_mul_imm16, UntypedValue::i32_mul), (i32, Instruction::I32AndImm16, execute_i32_and_imm16, UntypedValue::i32_and), + (i32, Instruction::I32AndEqzImm16, execute_i32_and_eqz_imm16, UntypedValue::i32_and_eqz), (i32, Instruction::I32OrImm16, execute_i32_or_imm16, UntypedValue::i32_or), + (i32, Instruction::I32OrEqzImm16, execute_i32_or_eqz_imm16, UntypedValue::i32_or_eqz), (i32, Instruction::I32XorImm16, execute_i32_xor_imm16, UntypedValue::i32_xor), + (i32, Instruction::I32XorEqzImm16, execute_i32_xor_eqz_imm16, UntypedValue::i32_xor_eqz), (i64, Instruction::I64AddImm16, execute_i64_add_imm16, UntypedValue::i64_add), (i64, Instruction::I64SubImm16, execute_i64_sub_imm16, UntypedValue::i64_sub), diff --git a/crates/wasmi/src/engine/regmach/executor/instrs/branch.rs b/crates/wasmi/src/engine/regmach/executor/instrs/branch.rs index 039b0cbab5..e05f243162 100644 --- a/crates/wasmi/src/engine/regmach/executor/instrs/branch.rs +++ b/crates/wasmi/src/engine/regmach/executor/instrs/branch.rs @@ -1,11 +1,10 @@ -use wasmi_core::UntypedValue; - -use super::Executor; +use super::{Executor, UntypedValueExt}; use crate::engine::{ bytecode::BranchOffset, regmach::bytecode::{BranchBinOpInstr, BranchBinOpInstrImm, Const16, Const32, Register}, }; use core::cmp; +use wasmi_core::UntypedValue; #[cfg(doc)] use crate::engine::regmach::bytecode::Instruction; @@ -101,6 +100,12 @@ macro_rules! impl_execute_branch_binop { } } impl_execute_branch_binop! { + (Instruction::BranchI32And, execute_branch_i32_and, UntypedValue::i32_and), + (Instruction::BranchI32Or, execute_branch_i32_or, UntypedValue::i32_or), + (Instruction::BranchI32Xor, execute_branch_i32_xor, UntypedValue::i32_xor), + (Instruction::BranchI32AndEqz, execute_branch_i32_and_eqz, UntypedValue::i32_and_eqz), + (Instruction::BranchI32OrEqz, execute_branch_i32_or_eqz, UntypedValue::i32_or_eqz), + (Instruction::BranchI32XorEqz, execute_branch_i32_xor_eqz, UntypedValue::i32_xor_eqz), (Instruction::BranchI32Eq, execute_branch_i32_eq, UntypedValue::i32_eq), (Instruction::BranchI32Ne, execute_branch_i32_ne, UntypedValue::i32_ne), (Instruction::BranchI32LtS, execute_branch_i32_lt_s, UntypedValue::i32_lt_s), @@ -152,6 +157,12 @@ macro_rules! impl_execute_branch_binop_imm { } } impl_execute_branch_binop_imm! { + (Instruction::BranchI32AndImm, execute_branch_i32_and_imm, UntypedValue::i32_and, i32), + (Instruction::BranchI32OrImm, execute_branch_i32_or_imm, UntypedValue::i32_or, i32), + (Instruction::BranchI32XorImm, execute_branch_i32_xor_imm, UntypedValue::i32_xor, i32), + (Instruction::BranchI32AndEqzImm, execute_branch_i32_and_eqz_imm, UntypedValue::i32_and_eqz, i32), + (Instruction::BranchI32OrEqzImm, execute_branch_i32_or_eqz_imm, UntypedValue::i32_or_eqz, i32), + (Instruction::BranchI32XorEqzImm, execute_branch_i32_xor_eqz_imm, UntypedValue::i32_xor_eqz, i32), (Instruction::BranchI32EqImm, execute_branch_i32_eq_imm, UntypedValue::i32_eq, i32), (Instruction::BranchI32NeImm, execute_branch_i32_ne_imm, UntypedValue::i32_ne, i32), (Instruction::BranchI32LtSImm, execute_branch_i32_lt_s_imm, UntypedValue::i32_lt_s, i32), diff --git a/crates/wasmi/src/engine/regmach/tests/op/cmp_br.rs b/crates/wasmi/src/engine/regmach/tests/op/cmp_br.rs index f408f197ca..f1bb88cfe6 100644 --- a/crates/wasmi/src/engine/regmach/tests/op/cmp_br.rs +++ b/crates/wasmi/src/engine/regmach/tests/op/cmp_br.rs @@ -42,6 +42,9 @@ fn loop_backward() { .run() } + test_for(ValueType::I32, "and", Instruction::branch_i32_and); + test_for(ValueType::I32, "or", Instruction::branch_i32_or); + test_for(ValueType::I32, "xor", Instruction::branch_i32_xor); test_for(ValueType::I32, "eq", Instruction::branch_i32_eq); test_for(ValueType::I32, "ne", Instruction::branch_i32_ne); test_for(ValueType::I32, "lt_s", Instruction::branch_i32_lt_s); @@ -117,6 +120,10 @@ fn loop_backward_imm() { ]) .run() } + + test_for::("and", 1, Instruction::branch_i32_and_imm); + test_for::("or", 1, Instruction::branch_i32_or_imm); + test_for::("xor", 1, Instruction::branch_i32_xor_imm); test_for::("eq", 1, Instruction::branch_i32_eq_imm); test_for::("ne", 1, Instruction::branch_i32_ne_imm); test_for::("lt_s", 1, Instruction::branch_i32_lt_s_imm); @@ -202,6 +209,9 @@ fn block_forward() { .run() } + test_for(ValueType::I32, "and", Instruction::branch_i32_and); + test_for(ValueType::I32, "or", Instruction::branch_i32_or); + test_for(ValueType::I32, "xor", Instruction::branch_i32_xor); test_for(ValueType::I32, "eq", Instruction::branch_i32_eq); test_for(ValueType::I32, "ne", Instruction::branch_i32_ne); test_for(ValueType::I32, "lt_s", Instruction::branch_i32_lt_s); @@ -279,6 +289,9 @@ fn block_forward_nop_copy() { .run() } + test_for(ValueType::I32, "and", Instruction::branch_i32_and); + test_for(ValueType::I32, "or", Instruction::branch_i32_or); + test_for(ValueType::I32, "xor", Instruction::branch_i32_xor); test_for(ValueType::I32, "eq", Instruction::branch_i32_eq); test_for(ValueType::I32, "ne", Instruction::branch_i32_ne); test_for(ValueType::I32, "lt_s", Instruction::branch_i32_lt_s); @@ -356,6 +369,9 @@ fn if_forward_multi_value() { .run() } + test_for(ValueType::I32, "and", Instruction::branch_i32_and_eqz); + test_for(ValueType::I32, "or", Instruction::branch_i32_or_eqz); + test_for(ValueType::I32, "xor", Instruction::branch_i32_xor_eqz); test_for(ValueType::I32, "eq", Instruction::branch_i32_ne); test_for(ValueType::I32, "ne", Instruction::branch_i32_eq); test_for(ValueType::I32, "lt_s", Instruction::branch_i32_ge_s); @@ -414,6 +430,9 @@ fn if_forward() { .run() } + test_for(ValueType::I32, "and", Instruction::branch_i32_and_eqz); + test_for(ValueType::I32, "or", Instruction::branch_i32_or_eqz); + test_for(ValueType::I32, "xor", Instruction::branch_i32_xor_eqz); test_for(ValueType::I32, "eq", Instruction::branch_i32_ne); test_for(ValueType::I32, "ne", Instruction::branch_i32_eq); test_for(ValueType::I32, "lt_s", Instruction::branch_i32_ge_s); @@ -436,3 +455,70 @@ fn if_forward() { test_for(ValueType::I64, "ge_s", Instruction::branch_i64_lt_s); test_for(ValueType::I64, "ge_u", Instruction::branch_i64_lt_u); } + +#[test] +#[cfg_attr(miri, ignore)] +fn block_i32_eqz_fuse() { + fn test_for(op: &str, expect_instr: fn(Register, Register, BranchOffset16) -> Instruction) { + let wasm = wat2wasm(&format!( + r" + (module + (func (param i32 i32) + (block + (local.get 0) + (local.get 1) + (i32.{op}) + (i32.eqz) + (br_if 0) + ) + ) + )", + )); + TranslationTest::new(wasm) + .expect_func_instrs([ + expect_instr( + Register::from_i16(0), + Register::from_i16(1), + BranchOffset16::from(1), + ), + Instruction::Return, + ]) + .run() + } + + test_for("and", Instruction::branch_i32_and_eqz); + test_for("or", Instruction::branch_i32_or_eqz); + test_for("xor", Instruction::branch_i32_xor_eqz); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn if_i32_eqz_fuse() { + fn test_for(op: &str, expect_instr: fn(Register, Register, BranchOffset16) -> Instruction) { + let wasm = wat2wasm(&format!( + r" + (module + (func (param i32 i32) + (if + (i32.eqz (i32.{op} (local.get 0) (local.get 1))) + (then) + ) + ) + )", + )); + TranslationTest::new(wasm) + .expect_func_instrs([ + expect_instr( + Register::from_i16(0), + Register::from_i16(1), + BranchOffset16::from(1), + ), + Instruction::Return, + ]) + .run() + } + + test_for("and", Instruction::branch_i32_and); + test_for("or", Instruction::branch_i32_or); + test_for("xor", Instruction::branch_i32_xor); +} diff --git a/crates/wasmi/src/engine/regmach/tests/op/i32_eqz.rs b/crates/wasmi/src/engine/regmach/tests/op/i32_eqz.rs new file mode 100644 index 0000000000..38d59c5693 --- /dev/null +++ b/crates/wasmi/src/engine/regmach/tests/op/i32_eqz.rs @@ -0,0 +1,69 @@ +use super::*; + +#[test] +#[cfg_attr(miri, ignore)] +fn binop_i32_eqz() { + fn test_for( + op: &str, + expect_instr: fn(result: Register, lhs: Register, rhs: Register) -> Instruction, + ) { + let wasm = wat2wasm(&format!( + r" + (module + (func (param i32 i32) (result i32) + (local.get 0) + (local.get 1) + (i32.{op}) + (i32.eqz) + ) + )", + )); + TranslationTest::new(wasm) + .expect_func_instrs([ + expect_instr( + Register::from_i16(2), + Register::from_i16(0), + Register::from_i16(1), + ), + Instruction::return_reg(2), + ]) + .run() + } + test_for("and", Instruction::i32_and_eqz); + test_for("or", Instruction::i32_or_eqz); + test_for("xor", Instruction::i32_xor_eqz); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn binop_imm_i32_eqz() { + fn test_for( + op: &str, + expect_instr: fn(result: Register, lhs: Register, rhs: Const16) -> Instruction, + ) { + let wasm = wat2wasm(&format!( + r" + (module + (func (param i32 i32) (result i32) + (local.get 0) + (i32.const 1) + (i32.{op}) + (i32.eqz) + ) + )", + )); + TranslationTest::new(wasm) + .expect_func_instrs([ + expect_instr( + Register::from_i16(2), + Register::from_i16(0), + Const16::from(1), + ), + Instruction::return_reg(2), + ]) + .run() + } + test_for("and", Instruction::i32_and_eqz_imm16); + test_for("or", Instruction::i32_or_eqz_imm16); + test_for("xor", Instruction::i32_xor_eqz_imm16); +} diff --git a/crates/wasmi/src/engine/regmach/tests/op/mod.rs b/crates/wasmi/src/engine/regmach/tests/op/mod.rs index 0eab96f2d2..2f4e5b87ce 100644 --- a/crates/wasmi/src/engine/regmach/tests/op/mod.rs +++ b/crates/wasmi/src/engine/regmach/tests/op/mod.rs @@ -8,6 +8,7 @@ mod cmp; mod cmp_br; mod global_get; mod global_set; +mod i32_eqz; mod if_; mod load; mod local_set; diff --git a/crates/wasmi/src/engine/regmach/translator/instr_encoder.rs b/crates/wasmi/src/engine/regmach/translator/instr_encoder.rs index 9acd5ef197..3c7f5f3ab9 100644 --- a/crates/wasmi/src/engine/regmach/translator/instr_encoder.rs +++ b/crates/wasmi/src/engine/regmach/translator/instr_encoder.rs @@ -777,6 +777,61 @@ impl InstrEncoder { Ok(()) } + /// Translates a Wasm `i32.eqz` instruction. + /// + /// Tries to fuse `i32.eqz` with a previous `i32.{and,or,xor}` instruction if possible. + /// Returns `true` if it was possible to fuse the `i32.eqz` instruction. + pub fn fuse_i32_eqz(&mut self, stack: &mut ValueStack) -> bool { + /// Fuse a `i32.{and,or,xor}` instruction with `i32.eqz`. + macro_rules! fuse { + ($instr:ident, $stack:ident, $make_fuse:expr) => {{ + if matches!( + $stack.get_register_space($instr.result), + RegisterSpace::Local + ) { + return false; + } + $make_fuse($instr.result, $instr.lhs, $instr.rhs) + }}; + } + + /// Fuse a `i32.{and,or,xor}` instruction with 16-bit encoded immediate parameter with `i32.eqz`. + macro_rules! fuse_imm16 { + ($instr:ident, $stack:ident, $make_fuse:expr) => {{ + if matches!( + $stack.get_register_space($instr.result), + RegisterSpace::Local + ) { + // Must not fuse instruction that store to local registers since + // this behavior is observable and would not be semantics preserving. + return false; + } + $make_fuse($instr.result, $instr.reg_in, $instr.imm_in) + }}; + } + + let Some(last_instr) = self.last_instr else { + return false; + }; + let fused_instr = match self.instrs.get(last_instr) { + Instruction::I32And(instr) => fuse!(instr, stack, Instruction::i32_and_eqz), + Instruction::I32AndImm16(instr) => { + fuse_imm16!(instr, stack, Instruction::i32_and_eqz_imm16) + } + Instruction::I32Or(instr) => fuse!(instr, stack, Instruction::i32_or_eqz), + Instruction::I32OrImm16(instr) => { + fuse_imm16!(instr, stack, Instruction::i32_or_eqz_imm16) + } + Instruction::I32Xor(instr) => fuse!(instr, stack, Instruction::i32_xor_eqz), + Instruction::I32XorImm16(instr) => { + fuse_imm16!(instr, stack, Instruction::i32_xor_eqz_imm16) + } + _ => return false, + }; + _ = mem::replace(self.instrs.get_mut(last_instr), fused_instr); + true + } + /// Encodes a `branch_eqz` instruction and tries to fuse it with a previous comparison instruction. pub fn encode_branch_eqz( &mut self, @@ -875,6 +930,12 @@ impl InstrEncoder { } } } + I::I32And(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_and_eqz as _)?, + I::I32Or(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_or_eqz as _)?, + I::I32Xor(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_xor_eqz as _)?, + I::I32AndEqz(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_and as _)?, + I::I32OrEqz(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_or as _)?, + I::I32XorEqz(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_xor as _)?, I::I32Eq(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_ne as _)?, I::I32Ne(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_eq as _)?, I::I32LtS(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_ge_s as _)?, @@ -898,6 +959,12 @@ impl InstrEncoder { I::F32Eq(instr) => fuse(self, stack, last_instr, instr, label, I::branch_f32_ne as _)?, I::F32Ne(instr) => fuse(self, stack, last_instr, instr, label, I::branch_f32_eq as _)?, // Note: We cannot fuse cmp+branch for float comparison operators due to how NaN values are treated. + I::I32AndImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_and_eqz_imm as _)?, + I::I32OrImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_or_eqz_imm as _)?, + I::I32XorImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_xor_eqz_imm as _)?, + I::I32AndEqzImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_and_imm as _)?, + I::I32OrEqzImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_or_imm as _)?, + I::I32XorEqzImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_xor_imm as _)?, I::I32EqImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_ne_imm as _)?, I::I32NeImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_eq_imm as _)?, I::I32LtSImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_ge_s_imm as _)?, @@ -1025,6 +1092,12 @@ impl InstrEncoder { } } } + I::I32And(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_and as _)?, + I::I32Or(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_or as _)?, + I::I32Xor(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_xor as _)?, + I::I32AndEqz(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_and_eqz as _)?, + I::I32OrEqz(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_or_eqz as _)?, + I::I32XorEqz(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_xor_eqz as _)?, I::I32Eq(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_eq as _)?, I::I32Ne(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_ne as _)?, I::I32LtS(instr) => fuse(self, stack, last_instr, instr, label, I::branch_i32_lt_s as _)?, @@ -1057,6 +1130,12 @@ impl InstrEncoder { I::F64Le(instr) => fuse(self, stack, last_instr, instr, label, I::branch_f64_le as _)?, I::F64Gt(instr) => fuse(self, stack, last_instr, instr, label, I::branch_f64_gt as _)?, I::F64Ge(instr) => fuse(self, stack, last_instr, instr, label, I::branch_f64_ge as _)?, + I::I32AndImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_and_imm as _)?, + I::I32OrImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_or_imm as _)?, + I::I32XorImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_xor_imm as _)?, + I::I32AndEqzImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_and_eqz_imm as _)?, + I::I32OrEqzImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_or_eqz_imm as _)?, + I::I32XorEqzImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_xor_eqz_imm as _)?, I::I32EqImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_eq_imm as _)?, I::I32NeImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_ne_imm as _)?, I::I32LtSImm16(instr) => fuse_imm(self, stack, last_instr, instr, label, I::branch_i32_lt_s_imm as _)?, @@ -1104,7 +1183,13 @@ impl Instruction { offset.init(new_offset); Ok(()) } - Instruction::BranchI32Eq(instr) + Instruction::BranchI32And(instr) + | Instruction::BranchI32Or(instr) + | Instruction::BranchI32Xor(instr) + | Instruction::BranchI32AndEqz(instr) + | Instruction::BranchI32OrEqz(instr) + | Instruction::BranchI32XorEqz(instr) + | Instruction::BranchI32Eq(instr) | Instruction::BranchI32Ne(instr) | Instruction::BranchI32LtS(instr) | Instruction::BranchI32LtU(instr) @@ -1136,7 +1221,13 @@ impl Instruction { | Instruction::BranchF64Le(instr) | Instruction::BranchF64Gt(instr) | Instruction::BranchF64Ge(instr) => instr.offset.init(new_offset), - Instruction::BranchI32EqImm(instr) + Instruction::BranchI32AndImm(instr) + | Instruction::BranchI32OrImm(instr) + | Instruction::BranchI32XorImm(instr) + | Instruction::BranchI32AndEqzImm(instr) + | Instruction::BranchI32OrEqzImm(instr) + | Instruction::BranchI32XorEqzImm(instr) + | Instruction::BranchI32EqImm(instr) | Instruction::BranchI32NeImm(instr) | Instruction::BranchI32LtSImm(instr) | Instruction::BranchI32LeSImm(instr) diff --git a/crates/wasmi/src/engine/regmach/translator/mod.rs b/crates/wasmi/src/engine/regmach/translator/mod.rs index e20c28580d..45d8474476 100644 --- a/crates/wasmi/src/engine/regmach/translator/mod.rs +++ b/crates/wasmi/src/engine/regmach/translator/mod.rs @@ -1745,287 +1745,291 @@ impl<'parser> FuncTranslator<'parser> { Ok(()) } }, - TypedProvider::Register(condition) => match (lhs, rhs) { - (TypedProvider::Register(lhs), TypedProvider::Register(rhs)) => { - if lhs == rhs { - // # Optimization - // - // Both `lhs` and `rhs` are equal registers - // and thus will always yield the same value. - self.alloc.stack.push_register(lhs)?; - return Ok(()); - } - let result = self.alloc.stack.push_dynamic()?; - self.alloc - .instr_encoder - .push_instr(Instruction::select(result, condition, lhs))?; - self.alloc - .instr_encoder - .append_instr(Instruction::Register(rhs))?; - Ok(()) - } - (TypedProvider::Register(lhs), TypedProvider::Const(rhs)) => { - if let Some(type_hint) = type_hint { - debug_assert_eq!(rhs.ty(), type_hint); - } - let result = self.alloc.stack.push_dynamic()?; - match rhs.ty() { - ValueType::I32 => encode_select_imm32( - self, - result, - condition, - lhs, - i32::from(rhs), - Instruction::select, - ), - ValueType::F32 => encode_select_imm32( - self, - result, - condition, - lhs, - f32::from(rhs), - Instruction::select, - ), - ValueType::I64 => encode_select_imm64( - self, - result, - condition, - lhs, - i64::from(rhs), - Instruction::select, - Instruction::i64const32, - ), - ValueType::F64 => encode_select_imm64( - self, - result, - condition, - lhs, - f64::from(rhs), - Instruction::select, - Instruction::f64const32, - ), - ValueType::FuncRef | ValueType::ExternRef => encode_select_imm( - self, - result, - condition, - lhs, - rhs, - Instruction::select, - ), - } - } - (TypedProvider::Const(lhs), TypedProvider::Register(rhs)) => { - if let Some(type_hint) = type_hint { - debug_assert_eq!(lhs.ty(), type_hint); - } - let result = self.alloc.stack.push_dynamic()?; - match lhs.ty() { - ValueType::I32 => encode_select_imm32( - self, - result, - condition, - rhs, - i32::from(lhs), - Instruction::select_rev, - ), - ValueType::F32 => encode_select_imm32( - self, - result, - condition, - rhs, - f32::from(lhs), - Instruction::select_rev, - ), - ValueType::I64 => encode_select_imm64( - self, - result, - condition, - rhs, - i64::from(lhs), - Instruction::select_rev, - Instruction::i64const32, - ), - ValueType::F64 => encode_select_imm64( - self, - result, - condition, - rhs, - f64::from(lhs), - Instruction::select_rev, - Instruction::f64const32, - ), - ValueType::FuncRef | ValueType::ExternRef => encode_select_imm( - self, - result, - condition, - rhs, - lhs, - Instruction::select_rev, - ), - } - } - (TypedProvider::Const(lhs), TypedProvider::Const(rhs)) => { - /// Convenience function to encode a `select` instruction. - /// - /// # Note - /// - /// Helper for `select` instructions where both - /// `lhs` and `rhs` are 32-bit constant values. - fn encode_select_imm32>( - this: &mut FuncTranslator<'_>, - result: Register, - condition: Register, - lhs: T, - rhs: T, - ) -> Result<(), TranslationError> { - this.alloc - .instr_encoder - .push_instr(Instruction::select_imm32(result, lhs))?; - this.alloc - .instr_encoder - .append_instr(Instruction::select_imm32(condition, rhs))?; - Ok(()) - } - - /// Convenience function to encode a `select` instruction. - /// - /// # Note - /// - /// Helper for `select` instructions where both - /// `lhs` and `rhs` are 64-bit constant values. - fn encode_select_imm64( - this: &mut FuncTranslator<'_>, - result: Register, - condition: Register, - lhs: T, - rhs: T, - make_instr: fn( - result_or_condition: Register, - lhs_or_rhs: Const32, - ) -> Instruction, - make_param: fn(Const32) -> Instruction, - ) -> Result<(), TranslationError> - where - T: Copy + Into, - Const32: TryFrom, - { - let lhs32 = >::try_from(lhs).ok(); - let rhs32 = >::try_from(rhs).ok(); - match (lhs32, rhs32) { - (Some(lhs), Some(rhs)) => { - this.alloc - .instr_encoder - .push_instr(make_instr(result, lhs))?; - this.alloc - .instr_encoder - .append_instr(make_instr(condition, rhs))?; - Ok(()) - } - (Some(lhs), None) => { - let rhs = this.alloc.stack.alloc_const(rhs)?; - this.alloc - .instr_encoder - .push_instr(Instruction::select_rev(result, condition, rhs))?; - this.alloc.instr_encoder.append_instr(make_param(lhs))?; - Ok(()) - } - (None, Some(rhs)) => { - let lhs = this.alloc.stack.alloc_const(lhs)?; - this.alloc - .instr_encoder - .push_instr(Instruction::select(result, condition, lhs))?; - this.alloc.instr_encoder.append_instr(make_param(rhs))?; - Ok(()) - } - (None, None) => encode_select_imm(this, result, condition, lhs, rhs), + TypedProvider::Register(condition) => { + match (lhs, rhs) { + (TypedProvider::Register(lhs), TypedProvider::Register(rhs)) => { + if lhs == rhs { + // # Optimization + // + // Both `lhs` and `rhs` are equal registers + // and thus will always yield the same value. + self.alloc.stack.push_register(lhs)?; + return Ok(()); } - } - - /// Convenience function to encode a `select` instruction. - /// - /// # Note - /// - /// Helper for `select` instructions where both `lhs` - /// and `rhs` are function local constant values. - fn encode_select_imm( - this: &mut FuncTranslator<'_>, - result: Register, - condition: Register, - lhs: T, - rhs: T, - ) -> Result<(), TranslationError> - where - T: Into, - { - let lhs = this.alloc.stack.alloc_const(lhs)?; - let rhs = this.alloc.stack.alloc_const(rhs)?; - this.alloc + let result = self.alloc.stack.push_dynamic()?; + self.alloc .instr_encoder .push_instr(Instruction::select(result, condition, lhs))?; - this.alloc + self.alloc .instr_encoder .append_instr(Instruction::Register(rhs))?; Ok(()) } - - debug_assert_eq!(lhs.ty(), rhs.ty()); - if let Some(type_hint) = type_hint { - debug_assert_eq!(lhs.ty(), type_hint); - } - if lhs == rhs { - // # Optimization - // - // Both `lhs` and `rhs` are equal constant values - // and thus will always yield the same value. - self.alloc.stack.push_const(lhs); - return Ok(()); - } - let result = self.alloc.stack.push_dynamic()?; - match lhs.ty() { - ValueType::I32 => { - encode_select_imm32( + (TypedProvider::Register(lhs), TypedProvider::Const(rhs)) => { + if let Some(type_hint) = type_hint { + debug_assert_eq!(rhs.ty(), type_hint); + } + let result = self.alloc.stack.push_dynamic()?; + match rhs.ty() { + ValueType::I32 => encode_select_imm32( self, result, condition, - i32::from(lhs), + lhs, i32::from(rhs), - )?; - Ok(()) + Instruction::select, + ), + ValueType::F32 => encode_select_imm32( + self, + result, + condition, + lhs, + f32::from(rhs), + Instruction::select, + ), + ValueType::I64 => encode_select_imm64( + self, + result, + condition, + lhs, + i64::from(rhs), + Instruction::select, + Instruction::i64const32, + ), + ValueType::F64 => encode_select_imm64( + self, + result, + condition, + lhs, + f64::from(rhs), + Instruction::select, + Instruction::f64const32, + ), + ValueType::FuncRef | ValueType::ExternRef => encode_select_imm( + self, + result, + condition, + lhs, + rhs, + Instruction::select, + ), + } + } + (TypedProvider::Const(lhs), TypedProvider::Register(rhs)) => { + if let Some(type_hint) = type_hint { + debug_assert_eq!(lhs.ty(), type_hint); } - ValueType::F32 => { - encode_select_imm32( + let result = self.alloc.stack.push_dynamic()?; + match lhs.ty() { + ValueType::I32 => encode_select_imm32( + self, + result, + condition, + rhs, + i32::from(lhs), + Instruction::select_rev, + ), + ValueType::F32 => encode_select_imm32( self, result, condition, + rhs, f32::from(lhs), - f32::from(rhs), - )?; + Instruction::select_rev, + ), + ValueType::I64 => encode_select_imm64( + self, + result, + condition, + rhs, + i64::from(lhs), + Instruction::select_rev, + Instruction::i64const32, + ), + ValueType::F64 => encode_select_imm64( + self, + result, + condition, + rhs, + f64::from(lhs), + Instruction::select_rev, + Instruction::f64const32, + ), + ValueType::FuncRef | ValueType::ExternRef => encode_select_imm( + self, + result, + condition, + rhs, + lhs, + Instruction::select_rev, + ), + } + } + (TypedProvider::Const(lhs), TypedProvider::Const(rhs)) => { + /// Convenience function to encode a `select` instruction. + /// + /// # Note + /// + /// Helper for `select` instructions where both + /// `lhs` and `rhs` are 32-bit constant values. + fn encode_select_imm32>( + this: &mut FuncTranslator<'_>, + result: Register, + condition: Register, + lhs: T, + rhs: T, + ) -> Result<(), TranslationError> { + this.alloc + .instr_encoder + .push_instr(Instruction::select_imm32(result, lhs))?; + this.alloc + .instr_encoder + .append_instr(Instruction::select_imm32(condition, rhs))?; + Ok(()) + } + + /// Convenience function to encode a `select` instruction. + /// + /// # Note + /// + /// Helper for `select` instructions where both + /// `lhs` and `rhs` are 64-bit constant values. + fn encode_select_imm64( + this: &mut FuncTranslator<'_>, + result: Register, + condition: Register, + lhs: T, + rhs: T, + make_instr: fn( + result_or_condition: Register, + lhs_or_rhs: Const32, + ) -> Instruction, + make_param: fn(Const32) -> Instruction, + ) -> Result<(), TranslationError> + where + T: Copy + Into, + Const32: TryFrom, + { + let lhs32 = >::try_from(lhs).ok(); + let rhs32 = >::try_from(rhs).ok(); + match (lhs32, rhs32) { + (Some(lhs), Some(rhs)) => { + this.alloc + .instr_encoder + .push_instr(make_instr(result, lhs))?; + this.alloc + .instr_encoder + .append_instr(make_instr(condition, rhs))?; + Ok(()) + } + (Some(lhs), None) => { + let rhs = this.alloc.stack.alloc_const(rhs)?; + this.alloc.instr_encoder.push_instr( + Instruction::select_rev(result, condition, rhs), + )?; + this.alloc.instr_encoder.append_instr(make_param(lhs))?; + Ok(()) + } + (None, Some(rhs)) => { + let lhs = this.alloc.stack.alloc_const(lhs)?; + this.alloc + .instr_encoder + .push_instr(Instruction::select(result, condition, lhs))?; + this.alloc.instr_encoder.append_instr(make_param(rhs))?; + Ok(()) + } + (None, None) => { + encode_select_imm(this, result, condition, lhs, rhs) + } + } + } + + /// Convenience function to encode a `select` instruction. + /// + /// # Note + /// + /// Helper for `select` instructions where both `lhs` + /// and `rhs` are function local constant values. + fn encode_select_imm( + this: &mut FuncTranslator<'_>, + result: Register, + condition: Register, + lhs: T, + rhs: T, + ) -> Result<(), TranslationError> + where + T: Into, + { + let lhs = this.alloc.stack.alloc_const(lhs)?; + let rhs = this.alloc.stack.alloc_const(rhs)?; + this.alloc + .instr_encoder + .push_instr(Instruction::select(result, condition, lhs))?; + this.alloc + .instr_encoder + .append_instr(Instruction::Register(rhs))?; Ok(()) } - ValueType::I64 => encode_select_imm64( - self, - result, - condition, - i64::from(lhs), - i64::from(rhs), - Instruction::select_i64imm32, - Instruction::i64const32, - ), - ValueType::F64 => encode_select_imm64( - self, - result, - condition, - f64::from(lhs), - f64::from(rhs), - Instruction::select_f64imm32, - Instruction::f64const32, - ), - ValueType::FuncRef | ValueType::ExternRef => { - encode_select_imm(self, result, condition, lhs, rhs) + + debug_assert_eq!(lhs.ty(), rhs.ty()); + if let Some(type_hint) = type_hint { + debug_assert_eq!(lhs.ty(), type_hint); + } + if lhs == rhs { + // # Optimization + // + // Both `lhs` and `rhs` are equal constant values + // and thus will always yield the same value. + self.alloc.stack.push_const(lhs); + return Ok(()); + } + let result = self.alloc.stack.push_dynamic()?; + match lhs.ty() { + ValueType::I32 => { + encode_select_imm32( + self, + result, + condition, + i32::from(lhs), + i32::from(rhs), + )?; + Ok(()) + } + ValueType::F32 => { + encode_select_imm32( + self, + result, + condition, + f32::from(lhs), + f32::from(rhs), + )?; + Ok(()) + } + ValueType::I64 => encode_select_imm64( + self, + result, + condition, + i64::from(lhs), + i64::from(rhs), + Instruction::select_i64imm32, + Instruction::i64const32, + ), + ValueType::F64 => encode_select_imm64( + self, + result, + condition, + f64::from(lhs), + f64::from(rhs), + Instruction::select_f64imm32, + Instruction::f64const32, + ), + ValueType::FuncRef | ValueType::ExternRef => { + encode_select_imm(self, result, condition, lhs, rhs) + } } } } - }, + } } } diff --git a/crates/wasmi/src/engine/regmach/translator/result_mut.rs b/crates/wasmi/src/engine/regmach/translator/result_mut.rs index f3d4f9c525..4e4547958e 100644 --- a/crates/wasmi/src/engine/regmach/translator/result_mut.rs +++ b/crates/wasmi/src/engine/regmach/translator/result_mut.rs @@ -70,6 +70,18 @@ impl Instruction { Instruction::BranchEqz { .. } | Instruction::BranchNez { .. } | Instruction::BranchTable { .. } => None, + Instruction::BranchI32And(_) | + Instruction::BranchI32AndImm(_) | + Instruction::BranchI32Or(_) | + Instruction::BranchI32OrImm(_) | + Instruction::BranchI32Xor(_) | + Instruction::BranchI32XorImm(_) | + Instruction::BranchI32AndEqz(_) | + Instruction::BranchI32AndEqzImm(_) | + Instruction::BranchI32OrEqz(_) | + Instruction::BranchI32OrEqzImm(_) | + Instruction::BranchI32XorEqz(_) | + Instruction::BranchI32XorEqzImm(_) | Instruction::BranchI32Eq(_) | Instruction::BranchI32EqImm(_) | Instruction::BranchI32Ne(_) | @@ -396,15 +408,21 @@ impl Instruction { Instruction::I32RemUImm16Rev(instr) => instr.result_mut(), Instruction::I64RemUImm16Rev(instr) => instr.result_mut(), Instruction::I32And(instr) | + Instruction::I32AndEqz(instr) | Instruction::I64And(instr) => instr.result_mut(), + Instruction::I32AndEqzImm16(instr) | Instruction::I32AndImm16(instr) => instr.result_mut(), Instruction::I64AndImm16(instr) => instr.result_mut(), Instruction::I32Or(instr) | + Instruction::I32OrEqz(instr) | Instruction::I64Or(instr) => instr.result_mut(), + Instruction::I32OrEqzImm16(instr) | Instruction::I32OrImm16(instr) => instr.result_mut(), Instruction::I64OrImm16(instr) => instr.result_mut(), Instruction::I32Xor(instr) | + Instruction::I32XorEqz(instr) | Instruction::I64Xor(instr) => instr.result_mut(), + Instruction::I32XorEqzImm16(instr) | Instruction::I32XorImm16(instr) => instr.result_mut(), Instruction::I64XorImm16(instr) => instr.result_mut(), Instruction::I32Shl(instr) | diff --git a/crates/wasmi/src/engine/regmach/translator/visit.rs b/crates/wasmi/src/engine/regmach/translator/visit.rs index 0162f73d87..1c348ee11b 100644 --- a/crates/wasmi/src/engine/regmach/translator/visit.rs +++ b/crates/wasmi/src/engine/regmach/translator/visit.rs @@ -1200,6 +1200,10 @@ impl<'a> VisitOperator<'a> for FuncTranslator<'a> { fn visit_i32_eqz(&mut self) -> Self::Output { bail_unreachable!(self); + if self.alloc.instr_encoder.fuse_i32_eqz(&mut self.alloc.stack) { + // Optimization of `i32.eqz` was applied so we can bail out. + return Ok(()); + } // Push a zero on the value stack so we can translate `i32.eqz` as `i32.eq(x, 0)`. self.alloc.stack.push_const(0_i32); self.visit_i32_eq() diff --git a/crates/wasmi/src/engine/regmach/translator/visit_register.rs b/crates/wasmi/src/engine/regmach/translator/visit_register.rs index c8dd1ea489..731e7302d7 100644 --- a/crates/wasmi/src/engine/regmach/translator/visit_register.rs +++ b/crates/wasmi/src/engine/regmach/translator/visit_register.rs @@ -83,6 +83,18 @@ impl VisitInputRegisters for Instruction { Instruction::BranchNez { condition, .. } => f(condition), Instruction::BranchTable { index, .. } => f(index), + Instruction::BranchI32And(instr) => instr.visit_input_registers(f), + Instruction::BranchI32AndImm(instr) => instr.visit_input_registers(f), + Instruction::BranchI32Or(instr) => instr.visit_input_registers(f), + Instruction::BranchI32OrImm(instr) => instr.visit_input_registers(f), + Instruction::BranchI32Xor(instr) => instr.visit_input_registers(f), + Instruction::BranchI32XorImm(instr) => instr.visit_input_registers(f), + Instruction::BranchI32AndEqz(instr) => instr.visit_input_registers(f), + Instruction::BranchI32AndEqzImm(instr) => instr.visit_input_registers(f), + Instruction::BranchI32OrEqz(instr) => instr.visit_input_registers(f), + Instruction::BranchI32OrEqzImm(instr) => instr.visit_input_registers(f), + Instruction::BranchI32XorEqz(instr) => instr.visit_input_registers(f), + Instruction::BranchI32XorEqzImm(instr) => instr.visit_input_registers(f), Instruction::BranchI32Eq(instr) => instr.visit_input_registers(f), Instruction::BranchI32EqImm(instr) => instr.visit_input_registers(f), Instruction::BranchI32Ne(instr) => instr.visit_input_registers(f), @@ -425,16 +437,22 @@ impl VisitInputRegisters for Instruction { Instruction::I32RemUImm16Rev(instr) => instr.visit_input_registers(f), Instruction::I64RemUImm16Rev(instr) => instr.visit_input_registers(f), Instruction::I32And(instr) => instr.visit_input_registers(f), - Instruction::I64And(instr) => instr.visit_input_registers(f), + Instruction::I32AndEqz(instr) => instr.visit_input_registers(f), + Instruction::I32AndEqzImm16(instr) => instr.visit_input_registers(f), Instruction::I32AndImm16(instr) => instr.visit_input_registers(f), + Instruction::I64And(instr) => instr.visit_input_registers(f), Instruction::I64AndImm16(instr) => instr.visit_input_registers(f), Instruction::I32Or(instr) => instr.visit_input_registers(f), - Instruction::I64Or(instr) => instr.visit_input_registers(f), + Instruction::I32OrEqz(instr) => instr.visit_input_registers(f), + Instruction::I32OrEqzImm16(instr) => instr.visit_input_registers(f), Instruction::I32OrImm16(instr) => instr.visit_input_registers(f), + Instruction::I64Or(instr) => instr.visit_input_registers(f), Instruction::I64OrImm16(instr) => instr.visit_input_registers(f), Instruction::I32Xor(instr) => instr.visit_input_registers(f), - Instruction::I64Xor(instr) => instr.visit_input_registers(f), + Instruction::I32XorEqz(instr) => instr.visit_input_registers(f), + Instruction::I32XorEqzImm16(instr) => instr.visit_input_registers(f), Instruction::I32XorImm16(instr) => instr.visit_input_registers(f), + Instruction::I64Xor(instr) => instr.visit_input_registers(f), Instruction::I64XorImm16(instr) => instr.visit_input_registers(f), Instruction::I32Shl(instr) => instr.visit_input_registers(f), Instruction::I64Shl(instr) => instr.visit_input_registers(f),