Skip to content

Commit

Permalink
Fuse i32.{and,or, xor} + [i32.eqz] + br_if Wasm instructions (#796
Browse files Browse the repository at this point in the history
)

* add i32.branch_{and,or,xor}[_imm] instructions

There is no need for i64 counterparts since in Wasm only i32 types are used as conditional "bool" types.

* add i32.branch_{nand, nor, xnor}[_imm] instructions

We added these instruction to provide optimizations for encode_eqz.

* rename new branch instructions

* add fusion of i32.{and,or,xor} + i32.eqz

* add forgotten i32.{and,or,xor}+i32.eqz+branch translations

* add fuse benchmark to showcase perf gains

* bump count_until limit to make it less noisy

* fix bug in executor for new fuse instructions

* add i32.{and,or,xor} + i32.eqz fusion tests

* add i32.{and,or,xor} + i32.eqz + br_if fuse tests
  • Loading branch information
Robbepop authored Nov 24, 2023
1 parent ea3a29c commit 91a956b
Show file tree
Hide file tree
Showing 15 changed files with 757 additions and 276 deletions.
21 changes: 20 additions & 1 deletion crates/wasmi/benches/benches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ criterion_group! {
bench_execute_recursive_scan,
bench_execute_recursive_trap,
bench_execute_host_calls,
bench_execute_fuse,
bench_execute_fibonacci,
bench_execute_recursive_is_even,
bench_execute_memory_sum,
Expand Down Expand Up @@ -421,7 +422,7 @@ fn bench_execute_regex_redux(c: &mut Criterion) {
}

fn bench_execute_count_until(c: &mut Criterion) {
const COUNT_UNTIL: i32 = 100_000;
const COUNT_UNTIL: i32 = 1_000_000;
c.bench_function("execute/count_until", |b| {
let (mut store, instance) = load_instance_from_wat(include_bytes!("wat/count_until.wat"));
let count_until = instance
Expand Down Expand Up @@ -887,6 +888,24 @@ fn bench_execute_host_calls(c: &mut Criterion) {
});
}

fn bench_execute_fuse(c: &mut Criterion) {
let (mut store, instance) = load_instance_from_wat(include_bytes!("wat/fuse.wat"));
let mut bench_fuse = |bench_id: &str, func_name: &str, input: i32| {
c.bench_function(bench_id, |b| {
let fib = instance
.get_export(&store, func_name)
.and_then(Extern::into_func)
.unwrap()
.typed::<i32, i32>(&store)
.unwrap();
b.iter(|| {
assert_eq!(fib.call(&mut store, input).unwrap(), input);
});
});
};
bench_fuse("execute/fuse", "test", 1_000_000);
}

fn bench_execute_fibonacci(c: &mut Criterion) {
const fn fib(n: i64) -> i64 {
if n <= 1 {
Expand Down
27 changes: 27 additions & 0 deletions crates/wasmi/benches/wat/fuse.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
(module
(func (export "test") (param $n i32) (result i32)
(local $i i32)
(loop $continue
;; i += 1
(local.set $i
(i32.add
(local.get $i)
(i32.const 1)
)
)
;; if not((i >= n) and (i <= n)) then continue
;; Note: The above is equal to:
;; if i != n then continue
(br_if
$continue
(i32.eqz
(i32.and
(i32.ge_u (local.get $i) (local.get $n))
(i32.le_u (local.get $i) (local.get $n))
)
)
)
)
(return (local.get $i))
)
)
18 changes: 18 additions & 0 deletions crates/wasmi/src/engine/regmach/bytecode/construct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,12 @@ macro_rules! constructor_for_branch_binop {
}
}
constructor_for_branch_binop! {
fn branch_i32_and() -> Self::BranchI32And;
fn branch_i32_or() -> Self::BranchI32Or;
fn branch_i32_xor() -> Self::BranchI32Xor;
fn branch_i32_and_eqz() -> Self::BranchI32AndEqz;
fn branch_i32_or_eqz() -> Self::BranchI32OrEqz;
fn branch_i32_xor_eqz() -> Self::BranchI32XorEqz;
fn branch_i32_eq() -> Self::BranchI32Eq;
fn branch_i32_ne() -> Self::BranchI32Ne;
fn branch_i32_lt_s() -> Self::BranchI32LtS;
Expand Down Expand Up @@ -225,6 +231,12 @@ macro_rules! constructor_for_branch_binop_imm {
}
}
constructor_for_branch_binop_imm! {
fn branch_i32_and_imm(i32) -> Self::BranchI32AndImm;
fn branch_i32_or_imm(i32) -> Self::BranchI32OrImm;
fn branch_i32_xor_imm(i32) -> Self::BranchI32XorImm;
fn branch_i32_and_eqz_imm(i32) -> Self::BranchI32AndEqzImm;
fn branch_i32_or_eqz_imm(i32) -> Self::BranchI32OrEqzImm;
fn branch_i32_xor_eqz_imm(i32) -> Self::BranchI32XorEqzImm;
fn branch_i32_eq_imm(i32) -> Self::BranchI32EqImm;
fn branch_i32_ne_imm(i32) -> Self::BranchI32NeImm;
fn branch_i32_lt_s_imm(i32) -> Self::BranchI32LtSImm;
Expand Down Expand Up @@ -1555,18 +1567,24 @@ impl Instruction {
// Integer Bitwise Logic

fn i32_and(binary) -> Self::I32And;
fn i32_and_eqz(binary) -> Self::I32AndEqz;
fn i32_and_eqz_imm16(binary_i32imm16) -> Self::I32AndEqzImm16;
fn i32_and_imm16(binary_i32imm16) -> Self::I32AndImm16;

fn i64_and(binary) -> Self::I64And;
fn i64_and_imm16(binary_i64imm16) -> Self::I64AndImm16;

fn i32_or(binary) -> Self::I32Or;
fn i32_or_eqz(binary) -> Self::I32OrEqz;
fn i32_or_eqz_imm16(binary_i32imm16) -> Self::I32OrEqzImm16;
fn i32_or_imm16(binary_i32imm16) -> Self::I32OrImm16;

fn i64_or(binary) -> Self::I64Or;
fn i64_or_imm16(binary_i64imm16) -> Self::I64OrImm16;

fn i32_xor(binary) -> Self::I32Xor;
fn i32_xor_eqz(binary) -> Self::I32XorEqz;
fn i32_xor_eqz_imm16(binary_i32imm16) -> Self::I32XorEqzImm16;
fn i32_xor_imm16(binary_i32imm16) -> Self::I32XorImm16;

fn i64_xor(binary) -> Self::I64Xor;
Expand Down
77 changes: 71 additions & 6 deletions crates/wasmi/src/engine/regmach/bytecode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,56 @@ pub enum Instruction {
offset: BranchOffset,
},

/// A fused [`Instruction::I32And`] and [`Instruction::BranchNez`] instruction.
BranchI32And(BranchBinOpInstr),
/// A fused [`Instruction::I32And`] and [`Instruction::BranchNez`] instruction.
///
/// # Note
///
/// Variant of [`Instruction::BranchI32And`] with 16-bit encoded constant `rhs`.
BranchI32AndImm(BranchBinOpInstrImm<i32>),
/// A fused [`Instruction::I32Or`] and [`Instruction::BranchNez`] instruction.
BranchI32Or(BranchBinOpInstr),
/// A fused [`Instruction::I32Or`] and [`Instruction::BranchNez`] instruction.
///
/// # Note
///
/// Variant of [`Instruction::BranchI32Or`] with 16-bit encoded constant `rhs`.
BranchI32OrImm(BranchBinOpInstrImm<i32>),
/// A fused [`Instruction::I32Xor`] and [`Instruction::BranchNez`] instruction.
BranchI32Xor(BranchBinOpInstr),
/// A fused [`Instruction::I32Xor`] and [`Instruction::BranchNez`] instruction.
///
/// # Note
///
/// Variant of [`Instruction::BranchI32Xor`] with 16-bit encoded constant `rhs`.
BranchI32XorImm(BranchBinOpInstrImm<i32>),

/// A fused not-[`Instruction::I32And`] and [`Instruction::BranchNez`] instruction.
BranchI32AndEqz(BranchBinOpInstr),
/// A fused not-[`Instruction::I32And`] and [`Instruction::BranchNez`] instruction.
///
/// # Note
///
/// Variant of [`Instruction::BranchI32AndEqz`] with 16-bit encoded constant `rhs`.
BranchI32AndEqzImm(BranchBinOpInstrImm<i32>),
/// A fused not-[`Instruction::I32Or`] and [`Instruction::BranchNez`] instruction.
BranchI32OrEqz(BranchBinOpInstr),
/// A fused not-[`Instruction::I32Or`] and [`Instruction::BranchNez`] instruction.
///
/// # Note
///
/// Variant of [`Instruction::BranchI32OrEqz`] with 16-bit encoded constant `rhs`.
BranchI32OrEqzImm(BranchBinOpInstrImm<i32>),
/// A fused not-[`Instruction::I32Xor`] and [`Instruction::BranchNez`] instruction.
BranchI32XorEqz(BranchBinOpInstr),
/// A fused not-[`Instruction::I32Xor`] and [`Instruction::BranchNez`] instruction.
///
/// # Note
///
/// Variant of [`Instruction::BranchI32XorEqz`] with 16-bit encoded constant `rhs`.
BranchI32XorEqzImm(BranchBinOpInstrImm<i32>),

/// A fused [`Instruction::I32Eq`] and [`Instruction::BranchNez`] instruction.
BranchI32Eq(BranchBinOpInstr),
/// A fused [`Instruction::I32Eq`] and [`Instruction::BranchNez`] instruction.
Expand Down Expand Up @@ -2677,14 +2727,19 @@ pub enum Instruction {

/// `i32` bitwise-and instruction: `r0 = r1 & r2`
I32And(BinInstr),
/// `i64` bitwise-and instruction: `r0 = r1 & r2`
I64And(BinInstr),
/// Fused Wasm `i32.and` + `i32.eqz` [`Instruction`].
I32AndEqz(BinInstr),
/// Fused Wasm `i32.and` + `i32.eqz` [`Instruction`] with 16-bit encoded immediate.
I32AndEqzImm16(BinInstrImm16<i32>),
/// `i32` bitwise-and (small) immediate instruction: `r0 = r1 & c0`
///
/// # Note
///
/// Optimized variant of [`Instruction::I32And`] for 16-bit constant values.
I32AndImm16(BinInstrImm16<i32>),

/// `i64` bitwise-and instruction: `r0 = r1 & r2`
I64And(BinInstr),
/// `i64` bitwise-and (small) immediate instruction: `r0 = r1 & c0`
///
/// # Note
Expand All @@ -2694,14 +2749,19 @@ pub enum Instruction {

/// `i32` bitwise-or instruction: `r0 = r1 & r2`
I32Or(BinInstr),
/// `i64` bitwise-or instruction: `r0 = r1 & r2`
I64Or(BinInstr),
/// Fused Wasm `i32.or` + `i32.eqz` [`Instruction`].
I32OrEqz(BinInstr),
/// Fused Wasm `i32.or` + `i32.eqz` [`Instruction`] with 16-bit encoded immediate.
I32OrEqzImm16(BinInstrImm16<i32>),
/// `i32` bitwise-or (small) immediate instruction: `r0 = r1 & c0`
///
/// # Note
///
/// Optimized variant of [`Instruction::I32Or`] for 16-bit constant values.
I32OrImm16(BinInstrImm16<i32>),

/// `i64` bitwise-or instruction: `r0 = r1 & r2`
I64Or(BinInstr),
/// `i64` bitwise-or (small) immediate instruction: `r0 = r1 & c0`
///
/// # Note
Expand All @@ -2711,14 +2771,19 @@ pub enum Instruction {

/// `i32` bitwise-or instruction: `r0 = r1 ^ r2`
I32Xor(BinInstr),
/// `i64` bitwise-or instruction: `r0 = r1 ^ r2`
I64Xor(BinInstr),
/// Fused Wasm `i32.xor` + `i32.eqz` [`Instruction`].
I32XorEqz(BinInstr),
/// Fused Wasm `i32.xor` + `i32.eqz` [`Instruction`] with 16-bit encoded immediate.
I32XorEqzImm16(BinInstrImm16<i32>),
/// `i32` bitwise-or (small) immediate instruction: `r0 = r1 ^ c0`
///
/// # Note
///
/// Optimized variant of [`Instruction::I32Xor`] for 16-bit constant values.
I32XorImm16(BinInstrImm16<i32>),

/// `i64` bitwise-or instruction: `r0 = r1 ^ r2`
I64Xor(BinInstr),
/// `i64` bitwise-or (small) immediate instruction: `r0 = r1 ^ c0`
///
/// # Note
Expand Down
44 changes: 44 additions & 0 deletions crates/wasmi/src/engine/regmach/executor/instrs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,18 @@ impl<'ctx, 'engine> Executor<'ctx, 'engine> {
Instr::BranchTable { index, len_targets } => {
self.execute_branch_table(index, len_targets)
}
Instr::BranchI32And(instr) => self.execute_branch_i32_and(instr),
Instr::BranchI32AndImm(instr) => self.execute_branch_i32_and_imm(instr),
Instr::BranchI32Or(instr) => self.execute_branch_i32_or(instr),
Instr::BranchI32OrImm(instr) => self.execute_branch_i32_or_imm(instr),
Instr::BranchI32Xor(instr) => self.execute_branch_i32_xor(instr),
Instr::BranchI32XorImm(instr) => self.execute_branch_i32_xor_imm(instr),
Instr::BranchI32AndEqz(instr) => self.execute_branch_i32_and_eqz(instr),
Instr::BranchI32AndEqzImm(instr) => self.execute_branch_i32_and_eqz_imm(instr),
Instr::BranchI32OrEqz(instr) => self.execute_branch_i32_or_eqz(instr),
Instr::BranchI32OrEqzImm(instr) => self.execute_branch_i32_or_eqz_imm(instr),
Instr::BranchI32XorEqz(instr) => self.execute_branch_i32_xor_eqz(instr),
Instr::BranchI32XorEqzImm(instr) => self.execute_branch_i32_xor_eqz_imm(instr),
Instr::BranchI32Eq(instr) => self.execute_branch_i32_eq(instr),
Instr::BranchI32EqImm(instr) => self.execute_branch_i32_eq_imm(instr),
Instr::BranchI32Ne(instr) => self.execute_branch_i32_ne(instr),
Expand Down Expand Up @@ -711,10 +723,16 @@ impl<'ctx, 'engine> Executor<'ctx, 'engine> {
Instr::I32RemUImm16(instr) => self.execute_i32_rem_u_imm16(instr)?,
Instr::I32RemUImm16Rev(instr) => self.execute_i32_rem_u_imm16_rev(instr)?,
Instr::I32And(instr) => self.execute_i32_and(instr),
Instr::I32AndEqz(instr) => self.execute_i32_and_eqz(instr),
Instr::I32AndEqzImm16(instr) => self.execute_i32_and_eqz_imm16(instr),
Instr::I32AndImm16(instr) => self.execute_i32_and_imm16(instr),
Instr::I32Or(instr) => self.execute_i32_or(instr),
Instr::I32OrEqz(instr) => self.execute_i32_or_eqz(instr),
Instr::I32OrEqzImm16(instr) => self.execute_i32_or_eqz_imm16(instr),
Instr::I32OrImm16(instr) => self.execute_i32_or_imm16(instr),
Instr::I32Xor(instr) => self.execute_i32_xor(instr),
Instr::I32XorEqz(instr) => self.execute_i32_xor_eqz(instr),
Instr::I32XorEqzImm16(instr) => self.execute_i32_xor_eqz_imm16(instr),
Instr::I32XorImm16(instr) => self.execute_i32_xor_imm16(instr),
Instr::I32Shl(instr) => self.execute_i32_shl(instr),
Instr::I32ShlImm(instr) => self.execute_i32_shl_imm(instr),
Expand Down Expand Up @@ -1232,3 +1250,29 @@ impl<'ctx, 'engine> Executor<'ctx, 'engine> {
self.next_instr();
}
}

/// Extension method for [`UntypedValue`] required by the [`Executor`].
trait UntypedValueExt {
/// Executes a fused `i32.and` + `i32.eqz` instruction.
fn i32_and_eqz(x: UntypedValue, y: UntypedValue) -> UntypedValue;

/// Executes a fused `i32.or` + `i32.eqz` instruction.
fn i32_or_eqz(x: UntypedValue, y: UntypedValue) -> UntypedValue;

/// Executes a fused `i32.xor` + `i32.eqz` instruction.
fn i32_xor_eqz(x: UntypedValue, y: UntypedValue) -> UntypedValue;
}

impl UntypedValueExt for UntypedValue {
fn i32_and_eqz(x: UntypedValue, y: UntypedValue) -> UntypedValue {
(i32::from(UntypedValue::i32_and(x, y)) == 0).into()
}

fn i32_or_eqz(x: UntypedValue, y: UntypedValue) -> UntypedValue {
(i32::from(UntypedValue::i32_or(x, y)) == 0).into()
}

fn i32_xor_eqz(x: UntypedValue, y: UntypedValue) -> UntypedValue {
(i32::from(UntypedValue::i32_xor(x, y)) == 0).into()
}
}
8 changes: 7 additions & 1 deletion crates/wasmi/src/engine/regmach/executor/instrs/binary.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::Executor;
use super::{Executor, UntypedValueExt};
use crate::{
core::{TrapCode, UntypedValue},
engine::regmach::bytecode::{BinInstr, BinInstrImm16, CopysignImmInstr, Sign},
Expand All @@ -24,8 +24,11 @@ impl<'ctx, 'engine> Executor<'ctx, 'engine> {
(Instruction::I32Sub, execute_i32_sub, UntypedValue::i32_sub),
(Instruction::I32Mul, execute_i32_mul, UntypedValue::i32_mul),
(Instruction::I32And, execute_i32_and, UntypedValue::i32_and),
(Instruction::I32AndEqz, execute_i32_and_eqz, UntypedValue::i32_and_eqz),
(Instruction::I32Or, execute_i32_or, UntypedValue::i32_or),
(Instruction::I32OrEqz, execute_i32_or_eqz, UntypedValue::i32_or_eqz),
(Instruction::I32Xor, execute_i32_xor, UntypedValue::i32_xor),
(Instruction::I32XorEqz, execute_i32_xor_eqz, UntypedValue::i32_xor_eqz),

(Instruction::I64Add, execute_i64_add, UntypedValue::i64_add),
(Instruction::I64Sub, execute_i64_sub, UntypedValue::i64_sub),
Expand Down Expand Up @@ -81,8 +84,11 @@ impl<'ctx, 'engine> Executor<'ctx, 'engine> {
(i32, Instruction::I32SubImm16, execute_i32_sub_imm16, UntypedValue::i32_sub),
(i32, Instruction::I32MulImm16, execute_i32_mul_imm16, UntypedValue::i32_mul),
(i32, Instruction::I32AndImm16, execute_i32_and_imm16, UntypedValue::i32_and),
(i32, Instruction::I32AndEqzImm16, execute_i32_and_eqz_imm16, UntypedValue::i32_and_eqz),
(i32, Instruction::I32OrImm16, execute_i32_or_imm16, UntypedValue::i32_or),
(i32, Instruction::I32OrEqzImm16, execute_i32_or_eqz_imm16, UntypedValue::i32_or_eqz),
(i32, Instruction::I32XorImm16, execute_i32_xor_imm16, UntypedValue::i32_xor),
(i32, Instruction::I32XorEqzImm16, execute_i32_xor_eqz_imm16, UntypedValue::i32_xor_eqz),

(i64, Instruction::I64AddImm16, execute_i64_add_imm16, UntypedValue::i64_add),
(i64, Instruction::I64SubImm16, execute_i64_sub_imm16, UntypedValue::i64_sub),
Expand Down
17 changes: 14 additions & 3 deletions crates/wasmi/src/engine/regmach/executor/instrs/branch.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
use wasmi_core::UntypedValue;

use super::Executor;
use super::{Executor, UntypedValueExt};
use crate::engine::{
bytecode::BranchOffset,
regmach::bytecode::{BranchBinOpInstr, BranchBinOpInstrImm, Const16, Const32, Register},
};
use core::cmp;
use wasmi_core::UntypedValue;

#[cfg(doc)]
use crate::engine::regmach::bytecode::Instruction;
Expand Down Expand Up @@ -101,6 +100,12 @@ macro_rules! impl_execute_branch_binop {
}
}
impl_execute_branch_binop! {
(Instruction::BranchI32And, execute_branch_i32_and, UntypedValue::i32_and),
(Instruction::BranchI32Or, execute_branch_i32_or, UntypedValue::i32_or),
(Instruction::BranchI32Xor, execute_branch_i32_xor, UntypedValue::i32_xor),
(Instruction::BranchI32AndEqz, execute_branch_i32_and_eqz, UntypedValue::i32_and_eqz),
(Instruction::BranchI32OrEqz, execute_branch_i32_or_eqz, UntypedValue::i32_or_eqz),
(Instruction::BranchI32XorEqz, execute_branch_i32_xor_eqz, UntypedValue::i32_xor_eqz),
(Instruction::BranchI32Eq, execute_branch_i32_eq, UntypedValue::i32_eq),
(Instruction::BranchI32Ne, execute_branch_i32_ne, UntypedValue::i32_ne),
(Instruction::BranchI32LtS, execute_branch_i32_lt_s, UntypedValue::i32_lt_s),
Expand Down Expand Up @@ -152,6 +157,12 @@ macro_rules! impl_execute_branch_binop_imm {
}
}
impl_execute_branch_binop_imm! {
(Instruction::BranchI32AndImm, execute_branch_i32_and_imm, UntypedValue::i32_and, i32),
(Instruction::BranchI32OrImm, execute_branch_i32_or_imm, UntypedValue::i32_or, i32),
(Instruction::BranchI32XorImm, execute_branch_i32_xor_imm, UntypedValue::i32_xor, i32),
(Instruction::BranchI32AndEqzImm, execute_branch_i32_and_eqz_imm, UntypedValue::i32_and_eqz, i32),
(Instruction::BranchI32OrEqzImm, execute_branch_i32_or_eqz_imm, UntypedValue::i32_or_eqz, i32),
(Instruction::BranchI32XorEqzImm, execute_branch_i32_xor_eqz_imm, UntypedValue::i32_xor_eqz, i32),
(Instruction::BranchI32EqImm, execute_branch_i32_eq_imm, UntypedValue::i32_eq, i32),
(Instruction::BranchI32NeImm, execute_branch_i32_ne_imm, UntypedValue::i32_ne, i32),
(Instruction::BranchI32LtSImm, execute_branch_i32_lt_s_imm, UntypedValue::i32_lt_s, i32),
Expand Down
Loading

0 comments on commit 91a956b

Please sign in to comment.