Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: add two more precompiles to speed up poseidon hash over bn254 scalar field #3

Merged
merged 40 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
d89f1d1
add bn254 scalar arith chip
kunxian-xia Apr 18, 2024
11bf5ad
refactor
kunxian-xia Apr 19, 2024
c926509
finish bn254-scalar-arith chip and add unit test
kunxian-xia Apr 23, 2024
30186b7
include entrypoint in unit test
kunxian-xia Apr 23, 2024
7bf8d1e
include bn254 scalar arith chip in riscv air
kunxian-xia Apr 24, 2024
52b5103
add selector
kunxian-xia Apr 24, 2024
3a6f21b
pass op through value not pointer and refine testcase
kunxian-xia Apr 24, 2024
a0afb44
add poseidon_bn254 example
kunxian-xia May 9, 2024
e769fb4
handle bn254 scalar add and mul separately
kunxian-xia May 16, 2024
89e1ec8
chore: update poseidon-base dep
kunxian-xia May 16, 2024
eb0fddd
reduce total cycles from 2M to 1.4M
kunxian-xia May 17, 2024
28b0bfb
reduce total cycles from 1.4M to 1M
kunxian-xia May 21, 2024
df2d65b
add log in runtime execution
kunxian-xia May 21, 2024
a293b1b
add mac
lightsing May 21, 2024
fb1c13f
add chip
lightsing May 22, 2024
15114bf
fix
lightsing May 22, 2024
c4e0ac3
fix & fmt
lightsing May 22, 2024
ee578f8
fix p3-baby-bear dep
lightsing May 22, 2024
6aaec72
remove add
lightsing May 22, 2024
6397a79
fix twirp & reqwest
lightsing May 22, 2024
c228ae3
add memcpy chip
lightsing Jun 6, 2024
9ee89cd
add eval_memory_access_slice
lightsing Jun 6, 2024
61b3431
update Cargo.lock
lightsing Jun 13, 2024
6ed60da
update Cargo.toml
lightsing Jun 13, 2024
6f5d81b
cleanup
lightsing Jun 13, 2024
a61929f
Merge remote-tracking branch 'scroll/main' into feat/poseidon_bn254
kunxian-xia Jun 13, 2024
7eb3efb
fix errors after merge upstream
kunxian-xia Jun 13, 2024
46cd5a9
Merge remote-tracking branch 'scroll/main' into feat/poseidon_bn254
kunxian-xia Jun 13, 2024
4da5a36
fix merge errors
kunxian-xia Jun 13, 2024
42be5d2
Merge remote-tracking branch 'scroll/feat/poseidon_bn254' into feat/m…
kunxian-xia Jun 13, 2024
e84c37c
Merge branch 'refs/heads/feat/poseidon_bn254' into feat/memcpy
lightsing Jun 14, 2024
003227d
Merge remote-tracking branch 'refs/remotes/scroll/feat/memcpy' into f…
lightsing Jun 14, 2024
8821135
fix upgrade
lightsing Jun 14, 2024
340d982
Merge remote-tracking branch 'scroll/dev' into feat/poseidon_bn254
kunxian-xia Jun 14, 2024
d455d69
self-hosted runner
kunxian-xia Jun 14, 2024
30d29bc
fix
kunxian-xia Jun 14, 2024
795eefc
Merge remote-tracking branch 'scroll/feat/poseidon_bn254' into feat/m…
kunxian-xia Jun 14, 2024
ec58b04
fix
kunxian-xia Jun 14, 2024
ad46cdf
update elf
kunxian-xia Jun 14, 2024
c7d0a5e
Merge pull request #2 from scroll-tech/feat/memcpy
kunxian-xia Jun 14, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ concurrency:
jobs:
test-x86:
name: Test (x86-64)
runs-on: runs-on,runner=self-hosted-linux-x64
runs-on: [self-hosted, Linux, X64]
env:
CARGO_NET_GIT_FETCH_WITH_CLI: "true"
steps:
Expand Down Expand Up @@ -57,7 +57,7 @@ jobs:

test-arm:
name: Test (ARM)
runs-on: runs-on,runner=self-hosted-linux-arm64
runs-on: runs-on,runner=64cpu-linux-arm64
env:
CARGO_NET_GIT_FETCH_WITH_CLI: "true"
steps:
Expand Down Expand Up @@ -88,7 +88,7 @@ jobs:

lint:
name: Formatting & Clippy
runs-on: runs-on,runner=self-hosted-linux-x64
runs-on: [self-hosted, Linux, X64]
env:
CARGO_NET_GIT_FETCH_WITH_CLI: "true"
steps:
Expand Down Expand Up @@ -116,7 +116,7 @@ jobs:

examples:
name: Examples
runs-on: runs-on,runner=self-hosted-linux-x64
runs-on: [self-hosted, Linux, X64]
env:
CARGO_NET_GIT_FETCH_WITH_CLI: "true"
steps:
Expand Down Expand Up @@ -145,7 +145,7 @@ jobs:

cli:
name: CLI
runs-on: runs-on,runner=self-hosted-linux-x64
runs-on: [self-hosted, Linux, X64]
env:
CARGO_NET_GIT_FETCH_WITH_CLI: "true"
steps:
Expand Down
34 changes: 34 additions & 0 deletions core/src/runtime/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,10 @@ impl Runtime {
let value = (memory_read_value).to_le_bytes()[(addr % 4) as usize];
a = ((value as i8) as i32) as u32;
memory_store_value = Some(memory_read_value);
println!(
"[clk: {}, pc: 0x{:x}] LB: {:?} <- {:x}",
self.state.global_clk, self.state.pc, rd, a
);
self.rw(rd, a);
}
Opcode::LH => {
Expand All @@ -667,6 +671,10 @@ impl Runtime {
};
a = ((value as i16) as i32) as u32;
memory_store_value = Some(memory_read_value);
println!(
"[clk: {}, pc: 0x{:x}] LH: {:?} <- {:x}",
self.state.global_clk, self.state.pc, rd, a
);
self.rw(rd, a);
}
Opcode::LW => {
Expand All @@ -676,6 +684,10 @@ impl Runtime {
}
a = memory_read_value;
memory_store_value = Some(memory_read_value);
println!(
"[clk: {}, pc: 0x{:x}] LW: {:?} <- {}",
self.state.global_clk, self.state.pc, rd, a
);
self.rw(rd, a);
}
Opcode::LBU => {
Expand Down Expand Up @@ -711,6 +723,10 @@ impl Runtime {
_ => unreachable!(),
};
memory_store_value = Some(value);
println!(
"[clk: {}, pc: 0x{:x}] SB 0x{:x} <- 0x{:x}",
self.state.global_clk, pc, addr, value
);
self.mw_cpu(align(addr), value, MemoryAccessPosition::Memory);
}
Opcode::SH => {
Expand All @@ -724,6 +740,10 @@ impl Runtime {
_ => unreachable!(),
};
memory_store_value = Some(value);
println!(
"[clk: {}, pc: 0x{:x}] SH 0x{:x} <- 0x{:x}",
self.state.global_clk, pc, addr, value
);
self.mw_cpu(align(addr), value, MemoryAccessPosition::Memory);
}
Opcode::SW => {
Expand All @@ -733,6 +753,10 @@ impl Runtime {
}
let value = a;
memory_store_value = Some(value);
println!(
"[clk: {}, pc: 0x{:x}] SW 0x{:x} <- 0x{:x}",
self.state.global_clk, pc, addr, value
);
self.mw_cpu(align(addr), value, MemoryAccessPosition::Memory);
}

Expand Down Expand Up @@ -816,8 +840,18 @@ impl Runtime {
.or_insert(1);
}

let global_clk = self.state.global_clk;
let syscall_impl = self.get_syscall(syscall).cloned();
let mut precompile_rt = SyscallContext::new(self);

log::debug!(
"[clk: {}, pc: 0x{:x}] ecall syscall_id=0x{:x}, b: 0x{:x}, c: 0x{:x}",
global_clk,
pc,
syscall_id,
b,
c,
);
let (precompile_next_pc, precompile_cycles, returned_exit_code) =
if let Some(syscall_impl) = syscall_impl {
// Executing a syscall optionally returns a value to write to the t0 register.
Expand Down
44 changes: 44 additions & 0 deletions core/src/runtime/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ use crate::runtime::MemoryInitializeFinalizeEvent;
use crate::runtime::MemoryRecordEnum;
use crate::stark::MachineRecord;
use crate::syscall::precompiles::blake3::Blake3CompressInnerEvent;
use crate::syscall::precompiles::bn254_scalar::Bn254FieldArithEvent;
use crate::syscall::precompiles::edwards::EdDecompressEvent;
use crate::syscall::precompiles::keccak256::KeccakPermuteEvent;
use crate::syscall::precompiles::sha256::{ShaCompressEvent, ShaExtendEvent};
use crate::syscall::precompiles::uint256::Uint256MulEvent;
use crate::syscall::precompiles::ECDecompressEvent;
use crate::syscall::precompiles::{ECAddEvent, ECDoubleEvent};
use crate::syscall::MemCopyEvent;
use crate::utils::SP1CoreOpts;

/// A record of the execution of a program. Contains event data for everything that happened during
Expand Down Expand Up @@ -85,6 +87,8 @@ pub struct ExecutionRecord {

pub bn254_double_events: Vec<ECDoubleEvent>,

pub bn254_scalar_arith_events: Vec<Bn254FieldArithEvent>,

pub k256_decompress_events: Vec<ECDecompressEvent>,

pub blake3_compress_inner_events: Vec<Blake3CompressInnerEvent>,
Expand All @@ -101,6 +105,8 @@ pub struct ExecutionRecord {

pub bls12381_decompress_events: Vec<ECDecompressEvent>,

pub memcpy_events: HashMap<usize, Vec<MemCopyEvent>>,

/// The public values.
pub public_values: PublicValues<u32, u32>,
}
Expand All @@ -124,6 +130,8 @@ pub struct ShardingConfig {
pub bls12381_add_len: usize,
pub bls12381_double_len: usize,
pub uint256_mul_len: usize,
pub bn254_scalar_arith_len: usize,
pub memcpy_len: usize,
}

impl ShardingConfig {
Expand Down Expand Up @@ -154,6 +162,8 @@ impl Default for ShardingConfig {
bls12381_add_len: shard_size,
bls12381_double_len: shard_size,
uint256_mul_len: shard_size,
bn254_scalar_arith_len: shard_size,
memcpy_len: shard_size,
}
}
}
Expand Down Expand Up @@ -241,6 +251,16 @@ impl MachineRecord for ExecutionRecord {
"bls12381_decompress_events".to_string(),
self.bls12381_decompress_events.len(),
);

stats.insert(
"bn254_scalar_arith_events".to_string(),
self.bn254_scalar_arith_events.len(),
);

for (sz, events) in self.memcpy_events.iter() {
stats.insert(format!("memcpy{}_events", sz), events.len());
}

stats
}

Expand Down Expand Up @@ -282,6 +302,8 @@ impl MachineRecord for ExecutionRecord {
.append(&mut other.uint256_mul_events);
self.bls12381_decompress_events
.append(&mut other.bls12381_decompress_events);
self.bn254_scalar_arith_events
.append(&mut other.bn254_scalar_arith_events);

// Merge the byte lookups.
for (shard, events_map) in std::mem::take(&mut other.byte_lookups).into_iter() {
Expand Down Expand Up @@ -490,6 +512,28 @@ impl MachineRecord for ExecutionRecord {
.extend_from_slice(bls12381_double_chunk);
}

for (sz, events) in self.memcpy_events.iter_mut() {
for (memcpy_chunk, shard) in take(events)
.chunks_mut(config.memcpy_len)
.zip(shards.iter_mut())
{
if let Some(events) = shard.memcpy_events.get_mut(sz) {
events.extend_from_slice(memcpy_chunk);
} else {
shard.memcpy_events.insert(*sz, memcpy_chunk.to_vec());
}
}
}

for (bn254_scalar_arith_chunk, shard) in take(&mut self.bn254_scalar_arith_events)
.chunks_mut(config.bn254_scalar_arith_len)
.zip(shards.iter_mut())
{
shard
.bn254_scalar_arith_events
.extend_from_slice(bn254_scalar_arith_chunk);
}

// Put the precompile events in the first shard.
let first = shards.first_mut().unwrap();

Expand Down
46 changes: 44 additions & 2 deletions core/src/runtime/syscall.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ use std::fmt;
use std::sync::Arc;

use strum_macros::EnumIter;
use typenum::{U16, U32, U64, U8};

use crate::runtime::{Register, Runtime};
use crate::stark::Blake3CompressInnerChip;
use crate::syscall::precompiles::bn254_scalar::{Bn254ScalarMacChip, Bn254ScalarMulChip};
use crate::syscall::precompiles::edwards::EdAddAssignChip;
use crate::syscall::precompiles::edwards::EdDecompressChip;
use crate::syscall::precompiles::keccak256::KeccakPermuteChip;
Expand All @@ -15,8 +17,9 @@ use crate::syscall::precompiles::weierstrass::WeierstrassAddAssignChip;
use crate::syscall::precompiles::weierstrass::WeierstrassDecompressChip;
use crate::syscall::precompiles::weierstrass::WeierstrassDoubleAssignChip;
use crate::syscall::{
SyscallCommit, SyscallCommitDeferred, SyscallEnterUnconstrained, SyscallExitUnconstrained,
SyscallHalt, SyscallHintLen, SyscallHintRead, SyscallVerifySP1Proof, SyscallWrite,
MemCopyChip, SyscallCommit, SyscallCommitDeferred, SyscallEnterUnconstrained,
SyscallExitUnconstrained, SyscallHalt, SyscallHintLen, SyscallHintRead, SyscallVerifySP1Proof,
SyscallWrite,
};
use crate::utils::ec::edwards::ed25519::{Ed25519, Ed25519Parameters};
use crate::utils::ec::weierstrass::bls12_381::Bls12381;
Expand Down Expand Up @@ -103,6 +106,18 @@ pub enum SyscallCode {

/// Executes the `BLS12381_DOUBLE` precompile.
BLS12381_DOUBLE = 0x00_00_01_1F,

/// Execute the `BN254_SCALAR_MUL` precompile.
BN254_SCALAR_MUL = 0x00_01_01_20,

/// Execute the `BN254_SCALAR_MAC` precompile.
BN254_SCALAR_MAC = 0x00_01_01_21,

/// Execute the `MEMCPY_32` precompile.
MEMCPY_32 = 0x00_00_01_30,

/// Execute the `MEMCPY_64` precompile.
MEMCPY_64 = 0x00_00_01_31,
}

impl SyscallCode {
Expand Down Expand Up @@ -133,6 +148,10 @@ impl SyscallCode {
0x00_00_00_F1 => SyscallCode::HINT_READ,
0x00_00_01_1D => SyscallCode::UINT256_MUL,
0x00_00_01_1C => SyscallCode::BLS12381_DECOMPRESS,
0x00_01_01_20 => SyscallCode::BN254_SCALAR_MUL,
0x00_01_01_21 => SyscallCode::BN254_SCALAR_MAC,
0x00_00_01_30 => SyscallCode::MEMCPY_32,
0x00_00_01_31 => SyscallCode::MEMCPY_64,
_ => panic!("invalid syscall number: {}", value),
}
}
Expand Down Expand Up @@ -346,6 +365,23 @@ pub fn default_syscall_map() -> HashMap<SyscallCode, Arc<dyn Syscall>> {
Arc::new(WeierstrassDecompressChip::<Bls12381>::new()),
);
syscall_map.insert(SyscallCode::UINT256_MUL, Arc::new(Uint256MulChip::new()));
syscall_map.insert(
SyscallCode::BN254_SCALAR_MUL,
Arc::new(Bn254ScalarMulChip::new()),
);
syscall_map.insert(
SyscallCode::BN254_SCALAR_MAC,
Arc::new(Bn254ScalarMacChip::new()),
);

syscall_map.insert(
SyscallCode::MEMCPY_32,
Arc::new(MemCopyChip::<U8, U32>::new()),
);
syscall_map.insert(
SyscallCode::MEMCPY_64,
Arc::new(MemCopyChip::<U16, U64>::new()),
);

syscall_map
}
Expand Down Expand Up @@ -443,6 +479,12 @@ mod tests {
SyscallCode::BLS12381_DECOMPRESS => {
assert_eq!(code as u32, sp1_zkvm::syscalls::BLS12381_DECOMPRESS)
}
SyscallCode::BN254_SCALAR_MUL => {
assert_eq!(code as u32, sp1_zkvm::syscalls::BN254_SCALAR_MUL)
}
SyscallCode::BN254_SCALAR_MAC => {
assert_eq!(code as u32, sp1_zkvm::syscalls::BN254_SCALAR_MAC)
}
}
}
}
Expand Down
7 changes: 7 additions & 0 deletions core/src/runtime/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@ impl Runtime {
self.register(Register::X18),
);

log::trace!(
"[clk: {}, pc: 0x{:x}] {:?}",
self.state.global_clk,
self.state.pc,
instruction,
);

if !self.unconstrained && self.state.global_clk % 10_000_000 == 0 {
log::info!(
"clk = {} pc = 0x{:x?}",
Expand Down
Loading
Loading