diff --git a/Cargo.lock b/Cargo.lock index 770f7253f..03223ee5f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5790,6 +5790,8 @@ dependencies = [ "plonky2", "plonky2_maybe_rayon 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "primitive-types 0.12.2", + "regex", + "rlp", "ruint", "serde", "serde_json", diff --git a/evm_arithmetization/src/fixed_recursive_verifier.rs b/evm_arithmetization/src/fixed_recursive_verifier.rs index 94a658685..3c1f0b1e1 100644 --- a/evm_arithmetization/src/fixed_recursive_verifier.rs +++ b/evm_arithmetization/src/fixed_recursive_verifier.rs @@ -37,7 +37,7 @@ use starky::stark::Stark; use crate::all_stark::{all_cross_table_lookups, AllStark, Table, NUM_TABLES}; use crate::cpu::kernel::aggregator::KERNEL; -use crate::generation::segments::{GenerationSegmentData, SegmentDataIterator, SegmentError}; +use crate::generation::segments::{GenerationSegmentData, SegmentDataIterator}; use crate::generation::{GenerationInputs, TrimmedGenerationInputs}; use crate::get_challenges::observe_public_values_target; use crate::proof::{ @@ -1889,8 +1889,7 @@ where let mut proofs = vec![]; for segment_run in segment_iterator { - let (_, mut next_data) = - segment_run.map_err(|e: SegmentError| anyhow::format_err!(e))?; + let (_, mut next_data) = segment_run?; let proof = self.prove_segment( all_stark, config, diff --git a/evm_arithmetization/src/generation/mod.rs b/evm_arithmetization/src/generation/mod.rs index 9c7625d2b..9279e88d9 100644 --- a/evm_arithmetization/src/generation/mod.rs +++ b/evm_arithmetization/src/generation/mod.rs @@ -1,9 +1,10 @@ use std::collections::HashMap; +use std::fmt::Display; use anyhow::anyhow; use ethereum_types::{Address, BigEndianHash, H256, U256}; use keccak_hash::keccak; -use log::log_enabled; +use log::error; use mpt_trie::partial_trie::{HashedPartialTrie, PartialTrie}; use plonky2::field::extension::Extendable; use plonky2::field::polynomial::PolynomialValues; @@ -51,6 +52,29 @@ pub const NUM_EXTRA_CYCLES_BEFORE: usize = 64; /// Memory values used to initialize `MemBefore`. pub type MemBeforeValues = Vec<(MemoryAddress, U256)>; +#[derive(Debug, Serialize, Deserialize)] +pub struct ErrorWithTries { + pub inner: E, + pub tries: Option, +} +impl Display for ErrorWithTries { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.inner.fmt(f) + } +} + +impl std::error::Error for ErrorWithTries { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + self.inner.source() + } +} + +impl ErrorWithTries { + pub fn new(inner: E, tries: Option) -> Self { + Self { inner, tries } + } +} + /// Inputs needed for trace generation. #[derive(Clone, Debug, Deserialize, Serialize, Default)] #[serde(bound = "")] @@ -234,6 +258,15 @@ impl GenerationInputs { } } +/// Post transaction execution tries retrieved from the prover's memory. +/// Used primarily for error debugging in case of a failed execution. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct DebugOutputTries { + pub state_trie: HashedPartialTrie, + pub transaction_trie: HashedPartialTrie, + pub receipt_trie: HashedPartialTrie, +} + fn apply_metadata_and_tries_memops, const D: usize>( state: &mut GenerationState, inputs: &TrimmedGenerationInputs, @@ -492,10 +525,8 @@ pub fn generate_traces, const D: usize>( "simulate CPU", simulate_cpu(&mut state, *max_cpu_len_log) ); - if cpu_res.is_err() { - output_debug_tries(&state)?; - cpu_res?; - }; + + cpu_res?; let trace_lengths = state.traces.get_lengths(); @@ -595,59 +626,50 @@ fn simulate_cpu( Ok((final_registers, mem_after)) } -/// Outputs the tries that have been obtained post transaction execution, as +/// Collects the tries that have been obtained post transaction execution, as /// they are represented in the prover's memory. -/// This will do nothing if the CPU execution failed outside of the final trie -/// root checks. -pub(crate) fn output_debug_tries(state: &GenerationState) -> anyhow::Result<()> { - if !log_enabled!(log::Level::Debug) { - return Ok(()); - } - - // Retrieve previous PC (before jumping to KernelPanic), to see if we reached - // `perform_final_checks`. We will output debugging information on the final - // tries only if we got a root mismatch. - let previous_pc = state.get_registers().program_counter; - - let label = KERNEL.offset_name(previous_pc); - - if label.contains("check_state_trie") - || label.contains("check_txn_trie") - || label.contains("check_receipt_trie") - { - let state_trie_ptr = u256_to_usize( - state - .memory - .read_global_metadata(GlobalMetadata::StateTrieRoot), - ) - .map_err(|_| anyhow!("State trie pointer is too large to fit in a usize."))?; - log::debug!( - "Computed state trie: {:?}", - get_state_trie::(&state.memory, state_trie_ptr) - ); - - let txn_trie_ptr = u256_to_usize( - state - .memory - .read_global_metadata(GlobalMetadata::TransactionTrieRoot), - ) - .map_err(|_| anyhow!("Transactions trie pointer is too large to fit in a usize."))?; - log::debug!( - "Computed transactions trie: {:?}", - get_txn_trie::(&state.memory, txn_trie_ptr) - ); - - let receipt_trie_ptr = u256_to_usize( - state - .memory - .read_global_metadata(GlobalMetadata::ReceiptTrieRoot), - ) - .map_err(|_| anyhow!("Receipts trie pointer is too large to fit in a usize."))?; - log::debug!( - "Computed receipts trie: {:?}", - get_receipt_trie::(&state.memory, receipt_trie_ptr) - ); - } - - Ok(()) +pub(crate) fn collect_debug_tries( + state: &GenerationState, +) -> Option { + let state_trie_ptr = u256_to_usize( + state + .memory + .read_global_metadata(GlobalMetadata::StateTrieRoot), + ) + .inspect_err(|e| error!("failed to retrieve state trie pointer: {e:?}")) + .ok()?; + + let state_trie = get_state_trie::(&state.memory, state_trie_ptr) + .inspect_err(|e| error!("unable to retrieve state trie for debugging purposes: {e:?}")) + .ok()?; + + let txn_trie_ptr = u256_to_usize( + state + .memory + .read_global_metadata(GlobalMetadata::TransactionTrieRoot), + ) + .inspect_err(|e| error!("failed to retrieve transactions trie pointer: {e:?}")) + .ok()?; + let transaction_trie = get_txn_trie::(&state.memory, txn_trie_ptr) + .inspect_err(|e| { + error!("unable to retrieve transaction trie for debugging purposes: {e:?}",) + }) + .ok()?; + + let receipt_trie_ptr = u256_to_usize( + state + .memory + .read_global_metadata(GlobalMetadata::ReceiptTrieRoot), + ) + .inspect_err(|e| error!("failed to retrieve receipts trie pointer: {e:?}")) + .ok()?; + let receipt_trie = get_receipt_trie::(&state.memory, receipt_trie_ptr) + .inspect_err(|e| error!("unable to retrieve receipt trie for debugging purposes: {e:?}")) + .ok()?; + + Some(DebugOutputTries { + state_trie, + transaction_trie, + receipt_trie, + }) } diff --git a/evm_arithmetization/src/generation/segments.rs b/evm_arithmetization/src/generation/segments.rs index 51de9fa83..b3a129137 100644 --- a/evm_arithmetization/src/generation/segments.rs +++ b/evm_arithmetization/src/generation/segments.rs @@ -9,7 +9,7 @@ use super::TrimmedGenerationInputs; use crate::cpu::kernel::aggregator::KERNEL; use crate::cpu::kernel::interpreter::{set_registers_and_run, ExtraSegmentData, Interpreter}; use crate::generation::state::State; -use crate::generation::{debug_inputs, GenerationInputs}; +use crate::generation::{collect_debug_tries, debug_inputs, ErrorWithTries, GenerationInputs}; use crate::witness::memory::MemoryState; use crate::witness::state::RegistersState; @@ -88,8 +88,10 @@ pub struct SegmentDataIterator { pub type SegmentRunResult = Option)>>; #[derive(thiserror::Error, Debug, Serialize, Deserialize)] -#[error("{}", .0)] -pub struct SegmentError(pub String); +#[error("{}", .message)] +pub struct SegmentError { + pub message: String, +} impl SegmentDataIterator { pub fn new(inputs: &GenerationInputs, max_cpu_len_log: Option) -> Self { @@ -113,7 +115,7 @@ impl SegmentDataIterator { fn generate_next_segment( &mut self, partial_segment_data: Option, - ) -> Result { + ) -> Result> { // Get the (partial) current segment data, if it is provided. Otherwise, // initialize it. let mut segment_data = if let Some(partial) = partial_segment_data { @@ -133,8 +135,9 @@ impl SegmentDataIterator { // Run the interpreter to get `registers_after` and the partial data for the // next segment. - let run = set_registers_and_run(segment_data.registers_after, &mut self.interpreter); - if let Ok((updated_registers, mem_after)) = run { + let execution_result = + set_registers_and_run(segment_data.registers_after, &mut self.interpreter); + if let Ok((updated_registers, mem_after)) = execution_result { let partial_segment_data = Some(build_segment_data( segment_index + 1, Some(updated_registers), @@ -157,21 +160,28 @@ impl SegmentDataIterator { inputs.txn_number_before + inputs.txn_hashes.len() ), }; - let s = format!( - "Segment generation {:?} for block {:?} ({}) failed with error {:?}", - segment_index, - block, - txn_range, - run.unwrap_err() - ); - Err(SegmentError(s)) + // In case of the error, return tries as part of the error for easier debugging. + Err(ErrorWithTries::new( + SegmentError { + message: format!( + "Segment generation {:?} for block:{} batch:{} tx_range:({}) failed with error {:?}", + segment_index, + block.low_u64(), + segment_index, + txn_range, + execution_result.unwrap_err() + ), + }, + collect_debug_tries(self.interpreter.get_generation_state()), + )) } } } /// Returned type from a `SegmentDataIterator`, needed to prove all segments in /// a transaction batch. -pub type AllData = Result<(TrimmedGenerationInputs, GenerationSegmentData), SegmentError>; +pub type AllData = + Result<(TrimmedGenerationInputs, GenerationSegmentData), ErrorWithTries>; impl Iterator for SegmentDataIterator { type Item = AllData; diff --git a/evm_arithmetization/src/prover.rs b/evm_arithmetization/src/prover.rs index 079fe3469..f97fc8b40 100644 --- a/evm_arithmetization/src/prover.rs +++ b/evm_arithmetization/src/prover.rs @@ -370,13 +370,10 @@ pub(crate) fn features_check(inputs: &TrimmedGenerationInputs) /// A utility module designed to test witness generation externally. pub mod testing { use super::*; + use crate::generation::ErrorWithTries; use crate::{ cpu::kernel::interpreter::Interpreter, - generation::{ - output_debug_tries, - segments::{SegmentDataIterator, SegmentError}, - state::State, - }, + generation::segments::{SegmentDataIterator, SegmentError}, }; /// Simulates the zkEVM CPU execution. @@ -388,13 +385,7 @@ pub mod testing { let initial_offset = KERNEL.global_labels["init"]; let mut interpreter: Interpreter = Interpreter::new_with_generation_inputs(initial_offset, initial_stack, &inputs, None); - let result = interpreter.run(); - - if result.is_err() { - output_debug_tries(interpreter.get_generation_state())?; - } - - result?; + interpreter.run()?; Ok(()) } @@ -415,8 +406,7 @@ pub mod testing { let mut proofs = vec![]; for segment_run in segment_data_iterator { - let (_, mut next_data) = - segment_run.map_err(|e: SegmentError| anyhow::format_err!(e))?; + let (_, mut next_data) = segment_run?; let proof = prove( all_stark, config, @@ -434,16 +424,14 @@ pub mod testing { pub fn simulate_execution_all_segments( inputs: GenerationInputs, max_cpu_len_log: usize, - ) -> Result<()> + ) -> Result<(), ErrorWithTries> where F: RichField, { features_check(&inputs.clone().trim()); for segment in SegmentDataIterator::::new(&inputs, Some(max_cpu_len_log)) { - if let Err(e) = segment { - return Err(anyhow::format_err!(e)); - } + segment?; } Ok(()) diff --git a/evm_arithmetization/src/public_types.rs b/evm_arithmetization/src/public_types.rs index 9d3de7196..0b917317d 100644 --- a/evm_arithmetization/src/public_types.rs +++ b/evm_arithmetization/src/public_types.rs @@ -39,7 +39,10 @@ pub type ProofWithPublicInputs = /// proofs. pub type PublicValues = crate::proof::PublicValues; -pub type AllData = Result<(TrimmedGenerationInputs, GenerationSegmentData), SegmentError>; +pub type AllData = Result< + (TrimmedGenerationInputs, GenerationSegmentData), + crate::generation::ErrorWithTries, +>; /// Returned type from the zkEVM STARK prover, before recursive verification. pub type AllProof = crate::proof::AllProof; diff --git a/mpt_trie/src/debug_tools/diff.rs b/mpt_trie/src/debug_tools/diff.rs index 12775b445..880984290 100644 --- a/mpt_trie/src/debug_tools/diff.rs +++ b/mpt_trie/src/debug_tools/diff.rs @@ -136,13 +136,15 @@ impl Display for DiffPoint { /// Meta information for a node in a trie. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub struct NodeInfo { - key: Nibbles, - + /// Mpt trie node key. + pub key: Nibbles, /// The direct value associated with the node (only applicable to `Leaf` & /// `Branch` nodes). - value: Option>, - node_type: TrieNodeType, - hash: H256, + pub value: Option>, + /// Type of this node. + pub node_type: TrieNodeType, + /// Node hash. + pub hash: H256, } impl Display for NodeInfo { diff --git a/trace_decoder/benches/block_processing.rs b/trace_decoder/benches/block_processing.rs index 6f3319d94..adefdae3f 100644 --- a/trace_decoder/benches/block_processing.rs +++ b/trace_decoder/benches/block_processing.rs @@ -6,6 +6,7 @@ //! for a total of 24,479,837 gas. use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; +use trace_decoder::observer::DummyObserver; use trace_decoder::{BlockTrace, OtherBlockData}; #[derive(Clone, Debug, serde::Deserialize)] @@ -33,7 +34,13 @@ fn criterion_benchmark(c: &mut Criterion) { block_trace, other_data, }| { - trace_decoder::entrypoint(block_trace, other_data, batch_size).unwrap() + trace_decoder::entrypoint( + block_trace, + other_data, + batch_size, + &mut DummyObserver::new(), + ) + .unwrap() }, BatchSize::LargeInput, ) diff --git a/trace_decoder/src/core.rs b/trace_decoder/src/core.rs index 959e03967..d96778dec 100644 --- a/trace_decoder/src/core.rs +++ b/trace_decoder/src/core.rs @@ -19,6 +19,7 @@ use mpt_trie::partial_trie::PartialTrie as _; use nunny::NonEmpty; use zk_evm_common::gwei_to_wei; +use crate::observer::Observer; use crate::{ typed_mpt::{ReceiptTrie, StateMpt, StateTrie, StorageTrie, TransactionTrie, TrieKey}, BlockLevelData, BlockTrace, BlockTraceTriePreImages, CombinedPreImages, ContractCodeUsage, @@ -31,6 +32,7 @@ pub fn entrypoint( trace: BlockTrace, other: OtherBlockData, batch_size_hint: usize, + observer: &mut impl Observer, ) -> anyhow::Result> { ensure!(batch_size_hint != 0); @@ -67,6 +69,7 @@ pub fn entrypoint( &b_meta, ger_data, withdrawals, + observer, )?; let mut running_gas_used = 0; @@ -261,7 +264,7 @@ struct Batch { /// [`evm_arithmetization::generation::TrieInputs`], /// generic over state trie representation. #[derive(Debug)] -struct IntraBlockTries { +pub struct IntraBlockTries { pub state: StateTrieT, pub storage: BTreeMap, pub transaction: TransactionTrie, @@ -269,6 +272,7 @@ struct IntraBlockTries { } /// Does the main work mentioned in the [module documentation](super). +#[allow(clippy::too_many_arguments)] fn middle( // state at the beginning of the block mut state_trie: StateTrieT, @@ -282,6 +286,8 @@ fn middle( ger_data: Option<(H256, H256)>, // added to final batch mut withdrawals: Vec<(Address, U256)>, + // called with the untrimmed tries after each batch + observer: &mut impl Observer, ) -> anyhow::Result>> { // Initialise the storage tries. for (haddr, acct) in state_trie.iter() { @@ -306,7 +312,7 @@ fn middle( let mut txn_ix = 0; // incremented for non-dummy transactions let mut loop_ix = 0; // always incremented let loop_len = batches.iter().flatten().count(); - for batch in batches { + for (batch_index, batch) in batches.into_iter().enumerate() { let batch_first_txn_ix = txn_ix; // GOTCHA: if there are no transactions in this batch let mut batch_gas_used = 0; let mut batch_byte_code = vec![]; @@ -543,6 +549,15 @@ fn middle( receipts_root: receipt_trie.root(), }, }); + + observer.collect_tries( + block.block_number, + batch_index, + &state_trie, + &storage_tries, + &transaction_trie, + &receipt_trie, + ) } // batch in batches Ok(out) diff --git a/trace_decoder/src/lib.rs b/trace_decoder/src/lib.rs index 53db82a69..049472c40 100644 --- a/trace_decoder/src/lib.rs +++ b/trace_decoder/src/lib.rs @@ -69,6 +69,8 @@ pub use core::entrypoint; mod core; +/// Implementation of the observer for the trace decoder. +pub mod observer; /// Like `#[serde(with = "hex")`, but tolerates and emits leading `0x` prefixes mod hex { use serde::{de::Error as _, Deserialize as _, Deserializer, Serializer}; diff --git a/trace_decoder/src/observer.rs b/trace_decoder/src/observer.rs new file mode 100644 index 000000000..320019e55 --- /dev/null +++ b/trace_decoder/src/observer.rs @@ -0,0 +1,113 @@ +use std::collections::BTreeMap; +use std::marker::PhantomData; + +use ethereum_types::{H256, U256}; + +use crate::core::IntraBlockTries; +use crate::typed_mpt::{ReceiptTrie, StorageTrie, TransactionTrie}; + +/// Observer API for the trace decoder. +/// Observer is used to collect various debugging and metadata info +/// from the trace decoder run. +pub trait Observer { + /// Collect tries after the transaction/batch execution. + /// + /// Passing the arguments one by one through reference, because + /// we don't want to clone argument tries in case they are not used in + /// observer. + fn collect_tries( + &mut self, + block: U256, + batch: usize, + state_trie: &StateTrieT, + storage: &BTreeMap, + transaction_trie: &TransactionTrie, + receipt_trie: &ReceiptTrie, + ); +} + +#[derive(Debug)] +/// Tries observer collected data element - contains +/// the data collected during the trace decoder processing of the batches in a +/// block, one element is retrieved after every batch. +pub struct TriesObserverElement { + /// Block where the tries are collected. + pub block: U256, + /// Tries were collected after trace decoder processes batch number `batch`. + pub batch: usize, + /// State, transaction, and receipt tries after the batch + /// execution (how the trace decoder sees them). + pub tries: IntraBlockTries, +} + +/// Observer for collection of post-execution tries from the +/// trace decoder run. +#[derive(Debug)] +pub struct TriesObserver { + /// Collected data in the observer pass + pub data: Vec>, +} + +impl TriesObserver { + /// Create new tries collecting observer. + pub fn new() -> Self { + TriesObserver:: { data: Vec::new() } + } +} + +impl Observer for TriesObserver { + fn collect_tries( + &mut self, + block: U256, + batch: usize, + state_trie: &StateTrieT, + storage: &BTreeMap, + transaction_trie: &TransactionTrie, + receipt_trie: &ReceiptTrie, + ) { + self.data.push(TriesObserverElement { + block, + batch, + tries: IntraBlockTries { + state: state_trie.clone(), + storage: storage.clone(), + transaction: transaction_trie.clone(), + receipt: receipt_trie.clone(), + }, + }); + } +} + +impl Default for TriesObserver { + fn default() -> Self { + Self::new() + } +} + +/// Dummy observer which does not collect any data. +#[derive(Default, Debug)] +pub struct DummyObserver { + phantom: PhantomData, +} + +impl DummyObserver { + /// Create a new dummy observer. + pub fn new() -> Self { + DummyObserver:: { + phantom: Default::default(), + } + } +} + +impl Observer for DummyObserver { + fn collect_tries( + &mut self, + _block: U256, + _batch: usize, + _state_trie: &StateTrieT, + _storage: &BTreeMap, + _transaction_trie: &TransactionTrie, + _receipt_trie: &ReceiptTrie, + ) { + } +} diff --git a/trace_decoder/src/typed_mpt.rs b/trace_decoder/src/typed_mpt.rs index dc56d54a1..8baf3cf29 100644 --- a/trace_decoder/src/typed_mpt.rs +++ b/trace_decoder/src/typed_mpt.rs @@ -271,6 +271,7 @@ impl From for HashedPartialTrie { } } +/// TODO(0xaatif): document this after refactoring is done https://github.com/0xPolygonZero/zk_evm/issues/275 pub trait StateTrie { fn insert_by_address( &mut self, diff --git a/trace_decoder/tests/consistent-with-header.rs b/trace_decoder/tests/consistent-with-header.rs index f82027e82..609fd57bb 100644 --- a/trace_decoder/tests/consistent-with-header.rs +++ b/trace_decoder/tests/consistent-with-header.rs @@ -10,6 +10,7 @@ use common::{cases, Case}; use itertools::Itertools; use libtest_mimic::{Arguments, Trial}; use mpt_trie::partial_trie::PartialTrie as _; +use trace_decoder::observer::DummyObserver; fn main() -> anyhow::Result<()> { let mut trials = vec![]; @@ -23,8 +24,13 @@ fn main() -> anyhow::Result<()> { } in cases()? { trials.push(Trial::test(format!("{name}@{batch_size}"), move || { - let gen_inputs = trace_decoder::entrypoint(trace, other.clone(), batch_size) - .map_err(|e| format!("{e:?}"))?; // get the full cause chain + let gen_inputs = trace_decoder::entrypoint( + trace, + other.clone(), + batch_size, + &mut DummyObserver::new(), + ) + .map_err(|e| format!("{e:?}"))?; // get the full cause chain check!(gen_inputs.len() >= 2); check!( Some(other.checkpoint_state_trie_root) diff --git a/trace_decoder/tests/simulate-execution.rs b/trace_decoder/tests/simulate-execution.rs index c4cbe53b2..d0476c2b7 100644 --- a/trace_decoder/tests/simulate-execution.rs +++ b/trace_decoder/tests/simulate-execution.rs @@ -8,6 +8,7 @@ use anyhow::Context as _; use common::{cases, Case}; use libtest_mimic::{Arguments, Trial}; use plonky2::field::goldilocks_field::GoldilocksField; +use trace_decoder::observer::DummyObserver; fn main() -> anyhow::Result<()> { let mut trials = vec![]; @@ -19,9 +20,11 @@ fn main() -> anyhow::Result<()> { other, } in cases()? { - let gen_inputs = trace_decoder::entrypoint(trace, other, batch_size).context( - format!("error in `trace_decoder` for {name} at batch size {batch_size}"), - )?; + let gen_inputs = + trace_decoder::entrypoint(trace, other, batch_size, &mut DummyObserver::new()) + .context(format!( + "error in `trace_decoder` for {name} at batch size {batch_size}" + ))?; for (ix, gi) in gen_inputs.into_iter().enumerate() { trials.push(Trial::test( format!("{name}@{batch_size}/{ix}"), diff --git a/zero/Cargo.toml b/zero/Cargo.toml index 7bc42709c..5ccb57c96 100644 --- a/zero/Cargo.toml +++ b/zero/Cargo.toml @@ -30,6 +30,8 @@ once_cell = { workspace = true } paladin-core = { workspace = true } plonky2 = { workspace = true } plonky2_maybe_rayon = { workspace = true } +regex = "1.5.4" +rlp = {workspace = true} ruint = { workspace = true, features = ["num-traits", "primitive-types"] } serde = { workspace = true } serde_json = { workspace = true } diff --git a/zero/src/bin/rpc.rs b/zero/src/bin/rpc.rs index 9c6baa535..d49cdde5c 100644 --- a/zero/src/bin/rpc.rs +++ b/zero/src/bin/rpc.rs @@ -8,6 +8,7 @@ use alloy::transports::Transport; use anyhow::anyhow; use clap::{Args, Parser, Subcommand, ValueHint}; use futures::StreamExt; +use trace_decoder::observer::DummyObserver; use tracing_subscriber::{prelude::*, EnvFilter}; use url::Url; use zero::block_interval::BlockInterval; @@ -170,6 +171,7 @@ impl Cli { block_prover_input.block_trace, block_prover_input.other_data, batch_size, + &mut DummyObserver::new(), )?; if let Some(index) = tx_info.transaction_index { diff --git a/zero/src/bin/trie_diff.rs b/zero/src/bin/trie_diff.rs new file mode 100644 index 000000000..d454fdda6 --- /dev/null +++ b/zero/src/bin/trie_diff.rs @@ -0,0 +1,167 @@ +//! This binary is a debugging tool used to compare +//! the trace decoder output tries and the post kernel execution tries (state, +//! transaction and receipt). As input, it uses a standard witness JSON file +//! (same as `leader` in stdio mode), and it runs block by block the trace +//! decoder and `test_only` mode of the prover. On the first error that happens +//! trace decoder and prover tries are compared, and the details of the trie +//! differences are printed. + +use std::io::Read; +use std::iter::repeat; +use std::path::PathBuf; +use std::sync::Arc; + +use anyhow::Result; +use clap::{Parser, ValueHint}; +use evm_arithmetization::generation::DebugOutputTries; +use futures::{future, TryStreamExt}; +use paladin::directive::{Directive, IndexedStream}; +use paladin::runtime::Runtime; +use regex::Regex; +use trace_decoder::observer::TriesObserver; +use tracing::{error, info}; +use zero::ops::register; +use zero::prover::{cli::CliProverConfig, BlockProverInput, ProverConfig}; + +#[derive(Parser)] +#[command(version = zero::version(), propagate_version = true)] +pub(crate) struct Cli { + #[clap(flatten)] + pub(crate) prover_config: CliProverConfig, + + /// The previous proof output. + #[arg(long, short = 'f', value_hint = ValueHint::FilePath)] + previous_proof: Option, +} + +#[tokio::main] +async fn main() -> Result<()> { + zero::tracing::init(); + + let args = Cli::parse(); + + // Load witness input from stdin + let mut buffer = String::new(); + std::io::stdin().read_to_string(&mut buffer)?; + + // Debug run, we always use in-memory execution + let paladin_config = paladin::config::Config { + amqp_uri: None, + runtime: paladin::config::Runtime::InMemory, + ..Default::default() + }; + let runtime = Arc::new(Runtime::from_config(&paladin_config, register()).await?); + + // Tries are computed in the kernel so no need to run proving, test_only mode is + // enough. We hardcode prover arguments that we need for trie diff computation. + let prover_config: Arc = Arc::new(ProverConfig { + test_only: true, + save_inputs_on_error: true, + save_tries_on_error: true, + ..args.prover_config.into() + }); + + let seg_ops = zero::ops::SegmentProofTestOnly { + save_inputs_on_error: prover_config.save_inputs_on_error, + save_tries_on_error: prover_config.save_tries_on_error, + }; + + let des = &mut serde_json::Deserializer::from_str(&buffer); + let block_prover_inputs = serde_path_to_error::deserialize::<_, Vec>(des)? + .into_iter() + .collect::>(); + + for block_prover_input in block_prover_inputs { + let mut observer = TriesObserver::new(); + let block_number = block_prover_input + .other_data + .b_data + .b_meta + .block_number + .low_u64(); + let block_generation_inputs = trace_decoder::entrypoint( + block_prover_input.block_trace.clone(), + block_prover_input.other_data.clone(), + prover_config.batch_size, + &mut observer, + )?; + info!( + "Number of collected batch tries for block {}: {}", + block_number, + observer.data.len() + ); + + info!("Running trie diff simulation for block {block_number} ..."); + let simulation = Directive::map( + IndexedStream::from( + block_generation_inputs + .clone() + .into_iter() + .enumerate() + .zip(repeat(prover_config.max_cpu_len_log)) + .map(|((batch_index, inputs), max_cpu_len_log)| { + (inputs, max_cpu_len_log, batch_index) + }), + ), + &seg_ops, + ); + + if let Err(e2) = simulation + .run(&runtime) + .await + .inspect_err(|e1| { + error!("Failed to run simulation for block {block_number}, error: {e1}") + })? + .try_for_each(|_| future::ok(())) + .await + { + // Try to parse block and batch index from error message. + let error_message = e2.to_string(); + let re = Regex::new(r"block:(\d+) batch:(\d+)")?; + if let Some(cap) = re.captures(&error_message) { + let block_number: u64 = cap[1].parse()?; + let batch_index: usize = cap[2].parse()?; + + let prover_tries = + zero::debug_utils::load_tries_from_disk(block_number, batch_index)?; + + info!("Performing trie comparison for block {block_number} batch {batch_index}..."); + zero::trie_diff::compare_tries( + &block_prover_input, + batch_index, + &DebugOutputTries { + state_trie: observer.data[prover_tries.batch_index] + .tries + .state + .as_hashed_partial_trie() + .clone(), + transaction_trie: observer.data[prover_tries.batch_index] + .tries + .transaction + .clone() + .into(), + receipt_trie: observer.data[prover_tries.batch_index] + .tries + .receipt + .clone() + .into(), + }, + &prover_tries.tries, + )?; + + info!("Trie comparison finished for block {block_number} batch {batch_index}"); + return Ok(()); + } else { + error!( + "Failed to extract block and batch numbers from error message, could not run tries comparison: {}", + error_message + ); + return Err(e2); + } + } + + info!("Trie diff for block {block_number} finished, no problems found.") + } + + Ok(()) +} diff --git a/zero/src/debug_utils.rs b/zero/src/debug_utils.rs index f8cb53dd6..970c646c2 100644 --- a/zero/src/debug_utils.rs +++ b/zero/src/debug_utils.rs @@ -1,67 +1,13 @@ use std::fs::{self, File}; -use std::io::{self, Write}; +use std::io::Write; use std::path::{Path, PathBuf}; -use serde::Serialize; -use serde_json::Error as SerdeError; -use thiserror::Error; +use anyhow::Context; +use evm_arithmetization::generation::DebugOutputTries; +use serde::{Deserialize, Serialize}; const DEBUG_FOLDER: &str = "./debug"; -/// Ensures that the specified directory exists on the filesystem. -/// -/// This function checks if the directory at `folder_path` exists. If not, it -/// attempts to create the directory. It returns an error if the path is not a -/// directory or if there are issues accessing or creating the directory. -/// -/// # Parameters -/// * `folder_path` - A reference to a `Path` that specifies the directory to -/// check or create. -/// -/// # Returns -/// * `Ok(())` - The directory exists or was successfully created. -/// * `Err(io::Error)` - The path is not a directory, or there was a problem -/// accessing or creating the directory. -fn ensure_directory_exists(folder_path: &Path) -> io::Result<()> { - match fs::metadata(folder_path) { - Ok(metadata) => { - if metadata.is_dir() { - Ok(()) // The directory already exists - } else { - Err(io::Error::new( - io::ErrorKind::AlreadyExists, - "The path exists but is not a directory", - )) - } - } - Err(e) => { - if e.kind() == io::ErrorKind::NotFound { - // Directory does not exist, try to create it - fs::create_dir(folder_path) - } else { - // Re-throw the error if it's not a 'NotFound' error - Err(e) - } - } - } -} - -/// An error type for save debug input information. -#[derive(Error, Debug)] -pub enum SaveInputError { - #[error("failed to create directory '{0}'")] - CreateDirectoryError(PathBuf, #[source] io::Error), - - #[error("failed to create file '{0}'")] - CreateFileError(PathBuf, #[source] io::Error), - - #[error("failed to serialize inputs")] - SerializationError(#[source] SerdeError), - - #[error("failed to write to file '{0}'")] - WriteToFileError(PathBuf, #[source] io::Error), -} - /// Serializes a collection of inputs to a pretty-printed JSON format and saves /// them to a file. /// @@ -76,27 +22,73 @@ pub enum SaveInputError { /// /// This function returns a `Result<(), std::io::Error>` indicating the /// operation's success or failure. -pub fn save_inputs_to_disk( - file_name: String, - inputs: T, -) -> Result<(), SaveInputError> { +pub fn save_inputs_to_disk(file_name: String, inputs: T) -> anyhow::Result<()> { let debug_folder = Path::new(DEBUG_FOLDER); - let input_file_path = debug_folder.join(file_name); - // Ensure the DEBUG_FOLDER exists - ensure_directory_exists(debug_folder) - .map_err(|e| SaveInputError::CreateDirectoryError(debug_folder.to_path_buf(), e))?; + // Check if output directory exists, and create one if it doesn't. + if !debug_folder.exists() { + fs::create_dir(debug_folder)?; + } - let mut file = File::create(&input_file_path) - .map_err(|e| SaveInputError::CreateFileError(input_file_path.clone(), e))?; + let input_file_path = debug_folder.join(file_name); + let mut file = File::create(&input_file_path)?; // Serialize the entire collection to a pretty JSON string - let all_inputs_str = - serde_json::to_string_pretty(&inputs).map_err(SaveInputError::SerializationError)?; + let all_inputs_str = serde_json::to_string_pretty(&inputs)?; // Write the serialized data to the file - file.write_all(all_inputs_str.as_bytes()) - .map_err(|e| SaveInputError::WriteToFileError(input_file_path, e))?; + file.write_all(all_inputs_str.as_bytes())?; + + Ok(()) +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ErrorTrieFile { + pub error: String, + pub block_number: u64, + pub batch_index: usize, + pub tries: DebugOutputTries, +} + +pub fn generate_trie_debug_file_name(block_number: u64, batch_index: usize) -> String { + format!("b{}_batch{}_error_tries.data", block_number, batch_index) +} + +pub fn save_tries_to_disk( + err: &str, + block_number: u64, + batch_index: usize, + tries: &DebugOutputTries, +) -> anyhow::Result<()> { + let output_dir = PathBuf::from(DEBUG_FOLDER); + + // Check if output directory exists, and create one if it doesn't. + if !output_dir.exists() { + fs::create_dir(output_dir.clone())?; + } + let mut tries_debug_file_path = output_dir; + tries_debug_file_path.push(generate_trie_debug_file_name(block_number, batch_index)); + + let simulation_error_str = serde_json::to_string(&ErrorTrieFile { + error: err.to_string(), + block_number, + batch_index, + tries: tries.clone(), + }) + .context("unable to serialize simulation error to save tries")?; + fs::write(tries_debug_file_path, simulation_error_str) + .expect("unable to write simulation error to file"); Ok(()) } + +pub fn load_tries_from_disk( + block_number: u64, + batch_index: usize, +) -> anyhow::Result { + let mut tries_debug_file_path = PathBuf::from(DEBUG_FOLDER); + tries_debug_file_path.push(generate_trie_debug_file_name(block_number, batch_index)); + let file = File::open(tries_debug_file_path)?; + let data: ErrorTrieFile = serde_json::from_reader(file)?; + Ok(data) +} diff --git a/zero/src/lib.rs b/zero/src/lib.rs index 066ecc87e..c2ca63f6a 100644 --- a/zero/src/lib.rs +++ b/zero/src/lib.rs @@ -13,6 +13,7 @@ pub mod prover_state; pub mod provider; pub mod rpc; pub mod tracing; +pub mod trie_diff; /// Size of the channel used to send block prover inputs to the per block /// proving task. If the proving task is slow and can not consume inputs fast diff --git a/zero/src/ops.rs b/zero/src/ops.rs index b08b5a925..cc180c7e5 100644 --- a/zero/src/ops.rs +++ b/zero/src/ops.rs @@ -2,6 +2,7 @@ zk_evm_common::check_chain_features!(); use std::time::Instant; +use anyhow::anyhow; use evm_arithmetization::fixed_recursive_verifier::ProverOutputData; use evm_arithmetization::{prover::testing::simulate_execution_all_segments, GenerationInputs}; use evm_arithmetization::{Field, PublicValues, TrimmedGenerationInputs}; @@ -13,6 +14,7 @@ use serde::{Deserialize, Serialize}; use tracing::error; use tracing::{event, info_span, Level}; +use crate::debug_utils::save_tries_to_disk; use crate::proof_types::{ BatchAggregatableProof, GeneratedBlockProof, GeneratedSegmentAggProof, GeneratedTxnAggProof, SegmentAggregatableProof, @@ -70,32 +72,58 @@ impl Operation for SegmentProof { #[derive(Deserialize, Serialize, RemoteExecute)] pub struct SegmentProofTestOnly { pub save_inputs_on_error: bool, + pub save_tries_on_error: bool, } impl Operation for SegmentProofTestOnly { - type Input = (GenerationInputs, usize); + // The input is a tuple of the batch generation inputs, max_cpu_len_log and + // batch index. + type Input = (GenerationInputs, usize, usize); type Output = (); fn execute(&self, inputs: Self::Input) -> Result { - if self.save_inputs_on_error { - simulate_execution_all_segments::(inputs.0.clone(), inputs.1).map_err(|e| { - if let Err(write_err) = save_inputs_to_disk( - format!( - "b{}_txns_{}..{}_input.json", - inputs.0.block_metadata.block_number, - inputs.0.txn_number_before, - inputs.0.txn_number_before + inputs.0.signed_txns.len(), - ), - inputs.0, - ) { - error!("Failed to save txn proof input to disk: {:?}", write_err); + if self.save_inputs_on_error || self.save_tries_on_error { + simulate_execution_all_segments::(inputs.0.clone(), inputs.1).map_err(|err| { + let block_number = inputs.0.block_metadata.block_number.low_u64(); + let batch_index = inputs.2; + + let err = if self.save_tries_on_error { + if let Some(ref tries) = err.tries { + if let Err(write_err) = + save_tries_to_disk(&err.to_string(), block_number, batch_index, tries) + { + error!("Failed to save tries to disk: {:?}", write_err); + } + } + anyhow!( + "block:{} batch:{} error: {}", + block_number, + batch_index, + err.to_string() + ) + } else { + err.into() + }; + + if self.save_inputs_on_error { + if let Err(write_err) = save_inputs_to_disk( + format!( + "b{}_txns_{}..{}_input.json", + block_number, + inputs.0.txn_number_before, + inputs.0.txn_number_before + inputs.0.signed_txns.len(), + ), + inputs.0, + ) { + error!("Failed to save txn proof input to disk: {:?}", write_err); + } } - FatalError::from_str(&e.to_string(), FatalStrategy::Terminate) + FatalError::from_anyhow(err, FatalStrategy::Terminate) })? } else { simulate_execution_all_segments::(inputs.0, inputs.1) - .map_err(|e| FatalError::from_str(&e.to_string(), FatalStrategy::Terminate))?; + .map_err(|err| FatalError::from_anyhow(err.into(), FatalStrategy::Terminate))?; } Ok(()) diff --git a/zero/src/prover.rs b/zero/src/prover.rs index 1625752b9..665d4f828 100644 --- a/zero/src/prover.rs +++ b/zero/src/prover.rs @@ -20,6 +20,7 @@ use serde::{Deserialize, Serialize}; use tokio::io::AsyncWriteExt; use tokio::sync::mpsc::Receiver; use tokio::sync::{oneshot, Semaphore}; +use trace_decoder::observer::DummyObserver; use trace_decoder::{BlockTrace, OtherBlockData}; use tracing::{error, info}; @@ -48,6 +49,7 @@ pub struct ProverConfig { pub keep_intermediate_proofs: bool, pub block_batch_size: usize, pub block_pool_size: usize, + pub save_tries_on_error: bool, } #[derive(Clone, Debug, Deserialize, Serialize)] @@ -81,8 +83,12 @@ impl BlockProverInput { let block_number = self.get_block_number(); - let block_generation_inputs = - trace_decoder::entrypoint(self.block_trace, self.other_data, batch_size)?; + let block_generation_inputs = trace_decoder::entrypoint( + self.block_trace, + self.other_data, + batch_size, + &mut DummyObserver::new(), + )?; // Create segment proof. let seg_prove_ops = ops::SegmentProof { @@ -163,24 +169,34 @@ impl BlockProverInput { max_cpu_len_log, batch_size, save_inputs_on_error, + save_tries_on_error, .. } = *prover_config; let block_number = self.get_block_number(); info!("Testing witness generation for block {block_number}."); - let block_generation_inputs = - trace_decoder::entrypoint(self.block_trace, self.other_data, batch_size)?; + let block_generation_inputs = trace_decoder::entrypoint( + self.block_trace, + self.other_data, + batch_size, + &mut DummyObserver::new(), + )?; let seg_ops = ops::SegmentProofTestOnly { save_inputs_on_error, + save_tries_on_error, }; let simulation = Directive::map( IndexedStream::from( block_generation_inputs .into_iter() - .zip(repeat(max_cpu_len_log)), + .enumerate() + .zip(repeat(max_cpu_len_log)) + .map(|((batch_index, txn_batch), max_cpu_len_log)| { + (txn_batch, max_cpu_len_log, batch_index) + }), ), &seg_ops, ); diff --git a/zero/src/prover/cli.rs b/zero/src/prover/cli.rs index a6cdaebf9..87e79bc65 100644 --- a/zero/src/prover/cli.rs +++ b/zero/src/prover/cli.rs @@ -60,6 +60,7 @@ impl From for super::ProverConfig { keep_intermediate_proofs: cli.keep_intermediate_proofs, block_batch_size: cli.block_batch_size, block_pool_size: cli.block_pool_size, + save_tries_on_error: false, } } } diff --git a/zero/src/trie_diff/mod.rs b/zero/src/trie_diff/mod.rs new file mode 100644 index 000000000..e154d16e1 --- /dev/null +++ b/zero/src/trie_diff/mod.rs @@ -0,0 +1,111 @@ +use evm_arithmetization::generation::mpt::{AccountRlp, LegacyReceiptRlp}; +use evm_arithmetization::generation::DebugOutputTries; +use mpt_trie::debug_tools::diff::create_diff_between_tries; +use mpt_trie::utils::TrieNodeType; +use tracing::info; + +use crate::prover::BlockProverInput; + +pub fn compare_tries( + block_prover_input: &BlockProverInput, + batch_index: usize, + left: &DebugOutputTries, + right: &DebugOutputTries, +) -> anyhow::Result<()> { + let block_number = block_prover_input + .other_data + .b_data + .b_meta + .block_number + .low_u64(); + let state_trie_diff = create_diff_between_tries(&left.state_trie, &right.state_trie); + if let Some(ref state_trie_diff_point) = state_trie_diff.latest_diff_res { + if state_trie_diff_point.a_info.node_type == TrieNodeType::Leaf { + if let Some(ref td_account_value) = state_trie_diff_point.a_info.value { + let td_account_data = rlp::decode::(td_account_value)?; + info!("Trace decoder state trie block {block_number} batch {batch_index} account address hash: {} account data: {:#?}", + state_trie_diff_point.a_info.key, td_account_data); + } else { + info!("Trace decoder state trie block {block_number} batch {batch_index}, skip account printout as diff is not at the leaf node level."); + } + } + if state_trie_diff_point.b_info.node_type == TrieNodeType::Leaf { + if let Some(ref prover_account_value) = state_trie_diff_point.b_info.value { + let prover_account_data = rlp::decode::(prover_account_value)?; + info!("Prover state trie block {block_number} batch {batch_index} account address hash: {} account data: {:#?}", + state_trie_diff_point.b_info.key, prover_account_data); + } else { + info!("Prover state trie block {block_number} batch {batch_index}, skip account printout as diff is not at the leaf node level."); + } + } + + info!( + "State trie block {block_number} batch {batch_index} diff: {:#?}", + state_trie_diff_point + ); + } else { + info!("State trie for block {block_number} batch {batch_index} matches."); + } + + let transaction_trie_diff = + create_diff_between_tries(&left.transaction_trie, &right.transaction_trie); + if let Some(ref transaction_trie_diff_point) = transaction_trie_diff.latest_diff_res { + if transaction_trie_diff_point.a_info.node_type == TrieNodeType::Leaf { + let tx_index = + rlp::decode::(transaction_trie_diff_point.a_info.key.as_byte_slice())?; + info!("Trace decoder transaction trie block {block_number} batch {batch_index} transaction index {tx_index} rlp bytecode: {:?}", + transaction_trie_diff_point.a_info.value.as_ref().map(hex::encode)); + } else { + info!("Trace decoder transaction trie block {block_number} batch {batch_index}, skip tx printout as diff is not at the leaf node level."); + } + if transaction_trie_diff_point.b_info.node_type == TrieNodeType::Leaf { + let tx_index = + rlp::decode::(transaction_trie_diff_point.b_info.key.as_byte_slice())?; + info!("Prover transaction trie block {block_number} batch {batch_index} transaction index {tx_index} rlp bytecode: {:?}", + transaction_trie_diff_point.b_info.value.as_ref().map(hex::encode)); + } else { + info!("Prover transaction trie block {block_number} batch {batch_index}, skip tx printout as diff is not at the leaf node level."); + } + + info!( + "Transactions trie block {block_number} batch {batch_index} diff: {:#?}", + transaction_trie_diff_point + ); + } else { + info!("Transaction trie for block {block_number} batch {batch_index} matches."); + } + + let receipt_trie_diff = create_diff_between_tries(&left.receipt_trie, &right.receipt_trie); + if let Some(ref receipt_trie_diff_point) = receipt_trie_diff.latest_diff_res { + if receipt_trie_diff_point.a_info.node_type == TrieNodeType::Leaf { + if let Some(ref td_receipt_value) = receipt_trie_diff_point.a_info.value { + let tx_index = + rlp::decode::(receipt_trie_diff_point.a_info.key.as_byte_slice())?; + let td_receipt_data = rlp::decode::(td_receipt_value)?; + info!("Trace decoder receipt trie block {block_number} batch {batch_index} output tx index: {tx_index} receipt data: {:#?}", td_receipt_data); + } else { + info!("Trace decoder receipt trie block {block_number} batch {batch_index}, skip printout as diff is not at the leaf node level."); + } + } + + if receipt_trie_diff_point.b_info.node_type == TrieNodeType::Leaf { + if let Some(ref prover_receipt_value) = receipt_trie_diff_point.b_info.value { + let tx_index = + rlp::decode::(receipt_trie_diff_point.b_info.key.as_byte_slice())?; + let prover_receipt_data = rlp::decode::(prover_receipt_value)?; + info!("Prover receipt trie block {block_number} batch {batch_index} output tx index: {tx_index} receipt data: {:#?}", prover_receipt_data); + } else { + info!("Prover receipt trie block {block_number} batch {batch_index}, skip receipt printout as diff is not at the leaf node level."); + } + } + + println!( + "Receipt trie block {block_number} batch {batch_index} diff: {:#?}", + receipt_trie_diff + ); + } else { + println!("Receipt trie block {block_number} batch {batch_index} matches."); + } + + Ok(()) +}