diff --git a/Cargo.lock b/Cargo.lock
index 770f7253f..03223ee5f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5790,6 +5790,8 @@ dependencies = [
  "plonky2",
  "plonky2_maybe_rayon 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "primitive-types 0.12.2",
+ "regex",
+ "rlp",
  "ruint",
  "serde",
  "serde_json",
diff --git a/evm_arithmetization/src/fixed_recursive_verifier.rs b/evm_arithmetization/src/fixed_recursive_verifier.rs
index 94a658685..3c1f0b1e1 100644
--- a/evm_arithmetization/src/fixed_recursive_verifier.rs
+++ b/evm_arithmetization/src/fixed_recursive_verifier.rs
@@ -37,7 +37,7 @@ use starky::stark::Stark;
 
 use crate::all_stark::{all_cross_table_lookups, AllStark, Table, NUM_TABLES};
 use crate::cpu::kernel::aggregator::KERNEL;
-use crate::generation::segments::{GenerationSegmentData, SegmentDataIterator, SegmentError};
+use crate::generation::segments::{GenerationSegmentData, SegmentDataIterator};
 use crate::generation::{GenerationInputs, TrimmedGenerationInputs};
 use crate::get_challenges::observe_public_values_target;
 use crate::proof::{
@@ -1889,8 +1889,7 @@ where
         let mut proofs = vec![];
 
         for segment_run in segment_iterator {
-            let (_, mut next_data) =
-                segment_run.map_err(|e: SegmentError| anyhow::format_err!(e))?;
+            let (_, mut next_data) = segment_run?;
             let proof = self.prove_segment(
                 all_stark,
                 config,
diff --git a/evm_arithmetization/src/generation/mod.rs b/evm_arithmetization/src/generation/mod.rs
index 9c7625d2b..9279e88d9 100644
--- a/evm_arithmetization/src/generation/mod.rs
+++ b/evm_arithmetization/src/generation/mod.rs
@@ -1,9 +1,10 @@
 use std::collections::HashMap;
+use std::fmt::Display;
 
 use anyhow::anyhow;
 use ethereum_types::{Address, BigEndianHash, H256, U256};
 use keccak_hash::keccak;
-use log::log_enabled;
+use log::error;
 use mpt_trie::partial_trie::{HashedPartialTrie, PartialTrie};
 use plonky2::field::extension::Extendable;
 use plonky2::field::polynomial::PolynomialValues;
@@ -51,6 +52,29 @@ pub const NUM_EXTRA_CYCLES_BEFORE: usize = 64;
 /// Memory values used to initialize `MemBefore`.
 pub type MemBeforeValues = Vec<(MemoryAddress, U256)>;
 
+#[derive(Debug, Serialize, Deserialize)]
+pub struct ErrorWithTries<E = anyhow::Error> {
+    pub inner: E,
+    pub tries: Option<DebugOutputTries>,
+}
+impl<E: Display> Display for ErrorWithTries<E> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.inner.fmt(f)
+    }
+}
+
+impl<E: std::error::Error> std::error::Error for ErrorWithTries<E> {
+    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+        self.inner.source()
+    }
+}
+
+impl<E> ErrorWithTries<E> {
+    pub fn new(inner: E, tries: Option<DebugOutputTries>) -> Self {
+        Self { inner, tries }
+    }
+}
+
 /// Inputs needed for trace generation.
 #[derive(Clone, Debug, Deserialize, Serialize, Default)]
 #[serde(bound = "")]
@@ -234,6 +258,15 @@ impl<F: RichField> GenerationInputs<F> {
     }
 }
 
+/// Post transaction execution tries retrieved from the prover's memory.
+/// Used primarily for error debugging in case of a failed execution.
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct DebugOutputTries {
+    pub state_trie: HashedPartialTrie,
+    pub transaction_trie: HashedPartialTrie,
+    pub receipt_trie: HashedPartialTrie,
+}
+
 fn apply_metadata_and_tries_memops<F: RichField + Extendable<D>, const D: usize>(
     state: &mut GenerationState<F>,
     inputs: &TrimmedGenerationInputs<F>,
@@ -492,10 +525,8 @@ pub fn generate_traces<F: RichField + Extendable<D>, const D: usize>(
         "simulate CPU",
         simulate_cpu(&mut state, *max_cpu_len_log)
     );
-    if cpu_res.is_err() {
-        output_debug_tries(&state)?;
-        cpu_res?;
-    };
+
+    cpu_res?;
 
     let trace_lengths = state.traces.get_lengths();
 
@@ -595,59 +626,50 @@ fn simulate_cpu<F: RichField>(
     Ok((final_registers, mem_after))
 }
 
-/// Outputs the tries that have been obtained post transaction execution, as
+/// Collects the tries that have been obtained post transaction execution, as
 /// they are represented in the prover's memory.
-/// This will do nothing if the CPU execution failed outside of the final trie
-/// root checks.
-pub(crate) fn output_debug_tries<F: RichField>(state: &GenerationState<F>) -> anyhow::Result<()> {
-    if !log_enabled!(log::Level::Debug) {
-        return Ok(());
-    }
-
-    // Retrieve previous PC (before jumping to KernelPanic), to see if we reached
-    // `perform_final_checks`. We will output debugging information on the final
-    // tries only if we got a root mismatch.
-    let previous_pc = state.get_registers().program_counter;
-
-    let label = KERNEL.offset_name(previous_pc);
-
-    if label.contains("check_state_trie")
-        || label.contains("check_txn_trie")
-        || label.contains("check_receipt_trie")
-    {
-        let state_trie_ptr = u256_to_usize(
-            state
-                .memory
-                .read_global_metadata(GlobalMetadata::StateTrieRoot),
-        )
-        .map_err(|_| anyhow!("State trie pointer is too large to fit in a usize."))?;
-        log::debug!(
-            "Computed state trie: {:?}",
-            get_state_trie::<HashedPartialTrie>(&state.memory, state_trie_ptr)
-        );
-
-        let txn_trie_ptr = u256_to_usize(
-            state
-                .memory
-                .read_global_metadata(GlobalMetadata::TransactionTrieRoot),
-        )
-        .map_err(|_| anyhow!("Transactions trie pointer is too large to fit in a usize."))?;
-        log::debug!(
-            "Computed transactions trie: {:?}",
-            get_txn_trie::<HashedPartialTrie>(&state.memory, txn_trie_ptr)
-        );
-
-        let receipt_trie_ptr = u256_to_usize(
-            state
-                .memory
-                .read_global_metadata(GlobalMetadata::ReceiptTrieRoot),
-        )
-        .map_err(|_| anyhow!("Receipts trie pointer is too large to fit in a usize."))?;
-        log::debug!(
-            "Computed receipts trie: {:?}",
-            get_receipt_trie::<HashedPartialTrie>(&state.memory, receipt_trie_ptr)
-        );
-    }
-
-    Ok(())
+pub(crate) fn collect_debug_tries<F: RichField>(
+    state: &GenerationState<F>,
+) -> Option<DebugOutputTries> {
+    let state_trie_ptr = u256_to_usize(
+        state
+            .memory
+            .read_global_metadata(GlobalMetadata::StateTrieRoot),
+    )
+    .inspect_err(|e| error!("failed to retrieve state trie pointer: {e:?}"))
+    .ok()?;
+
+    let state_trie = get_state_trie::<HashedPartialTrie>(&state.memory, state_trie_ptr)
+        .inspect_err(|e| error!("unable to retrieve state trie for debugging purposes: {e:?}"))
+        .ok()?;
+
+    let txn_trie_ptr = u256_to_usize(
+        state
+            .memory
+            .read_global_metadata(GlobalMetadata::TransactionTrieRoot),
+    )
+    .inspect_err(|e| error!("failed to retrieve transactions trie pointer: {e:?}"))
+    .ok()?;
+    let transaction_trie = get_txn_trie::<HashedPartialTrie>(&state.memory, txn_trie_ptr)
+        .inspect_err(|e| {
+            error!("unable to retrieve transaction trie for debugging purposes: {e:?}",)
+        })
+        .ok()?;
+
+    let receipt_trie_ptr = u256_to_usize(
+        state
+            .memory
+            .read_global_metadata(GlobalMetadata::ReceiptTrieRoot),
+    )
+    .inspect_err(|e| error!("failed to retrieve receipts trie pointer: {e:?}"))
+    .ok()?;
+    let receipt_trie = get_receipt_trie::<HashedPartialTrie>(&state.memory, receipt_trie_ptr)
+        .inspect_err(|e| error!("unable to retrieve receipt trie for debugging purposes: {e:?}"))
+        .ok()?;
+
+    Some(DebugOutputTries {
+        state_trie,
+        transaction_trie,
+        receipt_trie,
+    })
 }
diff --git a/evm_arithmetization/src/generation/segments.rs b/evm_arithmetization/src/generation/segments.rs
index 51de9fa83..b3a129137 100644
--- a/evm_arithmetization/src/generation/segments.rs
+++ b/evm_arithmetization/src/generation/segments.rs
@@ -9,7 +9,7 @@ use super::TrimmedGenerationInputs;
 use crate::cpu::kernel::aggregator::KERNEL;
 use crate::cpu::kernel::interpreter::{set_registers_and_run, ExtraSegmentData, Interpreter};
 use crate::generation::state::State;
-use crate::generation::{debug_inputs, GenerationInputs};
+use crate::generation::{collect_debug_tries, debug_inputs, ErrorWithTries, GenerationInputs};
 use crate::witness::memory::MemoryState;
 use crate::witness::state::RegistersState;
 
@@ -88,8 +88,10 @@ pub struct SegmentDataIterator<F: RichField> {
 pub type SegmentRunResult = Option<Box<(GenerationSegmentData, Option<GenerationSegmentData>)>>;
 
 #[derive(thiserror::Error, Debug, Serialize, Deserialize)]
-#[error("{}", .0)]
-pub struct SegmentError(pub String);
+#[error("{}", .message)]
+pub struct SegmentError {
+    pub message: String,
+}
 
 impl<F: RichField> SegmentDataIterator<F> {
     pub fn new(inputs: &GenerationInputs<F>, max_cpu_len_log: Option<usize>) -> Self {
@@ -113,7 +115,7 @@ impl<F: RichField> SegmentDataIterator<F> {
     fn generate_next_segment(
         &mut self,
         partial_segment_data: Option<GenerationSegmentData>,
-    ) -> Result<SegmentRunResult, SegmentError> {
+    ) -> Result<SegmentRunResult, ErrorWithTries<SegmentError>> {
         // Get the (partial) current segment data, if it is provided. Otherwise,
         // initialize it.
         let mut segment_data = if let Some(partial) = partial_segment_data {
@@ -133,8 +135,9 @@ impl<F: RichField> SegmentDataIterator<F> {
 
         // Run the interpreter to get `registers_after` and the partial data for the
         // next segment.
-        let run = set_registers_and_run(segment_data.registers_after, &mut self.interpreter);
-        if let Ok((updated_registers, mem_after)) = run {
+        let execution_result =
+            set_registers_and_run(segment_data.registers_after, &mut self.interpreter);
+        if let Ok((updated_registers, mem_after)) = execution_result {
             let partial_segment_data = Some(build_segment_data(
                 segment_index + 1,
                 Some(updated_registers),
@@ -157,21 +160,28 @@ impl<F: RichField> SegmentDataIterator<F> {
                     inputs.txn_number_before + inputs.txn_hashes.len()
                 ),
             };
-            let s = format!(
-                "Segment generation {:?} for block {:?} ({}) failed with error {:?}",
-                segment_index,
-                block,
-                txn_range,
-                run.unwrap_err()
-            );
-            Err(SegmentError(s))
+            // In case of the error, return tries as part of the error for easier debugging.
+            Err(ErrorWithTries::new(
+                SegmentError {
+                    message: format!(
+                        "Segment generation {:?} for block:{} batch:{} tx_range:({}) failed with error {:?}",
+                        segment_index,
+                        block.low_u64(),
+                        segment_index,
+                        txn_range,
+                        execution_result.unwrap_err()
+                    ),
+                },
+                collect_debug_tries(self.interpreter.get_generation_state()),
+            ))
         }
     }
 }
 
 /// Returned type from a `SegmentDataIterator`, needed to prove all segments in
 /// a transaction batch.
-pub type AllData<F> = Result<(TrimmedGenerationInputs<F>, GenerationSegmentData), SegmentError>;
+pub type AllData<F> =
+    Result<(TrimmedGenerationInputs<F>, GenerationSegmentData), ErrorWithTries<SegmentError>>;
 
 impl<F: RichField> Iterator for SegmentDataIterator<F> {
     type Item = AllData<F>;
diff --git a/evm_arithmetization/src/prover.rs b/evm_arithmetization/src/prover.rs
index 079fe3469..f97fc8b40 100644
--- a/evm_arithmetization/src/prover.rs
+++ b/evm_arithmetization/src/prover.rs
@@ -370,13 +370,10 @@ pub(crate) fn features_check<F: RichField>(inputs: &TrimmedGenerationInputs<F>)
 /// A utility module designed to test witness generation externally.
 pub mod testing {
     use super::*;
+    use crate::generation::ErrorWithTries;
     use crate::{
         cpu::kernel::interpreter::Interpreter,
-        generation::{
-            output_debug_tries,
-            segments::{SegmentDataIterator, SegmentError},
-            state::State,
-        },
+        generation::segments::{SegmentDataIterator, SegmentError},
     };
 
     /// Simulates the zkEVM CPU execution.
@@ -388,13 +385,7 @@ pub mod testing {
         let initial_offset = KERNEL.global_labels["init"];
         let mut interpreter: Interpreter<F> =
             Interpreter::new_with_generation_inputs(initial_offset, initial_stack, &inputs, None);
-        let result = interpreter.run();
-
-        if result.is_err() {
-            output_debug_tries(interpreter.get_generation_state())?;
-        }
-
-        result?;
+        interpreter.run()?;
         Ok(())
     }
 
@@ -415,8 +406,7 @@ pub mod testing {
         let mut proofs = vec![];
 
         for segment_run in segment_data_iterator {
-            let (_, mut next_data) =
-                segment_run.map_err(|e: SegmentError| anyhow::format_err!(e))?;
+            let (_, mut next_data) = segment_run?;
             let proof = prove(
                 all_stark,
                 config,
@@ -434,16 +424,14 @@ pub mod testing {
     pub fn simulate_execution_all_segments<F>(
         inputs: GenerationInputs<F>,
         max_cpu_len_log: usize,
-    ) -> Result<()>
+    ) -> Result<(), ErrorWithTries<SegmentError>>
     where
         F: RichField,
     {
         features_check(&inputs.clone().trim());
 
         for segment in SegmentDataIterator::<F>::new(&inputs, Some(max_cpu_len_log)) {
-            if let Err(e) = segment {
-                return Err(anyhow::format_err!(e));
-            }
+            segment?;
         }
 
         Ok(())
diff --git a/evm_arithmetization/src/public_types.rs b/evm_arithmetization/src/public_types.rs
index 9d3de7196..0b917317d 100644
--- a/evm_arithmetization/src/public_types.rs
+++ b/evm_arithmetization/src/public_types.rs
@@ -39,7 +39,10 @@ pub type ProofWithPublicInputs =
 /// proofs.
 pub type PublicValues = crate::proof::PublicValues<Field>;
 
-pub type AllData = Result<(TrimmedGenerationInputs, GenerationSegmentData), SegmentError>;
+pub type AllData = Result<
+    (TrimmedGenerationInputs, GenerationSegmentData),
+    crate::generation::ErrorWithTries<SegmentError>,
+>;
 
 /// Returned type from the zkEVM STARK prover, before recursive verification.
 pub type AllProof = crate::proof::AllProof<Field, RecursionConfig, EXTENSION_DEGREE>;
diff --git a/mpt_trie/src/debug_tools/diff.rs b/mpt_trie/src/debug_tools/diff.rs
index 12775b445..880984290 100644
--- a/mpt_trie/src/debug_tools/diff.rs
+++ b/mpt_trie/src/debug_tools/diff.rs
@@ -136,13 +136,15 @@ impl Display for DiffPoint {
 /// Meta information for a node in a trie.
 #[derive(Clone, Debug, Eq, Hash, PartialEq)]
 pub struct NodeInfo {
-    key: Nibbles,
-
+    /// Mpt trie node key.
+    pub key: Nibbles,
     /// The direct value associated with the node (only applicable to `Leaf` &
     /// `Branch` nodes).
-    value: Option<Vec<u8>>,
-    node_type: TrieNodeType,
-    hash: H256,
+    pub value: Option<Vec<u8>>,
+    /// Type of this node.
+    pub node_type: TrieNodeType,
+    /// Node hash.
+    pub hash: H256,
 }
 
 impl Display for NodeInfo {
diff --git a/trace_decoder/benches/block_processing.rs b/trace_decoder/benches/block_processing.rs
index 6f3319d94..adefdae3f 100644
--- a/trace_decoder/benches/block_processing.rs
+++ b/trace_decoder/benches/block_processing.rs
@@ -6,6 +6,7 @@
 //! for a total of 24,479,837 gas.
 
 use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
+use trace_decoder::observer::DummyObserver;
 use trace_decoder::{BlockTrace, OtherBlockData};
 
 #[derive(Clone, Debug, serde::Deserialize)]
@@ -33,7 +34,13 @@ fn criterion_benchmark(c: &mut Criterion) {
                      block_trace,
                      other_data,
                  }| {
-                    trace_decoder::entrypoint(block_trace, other_data, batch_size).unwrap()
+                    trace_decoder::entrypoint(
+                        block_trace,
+                        other_data,
+                        batch_size,
+                        &mut DummyObserver::new(),
+                    )
+                    .unwrap()
                 },
                 BatchSize::LargeInput,
             )
diff --git a/trace_decoder/src/core.rs b/trace_decoder/src/core.rs
index 959e03967..d96778dec 100644
--- a/trace_decoder/src/core.rs
+++ b/trace_decoder/src/core.rs
@@ -19,6 +19,7 @@ use mpt_trie::partial_trie::PartialTrie as _;
 use nunny::NonEmpty;
 use zk_evm_common::gwei_to_wei;
 
+use crate::observer::Observer;
 use crate::{
     typed_mpt::{ReceiptTrie, StateMpt, StateTrie, StorageTrie, TransactionTrie, TrieKey},
     BlockLevelData, BlockTrace, BlockTraceTriePreImages, CombinedPreImages, ContractCodeUsage,
@@ -31,6 +32,7 @@ pub fn entrypoint(
     trace: BlockTrace,
     other: OtherBlockData,
     batch_size_hint: usize,
+    observer: &mut impl Observer<StateMpt>,
 ) -> anyhow::Result<Vec<GenerationInputs>> {
     ensure!(batch_size_hint != 0);
 
@@ -67,6 +69,7 @@ pub fn entrypoint(
         &b_meta,
         ger_data,
         withdrawals,
+        observer,
     )?;
 
     let mut running_gas_used = 0;
@@ -261,7 +264,7 @@ struct Batch<StateTrieT> {
 /// [`evm_arithmetization::generation::TrieInputs`],
 /// generic over state trie representation.
 #[derive(Debug)]
-struct IntraBlockTries<StateTrieT> {
+pub struct IntraBlockTries<StateTrieT> {
     pub state: StateTrieT,
     pub storage: BTreeMap<H256, StorageTrie>,
     pub transaction: TransactionTrie,
@@ -269,6 +272,7 @@ struct IntraBlockTries<StateTrieT> {
 }
 
 /// Does the main work mentioned in the [module documentation](super).
+#[allow(clippy::too_many_arguments)]
 fn middle<StateTrieT: StateTrie + Clone>(
     // state at the beginning of the block
     mut state_trie: StateTrieT,
@@ -282,6 +286,8 @@ fn middle<StateTrieT: StateTrie + Clone>(
     ger_data: Option<(H256, H256)>,
     // added to final batch
     mut withdrawals: Vec<(Address, U256)>,
+    // called with the untrimmed tries after each batch
+    observer: &mut impl Observer<StateTrieT>,
 ) -> anyhow::Result<Vec<Batch<StateTrieT>>> {
     // Initialise the storage tries.
     for (haddr, acct) in state_trie.iter() {
@@ -306,7 +312,7 @@ fn middle<StateTrieT: StateTrie + Clone>(
     let mut txn_ix = 0; // incremented for non-dummy transactions
     let mut loop_ix = 0; // always incremented
     let loop_len = batches.iter().flatten().count();
-    for batch in batches {
+    for (batch_index, batch) in batches.into_iter().enumerate() {
         let batch_first_txn_ix = txn_ix; // GOTCHA: if there are no transactions in this batch
         let mut batch_gas_used = 0;
         let mut batch_byte_code = vec![];
@@ -543,6 +549,15 @@ fn middle<StateTrieT: StateTrie + Clone>(
                 receipts_root: receipt_trie.root(),
             },
         });
+
+        observer.collect_tries(
+            block.block_number,
+            batch_index,
+            &state_trie,
+            &storage_tries,
+            &transaction_trie,
+            &receipt_trie,
+        )
     } // batch in batches
 
     Ok(out)
diff --git a/trace_decoder/src/lib.rs b/trace_decoder/src/lib.rs
index 53db82a69..049472c40 100644
--- a/trace_decoder/src/lib.rs
+++ b/trace_decoder/src/lib.rs
@@ -69,6 +69,8 @@ pub use core::entrypoint;
 
 mod core;
 
+/// Implementation of the observer for the trace decoder.
+pub mod observer;
 /// Like `#[serde(with = "hex")`, but tolerates and emits leading `0x` prefixes
 mod hex {
     use serde::{de::Error as _, Deserialize as _, Deserializer, Serializer};
diff --git a/trace_decoder/src/observer.rs b/trace_decoder/src/observer.rs
new file mode 100644
index 000000000..320019e55
--- /dev/null
+++ b/trace_decoder/src/observer.rs
@@ -0,0 +1,113 @@
+use std::collections::BTreeMap;
+use std::marker::PhantomData;
+
+use ethereum_types::{H256, U256};
+
+use crate::core::IntraBlockTries;
+use crate::typed_mpt::{ReceiptTrie, StorageTrie, TransactionTrie};
+
+/// Observer API for the trace decoder.
+/// Observer is used to collect various debugging and metadata info
+/// from the trace decoder run.
+pub trait Observer<StateTrieT> {
+    /// Collect tries after the transaction/batch execution.
+    ///
+    /// Passing the arguments one by one through reference, because
+    /// we don't want to clone argument tries in case they are not used in
+    /// observer.
+    fn collect_tries(
+        &mut self,
+        block: U256,
+        batch: usize,
+        state_trie: &StateTrieT,
+        storage: &BTreeMap<H256, StorageTrie>,
+        transaction_trie: &TransactionTrie,
+        receipt_trie: &ReceiptTrie,
+    );
+}
+
+#[derive(Debug)]
+/// Tries observer collected data element - contains
+/// the data collected during the trace decoder processing of the batches in a
+/// block, one element is retrieved after every batch.
+pub struct TriesObserverElement<StateTrieT> {
+    /// Block where the tries are collected.
+    pub block: U256,
+    /// Tries were collected after trace decoder processes batch number `batch`.
+    pub batch: usize,
+    /// State, transaction, and receipt tries after the batch
+    /// execution (how the trace decoder sees them).
+    pub tries: IntraBlockTries<StateTrieT>,
+}
+
+/// Observer for collection of post-execution tries from the
+/// trace decoder run.
+#[derive(Debug)]
+pub struct TriesObserver<StateTrieT> {
+    /// Collected data in the observer pass
+    pub data: Vec<TriesObserverElement<StateTrieT>>,
+}
+
+impl<StateTrieT> TriesObserver<StateTrieT> {
+    /// Create new tries collecting observer.
+    pub fn new() -> Self {
+        TriesObserver::<StateTrieT> { data: Vec::new() }
+    }
+}
+
+impl<StateTrieT: Clone> Observer<StateTrieT> for TriesObserver<StateTrieT> {
+    fn collect_tries(
+        &mut self,
+        block: U256,
+        batch: usize,
+        state_trie: &StateTrieT,
+        storage: &BTreeMap<H256, StorageTrie>,
+        transaction_trie: &TransactionTrie,
+        receipt_trie: &ReceiptTrie,
+    ) {
+        self.data.push(TriesObserverElement {
+            block,
+            batch,
+            tries: IntraBlockTries {
+                state: state_trie.clone(),
+                storage: storage.clone(),
+                transaction: transaction_trie.clone(),
+                receipt: receipt_trie.clone(),
+            },
+        });
+    }
+}
+
+impl<StateTrieT> Default for TriesObserver<StateTrieT> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Dummy observer which does not collect any data.
+#[derive(Default, Debug)]
+pub struct DummyObserver<StateTrieT> {
+    phantom: PhantomData<StateTrieT>,
+}
+
+impl<StateTrieT> DummyObserver<StateTrieT> {
+    /// Create a new dummy observer.
+    pub fn new() -> Self {
+        DummyObserver::<StateTrieT> {
+            phantom: Default::default(),
+        }
+    }
+}
+
+impl<StateTrieT> Observer<StateTrieT> for DummyObserver<StateTrieT> {
+    fn collect_tries(
+        &mut self,
+        _block: U256,
+        _batch: usize,
+        _state_trie: &StateTrieT,
+        _storage: &BTreeMap<H256, StorageTrie>,
+        _transaction_trie: &TransactionTrie,
+        _receipt_trie: &ReceiptTrie,
+    ) {
+    }
+}
diff --git a/trace_decoder/src/typed_mpt.rs b/trace_decoder/src/typed_mpt.rs
index dc56d54a1..8baf3cf29 100644
--- a/trace_decoder/src/typed_mpt.rs
+++ b/trace_decoder/src/typed_mpt.rs
@@ -271,6 +271,7 @@ impl From<ReceiptTrie> for HashedPartialTrie {
     }
 }
 
+/// TODO(0xaatif): document this after refactoring is done https://github.com/0xPolygonZero/zk_evm/issues/275
 pub trait StateTrie {
     fn insert_by_address(
         &mut self,
diff --git a/trace_decoder/tests/consistent-with-header.rs b/trace_decoder/tests/consistent-with-header.rs
index f82027e82..609fd57bb 100644
--- a/trace_decoder/tests/consistent-with-header.rs
+++ b/trace_decoder/tests/consistent-with-header.rs
@@ -10,6 +10,7 @@ use common::{cases, Case};
 use itertools::Itertools;
 use libtest_mimic::{Arguments, Trial};
 use mpt_trie::partial_trie::PartialTrie as _;
+use trace_decoder::observer::DummyObserver;
 
 fn main() -> anyhow::Result<()> {
     let mut trials = vec![];
@@ -23,8 +24,13 @@ fn main() -> anyhow::Result<()> {
         } in cases()?
         {
             trials.push(Trial::test(format!("{name}@{batch_size}"), move || {
-                let gen_inputs = trace_decoder::entrypoint(trace, other.clone(), batch_size)
-                    .map_err(|e| format!("{e:?}"))?; // get the full cause chain
+                let gen_inputs = trace_decoder::entrypoint(
+                    trace,
+                    other.clone(),
+                    batch_size,
+                    &mut DummyObserver::new(),
+                )
+                .map_err(|e| format!("{e:?}"))?; // get the full cause chain
                 check!(gen_inputs.len() >= 2);
                 check!(
                     Some(other.checkpoint_state_trie_root)
diff --git a/trace_decoder/tests/simulate-execution.rs b/trace_decoder/tests/simulate-execution.rs
index c4cbe53b2..d0476c2b7 100644
--- a/trace_decoder/tests/simulate-execution.rs
+++ b/trace_decoder/tests/simulate-execution.rs
@@ -8,6 +8,7 @@ use anyhow::Context as _;
 use common::{cases, Case};
 use libtest_mimic::{Arguments, Trial};
 use plonky2::field::goldilocks_field::GoldilocksField;
+use trace_decoder::observer::DummyObserver;
 
 fn main() -> anyhow::Result<()> {
     let mut trials = vec![];
@@ -19,9 +20,11 @@ fn main() -> anyhow::Result<()> {
             other,
         } in cases()?
         {
-            let gen_inputs = trace_decoder::entrypoint(trace, other, batch_size).context(
-                format!("error in `trace_decoder` for {name} at batch size {batch_size}"),
-            )?;
+            let gen_inputs =
+                trace_decoder::entrypoint(trace, other, batch_size, &mut DummyObserver::new())
+                    .context(format!(
+                        "error in `trace_decoder` for {name} at batch size {batch_size}"
+                    ))?;
             for (ix, gi) in gen_inputs.into_iter().enumerate() {
                 trials.push(Trial::test(
                     format!("{name}@{batch_size}/{ix}"),
diff --git a/zero/Cargo.toml b/zero/Cargo.toml
index 7bc42709c..5ccb57c96 100644
--- a/zero/Cargo.toml
+++ b/zero/Cargo.toml
@@ -30,6 +30,8 @@ once_cell = { workspace = true }
 paladin-core = { workspace = true }
 plonky2 = { workspace = true }
 plonky2_maybe_rayon = { workspace = true }
+regex = "1.5.4"
+rlp = {workspace = true}
 ruint = { workspace = true, features = ["num-traits", "primitive-types"] }
 serde = { workspace = true }
 serde_json = { workspace = true }
diff --git a/zero/src/bin/rpc.rs b/zero/src/bin/rpc.rs
index 9c6baa535..d49cdde5c 100644
--- a/zero/src/bin/rpc.rs
+++ b/zero/src/bin/rpc.rs
@@ -8,6 +8,7 @@ use alloy::transports::Transport;
 use anyhow::anyhow;
 use clap::{Args, Parser, Subcommand, ValueHint};
 use futures::StreamExt;
+use trace_decoder::observer::DummyObserver;
 use tracing_subscriber::{prelude::*, EnvFilter};
 use url::Url;
 use zero::block_interval::BlockInterval;
@@ -170,6 +171,7 @@ impl Cli {
                             block_prover_input.block_trace,
                             block_prover_input.other_data,
                             batch_size,
+                            &mut DummyObserver::new(),
                         )?;
 
                         if let Some(index) = tx_info.transaction_index {
diff --git a/zero/src/bin/trie_diff.rs b/zero/src/bin/trie_diff.rs
new file mode 100644
index 000000000..d454fdda6
--- /dev/null
+++ b/zero/src/bin/trie_diff.rs
@@ -0,0 +1,167 @@
+//! This binary is a debugging tool used to compare
+//! the trace decoder output tries and the post kernel execution tries (state,
+//! transaction and receipt). As input, it uses a standard witness JSON file
+//! (same as `leader` in stdio mode), and it runs block by block the trace
+//! decoder and `test_only` mode of the prover. On the first error that happens
+//! trace decoder and prover tries are compared, and the details of the trie
+//! differences are printed.
+
+use std::io::Read;
+use std::iter::repeat;
+use std::path::PathBuf;
+use std::sync::Arc;
+
+use anyhow::Result;
+use clap::{Parser, ValueHint};
+use evm_arithmetization::generation::DebugOutputTries;
+use futures::{future, TryStreamExt};
+use paladin::directive::{Directive, IndexedStream};
+use paladin::runtime::Runtime;
+use regex::Regex;
+use trace_decoder::observer::TriesObserver;
+use tracing::{error, info};
+use zero::ops::register;
+use zero::prover::{cli::CliProverConfig, BlockProverInput, ProverConfig};
+
+#[derive(Parser)]
+#[command(version = zero::version(), propagate_version = true)]
+pub(crate) struct Cli {
+    #[clap(flatten)]
+    pub(crate) prover_config: CliProverConfig,
+
+    /// The previous proof output.
+    #[arg(long, short = 'f', value_hint = ValueHint::FilePath)]
+    previous_proof: Option<PathBuf>,
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    zero::tracing::init();
+
+    let args = Cli::parse();
+
+    // Load witness input from stdin
+    let mut buffer = String::new();
+    std::io::stdin().read_to_string(&mut buffer)?;
+
+    // Debug run, we always use in-memory execution
+    let paladin_config = paladin::config::Config {
+        amqp_uri: None,
+        runtime: paladin::config::Runtime::InMemory,
+        ..Default::default()
+    };
+    let runtime = Arc::new(Runtime::from_config(&paladin_config, register()).await?);
+
+    // Tries are computed in the kernel so no need to run proving, test_only mode is
+    // enough. We hardcode prover arguments that we need for trie diff computation.
+    let prover_config: Arc<ProverConfig> = Arc::new(ProverConfig {
+        test_only: true,
+        save_inputs_on_error: true,
+        save_tries_on_error: true,
+        ..args.prover_config.into()
+    });
+
+    let seg_ops = zero::ops::SegmentProofTestOnly {
+        save_inputs_on_error: prover_config.save_inputs_on_error,
+        save_tries_on_error: prover_config.save_tries_on_error,
+    };
+
+    let des = &mut serde_json::Deserializer::from_str(&buffer);
+    let block_prover_inputs = serde_path_to_error::deserialize::<_, Vec<BlockProverInput>>(des)?
+        .into_iter()
+        .collect::<Vec<_>>();
+
+    for block_prover_input in block_prover_inputs {
+        let mut observer = TriesObserver::new();
+        let block_number = block_prover_input
+            .other_data
+            .b_data
+            .b_meta
+            .block_number
+            .low_u64();
+        let block_generation_inputs = trace_decoder::entrypoint(
+            block_prover_input.block_trace.clone(),
+            block_prover_input.other_data.clone(),
+            prover_config.batch_size,
+            &mut observer,
+        )?;
+        info!(
+            "Number of collected batch tries for block {}: {}",
+            block_number,
+            observer.data.len()
+        );
+
+        info!("Running trie diff simulation for block {block_number} ...");
+        let simulation = Directive::map(
+            IndexedStream::from(
+                block_generation_inputs
+                    .clone()
+                    .into_iter()
+                    .enumerate()
+                    .zip(repeat(prover_config.max_cpu_len_log))
+                    .map(|((batch_index, inputs), max_cpu_len_log)| {
+                        (inputs, max_cpu_len_log, batch_index)
+                    }),
+            ),
+            &seg_ops,
+        );
+
+        if let Err(e2) = simulation
+            .run(&runtime)
+            .await
+            .inspect_err(|e1| {
+                error!("Failed to run simulation for block {block_number}, error: {e1}")
+            })?
+            .try_for_each(|_| future::ok(()))
+            .await
+        {
+            // Try to parse block and batch index from error message.
+            let error_message = e2.to_string();
+            let re = Regex::new(r"block:(\d+) batch:(\d+)")?;
+            if let Some(cap) = re.captures(&error_message) {
+                let block_number: u64 = cap[1].parse()?;
+                let batch_index: usize = cap[2].parse()?;
+
+                let prover_tries =
+                    zero::debug_utils::load_tries_from_disk(block_number, batch_index)?;
+
+                info!("Performing trie comparison for block {block_number} batch {batch_index}...");
+                zero::trie_diff::compare_tries(
+                    &block_prover_input,
+                    batch_index,
+                    &DebugOutputTries {
+                        state_trie: observer.data[prover_tries.batch_index]
+                            .tries
+                            .state
+                            .as_hashed_partial_trie()
+                            .clone(),
+                        transaction_trie: observer.data[prover_tries.batch_index]
+                            .tries
+                            .transaction
+                            .clone()
+                            .into(),
+                        receipt_trie: observer.data[prover_tries.batch_index]
+                            .tries
+                            .receipt
+                            .clone()
+                            .into(),
+                    },
+                    &prover_tries.tries,
+                )?;
+
+                info!("Trie comparison finished for block {block_number} batch {batch_index}");
+                return Ok(());
+            } else {
+                error!(
+                    "Failed to extract block and batch numbers from error message, could not run tries comparison: {}",
+                    error_message
+                );
+                return Err(e2);
+            }
+        }
+
+        info!("Trie diff for block {block_number} finished, no problems found.")
+    }
+
+    Ok(())
+}
diff --git a/zero/src/debug_utils.rs b/zero/src/debug_utils.rs
index f8cb53dd6..970c646c2 100644
--- a/zero/src/debug_utils.rs
+++ b/zero/src/debug_utils.rs
@@ -1,67 +1,13 @@
 use std::fs::{self, File};
-use std::io::{self, Write};
+use std::io::Write;
 use std::path::{Path, PathBuf};
 
-use serde::Serialize;
-use serde_json::Error as SerdeError;
-use thiserror::Error;
+use anyhow::Context;
+use evm_arithmetization::generation::DebugOutputTries;
+use serde::{Deserialize, Serialize};
 
 const DEBUG_FOLDER: &str = "./debug";
 
-/// Ensures that the specified directory exists on the filesystem.
-///
-/// This function checks if the directory at `folder_path` exists. If not, it
-/// attempts to create the directory. It returns an error if the path is not a
-/// directory or if there are issues accessing or creating the directory.
-///
-/// # Parameters
-/// * `folder_path` - A reference to a `Path` that specifies the directory to
-///   check or create.
-///
-/// # Returns
-/// * `Ok(())` - The directory exists or was successfully created.
-/// * `Err(io::Error)` - The path is not a directory, or there was a problem
-///   accessing or creating the directory.
-fn ensure_directory_exists(folder_path: &Path) -> io::Result<()> {
-    match fs::metadata(folder_path) {
-        Ok(metadata) => {
-            if metadata.is_dir() {
-                Ok(()) // The directory already exists
-            } else {
-                Err(io::Error::new(
-                    io::ErrorKind::AlreadyExists,
-                    "The path exists but is not a directory",
-                ))
-            }
-        }
-        Err(e) => {
-            if e.kind() == io::ErrorKind::NotFound {
-                // Directory does not exist, try to create it
-                fs::create_dir(folder_path)
-            } else {
-                // Re-throw the error if it's not a 'NotFound' error
-                Err(e)
-            }
-        }
-    }
-}
-
-/// An error type for save debug input information.
-#[derive(Error, Debug)]
-pub enum SaveInputError {
-    #[error("failed to create directory '{0}'")]
-    CreateDirectoryError(PathBuf, #[source] io::Error),
-
-    #[error("failed to create file '{0}'")]
-    CreateFileError(PathBuf, #[source] io::Error),
-
-    #[error("failed to serialize inputs")]
-    SerializationError(#[source] SerdeError),
-
-    #[error("failed to write to file '{0}'")]
-    WriteToFileError(PathBuf, #[source] io::Error),
-}
-
 /// Serializes a collection of inputs to a pretty-printed JSON format and saves
 /// them to a file.
 ///
@@ -76,27 +22,73 @@ pub enum SaveInputError {
 ///
 /// This function returns a `Result<(), std::io::Error>` indicating the
 /// operation's success or failure.
-pub fn save_inputs_to_disk<T: Serialize>(
-    file_name: String,
-    inputs: T,
-) -> Result<(), SaveInputError> {
+pub fn save_inputs_to_disk<T: Serialize>(file_name: String, inputs: T) -> anyhow::Result<()> {
     let debug_folder = Path::new(DEBUG_FOLDER);
-    let input_file_path = debug_folder.join(file_name);
 
-    // Ensure the DEBUG_FOLDER exists
-    ensure_directory_exists(debug_folder)
-        .map_err(|e| SaveInputError::CreateDirectoryError(debug_folder.to_path_buf(), e))?;
+    // Check if output directory exists, and create one if it doesn't.
+    if !debug_folder.exists() {
+        fs::create_dir(debug_folder)?;
+    }
 
-    let mut file = File::create(&input_file_path)
-        .map_err(|e| SaveInputError::CreateFileError(input_file_path.clone(), e))?;
+    let input_file_path = debug_folder.join(file_name);
+    let mut file = File::create(&input_file_path)?;
 
     // Serialize the entire collection to a pretty JSON string
-    let all_inputs_str =
-        serde_json::to_string_pretty(&inputs).map_err(SaveInputError::SerializationError)?;
+    let all_inputs_str = serde_json::to_string_pretty(&inputs)?;
 
     // Write the serialized data to the file
-    file.write_all(all_inputs_str.as_bytes())
-        .map_err(|e| SaveInputError::WriteToFileError(input_file_path, e))?;
+    file.write_all(all_inputs_str.as_bytes())?;
+
+    Ok(())
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ErrorTrieFile {
+    pub error: String,
+    pub block_number: u64,
+    pub batch_index: usize,
+    pub tries: DebugOutputTries,
+}
+
+pub fn generate_trie_debug_file_name(block_number: u64, batch_index: usize) -> String {
+    format!("b{}_batch{}_error_tries.data", block_number, batch_index)
+}
+
+pub fn save_tries_to_disk(
+    err: &str,
+    block_number: u64,
+    batch_index: usize,
+    tries: &DebugOutputTries,
+) -> anyhow::Result<()> {
+    let output_dir = PathBuf::from(DEBUG_FOLDER);
+
+    // Check if output directory exists, and create one if it doesn't.
+    if !output_dir.exists() {
+        fs::create_dir(output_dir.clone())?;
+    }
 
+    let mut tries_debug_file_path = output_dir;
+    tries_debug_file_path.push(generate_trie_debug_file_name(block_number, batch_index));
+
+    let simulation_error_str = serde_json::to_string(&ErrorTrieFile {
+        error: err.to_string(),
+        block_number,
+        batch_index,
+        tries: tries.clone(),
+    })
+    .context("unable to serialize simulation error to save tries")?;
+    fs::write(tries_debug_file_path, simulation_error_str)
+        .expect("unable to write simulation error to file");
     Ok(())
 }
+
+pub fn load_tries_from_disk(
+    block_number: u64,
+    batch_index: usize,
+) -> anyhow::Result<ErrorTrieFile> {
+    let mut tries_debug_file_path = PathBuf::from(DEBUG_FOLDER);
+    tries_debug_file_path.push(generate_trie_debug_file_name(block_number, batch_index));
+    let file = File::open(tries_debug_file_path)?;
+    let data: ErrorTrieFile = serde_json::from_reader(file)?;
+    Ok(data)
+}
diff --git a/zero/src/lib.rs b/zero/src/lib.rs
index 066ecc87e..c2ca63f6a 100644
--- a/zero/src/lib.rs
+++ b/zero/src/lib.rs
@@ -13,6 +13,7 @@ pub mod prover_state;
 pub mod provider;
 pub mod rpc;
 pub mod tracing;
+pub mod trie_diff;
 
 /// Size of the channel used to send block prover inputs to the per block
 /// proving task. If the proving task is slow and can not consume inputs fast
diff --git a/zero/src/ops.rs b/zero/src/ops.rs
index b08b5a925..cc180c7e5 100644
--- a/zero/src/ops.rs
+++ b/zero/src/ops.rs
@@ -2,6 +2,7 @@ zk_evm_common::check_chain_features!();
 
 use std::time::Instant;
 
+use anyhow::anyhow;
 use evm_arithmetization::fixed_recursive_verifier::ProverOutputData;
 use evm_arithmetization::{prover::testing::simulate_execution_all_segments, GenerationInputs};
 use evm_arithmetization::{Field, PublicValues, TrimmedGenerationInputs};
@@ -13,6 +14,7 @@ use serde::{Deserialize, Serialize};
 use tracing::error;
 use tracing::{event, info_span, Level};
 
+use crate::debug_utils::save_tries_to_disk;
 use crate::proof_types::{
     BatchAggregatableProof, GeneratedBlockProof, GeneratedSegmentAggProof, GeneratedTxnAggProof,
     SegmentAggregatableProof,
@@ -70,32 +72,58 @@ impl Operation for SegmentProof {
 #[derive(Deserialize, Serialize, RemoteExecute)]
 pub struct SegmentProofTestOnly {
     pub save_inputs_on_error: bool,
+    pub save_tries_on_error: bool,
 }
 
 impl Operation for SegmentProofTestOnly {
-    type Input = (GenerationInputs, usize);
+    // The input is a tuple of the batch generation inputs, max_cpu_len_log and
+    // batch index.
+    type Input = (GenerationInputs, usize, usize);
     type Output = ();
 
     fn execute(&self, inputs: Self::Input) -> Result<Self::Output> {
-        if self.save_inputs_on_error {
-            simulate_execution_all_segments::<Field>(inputs.0.clone(), inputs.1).map_err(|e| {
-                if let Err(write_err) = save_inputs_to_disk(
-                    format!(
-                        "b{}_txns_{}..{}_input.json",
-                        inputs.0.block_metadata.block_number,
-                        inputs.0.txn_number_before,
-                        inputs.0.txn_number_before + inputs.0.signed_txns.len(),
-                    ),
-                    inputs.0,
-                ) {
-                    error!("Failed to save txn proof input to disk: {:?}", write_err);
+        if self.save_inputs_on_error || self.save_tries_on_error {
+            simulate_execution_all_segments::<Field>(inputs.0.clone(), inputs.1).map_err(|err| {
+                let block_number = inputs.0.block_metadata.block_number.low_u64();
+                let batch_index = inputs.2;
+
+                let err = if self.save_tries_on_error {
+                    if let Some(ref tries) = err.tries {
+                        if let Err(write_err) =
+                            save_tries_to_disk(&err.to_string(), block_number, batch_index, tries)
+                        {
+                            error!("Failed to save tries to disk: {:?}", write_err);
+                        }
+                    }
+                    anyhow!(
+                        "block:{} batch:{} error: {}",
+                        block_number,
+                        batch_index,
+                        err.to_string()
+                    )
+                } else {
+                    err.into()
+                };
+
+                if self.save_inputs_on_error {
+                    if let Err(write_err) = save_inputs_to_disk(
+                        format!(
+                            "b{}_txns_{}..{}_input.json",
+                            block_number,
+                            inputs.0.txn_number_before,
+                            inputs.0.txn_number_before + inputs.0.signed_txns.len(),
+                        ),
+                        inputs.0,
+                    ) {
+                        error!("Failed to save txn proof input to disk: {:?}", write_err);
+                    }
                 }
 
-                FatalError::from_str(&e.to_string(), FatalStrategy::Terminate)
+                FatalError::from_anyhow(err, FatalStrategy::Terminate)
             })?
         } else {
             simulate_execution_all_segments::<Field>(inputs.0, inputs.1)
-                .map_err(|e| FatalError::from_str(&e.to_string(), FatalStrategy::Terminate))?;
+                .map_err(|err| FatalError::from_anyhow(err.into(), FatalStrategy::Terminate))?;
         }
 
         Ok(())
diff --git a/zero/src/prover.rs b/zero/src/prover.rs
index 1625752b9..665d4f828 100644
--- a/zero/src/prover.rs
+++ b/zero/src/prover.rs
@@ -20,6 +20,7 @@ use serde::{Deserialize, Serialize};
 use tokio::io::AsyncWriteExt;
 use tokio::sync::mpsc::Receiver;
 use tokio::sync::{oneshot, Semaphore};
+use trace_decoder::observer::DummyObserver;
 use trace_decoder::{BlockTrace, OtherBlockData};
 use tracing::{error, info};
 
@@ -48,6 +49,7 @@ pub struct ProverConfig {
     pub keep_intermediate_proofs: bool,
     pub block_batch_size: usize,
     pub block_pool_size: usize,
+    pub save_tries_on_error: bool,
 }
 
 #[derive(Clone, Debug, Deserialize, Serialize)]
@@ -81,8 +83,12 @@ impl BlockProverInput {
 
         let block_number = self.get_block_number();
 
-        let block_generation_inputs =
-            trace_decoder::entrypoint(self.block_trace, self.other_data, batch_size)?;
+        let block_generation_inputs = trace_decoder::entrypoint(
+            self.block_trace,
+            self.other_data,
+            batch_size,
+            &mut DummyObserver::new(),
+        )?;
 
         // Create segment proof.
         let seg_prove_ops = ops::SegmentProof {
@@ -163,24 +169,34 @@ impl BlockProverInput {
             max_cpu_len_log,
             batch_size,
             save_inputs_on_error,
+            save_tries_on_error,
             ..
         } = *prover_config;
 
         let block_number = self.get_block_number();
         info!("Testing witness generation for block {block_number}.");
 
-        let block_generation_inputs =
-            trace_decoder::entrypoint(self.block_trace, self.other_data, batch_size)?;
+        let block_generation_inputs = trace_decoder::entrypoint(
+            self.block_trace,
+            self.other_data,
+            batch_size,
+            &mut DummyObserver::new(),
+        )?;
 
         let seg_ops = ops::SegmentProofTestOnly {
             save_inputs_on_error,
+            save_tries_on_error,
         };
 
         let simulation = Directive::map(
             IndexedStream::from(
                 block_generation_inputs
                     .into_iter()
-                    .zip(repeat(max_cpu_len_log)),
+                    .enumerate()
+                    .zip(repeat(max_cpu_len_log))
+                    .map(|((batch_index, txn_batch), max_cpu_len_log)| {
+                        (txn_batch, max_cpu_len_log, batch_index)
+                    }),
             ),
             &seg_ops,
         );
diff --git a/zero/src/prover/cli.rs b/zero/src/prover/cli.rs
index a6cdaebf9..87e79bc65 100644
--- a/zero/src/prover/cli.rs
+++ b/zero/src/prover/cli.rs
@@ -60,6 +60,7 @@ impl From<CliProverConfig> for super::ProverConfig {
             keep_intermediate_proofs: cli.keep_intermediate_proofs,
             block_batch_size: cli.block_batch_size,
             block_pool_size: cli.block_pool_size,
+            save_tries_on_error: false,
         }
     }
 }
diff --git a/zero/src/trie_diff/mod.rs b/zero/src/trie_diff/mod.rs
new file mode 100644
index 000000000..e154d16e1
--- /dev/null
+++ b/zero/src/trie_diff/mod.rs
@@ -0,0 +1,111 @@
+use evm_arithmetization::generation::mpt::{AccountRlp, LegacyReceiptRlp};
+use evm_arithmetization::generation::DebugOutputTries;
+use mpt_trie::debug_tools::diff::create_diff_between_tries;
+use mpt_trie::utils::TrieNodeType;
+use tracing::info;
+
+use crate::prover::BlockProverInput;
+
+pub fn compare_tries(
+    block_prover_input: &BlockProverInput,
+    batch_index: usize,
+    left: &DebugOutputTries,
+    right: &DebugOutputTries,
+) -> anyhow::Result<()> {
+    let block_number = block_prover_input
+        .other_data
+        .b_data
+        .b_meta
+        .block_number
+        .low_u64();
+    let state_trie_diff = create_diff_between_tries(&left.state_trie, &right.state_trie);
+    if let Some(ref state_trie_diff_point) = state_trie_diff.latest_diff_res {
+        if state_trie_diff_point.a_info.node_type == TrieNodeType::Leaf {
+            if let Some(ref td_account_value) = state_trie_diff_point.a_info.value {
+                let td_account_data = rlp::decode::<AccountRlp>(td_account_value)?;
+                info!("Trace decoder state trie block {block_number} batch {batch_index} account address hash: {} account data: {:#?}",
+                    state_trie_diff_point.a_info.key, td_account_data);
+            } else {
+                info!("Trace decoder state trie block {block_number} batch {batch_index}, skip account printout as diff is not at the leaf node level.");
+            }
+        }
+        if state_trie_diff_point.b_info.node_type == TrieNodeType::Leaf {
+            if let Some(ref prover_account_value) = state_trie_diff_point.b_info.value {
+                let prover_account_data = rlp::decode::<AccountRlp>(prover_account_value)?;
+                info!("Prover state trie block {block_number} batch {batch_index} account address hash: {} account data: {:#?}",
+                    state_trie_diff_point.b_info.key, prover_account_data);
+            } else {
+                info!("Prover state trie block {block_number} batch {batch_index}, skip account printout as diff is not at the leaf node level.");
+            }
+        }
+
+        info!(
+            "State trie block {block_number} batch {batch_index} diff: {:#?}",
+            state_trie_diff_point
+        );
+    } else {
+        info!("State trie for block {block_number} batch {batch_index} matches.");
+    }
+
+    let transaction_trie_diff =
+        create_diff_between_tries(&left.transaction_trie, &right.transaction_trie);
+    if let Some(ref transaction_trie_diff_point) = transaction_trie_diff.latest_diff_res {
+        if transaction_trie_diff_point.a_info.node_type == TrieNodeType::Leaf {
+            let tx_index =
+                rlp::decode::<usize>(transaction_trie_diff_point.a_info.key.as_byte_slice())?;
+            info!("Trace decoder transaction trie block {block_number} batch {batch_index} transaction index {tx_index} rlp bytecode: {:?}",
+                    transaction_trie_diff_point.a_info.value.as_ref().map(hex::encode));
+        } else {
+            info!("Trace decoder transaction trie block {block_number} batch {batch_index}, skip tx printout as diff is not at the leaf node level.");
+        }
+        if transaction_trie_diff_point.b_info.node_type == TrieNodeType::Leaf {
+            let tx_index =
+                rlp::decode::<usize>(transaction_trie_diff_point.b_info.key.as_byte_slice())?;
+            info!("Prover transaction trie block {block_number} batch {batch_index} transaction index {tx_index} rlp bytecode: {:?}",
+                        transaction_trie_diff_point.b_info.value.as_ref().map(hex::encode));
+        } else {
+            info!("Prover transaction trie block {block_number} batch {batch_index}, skip tx printout as diff is not at the leaf node level.");
+        }
+
+        info!(
+            "Transactions trie block {block_number} batch {batch_index} diff: {:#?}",
+            transaction_trie_diff_point
+        );
+    } else {
+        info!("Transaction trie for block {block_number} batch {batch_index} matches.");
+    }
+
+    let receipt_trie_diff = create_diff_between_tries(&left.receipt_trie, &right.receipt_trie);
+    if let Some(ref receipt_trie_diff_point) = receipt_trie_diff.latest_diff_res {
+        if receipt_trie_diff_point.a_info.node_type == TrieNodeType::Leaf {
+            if let Some(ref td_receipt_value) = receipt_trie_diff_point.a_info.value {
+                let tx_index =
+                    rlp::decode::<usize>(receipt_trie_diff_point.a_info.key.as_byte_slice())?;
+                let td_receipt_data = rlp::decode::<LegacyReceiptRlp>(td_receipt_value)?;
+                info!("Trace decoder receipt trie block {block_number} batch {batch_index} output tx index: {tx_index} receipt data: {:#?}", td_receipt_data);
+            } else {
+                info!("Trace decoder receipt trie block {block_number} batch {batch_index}, skip printout as diff is not at the leaf node level.");
+            }
+        }
+
+        if receipt_trie_diff_point.b_info.node_type == TrieNodeType::Leaf {
+            if let Some(ref prover_receipt_value) = receipt_trie_diff_point.b_info.value {
+                let tx_index =
+                    rlp::decode::<usize>(receipt_trie_diff_point.b_info.key.as_byte_slice())?;
+                let prover_receipt_data = rlp::decode::<LegacyReceiptRlp>(prover_receipt_value)?;
+                info!("Prover receipt trie block {block_number} batch {batch_index} output tx index: {tx_index} receipt data: {:#?}", prover_receipt_data);
+            } else {
+                info!("Prover receipt trie block {block_number} batch {batch_index}, skip receipt printout as diff is not at the leaf node level.");
+            }
+        }
+
+        println!(
+            "Receipt trie block {block_number} batch {batch_index} diff: {:#?}",
+            receipt_trie_diff
+        );
+    } else {
+        println!("Receipt trie block {block_number} batch {batch_index} matches.");
+    }
+
+    Ok(())
+}