Merge pull request #331 from ralexstokes/blobber

add utilities for blobbing
ralexstokes · Mar 17, 2024 · becc1be · becc1be
2 parents bba88f6 + 9f15d40
commit becc1be
Show file tree

Hide file tree

Showing 21 changed files with 4,608 additions and 12 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -41,6 +41,6 @@ syn = { version = "1.0.98", features = [
 ] }
 prettyplease = { version = "0.1.10" }
 quote = { version = "1.0.18" }
-clap = { version = "4.4.6", features = ["derive"] }
+clap = { version = "4.5.3", features = ["derive"] }
 convert_case = "0.6.0"
-walkdir = "2.3.3"
+walkdir = "2.3.3"
diff --git a/ethereum-consensus/Cargo.toml b/ethereum-consensus/Cargo.toml
@@ -28,6 +28,7 @@ ec = [
     "ctr",
     "base64",
     "unicode-normalization",
+    "bitvec",
 ]
 
 [dependencies]
@@ -70,6 +71,7 @@ aes = { version = "0.8.3", optional = true }
 ctr = { version = "0.9.2", optional = true }
 base64 = { version = "0.21.4", optional = true }
 unicode-normalization = { version = "0.1.22", optional = true }
+bitvec = { version = "1.0.1", optional = true }
 
 [dev-dependencies]
 serde_with = "1.13.0"

diff --git a/ethereum-consensus/src/bin/ec/README.md b/ethereum-consensus/src/bin/ec/README.md
@@ -0,0 +1,52 @@
+# `ec`
+
+A set of utilities for Ethereum consensus.
+
+## Blobs
+
+Facilities for mapping an arbitrary stream of data into blobs and back.
+
+To produce blobs from some data, do something like this:
+
+```bash
+$ cat some-data | ec blobs encode > encoded-blobs.json
+```
+
+This produces a JSON array of complete blobs on STDOUT.
+The blobs are sequenced in the same order to match the incoming data.
+If you change the order of the blobs, the recovered data will not be the same.
+
+To bundle the blobs for including into an EIP-4844 transaction (including computing commitments and proofs):
+
+```bash
+$ cat encoded-blobs.json | ec blobs bundle > blobs-bundle.json
+```
+
+The blobs, commitments, and proofs are required when making a valid 4844 transaction to submit to an execution node.
+This utility currently does not support making the 4844 transaction; refer to something like [alloy](https://github.com/alloy-rs/alloy) for this.
+
+To recover some data from a set of blobs (e.g. from the chain), assemble them into a JSON array and provide as input on STDIN:
+
+```bash
+$ cat blobs.json | ec blobs decode > some-data
+```
+
+Note that the order of the blobs must be maintained as was first produced by `ec blobs encode` if one wishes to recover the same data stream as initially provided.
+
+### Framing
+
+The blob command supports various framing modes to support placing arbitrary data into blobs and being able to recover it.
+
+Supported modes:
+
+* `raw`
+* `sized`
+
+The `raw` mode just writes whatever data is provided directly into blobs. Note that given the fixed size of blobs, this could mean padding bytes are added to the end of the stream and there is no way to know from the blob data where the original data ended. There is no (local) limit to the amount of data that can {en,de}coded to/from blobs this way.
+
+The `sized` mode adds a header to the payload data so that this utility can read exactly the originally written number of bytes when decoding.
+Refer to the documentation for details of the header and payload encoding.
+The `sized` mode gives no other sequencing data so users must take care when ordering blobs if the order is meaningful (e.g. when decoding).
+If the target data fits within the maximum number of blobs per block, then a user can simply use this tool (keeping blobs in the same order at each step)
+and use the Ethereum protocol nonce and blob index as sequencing data. If the target data exceeds the maximum number of blobs per block, the user will either need to manually place blobs such that
+ the blob order respects the (nonce, blob index) order, or devise some other sequencing scheme (e.g. the payload data can include in-band sequencing information).
diff --git a/ethereum-consensus/src/bin/ec/blobs/bundler.rs b/ethereum-consensus/src/bin/ec/blobs/bundler.rs
@@ -0,0 +1,46 @@
+use crate::blobs::{Blob, Error};
+use ethereum_consensus::{
+    deneb::{self, polynomial_commitments as spec, presets::TRUSTED_SETUP_JSON},
+    Error as ConsensusError,
+};
+use std::io::Read;
+
+type BlobsBundle = deneb::mainnet::BlobsBundle;
+type Commitment = spec::KzgCommitment;
+type Proof = spec::KzgProof;
+type CommitmentAndProof = (Commitment, Proof);
+
+pub fn commit_and_prove_blob(
+    blob: &Blob,
+    kzg_settings: &spec::KzgSettings,
+) -> Result<CommitmentAndProof, ConsensusError> {
+    let commitment = spec::blob_to_kzg_commitment(blob, kzg_settings)?;
+    let proof = spec::compute_blob_kzg_proof(blob, &commitment, kzg_settings)?;
+    Ok((commitment, proof))
+}
+
+pub fn bundle(blobs: Vec<Blob>, kzg_settings: &spec::KzgSettings) -> Result<BlobsBundle, Error> {
+    let commitments_and_proofs = blobs
+        .iter()
+        .map(|blob| commit_and_prove_blob(blob, kzg_settings))
+        .collect::<Result<Vec<CommitmentAndProof>, ConsensusError>>()?;
+    let (commitments, proofs) = commitments_and_proofs.into_iter().unzip();
+    let blobs_bundle = BlobsBundle { commitments, proofs, blobs };
+
+    spec::verify_blob_kzg_proof_batch(
+        &blobs_bundle.blobs,
+        &blobs_bundle.commitments,
+        &blobs_bundle.proofs,
+        kzg_settings,
+    )
+    .map_err(ConsensusError::from)?;
+
+    Ok(blobs_bundle)
+}
+
+// Assumes a serde_json-encoded array of `Vec<Blob>` on `reader` and uses the mainnet trusted setup.
+pub fn from_reader(reader: impl Read) -> Result<BlobsBundle, Error> {
+    let kzg_settings = spec::kzg_settings_from_json(TRUSTED_SETUP_JSON)?;
+    let blobs: Vec<Blob> = serde_json::from_reader(reader)?;
+    bundle(blobs, &kzg_settings)
+}
diff --git a/ethereum-consensus/src/bin/ec/blobs/command.rs b/ethereum-consensus/src/bin/ec/blobs/command.rs
@@ -0,0 +1,66 @@
+use crate::blobs::{bundler, decode, encode, framing::Mode as Framing};
+use clap::{Args, Subcommand, ValueEnum};
+use std::io;
+
+#[derive(Debug, Subcommand)]
+enum Commands {
+    Encode {
+        #[arg(value_enum, default_value_t)]
+        framing: FramingArg,
+    },
+    Decode {
+        #[arg(value_enum, default_value_t)]
+        framing: FramingArg,
+    },
+    Bundle,
+}
+
+#[derive(Debug, Clone, Default, ValueEnum)]
+enum FramingArg {
+    Raw,
+    #[default]
+    Sized,
+}
+
+impl From<FramingArg> for Framing {
+    fn from(value: FramingArg) -> Self {
+        match value {
+            FramingArg::Raw => Framing::Raw,
+            FramingArg::Sized => Framing::Sized,
+        }
+    }
+}
+
+#[derive(Debug, Args)]
+#[clap(about = "utilities for blobspace")]
+pub struct Command {
+    #[clap(subcommand)]
+    command: Commands,
+}
+
+impl Command {
+    pub fn execute(self) -> eyre::Result<()> {
+        match self.command {
+            Commands::Encode { framing } => {
+                let stdin = io::stdin().lock();
+                let blobs = encode::from_reader(stdin, framing.into())?;
+                let result = serde_json::to_string_pretty(&blobs)?;
+                println!("{}", result);
+                Ok(())
+            }
+            Commands::Decode { framing } => {
+                let stdin = io::stdin().lock();
+                let stdout = io::stdout().lock();
+                decode::to_writer_from_json(stdin, stdout, framing.into())?;
+                Ok(())
+            }
+            Commands::Bundle => {
+                let stdin = io::stdin().lock();
+                let blobs_bundle = bundler::from_reader(stdin)?;
+                let result = serde_json::to_string_pretty(&blobs_bundle)?;
+                println!("{}", result);
+                Ok(())
+            }
+        }
+    }
+}
diff --git a/ethereum-consensus/src/bin/ec/blobs/decode.rs b/ethereum-consensus/src/bin/ec/blobs/decode.rs
@@ -0,0 +1,49 @@
+use crate::blobs::{
+    framing::{payload_from_sized, Mode as Framing},
+    Blob, Error, BITS_PER_FIELD_ELEMENT, BYTES_PER_BLOB, BYTES_PER_FIELD_ELEMENT,
+};
+use bitvec::prelude::*;
+use std::io::{Read, Write};
+
+const BITS_PER_SERIALIZED_FIELD_ELEMENT: usize = 8 * BYTES_PER_FIELD_ELEMENT;
+
+pub fn unpack_from_blobs(blobs: &[Blob]) -> Result<Vec<u8>, Error> {
+    let mut stream = vec![0u8; blobs.len() * BYTES_PER_BLOB];
+    let stream_bits = stream.view_bits_mut::<Msb0>();
+
+    let mut i = 0;
+    for blob in blobs {
+        let blob_bits = blob.as_ref().view_bits::<Msb0>();
+        // chunks of serialized field element bits
+        let mut chunks = blob_bits.chunks_exact(BITS_PER_SERIALIZED_FIELD_ELEMENT);
+        for chunk in chunks.by_ref() {
+            // first two-bits are unusable via the big-endian field element encoding
+            let src = &chunk[2..];
+            stream_bits[i * BITS_PER_FIELD_ELEMENT..(i + 1) * BITS_PER_FIELD_ELEMENT]
+                .copy_from_bitslice(src);
+            i += 1;
+        }
+
+        let remainder = chunks.remainder();
+        debug_assert!(remainder.is_empty());
+    }
+
+    Ok(stream)
+}
+
+// Expects a `Vec<Blob>` with `serde_json` encoding read from `reader`.
+// Writes recovered byte stream to `writer`.
+pub fn to_writer_from_json(
+    reader: impl Read,
+    mut writer: impl Write,
+    framing: Framing,
+) -> Result<(), Error> {
+    let blobs: Vec<Blob> = serde_json::from_reader(reader)?;
+    let result = unpack_from_blobs(&blobs)?;
+    let result = match framing {
+        Framing::Raw => &result,
+        Framing::Sized => payload_from_sized(&result)?,
+    };
+    writer.write_all(result)?;
+    Ok(())
+}
diff --git a/ethereum-consensus/src/bin/ec/blobs/encode.rs b/ethereum-consensus/src/bin/ec/blobs/encode.rs
@@ -0,0 +1,77 @@
+use crate::{
+    blobs::{
+        framing::{sized_header, Mode as Framing},
+        Blob, Error, BITS_PER_FIELD_ELEMENT, BYTES_PER_BLOB, BYTES_PER_FIELD_ELEMENT,
+    },
+    bls::MODULUS,
+};
+use bitvec::prelude::*;
+use ethereum_consensus::deneb::presets::mainnet::MAX_BLOBS_PER_BLOCK;
+use ruint::aliases::U256;
+use std::io::Read;
+
+type BitSlice = bitvec::slice::BitSlice<u8, Msb0>;
+
+fn field_element_from_bits(src: &BitSlice) -> Result<Vec<u8>, Error> {
+    let mut field_element = vec![0u8; BYTES_PER_FIELD_ELEMENT];
+    // first two-bits are unusable via the big-endian field element encoding
+    let dst = &mut field_element.view_bits_mut()[2..2 + src.len()];
+    dst.copy_from_bitslice(src);
+
+    let x = U256::from_be_slice(&field_element);
+    if x < MODULUS {
+        Ok(field_element)
+    } else {
+        Err(Error::InvalidFieldElement)
+    }
+}
+
+// Pack a buffer of an arbitrary number of bytes into a series of `Blob`s.
+pub fn pack_into_blobs(buffer: &[u8]) -> Result<Vec<Blob>, Error> {
+    let mut blobs = Vec::with_capacity(MAX_BLOBS_PER_BLOCK);
+    let bits = BitSlice::from_slice(buffer);
+    let mut blob_buffer = Vec::with_capacity(BYTES_PER_BLOB);
+    let mut chunks = bits.chunks_exact(BITS_PER_FIELD_ELEMENT);
+    for src in chunks.by_ref() {
+        if blob_buffer.len() == BYTES_PER_BLOB {
+            let blob = Blob::try_from(blob_buffer.as_ref()).expect("is the right size");
+            blobs.push(blob);
+            blob_buffer.clear();
+        }
+        let mut field_element = field_element_from_bits(src)?;
+        blob_buffer.append(&mut field_element);
+    }
+
+    // ensure we have only packed complete field elements so far
+    assert!(blob_buffer.len() % BYTES_PER_FIELD_ELEMENT == 0);
+
+    let remainder = chunks.remainder();
+    if !remainder.is_empty() {
+        let mut field_element = field_element_from_bits(remainder)?;
+        blob_buffer.append(&mut field_element);
+    }
+
+    blob_buffer.resize(BYTES_PER_BLOB, 0);
+    let blob = Blob::try_from(blob_buffer.as_ref()).expect("is the right size");
+    blobs.push(blob);
+
+    Ok(blobs)
+}
+
+// Writes bytes read from `reader` according to `framing` to a sequence of `Blob`s.
+// Data is tightly packed into field elements.
+pub fn from_reader(mut reader: impl Read, framing: Framing) -> Result<Vec<Blob>, Error> {
+    let mut buffer = Vec::with_capacity(BYTES_PER_BLOB * MAX_BLOBS_PER_BLOCK);
+    reader.read_to_end(&mut buffer).expect("can read data");
+    let prepared_buffer = match framing {
+        Framing::Raw => buffer,
+        Framing::Sized => {
+            let header = sized_header(buffer.len())?;
+            let mut framed_buffer = Vec::with_capacity(header.len() + buffer.len());
+            framed_buffer.extend_from_slice(&header);
+            framed_buffer.append(&mut buffer);
+            framed_buffer
+        }
+    };
+    pack_into_blobs(&prepared_buffer)
+}
diff --git a/ethereum-consensus/src/bin/ec/blobs/framing.rs b/ethereum-consensus/src/bin/ec/blobs/framing.rs
@@ -0,0 +1,45 @@
+use crate::blobs::Error;
+
+pub const SIZED_FRAMING_VERSION: u8 = 0;
+pub const HEADER_SIZE: usize = 5;
+
+/// A `Mode` to indicate how the target data should be packed into blob data.
+pub enum Mode {
+    /// No framing, data is written/read directly from the blob data
+    Raw,
+    /// The size of a "payload" is written in-band to the blob data.
+    /// Supports "lossless" {de,}serialization if the payload data is not
+    /// a multiple of the blob size.
+    Sized,
+}
+
+// Returns the header bytes that should prepend the target data in `Sized` framing mode.
+// The header consists of one version byte, then a `u32` integer in big-endian encoding containing
+// the size of the trailing data.
+pub fn sized_header(data_byte_length: usize) -> Result<[u8; HEADER_SIZE], Error> {
+    let mut header = [0u8; HEADER_SIZE];
+    header[0] = SIZED_FRAMING_VERSION;
+    let size = u32::try_from(data_byte_length).map_err(|_| Error::InvalidPayloadSize)?;
+    header[1..].copy_from_slice(&size.to_be_bytes());
+    Ok(header)
+}
+
+// Attempts to parse a `stream` of bytes assuming they were written to blobs with the `Sized`
+// framing mode.
+pub fn payload_from_sized(stream: &[u8]) -> Result<&[u8], Error> {
+    if stream.len() < HEADER_SIZE {
+        return Err(Error::ExpectedHeaderForSizedFraming)
+    }
+
+    let (header, payload) = stream.split_at(HEADER_SIZE);
+
+    if header[0] != SIZED_FRAMING_VERSION {
+        return Err(Error::UnsupportedSizedFramingVersion)
+    }
+    let size = u32::from_be_bytes(header[1..5].try_into().expect("correct size bytes")) as usize;
+    if size >= stream.len() {
+        return Err(Error::InvalidPayloadSize)
+    }
+
+    Ok(&payload[..size])
+}