Skip to content

Commit

Permalink
feat: fflonk gpu implementation (#26)
Browse files Browse the repository at this point in the history
# What ❔

Implements fflonk protocol for the cuda.

## Why ❔


## Checklist


- [ ] PR title corresponds to the body of PR (we generate changelog
entries from PRs).
- [ ] Tests for the changes have been added / updated.
- [ ] Documentation comments have been added / updated.
- [ ] Code has been formatted via `zk fmt` and `zk lint`.

## Prerequisites
- [PR
matter-labs/zksync-crypto#11](matter-labs/zksync-crypto#11)
- [PR #38 ]

---------

Co-authored-by: Michael Carilli <[email protected]>
Co-authored-by: zksync-era-bot <[email protected]>
Co-authored-by: zksync-era-bot <[email protected]>
Co-authored-by: Robert Remen <[email protected]>
Co-authored-by: Daniyar Itegulov <[email protected]>
  • Loading branch information
6 people authored Oct 31, 2024
1 parent 7648ac5 commit 9d11084
Show file tree
Hide file tree
Showing 66 changed files with 127,321 additions and 105 deletions.
19 changes: 11 additions & 8 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
[workspace]
members = [
"crates/*"
]
members = ["crates/*"]
exclude = ["crates/proof-compression"]
resolver = "2"

[workspace.package]
Expand All @@ -27,13 +26,17 @@ gpu-ffi-bindings-generator = { version = "=0.152.0", path = "crates/gpu-ffi", pa
gpu-prover = { version = "=0.152.0", path = "crates/gpu-prover", package = "zksync-gpu-prover" }
shivini = { version = "=0.152.0", path = "crates/shivini" }
wrapper-prover = { version = "=0.152.0", path = "crates/wrapper-prover", package = "zksync-wrapper-prover" }
# fflonk = { version = "=0.152.0", path = "crates/fflonk", package = "fflonk-gpu" }
# proof-compression = { version = "=0.150.1", path = "crates/proof-compression", package = "proof-compression" }

# These dependencies should be shared by all the crates.
boojum = "=0.30.5"
circuit_definitions = "=0.150.10"
franklin-crypto = "=0.30.5"
snark_wrapper = "=0.30.5"
zkevm_test_harness = "=0.150.10"
circuit_definitions = { version = "=0.150.11" }
zkevm_test_harness = { version = "=0.150.11" }
boojum = "=0.30.6"
franklin-crypto = "=0.30.6"
rescue_poseidon = "=0.30.6"
snark_wrapper = "=0.30.6"
fflonk-cpu = {package = "fflonk", version = "=0.30.6"}

[profile.release]
debug = "line-tables-only"
1 change: 1 addition & 0 deletions crates/fflonk/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data/
28 changes: 28 additions & 0 deletions crates/fflonk/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[package]
edition.workspace = true
authors.workspace = true
homepage.workspace = true
license.workspace = true
keywords.workspace = true
categories.workspace = true
repository.workspace = true
version.workspace = true
name = "fflonk-cuda"
description = "CUDA implementation of the fflonk prover"
exclude = ["/data"]

[dependencies]
fflonk-cpu = {workspace = true}
circuit_definitions.workspace = true
gpu-ffi.workspace = true
rand = "0.4"
derivative = "2.2"
byteorder = "1"
bincode = "1.3"
serde = { version = "1", features = ["derive", "rc"] }
serde_json = "1"
serde_derive = "1"

[features]
default = ["sanity"]
sanity = []
9 changes: 9 additions & 0 deletions crates/fflonk/src/allocator/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use super::*;
mod pinned;
pub use pinned::*;

mod pool;
pub use pool::*;

mod static_device;
pub use static_device::*;
44 changes: 44 additions & 0 deletions crates/fflonk/src/allocator/pinned.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
use super::*;

// Both assembly and device setup has an ability to store data on the pinned memory
// - Assembly uses for the variables(7487741), state and setup columns
// - Device setup uses variable indexes and gate selectors
static mut _STATIC_HOST_ALLOC: Option<GlobalHost> = None;

#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct GlobalHost;

impl GlobalHost {
pub fn init(domain_size: usize) -> CudaResult<Self> {
let num_variables = 0;
let num_cols = 3;

let size_of_indexes_in_bytes = 8 * num_cols * domain_size;
let size_of_vars_in_bytes = 32 * num_variables;

let total_size_in_bytes = size_of_indexes_in_bytes + size_of_vars_in_bytes;

todo!()
}
}

pub trait HostAllocator: Allocator + Default + Clone + Send + Sync + 'static {}

unsafe impl Allocator for GlobalHost {
fn allocate(
&self,
layout: std::alloc::Layout,
) -> Result<std::ptr::NonNull<[u8]>, std::alloc::AllocError> {
host_allocate(layout.size())
.map(|ptr| unsafe { std::ptr::NonNull::new_unchecked(ptr as _) })
.map(|ptr| std::ptr::NonNull::slice_from_raw_parts(ptr, layout.size()))
.map_err(|_| std::alloc::AllocError)
}

unsafe fn deallocate(&self, ptr: std::ptr::NonNull<u8>, layout: std::alloc::Layout) {
host_dealloc(ptr.as_ptr().cast()).expect("deallocate static buffer")
}
}

impl HostAllocator for GlobalHost {}
impl HostAllocator for std::alloc::Global {}
76 changes: 76 additions & 0 deletions crates/fflonk/src/allocator/pool.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use super::*;

pub trait DeviceAllocator: Default {
fn allocate(&self, layout: std::alloc::Layout) -> CudaResult<std::ptr::NonNull<[u8]>>;
fn allocate_zeroed(&self, layout: std::alloc::Layout) -> CudaResult<std::ptr::NonNull<[u8]>>;
fn deallocate(&self, ptr: std::ptr::NonNull<u8>, layout: std::alloc::Layout);
fn allocate_async(
&self,
layout: std::alloc::Layout,
pool: bc_mem_pool,
stream: bc_stream,
) -> CudaResult<std::ptr::NonNull<[u8]>>;
fn deallocate_async(
&self,
ptr: std::ptr::NonNull<u8>,
layout: std::alloc::Layout,
stream: bc_stream,
);
fn allocate_zeroed_async(
&self,
layout: std::alloc::Layout,
pool: bc_mem_pool,
stream: bc_stream,
) -> CudaResult<std::ptr::NonNull<[u8]>>;
}

#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct PoolAllocator;

impl DeviceAllocator for PoolAllocator {
fn allocate(&self, layout: std::alloc::Layout) -> CudaResult<std::ptr::NonNull<[u8]>> {
unimplemented!("Pool allocator can't do static allocation/deallocation")
}

fn deallocate(&self, ptr: std::ptr::NonNull<u8>, layout: std::alloc::Layout) {
unimplemented!("Pool allocator can't do static allocation/deallocation")
}

fn allocate_async(
&self,
layout: std::alloc::Layout,
pool: bc_mem_pool,
stream: bc_stream,
) -> CudaResult<std::ptr::NonNull<[u8]>> {
allocate_async_on(layout.size(), pool, stream)
.map(|ptr| unsafe { std::ptr::NonNull::new_unchecked(ptr as _) })
.map(|ptr| std::ptr::NonNull::slice_from_raw_parts(ptr, layout.size()))
}

fn deallocate_async(
&self,
ptr: std::ptr::NonNull<u8>,
_layout: std::alloc::Layout,
stream: bc_stream,
) {
dealloc_async(ptr.as_ptr().cast(), stream).expect("deallocate")
}

fn allocate_zeroed(&self, layout: std::alloc::Layout) -> CudaResult<std::ptr::NonNull<[u8]>> {
let ptr = self.allocate(layout)?;
Ok(ptr)
}
fn allocate_zeroed_async(
&self,
layout: std::alloc::Layout,
pool: bc_mem_pool,
stream: bc_stream,
) -> CudaResult<std::ptr::NonNull<[u8]>> {
allocate_zeroed_async_on(layout.size(), pool, stream)
.map(|ptr| unsafe { std::ptr::NonNull::new_unchecked(ptr as _) })
.map(|ptr| std::ptr::NonNull::slice_from_raw_parts(ptr, layout.size()))
}
}

unsafe impl Send for PoolAllocator {}
unsafe impl Sync for PoolAllocator {}
Loading

0 comments on commit 9d11084

Please sign in to comment.