Skip to content

Commit

Permalink
[TEMP] Add SequenceExecutor
Browse files Browse the repository at this point in the history
google#1295

modules/zstd: Add SequenceExecutor

This commit adds SequenceExecutor block, that is currently capable of
handling only Literal Copy commands.

Internal-tag: [#53241]
Signed-off-by: Robert Winkler <[email protected]>

examples/ram: Export internal RAM API to other modules

This commit marks SimultaneousReadWriteBehavior enum and
num_partitions function as public to allow for creating
simpler tests that interact with RAM models.

Internal-tag: [#53241]
Signed-off-by: Robert Winkler <[email protected]>

modules/zstd: Add consts and types used by SequenceExecutor to common

Internal-tag: [#53241]
Signed-off-by: Robert Winkler <[email protected]>

modules/zstd: Add RAM printer debugging block

This commit adds RAM printer block usefull for debugging
SequenceExecutor.

Internal-tag: [#53241]
Signed-off-by: Robert Winkler <[email protected]>

moduels/zstd/common: Specify decoder output format

Internal-tag: [#52954]
Signed-off-by: Pawel Czarnecki <[email protected]>

modules/zstd/sequence_executor: fix codegen

Internal-tag: [#52954]
Signed-off-by: Pawel Czarnecki <[email protected]>
  • Loading branch information
rw1nkler authored and lpawelcz committed Feb 21, 2024
1 parent a20d74e commit 4213725
Show file tree
Hide file tree
Showing 5 changed files with 1,063 additions and 2 deletions.
4 changes: 2 additions & 2 deletions xls/examples/ram.x
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ pub fn ReadWordReq<NUM_PARTITIONS:u32, ADDR_WIDTH:u32>(addr:uN[ADDR_WIDTH]) ->
}

// Behavior of reads and writes to the same address in the same "tick".
enum SimultaneousReadWriteBehavior : u2 {
pub enum SimultaneousReadWriteBehavior : u2 {
// The read shows the contents at the address before the write.
READ_BEFORE_WRITE = 0,
// The read shows the contents at the address after the write.
Expand Down Expand Up @@ -160,7 +160,7 @@ fn write_word_test() {

// Function to compute num partitions (e.g. mask width) for a data_width-wide
// word divided into word_partition_size-chunks.
fn num_partitions(word_partition_size: u32, data_width: u32) -> u32 {
pub fn num_partitions(word_partition_size: u32, data_width: u32) -> u32 {
match word_partition_size {
u32:0 => u32:0,
_ => (word_partition_size + data_width - u32:1) / word_partition_size,
Expand Down
119 changes: 119 additions & 0 deletions xls/modules/zstd/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ load("@rules_hdl//verilog:providers.bzl", "verilog_library")
load(
"//xls/build_rules:xls_build_defs.bzl",
"xls_benchmark_ir",
"xls_benchmark_verilog",
"xls_dslx_ir",
"xls_dslx_library",
"xls_dslx_test",
"xls_dslx_verilog",
"xls_ir_opt_ir",
)

package(
Expand Down Expand Up @@ -585,3 +588,119 @@ place_and_route(
synthesized_rtl = ":block_dec_synth_asap7",
target_die_utilization_percentage = "10",
)

xls_dslx_library(
name = "ram_printer_dslx",
srcs = ["ram_printer.x"],
deps = [
"//xls/examples:ram_dslx",
],
)

xls_dslx_test(
name = "ram_printer_dslx_test",
library = ":ram_printer_dslx",
)

xls_dslx_library(
name = "sequence_executor_dslx",
srcs = [
"sequence_executor.x",
],
deps = [
":common_dslx",
":ram_printer_dslx",
"//xls/examples:ram_dslx",
],
)

xls_dslx_test(
name = "sequence_executor_dslx_test",
dslx_test_args = {
"compare": "none",
},
library = ":sequence_executor_dslx",
)

xls_dslx_verilog(
name = "sequence_executor_verilog",
codegen_args = {
"module_name": "sequence_executor",
"generator": "pipeline",
"delay_model": "asap7",
"ram_configurations": ",".join([
"{ram_name}:1RW:{req}:{resp}:{wr_comp}:{latency}".format(
latency = 5,
ram_name = "ram{}".format(num),
req = "sequence_executor__req_m{}_s".format(num),
resp = "sequence_executor__resp_m{}_r".format(num),
wr_comp = "sequence_executor__wr_comp_m{}_r".format(num),
)
for num in range(7)
]),
"pipeline_stages": "6",
"reset": "rst",
"reset_data_path": "true",
"reset_active_low": "false",
"reset_asynchronous": "true",
"flop_inputs": "false",
"flop_single_value_channels": "false",
"flop_outputs": "false",
"worst_case_throughput": "1",
"use_system_verilog": "false",
},
dslx_top = "SequenceExecutor",
library = ":sequence_executor_dslx",
opt_ir_args = {
"inline_procs": "true",
},
verilog_file = "sequence_executor.v",
)

xls_benchmark_ir(
name = "sequence_executor_ir_benchmark",
src = ":sequence_executor_verilog.opt.ir",
benchmark_ir_args = {
"pipeline_stages": "6",
"delay_model": "asap7",
},
)

xls_benchmark_verilog(
name = "sequence_executor_verilog_benchmark",
verilog_target = "sequence_executor_verilog",
)

verilog_library(
name = "sequence_executor_lib",
srcs = [
":sequence_executor.v",
],
)

synthesize_rtl(
name = "sequence_executor_asap7",
standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt",
top_module = "sequence_executor",
deps = [
":sequence_executor_lib",
],
)

benchmark_synth(
name = "sequence_executor_benchmark_synth",
synth_target = ":sequence_executor_asap7",
)

place_and_route(
name = "sequence_executor_place_and_route",
clock_period = "750",
core_padding_microns = 2,
min_pin_distance = "0.5",
placement_density = "0.30",
skip_detailed_routing = True,
synthesized_rtl = ":sequence_executor_asap7",
#die_height_microns = 120,
#die_width_microns = 120,
target_die_utilization_percentage = "5",
)
12 changes: 12 additions & 0 deletions xls/modules/zstd/common.x
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,17 @@ pub const DATA_WIDTH = u32:64;
pub const MAX_ID = u32::MAX;
pub const SYMBOL_WIDTH = u32:8;
pub const BLOCK_SIZE_WIDTH = u32:21;
pub const HISTORY_BUFFER_SIZE_KB = u32:64;
pub const OFFSET_WIDTH = u32:22;
pub const LENGTH_WIDTH = u32:22;

pub type BlockData = bits[DATA_WIDTH];
pub type BlockPacketLength = u32;
pub type BlockSize = bits[BLOCK_SIZE_WIDTH];
pub type CopyOrMatchContent = BlockData;
pub type CopyOrMatchLength = u64;
pub type Offset = bits[OFFSET_WIDTH];
pub type Length = bits[LENGTH_WIDTH];

pub enum BlockType : u2 {
RAW = 0,
Expand Down Expand Up @@ -54,3 +59,10 @@ pub struct SequenceExecutorPacket {
content: CopyOrMatchContent, // Literal data or match offset
last: bool, // Last packet in frame
}

// Defines output format of the ZSTD Decoder
pub struct ZstdDecodedPacket {
data: BlockData,
length: BlockPacketLength, // valid bits in data
last: bool, // Last decoded packet in frame
}
137 changes: 137 additions & 0 deletions xls/modules/zstd/ram_printer.x
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import std;
import xls.examples.ram;

enum RamPrinterStatus : u2 {
IDLE = 0,
BUSY = 1,
}

struct RamPrinterState<ADDR_WIDTH: u32> { status: RamPrinterStatus, addr: bits[ADDR_WIDTH] }

proc RamPrinter<DATA_WIDTH: u32, SIZE: u32, NUM_PARTITIONS: u32, ADDR_WIDTH: u32, NUM_MEMORIES: u32>
{
print_r: chan<()> in;
finish_s: chan<()> out;
req_s: chan<ram::RWRamReq<ADDR_WIDTH, DATA_WIDTH, NUM_PARTITIONS>>[NUM_MEMORIES] out;
resp_r: chan<ram::RWRamResp<DATA_WIDTH>>[NUM_MEMORIES] in;

config(
print_r: chan<()> in,
finish_s: chan<()> out,
req_s: chan<ram::RWRamReq<ADDR_WIDTH, DATA_WIDTH, NUM_PARTITIONS>>[NUM_MEMORIES] out,
resp_r: chan<ram::RWRamResp<DATA_WIDTH>>[NUM_MEMORIES] in
) { (print_r, finish_s, req_s, resp_r) }

init {
RamPrinterState {
status: RamPrinterStatus::IDLE,
addr: bits[ADDR_WIDTH]:0
}
}

next(tok: token, state: RamPrinterState) {

let is_idle = state.status == RamPrinterStatus::IDLE;
let (tok, _) = recv_if(tok, print_r, is_idle, ());

let (tok, row) = for (i, (tok, row)): (u32, (token, bits[DATA_WIDTH][NUM_MEMORIES])) in
range(u32:0, NUM_MEMORIES) {
let tok = send(
tok, req_s[i],
ram::RWRamReq {
addr: state.addr,
data: bits[DATA_WIDTH]:0,
write_mask: (),
read_mask: (),
we: false,
re: true
});
let (tok, resp) = recv(tok, resp_r[i]);
let row = update(row, i, resp.data);
(tok, row)
}((tok, bits[DATA_WIDTH][NUM_MEMORIES]:[bits[DATA_WIDTH]:0, ...]));

let is_start = state.addr == bits[ADDR_WIDTH]:0;
let is_last = state.addr == (SIZE - u32:1) as bits[ADDR_WIDTH];

if is_start { trace_fmt!(" ========= RAM content ========= ", ); } else { };

trace_fmt!(" {}:\t{:x} ", state.addr, row);
let tok = send_if(tok, finish_s, is_last, ());

if is_last {
RamPrinterState {
addr: bits[ADDR_WIDTH]:0,
status: RamPrinterStatus::IDLE
}
} else {
RamPrinterState {
addr: state.addr + bits[ADDR_WIDTH]:1,
status: RamPrinterStatus::BUSY
}
}
}
}

const TEST_NUM_MEMORIES = u32:8;
const TEST_SIZE = u32:10;
const TEST_DATA_WIDTH = u32:8;
const TEST_WORD_PARTITION_SIZE = u32:0;
const TEST_NUM_PARTITIONS = ram::num_partitions(TEST_WORD_PARTITION_SIZE, TEST_DATA_WIDTH);
const TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR = ram::SimultaneousReadWriteBehavior::READ_BEFORE_WRITE;
const TEST_ADDR_WIDTH = std::clog2(TEST_SIZE);

#[test_proc]
proc RamPrinterTest {
terminator: chan<bool> out;
req0_s: chan<ram::RWRamReq<TEST_ADDR_WIDTH, TEST_DATA_WIDTH, TEST_NUM_PARTITIONS>>[TEST_NUM_MEMORIES] out;
resp0_r: chan<ram::RWRamResp<TEST_DATA_WIDTH>>[TEST_NUM_MEMORIES] in;
wr_comp0_r: chan<()>[TEST_NUM_MEMORIES] in;
req1_s: chan<ram::RWRamReq<TEST_ADDR_WIDTH, TEST_DATA_WIDTH, TEST_NUM_PARTITIONS>>[TEST_NUM_MEMORIES] out;
resp1_r: chan<ram::RWRamResp<TEST_DATA_WIDTH>>[TEST_NUM_MEMORIES] in;
wr_comp1_r: chan<()>[TEST_NUM_MEMORIES] in;
print_s: chan<()> out;
finish_r: chan<()> in;

config(terminator: chan<bool> out) {
let (req0_s, req0_r) = chan<ram::RWRamReq<TEST_ADDR_WIDTH, TEST_DATA_WIDTH, TEST_NUM_PARTITIONS>>[TEST_NUM_MEMORIES];
let (resp0_s, resp0_r) = chan<ram::RWRamResp<TEST_DATA_WIDTH>>[TEST_NUM_MEMORIES];
let (wr_comp0_s, wr_comp0_r) = chan<()>[TEST_NUM_MEMORIES];
let (req1_s, req1_r) = chan<ram::RWRamReq<TEST_ADDR_WIDTH, TEST_DATA_WIDTH, TEST_NUM_PARTITIONS>>[TEST_NUM_MEMORIES];
let (resp1_s, resp1_r) = chan<ram::RWRamResp<TEST_DATA_WIDTH>>[TEST_NUM_MEMORIES];
let (wr_comp1_s, wr_comp1_r) = chan<()>[TEST_NUM_MEMORIES];

let (print_s, print_r) = chan<()>;
let (finish_s, finish_r) = chan<()>;

spawn ram::RamModel2RW<TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR>
(req0_r[0], resp0_s[0], wr_comp0_s[0], req1_r[0], resp1_s[0], wr_comp1_s[0]);
spawn ram::RamModel2RW<TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR>
(req0_r[1], resp0_s[1], wr_comp0_s[1], req1_r[1], resp1_s[1], wr_comp1_s[1]);
spawn ram::RamModel2RW<TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR>
(req0_r[2], resp0_s[2], wr_comp0_s[2], req1_r[2], resp1_s[2], wr_comp1_s[2]);
spawn ram::RamModel2RW<TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR>
(req0_r[3], resp0_s[3], wr_comp0_s[3], req1_r[3], resp1_s[3], wr_comp1_s[3]);
spawn ram::RamModel2RW<TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR>
(req0_r[4], resp0_s[4], wr_comp0_s[4], req1_r[4], resp1_s[4], wr_comp1_s[4]);
spawn ram::RamModel2RW<TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR>
(req0_r[5], resp0_s[5], wr_comp0_s[5], req1_r[5], resp1_s[5], wr_comp1_s[5]);
spawn ram::RamModel2RW<TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR>
(req0_r[6], resp0_s[6], wr_comp0_s[6], req1_r[6], resp1_s[6], wr_comp1_s[6]);
spawn ram::RamModel2RW<TEST_DATA_WIDTH, TEST_SIZE, TEST_WORD_PARTITION_SIZE, TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR>
(req0_r[7], resp0_s[7], wr_comp0_s[7], req1_r[7], resp1_s[7], wr_comp1_s[7]);

spawn RamPrinter<TEST_DATA_WIDTH, TEST_SIZE, TEST_NUM_PARTITIONS, TEST_ADDR_WIDTH, TEST_NUM_MEMORIES>
(print_r, finish_s, req0_s, resp0_r);

(terminator, req0_s, resp0_r, wr_comp0_r, req1_s, resp1_r, wr_comp1_r, print_s, finish_r)
}

init { }

next(tok: token, state: ()) {
let tok = send(tok, print_s, ());
let (tok, _) = recv(tok, finish_r);
let tok = send(tok, terminator, true);
}
}
Loading

0 comments on commit 4213725

Please sign in to comment.