Skip to content

Commit

Permalink
Add self-invalidation coherence
Browse files Browse the repository at this point in the history
added files required for compilation

Added target to test litmus tests

Per byte dirty bit added to std dcache and tested

basic support for dual core instantiation

Some automation added to the multi-core testing process

minor changes

temporary ci modifications for working without sudo permissions

branch prova

prova modified

Multi core instantiation made generic

Transition between WAIT_CRITICAL_WORD and WAIT_TAG removed if there is a flush - feature tested

Changed repo with master branch and added masks for reservation at cacheline granularity because burst not supported

Fix the never return problem for non boot cores and dt modified for 2 cores

Added master branch of common_cells and compilation of new file in Makefile

Added transition between FLUSHING and FLUSHING to avoid multiple flushs during atomics

Unused code removed and code commented

Added support to use the master branch of the axi_riscv_atomics repository

Added support for multiple ariane instances for fpga synthesis

Increased stack for big applications and reduced number of harts

Pheripherals configured to use multiple cores

List of issues not solved encountered during the master thesis

Co-authored-by: msc22h2 <[email protected]>
Signed-off-by: Nils Wistoff <[email protected]>
  • Loading branch information
niwis and Michelangelo98 committed Nov 1, 2024
1 parent 5dfb893 commit 75986cc
Show file tree
Hide file tree
Showing 8 changed files with 285 additions and 28 deletions.
8 changes: 4 additions & 4 deletions core/cache_subsystem/cache_ctrl.sv
Original file line number Diff line number Diff line change
Expand Up @@ -319,11 +319,11 @@ module cache_ctrl

// set the correct byte enable
be_o.data[cl_offset>>3+:CVA6Cfg.XLEN/8] = mem_req_q.be;
data_o.data[cl_offset+:CVA6Cfg.XLEN] = mem_req_q.wdata;
data_o.tag = mem_req_d.tag;
data_o.data[cl_offset+:CVA6Cfg.XLEN] = mem_req_q.wdata;
data_o.tag = mem_req_d.tag;
// ~> change the state
data_o.dirty = 1'b1;
data_o.valid = 1'b1;
data_o.dirty[cl_offset>>3+:CVA6Cfg.XLEN/8] = 1'b1;
data_o.valid = 1'b1;

// got a grant ~> this is finished now
if (gnt_i) begin
Expand Down
11 changes: 6 additions & 5 deletions core/cache_subsystem/miss_handler.sv
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ module miss_handler
automatic logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] evict_way, valid_way;

for (int unsigned i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin
evict_way[i] = data_i[i].valid & data_i[i].dirty;
evict_way[i] = data_i[i].valid & (|data_i[i].dirty);
valid_way[i] = data_i[i].valid;
end
// ----------------------
Expand Down Expand Up @@ -287,10 +287,11 @@ module miss_handler
lfsr_enable = 1'b1;
evict_way_d = lfsr_oh;
// do we need to write back the cache line?
if (data_i[lfsr_bin].dirty) begin
if (|data_i[lfsr_bin].dirty) begin
state_d = WB_CACHELINE_MISS;
evict_cl_d.tag = data_i[lfsr_bin].tag;
evict_cl_d.data = data_i[lfsr_bin].data;
evict_cl_d.dirty = data_i[lfsr_bin].dirty;
cnt_d = mshr_q.addr[CVA6Cfg.DCACHE_INDEX_WIDTH-1:0];
// no - we can request a cache line now
end else state_d = REQ_CACHELINE;
Expand Down Expand Up @@ -328,7 +329,7 @@ module miss_handler
data_o.tag = mshr_q.addr[CVA6Cfg.DCACHE_TAG_WIDTH+CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_INDEX_WIDTH];
data_o.data = data_miss_fsm;
data_o.valid = 1'b1;
data_o.dirty = 1'b0;
data_o.dirty = '0;

// is this a write?
if (mshr_q.we) begin
Expand All @@ -338,7 +339,7 @@ module miss_handler
if (mshr_q.be[i]) data_o.data[(cl_offset+i*8)+:8] = mshr_q.wdata[i];
end
// its immediately dirty if we write
data_o.dirty = 1'b1;
data_o.dirty[cl_offset>>3+:8] = mshr_q.be;
end
// reset MSHR
mshr_d.valid = 1'b0;
Expand All @@ -359,7 +360,7 @@ module miss_handler
cnt_q[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH],
{{CVA6Cfg.DCACHE_OFFSET_WIDTH} {1'b0}}
};
req_fsm_miss_be = '1;
req_fsm_miss_be = evict_cl_q.dirty;
req_fsm_miss_we = 1'b1;
req_fsm_miss_wdata = evict_cl_q.data;

Expand Down
40 changes: 27 additions & 13 deletions core/cache_subsystem/std_nbdcache.sv
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,19 @@ module std_nbdcache
localparam DCACHE_DIRTY_WIDTH = CVA6Cfg.DCACHE_SET_ASSOC * 2;

localparam type cache_line_t = struct packed {
logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] tag; // tag array
logic [CVA6Cfg.DCACHE_LINE_WIDTH-1:0] data; // data array
logic valid; // state array
logic dirty; // state array
logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] tag; // tag array
logic [CVA6Cfg.DCACHE_LINE_WIDTH-1:0] data; // data array
logic valid; // state array
logic [(CVA6Cfg.DCACHE_LINE_WIDTH-1+7)/8-1:0] dirty; // state array
};
typedef struct packed {
logic [CVA6Cfg.DCACHE_LINE_WIDTH/8-1:0] dirty;
logic valid;
} vldrty_t;
localparam type cl_be_t = struct packed {
logic [(CVA6Cfg.DCACHE_TAG_WIDTH+7)/8-1:0] tag; // byte enable into tag array
logic [(CVA6Cfg.DCACHE_LINE_WIDTH+7)/8-1:0] data; // byte enable into data array
logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] vldrty; // bit enable into state array (valid for a pair of dirty/valid bits)
vldrty_t [CVA6Cfg.DCACHE_SET_ASSOC-1:0] vldrty; // bit enable into state array (valid for a pair of dirty/valid bits)
};

// -------------------------------
Expand Down Expand Up @@ -107,6 +111,7 @@ module std_nbdcache
cache_line_t wdata_ram;
cache_line_t [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] rdata_ram;
cl_be_t be_ram;
vldrty_t [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] be_valid_dirty_ram;

// Busy signals
logic miss_handler_busy;
Expand Down Expand Up @@ -245,19 +250,28 @@ module std_nbdcache

// align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals.
// note: if you have an SRAM that supports flat bit enables for your target technology,
// you can use it here to save the extra 4x overhead introduced by this workaround.
logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
// you can use it here to save the extra 17x overhead introduced by this workaround.
logic [(CVA6Cfg.DCACHE_LINE_WIDTH+8)*CVA6Cfg.DCACHE_SET_ASSOC-1:0] dirty_wdata, dirty_rdata;

for (genvar i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin
assign dirty_wdata[8*i] = wdata_ram.dirty;
assign dirty_wdata[8*i+1] = wdata_ram.valid;
assign rdata_ram[i].dirty = dirty_rdata[8*i];
assign rdata_ram[i].valid = dirty_rdata[8*i+1];
for (genvar j = 0; j < CVA6Cfg.DCACHE_LINE_WIDTH / 8; j++) begin
// dirty bits assignment
assign dirty_wdata[(CVA6Cfg.DCACHE_LINE_WIDTH+8)*i+8*j] = wdata_ram.dirty[j];
assign rdata_ram[i].dirty[j] = dirty_rdata[(CVA6Cfg.DCACHE_LINE_WIDTH+8)*i+8*j];
end
// valid bit assignment
assign dirty_wdata[CVA6Cfg.DCACHE_LINE_WIDTH+(CVA6Cfg.DCACHE_LINE_WIDTH+8)*i] = wdata_ram.valid;
assign rdata_ram[i].valid = dirty_rdata[CVA6Cfg.DCACHE_LINE_WIDTH+(CVA6Cfg.DCACHE_LINE_WIDTH+8)*i];
end

// be construction for valid_dirty_sram
for (genvar i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin
assign be_valid_dirty_ram[i*(CVA6Cfg.DCACHE_LINE_WIDTH/8+1)+:(CVA6Cfg.DCACHE_LINE_WIDTH/8+1)] = {be_ram.vldrty[i], be_ram.data} & {(CVA6Cfg.DCACHE_LINE_WIDTH/8+1){be_ram.vldrty[i]}};
end

sram #(
.USER_WIDTH(1),
.DATA_WIDTH(4 * DCACHE_DIRTY_WIDTH),
.DATA_WIDTH((CVA6Cfg.DCACHE_LINE_WIDTH + 8) * CVA6Cfg.DCACHE_SET_ASSOC),
.NUM_WORDS (CVA6Cfg.DCACHE_NUM_WORDS)
) valid_dirty_sram (
.clk_i (clk_i),
Expand All @@ -267,7 +281,7 @@ module std_nbdcache
.addr_i (addr_ram[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH]),
.wuser_i('0),
.wdata_i(dirty_wdata),
.be_i (be_ram.vldrty),
.be_i (be_valid_dirty_ram),
.ruser_o(),
.rdata_o(dirty_rdata)
);
Expand Down
2 changes: 1 addition & 1 deletion corev_apu/tb/common/tb_dcache_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ package tb_pkg;
parameter ERROR_CNT_STOP_LEVEL = 1; // use 1 for debugging. 0 runs the complete simulation...

// tb_readport sequences
typedef enum logic [2:0] { RANDOM_SEQ, LINEAR_SEQ, BURST_SEQ, IDLE_SEQ, WRAP_SEQ, SET_SEQ, CONST_SEQ } seq_t;
typedef enum logic [2:0] { RANDOM_SEQ, LINEAR_SEQ, BURST_SEQ, IDLE_SEQ, WRAP_SEQ, SET_SEQ, CONST_SEQ, HALF_SEQ } seq_t;

typedef enum logic [1:0] { OTHER, BYPASS, CACHED } port_type_t;

Expand Down
16 changes: 12 additions & 4 deletions corev_apu/tb/common/tb_writeport.sv
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #(
input logic rst_ni,

// to testbench master
input logic half_i,
input logic [1:0] max_size_i,
ref string test_name_i,
input logic [6:0] req_rate_i,
input seq_t seq_type_i,
Expand Down Expand Up @@ -66,13 +68,13 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #(
automatic logic [CVA6Cfg.XLEN/8-1:0] be;
automatic logic [1:0] size;

void'(randomize(size) with {size <= $clog2(CVA6Cfg.XLEN/8);});
void'(randomize(size) with {size >= 2'b00; size <= max_size_i; size <= $clog2(CVA6Cfg.XLEN/8);});
// align to size, set correct byte enables
be = '0;
unique case(size)
2'b00: be[paddr[2:0] +: 1] = '1;
2'b01: be[paddr[2:1]<<1 +: 2] = '1;
2'b10: be[paddr[2:2]<<2 +: 4] = '1;
2'b00: be[int'(paddr[2:0]) +: 1] = '1;
2'b01: be[int'(paddr[2:1]<<1) +: 2] = '1;
2'b10: be[int'(paddr[2:2]<<2) +: 4] = '1;
2'b11: be = '1;
default: ;
endcase
Expand Down Expand Up @@ -112,6 +114,7 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #(
dut_req_port_o.data_req = 1'b1;
// generate random address
void'(randomize(paddr) with {paddr >= 0; paddr < (MemWords<<$clog2(CVA6Cfg.XLEN/8));});
if (seq_type_i == HALF_SEQ) paddr[int'(max_size_i)] = half_i;
applyRandData();
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
end
Expand Down Expand Up @@ -281,6 +284,11 @@ program tb_writeport import tb_pkg::*; import ariane_pkg::*; #(
$display("%s> start random sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i);
genRandReq();
end
HALF_SEQ: begin
$display("%s> start half random sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i);
$display("%s> half = %b and max size = %b", PortName, half_i, max_size_i);
genRandReq();
end
LINEAR_SEQ: begin
$display("%s> start linear sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i);
genSeqWrite();
Expand Down
2 changes: 1 addition & 1 deletion corev_apu/tb/tb_wb_dcache/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ src := $(shell xargs printf '\n%s' < $(src-list) | cut -b 1-)
compile_flag += +cover+i_dut -incr -64 -nologo -svinputport=compat -override_timescale 1ns/1ps -suppress 2583 -suppress 13262 -suppress 2986 +cover
sim_opts += -64 -coverage -classdebug -voptargs="+acc"
questa_version ?= ${QUESTASIM_VERSION}
incdir += ../common/ ../../../vendor/pulp-platform/axi/include/
incdir += ../common/ ../../../vendor/pulp-platform/axi/include/ ../../../vendor/pulp-platform/common_cells/include/

# Iterate over all include directories and write them with +incdir+ prefixed
# +incdir+ works for Verilator and QuestaSim
Expand Down
Loading

0 comments on commit 75986cc

Please sign in to comment.