From 2eeadb43d983aa50ebc8cef66842d775c47cbdfa Mon Sep 17 00:00:00 2001 From: Chris Keilbart Date: Tue, 10 Sep 2024 15:13:31 -0700 Subject: [PATCH 1/4] Preliminary OS support --- .gitignore | 2 + LICENSE | 0 README.md | 10 +- core/common_components/byte_en_bram.sv | 0 core/common_components/cva5_fifo.sv | 12 +- core/common_components/cycler.sv | 0 core/common_components/lfsr.sv | 8 +- core/common_components/one_hot_mux.sv | 69 ++ core/common_components/one_hot_to_integer.sv | 0 .../{ => ram}/dual_port_bram.sv | 0 .../{ => ram}/lutram_1w_1r.sv | 0 .../{ => ram}/lutram_1w_mr.sv | 0 core/common_components/ram/sdp_ram.sv | 86 ++ core/common_components/ram/sdp_ram_padded.sv | 87 ++ core/common_components/toggle_memory.sv | 4 - core/common_components/toggle_memory_set.sv | 8 +- .../intel/intel_byte_enable_ram.sv | 0 .../xilinx/cva5_wrapper_xilinx.sv | 1 - .../xilinx/xilinx_byte_enable_ram.sv | 0 core/cva5.sv | 145 ++-- core/decode_and_issue.sv | 52 +- core/execution_units/alu_unit.sv | 0 core/execution_units/barrel_shifter.sv | 0 core/execution_units/branch_unit.sv | 15 +- core/execution_units/csr_unit.sv | 821 ++++++++++++------ core/execution_units/div_unit.sv | 2 +- core/execution_units/gc_unit.sv | 325 ++++--- .../load_store_unit/addr_hash.sv | 2 +- .../load_store_unit/amo_alu.sv | 56 +- .../load_store_unit/amo_unit.sv | 123 +++ .../execution_units/load_store_unit/dcache.sv | 549 ++++++------ .../load_store_unit/dcache_tag_banks.sv | 114 --- .../load_store_unit/load_store_queue.sv | 92 +- .../load_store_unit/load_store_unit.sv | 367 ++++++-- .../load_store_unit/store_queue.sv | 21 +- core/execution_units/mul_unit.sv | 0 core/fetch_stage/branch_predictor.sv | 107 ++- core/fetch_stage/fetch.sv | 77 +- core/fetch_stage/icache.sv | 64 +- core/fetch_stage/icache_tag_banks.sv | 42 +- core/fetch_stage/ras.sv | 2 +- .../instruction_metadata_and_id_management.sv | 53 +- core/l1_arbiter.sv | 0 core/memory_sub_units/avalon_master.sv | 183 ++-- core/memory_sub_units/axi_master.sv | 165 ++-- core/memory_sub_units/local_mem_sub_unit.sv | 67 +- core/memory_sub_units/wishbone_master.sv | 168 +++- core/mmu/dtlb.sv | 333 +++++++ core/mmu/itlb.sv | 294 +++++++ core/{ => mmu}/mmu.sv | 66 +- core/mmu/perms_check.sv | 58 ++ core/register_file.sv | 3 +- core/register_free_list.sv | 2 +- core/renamer.sv | 8 +- core/tlb_lut_ram.sv | 170 ---- core/types_and_interfaces/csr_types.sv | 82 +- core/types_and_interfaces/cva5_config.sv | 76 +- core/types_and_interfaces/cva5_types.sv | 54 +- .../external_interfaces.sv | 19 +- .../internal_interfaces.sv | 92 +- core/types_and_interfaces/opcodes.sv | 4 +- core/types_and_interfaces/riscv_types.sv | 85 +- debug_module/debug_module.sv | 2 +- examples/litex/l1_to_wishbone.sv | 2 +- examples/litex/litex_wrapper.sv | 269 ++---- examples/nexys/nexys_config.sv | 34 +- examples/nexys/nexys_sim.sv | 62 +- examples/nexys/nexys_wrapper.sv | 1 - examples/zedboard/README.md | 0 examples/zedboard/arm.tcl | 0 examples/zedboard/cva5.png | Bin examples/zedboard/cva5_small.png | Bin examples/zedboard/cva5_wrapper.sv | 4 +- .../zedboard/simulator_output_example.png | Bin examples/zedboard/system.png | Bin examples/zedboard/system_periperhals.tcl | 0 formal/interfaces/axi4_basic_props.sv | 0 formal/models/cva5_fbm.sv | 0 formal/models/cva5_formal_wrapper.sv | 0 l2_arbiter/axi_to_arb.sv | 1 - l2_arbiter/l2_arbiter.sv | 0 l2_arbiter/l2_config_and_types.sv | 0 l2_arbiter/l2_fifo.sv | 0 l2_arbiter/l2_interfaces.sv | 0 l2_arbiter/l2_reservation_logic.sv | 0 l2_arbiter/l2_round_robin.sv | 0 test_benches/axi_mem_sim.sv | 0 test_benches/cva5_tb.sv | 0 test_benches/cva5_tb.wcfg | 0 test_benches/sim_mem.sv | 0 test_benches/unit_test_benches/alu_unit_tb.sv | 0 test_benches/unit_test_benches/div_unit_tb.sv | 0 test_benches/unit_test_benches/mul_unit_tb.sv | 0 test_benches/verilator/CVA5Tracer.cc | 4 +- test_benches/verilator/CVA5Tracer.h | 4 +- test_benches/verilator/cva5_sim.cc | 2 +- test_benches/verilator/cva5_sim.sv | 2 +- tools/compile_order | 17 +- 98 files changed, 3695 insertions(+), 1954 deletions(-) create mode 100644 .gitignore mode change 100755 => 100644 LICENSE mode change 100755 => 100644 README.md mode change 100755 => 100644 core/common_components/byte_en_bram.sv mode change 100755 => 100644 core/common_components/cva5_fifo.sv mode change 100755 => 100644 core/common_components/cycler.sv create mode 100644 core/common_components/one_hot_mux.sv mode change 100755 => 100644 core/common_components/one_hot_to_integer.sv rename core/common_components/{ => ram}/dual_port_bram.sv (100%) rename core/common_components/{ => ram}/lutram_1w_1r.sv (100%) rename core/common_components/{ => ram}/lutram_1w_mr.sv (100%) create mode 100644 core/common_components/ram/sdp_ram.sv create mode 100644 core/common_components/ram/sdp_ram_padded.sv mode change 100755 => 100644 core/common_components/vendor_support/intel/intel_byte_enable_ram.sv mode change 100755 => 100644 core/common_components/vendor_support/xilinx/xilinx_byte_enable_ram.sv mode change 100755 => 100644 core/cva5.sv mode change 100755 => 100644 core/decode_and_issue.sv mode change 100755 => 100644 core/execution_units/alu_unit.sv mode change 100755 => 100644 core/execution_units/barrel_shifter.sv mode change 100755 => 100644 core/execution_units/branch_unit.sv mode change 100755 => 100644 core/execution_units/csr_unit.sv mode change 100755 => 100644 core/execution_units/div_unit.sv mode change 100755 => 100644 core/execution_units/load_store_unit/amo_alu.sv create mode 100644 core/execution_units/load_store_unit/amo_unit.sv delete mode 100644 core/execution_units/load_store_unit/dcache_tag_banks.sv mode change 100755 => 100644 core/execution_units/load_store_unit/load_store_unit.sv mode change 100755 => 100644 core/execution_units/mul_unit.sv mode change 100755 => 100644 core/fetch_stage/branch_predictor.sv mode change 100755 => 100644 core/fetch_stage/icache.sv mode change 100755 => 100644 core/fetch_stage/icache_tag_banks.sv mode change 100755 => 100644 core/fetch_stage/ras.sv mode change 100755 => 100644 core/l1_arbiter.sv mode change 100755 => 100644 core/memory_sub_units/axi_master.sv mode change 100755 => 100644 core/memory_sub_units/local_mem_sub_unit.sv create mode 100644 core/mmu/dtlb.sv create mode 100644 core/mmu/itlb.sv rename core/{ => mmu}/mmu.sv (79%) mode change 100755 => 100644 create mode 100644 core/mmu/perms_check.sv mode change 100755 => 100644 core/register_file.sv delete mode 100755 core/tlb_lut_ram.sv mode change 100755 => 100644 core/types_and_interfaces/cva5_config.sv mode change 100755 => 100644 core/types_and_interfaces/cva5_types.sv mode change 100755 => 100644 core/types_and_interfaces/internal_interfaces.sv mode change 100755 => 100644 examples/zedboard/README.md mode change 100755 => 100644 examples/zedboard/arm.tcl mode change 100755 => 100644 examples/zedboard/cva5.png mode change 100755 => 100644 examples/zedboard/cva5_small.png mode change 100755 => 100644 examples/zedboard/cva5_wrapper.sv mode change 100755 => 100644 examples/zedboard/simulator_output_example.png mode change 100755 => 100644 examples/zedboard/system.png mode change 100755 => 100644 examples/zedboard/system_periperhals.tcl mode change 100755 => 100644 formal/interfaces/axi4_basic_props.sv mode change 100755 => 100644 formal/models/cva5_fbm.sv mode change 100755 => 100644 formal/models/cva5_formal_wrapper.sv mode change 100755 => 100644 l2_arbiter/axi_to_arb.sv mode change 100755 => 100644 l2_arbiter/l2_arbiter.sv mode change 100755 => 100644 l2_arbiter/l2_config_and_types.sv mode change 100755 => 100644 l2_arbiter/l2_fifo.sv mode change 100755 => 100644 l2_arbiter/l2_interfaces.sv mode change 100755 => 100644 l2_arbiter/l2_reservation_logic.sv mode change 100755 => 100644 l2_arbiter/l2_round_robin.sv mode change 100755 => 100644 test_benches/axi_mem_sim.sv mode change 100755 => 100644 test_benches/cva5_tb.sv mode change 100755 => 100644 test_benches/cva5_tb.wcfg mode change 100755 => 100644 test_benches/sim_mem.sv mode change 100755 => 100644 test_benches/unit_test_benches/alu_unit_tb.sv mode change 100755 => 100644 test_benches/unit_test_benches/div_unit_tb.sv mode change 100755 => 100644 test_benches/unit_test_benches/mul_unit_tb.sv diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..7232d25b --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +test_benches/verilator/build + diff --git a/LICENSE b/LICENSE old mode 100755 new mode 100644 diff --git a/README.md b/README.md old mode 100755 new mode 100644 index d447f68c..b896e48a --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@ # CVA5 - -CVA5 is a 32-bit RISC-V processor designed for FPGAs supporting the Multiply/Divide and Double-precision Floating-Point extensions (RV32IMD). The processor is written in SystemVerilog and has been designed to be both highly extensible and highly configurable. +CVA5 is a 32-bit RISC-V processor designed for FPGAs supporting the Multiply/Divide, Atomic, and Floating-Point extensions (RV32IMAFD). The processor is written in SystemVerilog and has been designed to be both highly extensible and highly configurable. The CVA5 is derived from the Taiga Project from Simon Fraser University. @@ -16,7 +15,6 @@ For up-to-date documentation, as well as an automated build environment setup, r ## License - CVA5 is licensed under the Solderpad License, Version 2.1 ( http://solderpad.org/licenses/SHL-2.1/ ). Solderpad is an extension of the Apache License, and many contributions to CVA5 were made under Apache Version 2.0 ( https://www.apache.org/licenses/LICENSE-2.0 ) @@ -25,10 +23,14 @@ A zedboard configuration is provided under the examples directory along with too ## Publications +C. Keilbart, Y. Gao, M. Chua, E. Matthews, S. J. Wilton, and L. Shannon, “Designing an IEEE-Compliant FPU that Supports Configurable Precision for Soft Processors,” ACM Trans. Reconfgurable Technol. Syst., vol. 17, no. 2, Apr. 2024. +doi: [https://doi.org/10.1145/3650036](https://doi.org/10.1145/3650036) + E. Matthews, A. Lu, Z. Fang and L. Shannon, "Rethinking Integer Divider Design for FPGA-Based Soft-Processors," 2019 IEEE 27th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM), San Diego, CA, USA, 2019, pp. 289-297. doi: [https://doi.org/10.1109/FCCM.2019.00046](https://doi.org/10.1109/FCCM.2019.00046) E. Matthews, Z. Aguila and L. Shannon, "Evaluating the Performance Efficiency of a Soft-Processor, Variable-Length, Parallel-Execution-Unit Architecture for FPGAs Using the RISC-V ISA," 2018 IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM), Boulder, CO, 2018, pp. 1-8. doi: [https://doi.org/10.1109/FCCM.2018.00010](https://doi.org/10.1109/FCCM.2018.00010) -E. Matthews and L. Shannon, "TAIGA: A new RISC-V soft-processor framework enabling high performance CPU architectural features," 2017 27th International Conference on Field Programmable Logic and Applications (FPL), Ghent, Belgium, 2017. [https://doi.org/10.23919/FPL.2017.8056766](https://doi.org/10.23919/FPL.2017.8056766) +E. Matthews and L. Shannon, "TAIGA: A new RISC-V soft-processor framework enabling high performance CPU architectural features," 2017 27th International Conference on Field Programmable Logic and Applications (FPL), Ghent, Belgium, 2017. +doi: [https://doi.org/10.23919/FPL.2017.8056766](https://doi.org/10.23919/FPL.2017.8056766) diff --git a/core/common_components/byte_en_bram.sv b/core/common_components/byte_en_bram.sv old mode 100755 new mode 100644 diff --git a/core/common_components/cva5_fifo.sv b/core/common_components/cva5_fifo.sv old mode 100755 new mode 100644 index f163faec..0dad17b7 --- a/core/common_components/cva5_fifo.sv +++ b/core/common_components/cva5_fifo.sv @@ -27,10 +27,6 @@ */ module cva5_fifo - import cva5_config::*; - import riscv_types::*; - import cva5_types::*; - #( parameter type DATA_TYPE = logic, parameter FIFO_DEPTH = 4 @@ -49,8 +45,10 @@ module cva5_fifo always_ff @ (posedge clk) begin if (rst) fifo.valid <= 0; - else - fifo.valid <= fifo.push | (fifo.valid & ~fifo.pop); + else if (fifo.push & ~fifo.pop) + fifo.valid <= 1; + else if (fifo.pop & ~fifo.push) + fifo.valid <= 0; end assign fifo.full = fifo.valid; @@ -134,6 +132,6 @@ module cva5_fifo fifo_potenial_push_overflow_assertion: assert property (@(posedge clk) disable iff (rst) fifo.potential_push |-> (~fifo.full | fifo.pop)) else $error("potential push overflow"); fifo_underflow_assertion: - assert property (@(posedge clk) disable iff (rst) fifo.pop |-> fifo.valid) else $error("underflow"); + assert property (@(posedge clk) disable iff (rst) fifo.pop |-> (fifo.valid | fifo.push)) else $error("underflow"); endmodule diff --git a/core/common_components/cycler.sv b/core/common_components/cycler.sv old mode 100755 new mode 100644 diff --git a/core/common_components/lfsr.sv b/core/common_components/lfsr.sv index 5cfbf484..39bc68d5 100644 --- a/core/common_components/lfsr.sv +++ b/core/common_components/lfsr.sv @@ -68,7 +68,7 @@ module lfsr logic feedback; //////////////////////////////////////////////////// //Implementation - generate if (WIDTH == 2) begin : gen_width_two + generate if (WIDTH <= 2) begin : gen_width_one_or_two assign feedback = ~value[WIDTH-1]; end else begin : gen_width_three_plus @@ -84,8 +84,10 @@ module lfsr always_ff @ (posedge clk) begin if (NEEDS_RESET & rst) value <= '0; - else if (en) - value <= {value[WIDTH-2:0], feedback}; + else if (en) begin + value <= value << 1; + value[0] <= feedback; + end end endmodule diff --git a/core/common_components/one_hot_mux.sv b/core/common_components/one_hot_mux.sv new file mode 100644 index 00000000..83c9a7fe --- /dev/null +++ b/core/common_components/one_hot_mux.sv @@ -0,0 +1,69 @@ +/* + * Copyright © 2024 Chris Keilbart + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Initial code developed under the supervision of Dr. Lesley Shannon, + * Reconfigurable Computing Lab, Simon Fraser University. + * + * Author(s): + * Chris Keilbart + */ + + +module one_hot_mux + #( + parameter OPTIONS = 5, + parameter type DATA_TYPE = logic + ) + ( + //Only used for assertions + input logic clk, + input logic rst, + + input logic[OPTIONS-1:0] one_hot, + input DATA_TYPE[OPTIONS-1:0] choices, + output DATA_TYPE sel + ); + + //Casting to eliminate warnings + typedef logic[$bits(DATA_TYPE)-1:0] casted_t; + casted_t[OPTIONS-1:0] choices_casted; + casted_t sel_casted; + + //////////////////////////////////////////////////// + //Implementation + //Cheaper than converting ohot -> int and indexing + always_comb begin + for (int i = 0; i < OPTIONS; i++) + choices_casted[i] = casted_t'(choices[i]); + sel = DATA_TYPE'(sel_casted); + end + + generate if (OPTIONS == 1) begin : gen_no_mux + assign sel_casted = choices_casted[0]; + end else begin : gen_mux + always_comb begin + sel_casted = '0; + for (int i = 0; i < OPTIONS; i++) + if (one_hot[i]) sel_casted |= choices_casted[i]; + end + end endgenerate + + //////////////////////////////////////////////////// + //Assertions + ohot_assertion: + assert property (@(posedge clk) disable iff (rst) $onehot0(one_hot)) + else $error("Selection mux not one hot"); + +endmodule diff --git a/core/common_components/one_hot_to_integer.sv b/core/common_components/one_hot_to_integer.sv old mode 100755 new mode 100644 diff --git a/core/common_components/dual_port_bram.sv b/core/common_components/ram/dual_port_bram.sv similarity index 100% rename from core/common_components/dual_port_bram.sv rename to core/common_components/ram/dual_port_bram.sv diff --git a/core/common_components/lutram_1w_1r.sv b/core/common_components/ram/lutram_1w_1r.sv similarity index 100% rename from core/common_components/lutram_1w_1r.sv rename to core/common_components/ram/lutram_1w_1r.sv diff --git a/core/common_components/lutram_1w_mr.sv b/core/common_components/ram/lutram_1w_mr.sv similarity index 100% rename from core/common_components/lutram_1w_mr.sv rename to core/common_components/ram/lutram_1w_mr.sv diff --git a/core/common_components/ram/sdp_ram.sv b/core/common_components/ram/sdp_ram.sv new file mode 100644 index 00000000..5f1d2bae --- /dev/null +++ b/core/common_components/ram/sdp_ram.sv @@ -0,0 +1,86 @@ +/* + * Copyright © 2024 Chris Keilbart, Lesley Shannon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Initial code developed under the supervision of Dr. Lesley Shannon, + * Reconfigurable Computing Lab, Simon Fraser University. + * + * Author(s): + * Chris Keilbart + */ + +module sdp_ram + + #( + parameter ADDR_WIDTH = 10, + parameter NUM_COL = 4, //Number of independently writeable components + parameter COL_WIDTH = 16, //Width the "byte" enable controls + parameter DATA_WIDTH = COL_WIDTH*NUM_COL, //Do not set this to anything else + parameter PIPELINE_DEPTH = 1, //Depth of the output pipeline, is latency in clock cycles + parameter CASCADE_DEPTH = 4 //Maximum depth of the memory block cascade + ) + ( + input logic clk, + //Port A + input logic a_en, + input logic[NUM_COL-1:0] a_wbe, + input logic[DATA_WIDTH-1:0] a_wdata, + input logic[ADDR_WIDTH-1:0] a_addr, + + //Port B + input logic b_en, + input logic[ADDR_WIDTH-1:0] b_addr, + output logic[DATA_WIDTH-1:0] b_rdata + ); + + (* cascade_height = CASCADE_DEPTH, ramstyle = "no_rw_check" *) //Higher depths use less resources but are slower + logic[DATA_WIDTH-1:0] mem[(1< 0) begin : gen_b_pipeline + logic[DATA_WIDTH-1:0] b_data_pipeline[PIPELINE_DEPTH-1:0]; + logic[PIPELINE_DEPTH-1:0] b_en_pipeline; + + always_ff @(posedge clk) begin + for (int i = 0; i < PIPELINE_DEPTH; i++) begin + b_en_pipeline[i] <= i == 0 ? b_en : b_en_pipeline[i-1]; + if (b_en_pipeline[i]) + b_data_pipeline[i] <= i == 0 ? b_ram_output : b_data_pipeline[i-1]; + end + end + assign b_rdata = b_data_pipeline[PIPELINE_DEPTH-1]; + end + else begin : gen_b_transparent_output + assign b_rdata = b_ram_output; + end endgenerate + +endmodule diff --git a/core/common_components/ram/sdp_ram_padded.sv b/core/common_components/ram/sdp_ram_padded.sv new file mode 100644 index 00000000..ee847a72 --- /dev/null +++ b/core/common_components/ram/sdp_ram_padded.sv @@ -0,0 +1,87 @@ +/* + * Copyright © 2024 Chris Keilbart, Lesley Shannon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Initial code developed under the supervision of Dr. Lesley Shannon, + * Reconfigurable Computing Lab, Simon Fraser University. + * + * Author(s): + * Chris Keilbart + */ + +module sdp_ram_padded + + #( + parameter ADDR_WIDTH = 10, + parameter NUM_COL = 4, //Number of independently writeable components + parameter COL_WIDTH = 16, //Width the "byte" enable controls + parameter DATA_WIDTH = COL_WIDTH*NUM_COL, //Do not set this to anything else + parameter PIPELINE_DEPTH = 1, //Depth of the output pipeline, is latency in clock cycles + parameter CASCADE_DEPTH = 4 //Maximum depth of the memory block cascade + ) + ( + input logic clk, + //Port A + input logic a_en, + input logic[NUM_COL-1:0] a_wbe, + input logic[DATA_WIDTH-1:0] a_wdata, + input logic[ADDR_WIDTH-1:0] a_addr, + + //Port B + input logic b_en, + input logic[ADDR_WIDTH-1:0] b_addr, + output logic[DATA_WIDTH-1:0] b_rdata + ); + + //Pad columns to the nearest multiple of 8 or 9 to allow the use of the byte enable + //This results in a more compact BRAM encoding + localparam PAD_WIDTH8 = (8 - (COL_WIDTH % 8)) % 8; + localparam PAD_WIDTH9 = (9 - (COL_WIDTH % 9)) % 9; + localparam PAD_WIDTH = PAD_WIDTH8 <= PAD_WIDTH9 ? PAD_WIDTH8 : PAD_WIDTH9; + localparam PADDED_WIDTH = COL_WIDTH + PAD_WIDTH; + localparam TOTAL_WIDTH = NUM_COL * PADDED_WIDTH; + + generate if (PAD_WIDTH == 0 || NUM_COL == 1) begin : gen_no_padding + sdp_ram #( + .ADDR_WIDTH(ADDR_WIDTH), + .NUM_COL(NUM_COL), + .COL_WIDTH(COL_WIDTH), + .PIPELINE_DEPTH(PIPELINE_DEPTH), + .CASCADE_DEPTH(CASCADE_DEPTH) + ) mem (.*); + end else begin : gen_padded + logic[TOTAL_WIDTH-1:0] a_padded; + logic[TOTAL_WIDTH-1:0] b_padded; + + always_comb begin + a_padded = 'x; + for (int i = 0; i < NUM_COL; i++) begin + a_padded[i*PADDED_WIDTH+:COL_WIDTH] = a_wdata[i*COL_WIDTH+:COL_WIDTH]; + b_rdata[i*COL_WIDTH+:COL_WIDTH] = b_padded[i*PADDED_WIDTH+:COL_WIDTH]; + end + end + + sdp_ram #( + .ADDR_WIDTH(ADDR_WIDTH), + .NUM_COL(NUM_COL), + .COL_WIDTH(PADDED_WIDTH), + .PIPELINE_DEPTH(PIPELINE_DEPTH), + .CASCADE_DEPTH(CASCADE_DEPTH) + ) mem ( + .a_wdata(a_padded), + .b_rdata(b_padded), + .*); + end endgenerate + +endmodule diff --git a/core/common_components/toggle_memory.sv b/core/common_components/toggle_memory.sv index bf20db99..ff66bd99 100644 --- a/core/common_components/toggle_memory.sv +++ b/core/common_components/toggle_memory.sv @@ -22,16 +22,12 @@ module toggle_memory - import cva5_config::*; - import cva5_types::*; - # ( parameter DEPTH = 8, parameter NUM_READ_PORTS = 2 ) ( input logic clk, - input logic rst, input logic toggle, input logic [$clog2(DEPTH)-1:0] toggle_id, diff --git a/core/common_components/toggle_memory_set.sv b/core/common_components/toggle_memory_set.sv index 30299011..2a02eb51 100644 --- a/core/common_components/toggle_memory_set.sv +++ b/core/common_components/toggle_memory_set.sv @@ -22,9 +22,6 @@ module toggle_memory_set - import cva5_config::*; - import cva5_types::*; - # ( parameter DEPTH = 64, parameter NUM_WRITE_PORTS = 3, @@ -32,7 +29,6 @@ module toggle_memory_set ) ( input logic clk, - input logic rst, input logic init_clear, input logic toggle [NUM_WRITE_PORTS], @@ -53,7 +49,7 @@ module toggle_memory_set //counter for indexing through memories for post-reset clearing/initialization lfsr #(.WIDTH($clog2(DEPTH)), .NEEDS_RESET(0)) lfsr_counter ( - .clk (clk), .rst (rst), + .clk (clk), .rst (1'b0), .en(init_clear), .value(clear_index) ); @@ -76,7 +72,7 @@ module toggle_memory_set for (j = 0; j < NUM_WRITE_PORTS+1; j++) begin : write_port_gen toggle_memory #(.DEPTH(DEPTH), .NUM_READ_PORTS(NUM_READ_PORTS+1)) mem ( - .clk (clk), .rst (rst), + .clk (clk), .toggle(_toggle[j]), .toggle_id(_toggle_addr[j]), .read_id(_read_addr), diff --git a/core/common_components/vendor_support/intel/intel_byte_enable_ram.sv b/core/common_components/vendor_support/intel/intel_byte_enable_ram.sv old mode 100755 new mode 100644 diff --git a/core/common_components/vendor_support/xilinx/cva5_wrapper_xilinx.sv b/core/common_components/vendor_support/xilinx/cva5_wrapper_xilinx.sv index d61f4d9d..ac4891ec 100644 --- a/core/common_components/vendor_support/xilinx/cva5_wrapper_xilinx.sv +++ b/core/common_components/vendor_support/xilinx/cva5_wrapper_xilinx.sv @@ -131,4 +131,3 @@ module cva5_wrapper_xilinx cva5 cpu(.*); endmodule - diff --git a/core/common_components/vendor_support/xilinx/xilinx_byte_enable_ram.sv b/core/common_components/vendor_support/xilinx/xilinx_byte_enable_ram.sv old mode 100755 new mode 100644 diff --git a/core/cva5.sv b/core/cva5.sv old mode 100755 new mode 100644 index 0eb090eb..7bb8bee6 --- a/core/cva5.sv +++ b/core/cva5.sv @@ -29,6 +29,7 @@ module cva5 import riscv_types::*; import cva5_types::*; import fpu_types::*; + import csr_types::*; #( parameter cpu_config_t CONFIG = EXAMPLE_CONFIG @@ -48,6 +49,7 @@ module cva5 l2_requester_interface.master l2, + input logic [63:0] mtime, input interrupt_t s_interrupt, input interrupt_t m_interrupt ); @@ -90,7 +92,8 @@ module cva5 tlb_interface itlb(); tlb_interface dtlb(); - logic tlb_on; + logic instruction_translation_on; + logic data_translation_on; logic [ASIDLEN-1:0] asid; //Instruction ID/Metadata @@ -112,7 +115,6 @@ module cva5 logic decode_uses_rd; logic fp_decode_uses_rd; rs_addr_t decode_rd_addr; - exception_sources_t decode_exception_unit; logic decode_is_store; phys_addr_t decode_phys_rd_addr; phys_addr_t fp_decode_phys_rd_addr; @@ -127,7 +129,6 @@ module cva5 retire_packet_t fp_wb_retire; retire_packet_t store_retire; id_t retire_ids [RETIRE_PORTS]; - id_t retire_ids_next [RETIRE_PORTS]; logic retire_port_valid [RETIRE_PORTS]; logic [LOG2_RETIRE_PORTS : 0] retire_count; //Writeback @@ -138,29 +139,33 @@ module cva5 phys_addr_t wb_phys_addr [CONFIG.NUM_WB_GROUPS]; phys_addr_t fp_wb_phys_addr [2]; logic [4:0] fflag_wmask; - //Exception - logic [31:0] oldest_pc; renamer_interface #(.NUM_WB_GROUPS(CONFIG.NUM_WB_GROUPS), .READ_PORTS(REGFILE_READ_PORTS)) decode_rename_interface (); renamer_interface #(.NUM_WB_GROUPS(2), .READ_PORTS(3)) fp_decode_rename_interface (); //Global Control exception_interface exception [NUM_EXCEPTION_SOURCES](); - logic [$clog2(NUM_EXCEPTION_SOURCES)-1:0] current_exception_unit; gc_outputs_t gc; + tlb_packet_t sfence; load_store_status_t load_store_status; logic [LOG2_MAX_IDS:0] post_issue_count; - logic [1:0] current_privilege; logic mret; logic sret; - logic [31:0] epc; - logic [31:0] exception_target_pc; - + logic csr_frontend_flush; logic interrupt_taken; logic interrupt_pending; - logic processing_csr; + //CSR broadcast info + logic [1:0] current_privilege; + logic tvm; + logic tsr; + envcfg_t menvcfg; + envcfg_t senvcfg; + logic [31:0] mepc; + logic [31:0] sepc; + logic [31:0] exception_target_pc; + //Decode Unit and Fetch Unit logic issue_stage_ready; @@ -176,11 +181,12 @@ module cva5 //////////////////////////////////////////////////// //Implementation + //////////////////////////////////////////////////// // Memory Interface - generate if (CONFIG.INCLUDE_S_MODE || CONFIG.INCLUDE_ICACHE || CONFIG.INCLUDE_DCACHE) begin : gen_l1_arbiter + generate if (CONFIG.MODES == MSU || CONFIG.INCLUDE_ICACHE || CONFIG.INCLUDE_DCACHE) begin : gen_l1_arbiter l1_arbiter #(.CONFIG(CONFIG)) arb( .clk (clk), @@ -217,7 +223,6 @@ module cva5 .decode_rd_addr (decode_rd_addr), .decode_phys_rd_addr (decode_phys_rd_addr), .fp_decode_phys_rd_addr (fp_decode_phys_rd_addr), - .decode_exception_unit (decode_exception_unit), .decode_is_store (decode_is_store), .issue (issue), .instruction_issued (instruction_issued), @@ -231,12 +236,9 @@ module cva5 .fp_wb_retire (fp_wb_retire), .store_retire (store_retire), .retire_ids (retire_ids), - .retire_ids_next (retire_ids_next), .retire_port_valid(retire_port_valid), .retire_count (retire_count), - .post_issue_count(post_issue_count), - .oldest_pc (oldest_pc), - .current_exception_unit (current_exception_unit) + .post_issue_count(post_issue_count) ); //////////////////////////////////////////////////// @@ -263,8 +265,7 @@ module cva5 .icache_on ('1), .tlb (itlb), .l1_request (l1_request[L1_ICACHE_ID]), - .l1_response (l1_response[L1_ICACHE_ID]), - .exception (1'b0) + .l1_response (l1_response[L1_ICACHE_ID]) ); branch_predictor #(.CONFIG(CONFIG)) @@ -285,19 +286,19 @@ module cva5 .ras (ras) ); - generate if (CONFIG.INCLUDE_S_MODE) begin : gen_itlb_immu - - tlb_lut_ram #(.WAYS(CONFIG.ITLB.WAYS), .DEPTH(CONFIG.ITLB.DEPTH)) - i_tlb ( - .clk (clk), - .rst (rst), - .gc (gc), - .abort_request (gc.fetch_flush | early_branch_flush), - .asid (asid), - .tlb (itlb), - .mmu (immu) - ); + itlb #(.WAYS(CONFIG.ITLB.WAYS), .DEPTH(CONFIG.ITLB.DEPTH)) + i_tlb ( + .clk (clk), + .rst (rst), + .translation_on (instruction_translation_on), + .sfence (sfence), + .abort_request (gc.fetch_flush | early_branch_flush), + .asid (asid), + .tlb (itlb), + .mmu (immu) + ); + generate if (CONFIG.MODES == MSU) begin : gen_immu mmu i_mmu ( .clk (clk), .rst (rst), @@ -308,11 +309,6 @@ module cva5 ); end - else begin - assign itlb.ready = 1; - assign itlb.done = itlb.new_request; - assign itlb.physical_address = itlb.virtual_address; - end endgenerate //////////////////////////////////////////////////// @@ -348,7 +344,6 @@ module cva5 .decode_uses_rd (decode_uses_rd), .fp_decode_uses_rd (fp_decode_uses_rd), .decode_rd_addr (decode_rd_addr), - .decode_exception_unit (decode_exception_unit), .decode_phys_rd_addr (decode_phys_rd_addr), .fp_decode_phys_rd_addr (fp_decode_phys_rd_addr), .decode_phys_rs_addr (decode_phys_rs_addr), @@ -455,8 +450,7 @@ module cva5 .issue (unit_issue[LS_ID]), .dcache_on (1'b1), .clear_reservation (1'b0), - .tlb (dtlb), - .tlb_on (tlb_on), + .tlb (dtlb), .l1_request (l1_request[L1_DCACHE_ID]), .l1_response (l1_response[L1_DCACHE_ID]), .sc_complete (sc_complete), @@ -465,8 +459,12 @@ module cva5 .m_avalon (m_avalon), .dwishbone (dwishbone), .data_bram (data_bram), + .current_privilege (current_privilege), + .menvcfg (menvcfg), + .senvcfg (senvcfg), .wb_packet (wb_packet), .fp_wb_packet (fp_wb_packet), + .retire_id (retire_ids[0]), .store_retire (store_retire), .exception (exception[LS_EXCEPTION]), .load_store_status(load_store_status), @@ -474,18 +472,18 @@ module cva5 .fp_wb (fp_unit_wb[0]) ); - generate if (CONFIG.INCLUDE_S_MODE) begin : gen_dtlb_dmmu - tlb_lut_ram #(.WAYS(CONFIG.DTLB.WAYS), .DEPTH(CONFIG.DTLB.DEPTH)) - d_tlb ( - .clk (clk), - .rst (rst), - .gc (gc), - .abort_request (1'b0), - .asid (asid), - .tlb (dtlb), - .mmu (dmmu) - ); + dtlb #(.WAYS(CONFIG.DTLB.WAYS), .DEPTH(CONFIG.DTLB.DEPTH)) + d_tlb ( + .clk (clk), + .rst (rst), + .translation_on (data_translation_on), + .sfence (sfence), + .asid (asid), + .tlb (dtlb), + .mmu (dmmu) + ); + generate if (CONFIG.MODES == MSU) begin : gen_dmmu mmu d_mmu ( .clk (clk), .rst (rst), @@ -495,11 +493,6 @@ module cva5 .l1_response (l1_response[L1_DMMU_ID]) ); end - else begin - assign dtlb.ready = 1; - assign dtlb.done = dtlb.new_request; - assign dtlb.physical_address = dtlb.virtual_address; - end endgenerate generate if (CONFIG.INCLUDE_UNIT.CSR) begin : gen_csrs @@ -515,25 +508,32 @@ module cva5 .uses_rs (unit_uses_rs[CSR_ID]), .uses_rd (unit_uses_rd[CSR_ID]), .rf (rf_issue.data), + .instruction_issued (instruction_issued), + .fp_instruction_issued_with_rd (fp_instruction_issued_with_rd), .issue (unit_issue[CSR_ID]), .wb (unit_wb[CSR_ID]), .current_privilege(current_privilege), + .menvcfg(menvcfg), + .senvcfg(senvcfg), .fflag_wmask (fflag_wmask), .dyn_rm (dyn_rm), .interrupt_taken(interrupt_taken), .interrupt_pending(interrupt_pending), - .processing_csr(processing_csr), - .tlb_on(tlb_on), + .csr_frontend_flush(csr_frontend_flush), + .instruction_translation_on(instruction_translation_on), + .data_translation_on(data_translation_on), .asid(asid), .immu(immu), .dmmu(dmmu), - .exception(gc.exception), + .exception_pkt(gc.exception), .exception_target_pc (exception_target_pc), .mret(mret), .sret(sret), - .epc(epc), + .mepc(mepc), + .sepc(sepc), + .exception(exception[CSR_EXCEPTION]), .retire_ids(retire_ids), - .retire_count (retire_count), + .mtime(mtime), .s_interrupt(s_interrupt), .m_interrupt(m_interrupt) ); @@ -546,27 +546,30 @@ module cva5 .decode_stage (decode), .issue_stage (issue), .issue_stage_ready (issue_stage_ready), - .unit_needed (unit_needed[IEC_ID]), - .uses_rs (unit_uses_rs[IEC_ID]), - .uses_rd (unit_uses_rd[IEC_ID]), + .unit_needed (unit_needed[GC_ID]), + .uses_rs (unit_uses_rs[GC_ID]), + .uses_rd (unit_uses_rd[GC_ID]), + .instruction_issued (instruction_issued), .constant_alu (constant_alu), .rf (rf_issue.data), - .issue (unit_issue[IEC_ID]), + .issue (unit_issue[GC_ID]), .branch_flush (branch_flush), + .local_gc_exception (exception[GC_EXCEPTION]), .exception (exception), .exception_target_pc (exception_target_pc), - .current_exception_unit (current_exception_unit), + .csr_frontend_flush (csr_frontend_flush), + .current_privilege (current_privilege), + .tvm (tvm), + .tsr (tsr), .gc (gc), - .oldest_pc (oldest_pc), + .sfence (sfence), .mret(mret), .sret(sret), - .epc(epc), - .retire_ids_next (retire_ids_next), + .mepc(mepc), + .sepc(sepc), .interrupt_taken(interrupt_taken), .interrupt_pending(interrupt_pending), - .processing_csr(processing_csr), - .load_store_status(load_store_status), - .post_issue_count (post_issue_count) + .load_store_status(load_store_status) ); generate if (CONFIG.INCLUDE_UNIT.MUL) begin : gen_mul diff --git a/core/decode_and_issue.sv b/core/decode_and_issue.sv old mode 100755 new mode 100644 index 4576977a..4f87e320 --- a/core/decode_and_issue.sv +++ b/core/decode_and_issue.sv @@ -40,7 +40,6 @@ module decode_and_issue input logic pc_id_available, input decode_packet_t decode, output logic decode_advance, - output exception_sources_t decode_exception_unit, //Renamer renamer_interface.decode renamer, @@ -190,6 +189,10 @@ module decode_and_issue //////////////////////////////////////////////////// //Issue always_ff @(posedge clk) begin + if (instruction_issued) begin + issue.pc_r <= issue.pc; + issue.instruction_r <= issue.instruction; + end if (issue_stage_ready) begin issue.pc <= decode.pc; issue.instruction <= decode.instruction; @@ -208,7 +211,6 @@ module decode_and_issue fp_issue_rd_wb_group <= fp_decode_wb_group; issue.is_multicycle <= ~unit_needed[ALU_ID]; issue.id <= decode.id; - issue.exception_unit <= decode_exception_unit; issue_uses_rs <= decode_uses_rs; fp_issue_uses_rs <= fp_decode_uses_rs; issue.uses_rd <= decode_uses_rd; @@ -276,29 +278,23 @@ module decode_and_issue //////////////////////////////////////////////////// //Illegal Instruction check - generate if (CONFIG.INCLUDE_M_MODE) begin : gen_decode_exceptions + generate if (CONFIG.MODES != BARE) begin : gen_decode_exceptions logic new_exception; exception_code_t ecode; exception_code_t ecall_code; + logic [31:0] tval; //ECALL and EBREAK captured here, but seperated out when ecode is set assign illegal_instruction_pattern = ~|unit_needed; - //TODO: Consider ways of parameterizing so that any exception generating unit - //can be automatically added to this expression - always_comb begin - unique case (1'b1) - unit_needed[LS_ID] : decode_exception_unit = LS_EXCEPTION; - unit_needed[BR_ID] : decode_exception_unit = BR_EXCEPTION; - default : decode_exception_unit = PRE_ISSUE_EXCEPTION; - endcase - if (~decode.fetch_metadata.ok) - decode_exception_unit = PRE_ISSUE_EXCEPTION; - end - //////////////////////////////////////////////////// //ECALL/EBREAK //The type of call instruction is depedent on the current privilege level + logic is_ecall; + logic is_ebreak; + assign is_ecall = decode.instruction inside {ECALL}; + assign is_ebreak = decode.instruction inside {EBREAK}; + always_comb begin case (current_privilege) USER_PRIVILEGE : ecall_code = ECALL_U; @@ -311,10 +307,16 @@ module decode_and_issue always_ff @(posedge clk) begin if (issue_stage_ready) begin ecode <= - decode.instruction inside {ECALL} ? ecall_code : - decode.instruction inside {EBREAK} ? BREAK : + is_ecall ? ecall_code : + is_ebreak ? BREAK : illegal_instruction_pattern ? ILLEGAL_INST : decode.fetch_metadata.error_code; //(~decode.fetch_metadata.ok) + if (~decode.fetch_metadata.ok | is_ebreak) + tval <= decode.pc; + else if (is_ecall) + tval <= '0; + else + tval <= decode.instruction; end end @@ -327,22 +329,20 @@ module decode_and_issue pre_issue_exception_pending <= illegal_instruction_pattern | (~decode.fetch_metadata.ok); end - assign new_exception = issue.stage_valid & pre_issue_exception_pending & ~(gc.issue_hold | gc.fetch_flush | exception.valid); + assign new_exception = issue.stage_valid & pre_issue_exception_pending & ~(gc.issue_hold | gc.fetch_flush) & ~exception.valid; always_ff @(posedge clk) begin if (rst) exception.valid <= 0; else - exception.valid <= (exception.valid | new_exception) & ~exception.ack; + exception.valid <= new_exception; end - always_ff @(posedge clk) begin - if (new_exception) begin - exception.code <= ecode; - exception.tval <= issue.instruction; - exception.id <= issue.id; - end - end + assign exception.possible = 0; //Not needed because occurs before issue + assign exception.code = ecode; + assign exception.tval = tval; + assign exception.pc = issue.pc; + assign exception.discard = 0; end endgenerate //////////////////////////////////////////////////// diff --git a/core/execution_units/alu_unit.sv b/core/execution_units/alu_unit.sv old mode 100755 new mode 100644 diff --git a/core/execution_units/barrel_shifter.sv b/core/execution_units/barrel_shifter.sv old mode 100755 new mode 100644 diff --git a/core/execution_units/branch_unit.sv b/core/execution_units/branch_unit.sv old mode 100755 new mode 100644 index b51a2a09..b8166997 --- a/core/execution_units/branch_unit.sv +++ b/core/execution_units/branch_unit.sv @@ -65,7 +65,6 @@ module branch_unit logic [31:0] new_pc; logic [31:0] new_pc_ex; - logic [31:0] pc_ex; logic instruction_is_completing; logic branch_complete; @@ -200,7 +199,7 @@ module branch_unit //////////////////////////////////////////////////// //Exception support - generate if (CONFIG.INCLUDE_M_MODE) begin : gen_branch_exception + generate if (CONFIG.MODES != BARE) begin : gen_branch_exception logic new_exception; assign new_exception = new_pc[1] & branch_taken & issue.new_request; @@ -208,15 +207,14 @@ module branch_unit if (rst) exception.valid <= 0; else - exception.valid <= (exception.valid & ~exception.ack) | new_exception; + exception.valid <= new_exception; end - always_ff @(posedge clk) begin - if (issue.new_request) - exception.id <= issue.id; - end + assign exception.possible = 0; //Not needed because branch_flush suppresses issue assign exception.code = INST_ADDR_MISSALIGNED; assign exception.tval = new_pc_ex; + assign exception.pc = issue_stage.pc_r; + assign exception.discard = 0; end endgenerate @@ -228,13 +226,12 @@ module branch_unit if (issue.possible_issue) begin is_return_ex <= is_return; is_call_ex <= is_call; - pc_ex <= issue_stage.pc; end end assign br_results.id = id_ex; assign br_results.valid = instruction_is_completing; - assign br_results.pc = pc_ex; + assign br_results.pc = issue_stage.pc_r; assign br_results.target_pc = new_pc_ex; assign br_results.branch_taken = branch_taken_ex; assign br_results.is_branch = ~jal_or_jalr_ex; diff --git a/core/execution_units/csr_unit.sv b/core/execution_units/csr_unit.sv old mode 100755 new mode 100644 index fe4b7d2c..349b5dfc --- a/core/execution_units/csr_unit.sv +++ b/core/execution_units/csr_unit.sv @@ -45,6 +45,8 @@ module csr_unit input logic issue_stage_ready, input rs_addr_t issue_rs_addr [REGFILE_READ_PORTS], input logic [31:0] rf [REGFILE_READ_PORTS], + input logic instruction_issued, + input logic fp_instruction_issued_with_rd, //Unit Interfaces unit_issue_interface.unit issue, @@ -52,6 +54,8 @@ module csr_unit //Privilege output logic [1:0] current_privilege, + output envcfg_t menvcfg, + output envcfg_t senvcfg, //FP input logic [4:0] fflag_wmask, //Always valid @@ -60,10 +64,11 @@ module csr_unit //GC input logic interrupt_taken, output logic interrupt_pending, - output logic processing_csr, + output logic csr_frontend_flush, //TLB and MMU - output logic tlb_on, + output logic instruction_translation_on, + output logic data_translation_on, output logic [ASIDLEN-1:0] asid, //MMUs @@ -71,19 +76,23 @@ module csr_unit mmu_interface.csr dmmu, //CSR exception interface - input exception_packet_t exception, + input exception_packet_t exception_pkt, output logic [31:0] exception_target_pc, //exception return input logic mret, input logic sret, - output logic [31:0] epc, + output logic [31:0] mepc, + output logic [31:0] sepc, + + //Exception generation + exception_interface.unit exception, //Retire input id_t retire_ids [RETIRE_PORTS], - input logic [LOG2_RETIRE_PORTS : 0] retire_count, //External + input logic [63:0] mtime, input interrupt_t s_interrupt, input interrupt_t m_interrupt ); @@ -120,6 +129,7 @@ module csr_unit logic swrite; logic mwrite; logic [255:0] sub_write_en; + logic illegal_instruction; logic [31:0] selected_csr; logic [31:0] selected_csr_r; @@ -127,6 +137,10 @@ module csr_unit logic [31:0] updated_csr; logic [31:0] next_csr; + logic exception_delegated; + logic interrupt_delegated; + logic [ECODE_W-1:0] interrupt_cause_r; + function logic mwrite_en (input csr_addr_t addr); return mwrite & sub_write_en[addr.sub_addr]; endfunction @@ -138,7 +152,7 @@ module csr_unit //Legalization Functions function logic [31:0] init_medeleg_mask(); init_medeleg_mask = 0; - if (CONFIG.INCLUDE_S_MODE) begin + if (CONFIG.MODES == MSU) begin init_medeleg_mask[INST_ADDR_MISSALIGNED] = 1; init_medeleg_mask[INST_ACCESS_FAULT] = 1; init_medeleg_mask[ILLEGAL_INST] = 1; @@ -148,48 +162,13 @@ module csr_unit init_medeleg_mask[STORE_AMO_ADDR_MISSALIGNED] = 1; init_medeleg_mask[STORE_AMO_FAULT] = 1; init_medeleg_mask[ECALL_U] = 1; + init_medeleg_mask[ECALL_S] = 1; init_medeleg_mask[INST_PAGE_FAULT] = 1; init_medeleg_mask[LOAD_PAGE_FAULT] = 1; init_medeleg_mask[STORE_OR_AMO_PAGE_FAULT] = 1; end endfunction - function logic [31:0] init_mideleg_mask(); - init_mideleg_mask = 0; - if (CONFIG.INCLUDE_S_MODE) begin - init_mideleg_mask[S_SOFTWARE_INTERRUPT] = CONFIG.INCLUDE_S_MODE; - init_mideleg_mask[S_TIMER_INTERRUPT] = CONFIG.INCLUDE_S_MODE; - init_mideleg_mask[S_EXTERNAL_INTERRUPT] = CONFIG.INCLUDE_S_MODE; - end - endfunction - - function logic [2**ECODE_W-1:0] init_exception_masking_rom(); - init_exception_masking_rom = '{default: 0}; - init_exception_masking_rom[INST_ADDR_MISSALIGNED] = 1; - init_exception_masking_rom[INST_ACCESS_FAULT] = CONFIG.INCLUDE_S_MODE; - init_exception_masking_rom[ILLEGAL_INST] = 1; - init_exception_masking_rom[BREAK] = 1; - init_exception_masking_rom[LOAD_ADDR_MISSALIGNED] = 1; - init_exception_masking_rom[LOAD_FAULT] = CONFIG.INCLUDE_S_MODE; - init_exception_masking_rom[STORE_AMO_ADDR_MISSALIGNED] = 1; - init_exception_masking_rom[STORE_AMO_FAULT] = CONFIG.INCLUDE_S_MODE; - init_exception_masking_rom[ECALL_U] = CONFIG.INCLUDE_S_MODE; - init_exception_masking_rom[ECALL_S] = CONFIG.INCLUDE_S_MODE; - init_exception_masking_rom[ECALL_M] = 1; - init_exception_masking_rom[INST_PAGE_FAULT] = CONFIG.INCLUDE_S_MODE; - init_exception_masking_rom[LOAD_PAGE_FAULT] = CONFIG.INCLUDE_S_MODE; - init_exception_masking_rom[STORE_OR_AMO_PAGE_FAULT] = CONFIG.INCLUDE_S_MODE; - endfunction - - function logic [2**ECODE_W-1:0] init_interrupt_masking_rom(); - init_interrupt_masking_rom = '{default: 0}; - init_interrupt_masking_rom[S_SOFTWARE_INTERRUPT] = CONFIG.INCLUDE_S_MODE; - init_interrupt_masking_rom[M_SOFTWARE_INTERRUPT] = 1; - init_interrupt_masking_rom[S_TIMER_INTERRUPT] = CONFIG.INCLUDE_S_MODE; - init_interrupt_masking_rom[M_TIMER_INTERRUPT] = 1; - init_interrupt_masking_rom[S_EXTERNAL_INTERRUPT] = CONFIG.INCLUDE_S_MODE; - init_interrupt_masking_rom[M_EXTERNAL_INTERRUPT] = 1; - endfunction //////////////////////////////////////////////////// //Implementation @@ -208,11 +187,9 @@ module csr_unit addr : issue_stage.instruction[31:20], op : issue_stage.fn3[1:0], data : issue_stage.fn3[2] ? {27'b0, issue_rs_addr[RS1]} : rf[RS1], - reads : ~((issue_stage.fn3[1:0] == CSR_RW) && (issue_stage.rd_addr == 0)), - writes : ~((issue_stage.fn3[1:0] == CSR_RC) && (issue_rs_addr[RS1] == 0)) + reads : ~((issue_stage.fn3[1:0] == CSR_RW) & (issue_stage.rd_addr == 0)), + writes : ~((issue_stage.fn3[1:0] != CSR_RW) & (issue_rs_addr[RS1] == 0)) }; - - assign processing_csr = busy | issue.new_request; assign issue.ready = ~busy; @@ -238,6 +215,7 @@ module csr_unit //Waits until CSR instruction is the oldest issued instruction assign commit = (retire_ids[0] == wb.id) & busy & (~commit_in_progress); + //////////////////////////////////////////////////// //Output @@ -259,8 +237,8 @@ module csr_unit //Shared logic always_ff @(posedge clk) begin sub_write_en <= (1 << csr_inputs_r.addr.sub_addr); - mwrite <= CONFIG.INCLUDE_M_MODE && commit && (csr_inputs_r.addr.rw_bits != CSR_READ_ONLY && csr_inputs_r.addr.privilege == MACHINE_PRIVILEGE); - swrite <= CONFIG.INCLUDE_S_MODE && commit && (csr_inputs_r.addr.rw_bits != CSR_READ_ONLY && csr_inputs_r.addr.privilege == SUPERVISOR_PRIVILEGE); + mwrite <= CONFIG.MODES != BARE & commit & (csr_inputs_r.addr.rw_bits != CSR_READ_ONLY & csr_inputs_r.addr.privilege == MACHINE_PRIVILEGE) & ~illegal_instruction; + swrite <= CONFIG.MODES == MSU & commit & (csr_inputs_r.addr.rw_bits != CSR_READ_ONLY & csr_inputs_r.addr.privilege == SUPERVISOR_PRIVILEGE) & ~illegal_instruction; end always_comb begin @@ -291,9 +269,9 @@ module csr_unit mxlen:1, A:(CONFIG.INCLUDE_AMO), I:1, - M:(CONFIG.INCLUDE_UNIT.MUL && CONFIG.INCLUDE_UNIT.DIV), - S:(CONFIG.INCLUDE_S_MODE), - U:(CONFIG.INCLUDE_U_MODE), + M:(CONFIG.INCLUDE_UNIT.MUL & CONFIG.INCLUDE_UNIT.DIV), + S:(CONFIG.MODES == MSU), + U:(CONFIG.MODES inside {MU, MSU}), F:(CONFIG.INCLUDE_UNIT.FPU), D:(CONFIG.INCLUDE_UNIT.FPU) }; @@ -301,66 +279,86 @@ module csr_unit //////////////////////////////////////////////////// //Machine Version Registers localparam logic [31:0] mvendorid = 0; - localparam logic [31:0] marchid = 0; + localparam logic [31:0] marchid = 0; //TODO: register an ID with RISC-V localparam logic [31:0] mimpid = CONFIG.CSRS.MACHINE_IMPLEMENTATION_ID; localparam logic [31:0] mhartid = CONFIG.CSRS.CPU_ID; + localparam logic [31:0] mconfigptr = CONFIG.CSRS.MCONFIGPTR; //////////////////////////////////////////////////// - //MSTATUS + //Constants localparam logic [31:0] mstatush = 0; //Always little endian + localparam logic [31:0] medelegh = 0; //Not used + localparam logic [31:0] mstateen0 = 0; //Behaviour defined but not relevant + localparam logic [31:0] mstateen1 = 0; //Behaviour not yet defined + localparam logic [31:0] mstateen2 = 0; //Behaviour not yet defined + localparam logic [31:0] mstateen3 = 0; //Behaviour not yet defined + localparam logic [31:0] mstateen1h = 0; //Behaviour not yet defined + localparam logic [31:0] mstateen2h = 0; //Behaviour not yet defined + localparam logic [31:0] mstateen3h = 0; //Behaviour not yet defined //////////////////////////////////////////////////// //Non-Constant Registers mstatus_t mstatus; logic[31:0] mtvec; logic[31:0] medeleg; - logic[31:0] mideleg; - logic[31:0] mepc; - mip_t mip, mip_new; + mideleg_t mideleg; + mip_t mip; mie_t mie; - mcause_t mcause; + cause_t mcause; logic[31:0] mtval; logic[31:0] mscratch; + mcounter_t mcounteren; + mcounter_t mcountinhibit; + envcfgh_t menvcfgh; + mstateen0h_t mstateen0h; //Virtualization support: TSR, TW, TVM unused - //Extension context status: SD, FS, XS unused + //Extension context status: XS unused localparam mstatus_t mstatus_mask = '{ default:0, - mprv:(CONFIG.INCLUDE_U_MODE | CONFIG.INCLUDE_S_MODE), - mxr:(CONFIG.INCLUDE_S_MODE), - sum:(CONFIG.INCLUDE_U_MODE & CONFIG.INCLUDE_S_MODE), + mprv:(CONFIG.MODES inside {MU, MSU}), + mxr:(CONFIG.MODES == MSU), + sum:(CONFIG.MODES == MSU), mpp:'1, - spp:(CONFIG.INCLUDE_S_MODE), + spp:(CONFIG.MODES == MSU), mpie:1, - spie:(CONFIG.INCLUDE_S_MODE), + spie:(CONFIG.MODES == MSU), mie:1, - sie:(CONFIG.INCLUDE_S_MODE) + sie:(CONFIG.MODES == MSU), + sd:(CONFIG.INCLUDE_UNIT.FPU), + fs:{2{CONFIG.INCLUDE_UNIT.FPU}} }; - localparam mstatus_t sstatus_mask = '{default:0, mxr:1, sum:1, spp:1, spie:1, sie:1}; + localparam mstatus_t sstatus_mask = '{default:0, mxr:1, sum:1, spp:1, spie:1, sie:1, sd:(CONFIG.INCLUDE_UNIT.FPU), fs:{2{CONFIG.INCLUDE_UNIT.FPU}}}; + logic stip_stimecmp; - localparam mip_t sip_mask = '{default:0, seip:CONFIG.INCLUDE_S_MODE, stip:CONFIG.INCLUDE_S_MODE, ssip:CONFIG.INCLUDE_S_MODE}; - localparam mie_t sie_mask = '{default:0, seie:CONFIG.INCLUDE_S_MODE, stie:CONFIG.INCLUDE_S_MODE, ssie:CONFIG.INCLUDE_S_MODE}; + localparam mie_t sie_mask = '{default:0, seie:CONFIG.MODES == MSU, stie:CONFIG.MODES == MSU, ssie:CONFIG.MODES == MSU}; + localparam mip_t sip_mask = '{default:0, seip:CONFIG.MODES == MSU, stip:CONFIG.MODES == MSU, ssip:CONFIG.MODES == MSU}; -generate if (CONFIG.INCLUDE_M_MODE) begin : gen_csr_m_mode +generate if (CONFIG.MODES != BARE) begin : gen_csr_m_mode mstatus_t mstatus_new; mstatus_t mstatus_write_mask; - logic [ECODE_W-1:0] interrupt_cause_r; + logic[4:0] fflag_wmask_r; //Used for updating mstatus, registered for frequency reasons + + always_ff @(posedge clk) begin + if (rst) + fflag_wmask_r <= '0; + else if (CONFIG.INCLUDE_UNIT.FPU) + fflag_wmask_r <= fflag_wmask; + end //Interrupt and Exception Delegation //Can delegate to supervisor if currently in supervisor or user modes logic can_delegate; - logic exception_delegated; - logic interrupt_delegated; - assign can_delegate = CONFIG.INCLUDE_S_MODE & privilege_level inside {SUPERVISOR_PRIVILEGE, USER_PRIVILEGE}; - assign exception_delegated = can_delegate & exception.valid & medeleg[exception.code]; + assign can_delegate = CONFIG.MODES == MSU & privilege_level inside {SUPERVISOR_PRIVILEGE, USER_PRIVILEGE}; + assign exception_delegated = can_delegate & exception_pkt.valid & medeleg[exception_pkt.code]; assign interrupt_delegated = can_delegate & interrupt_taken & mideleg[interrupt_cause_r]; one_hot_to_integer #(6) mstatus_case_one_hot ( - .one_hot ({sret, mret, exception.valid, interrupt_taken, (mwrite_en(MSTATUS) | swrite_en(SSTATUS)), 1'b0}), + .one_hot ({sret, mret, exception_pkt.valid, interrupt_taken, (mwrite_en(MSTATUS) | swrite_en(SSTATUS)), 1'b0}), .int_out (mstatus_case) ); @@ -388,11 +386,20 @@ generate if (CONFIG.INCLUDE_M_MODE) begin : gen_csr_m_mode always_comb begin mstatus_new = mstatus; case (mstatus_case) inside - MSTATUS_WRITE : mstatus_new = (mstatus & ~mstatus_write_mask) | (updated_csr & mstatus_write_mask); + MSTATUS_WRITE : begin + mstatus_new = (mstatus & ~mstatus_write_mask) | (updated_csr & mstatus_write_mask); + //Cannot write invalid privilege + if (CONFIG.MODES == M) + mstatus_new.mpp = MACHINE_PRIVILEGE; + else if (CONFIG.MODES == MU & ^mstatus_new.mpp) + mstatus_new.mpp = MACHINE_PRIVILEGE; + else if (CONFIG.MODES == MSU & mstatus_new.mpp == RESERVED_PRIVILEGE) + mstatus_new.mpp = MACHINE_PRIVILEGE; + end MSTATUS_MRET : begin mstatus_new.mie = mstatus.mpie; mstatus_new.mpie = 1; - mstatus_new.mpp = CONFIG.INCLUDE_U_MODE ? USER_PRIVILEGE : MACHINE_PRIVILEGE; + mstatus_new.mpp = CONFIG.MODES inside {MU, MSU} ? USER_PRIVILEGE : MACHINE_PRIVILEGE; if (mstatus.mpp != MACHINE_PRIVILEGE) mstatus_new.mprv = 0; end @@ -404,7 +411,7 @@ generate if (CONFIG.INCLUDE_M_MODE) begin : gen_csr_m_mode end MSTATUS_INTERRUPT, MSTATUS_EXCEPTION : begin if (next_privilege_level == SUPERVISOR_PRIVILEGE) begin - mstatus_new.spie = (privilege_level == SUPERVISOR_PRIVILEGE) ? mstatus.sie : 0; + mstatus_new.spie = mstatus.sie; mstatus_new.sie = 0; mstatus_new.spp = privilege_level[0]; //one if from supervisor-mode, zero if from user-mode end @@ -416,11 +423,27 @@ generate if (CONFIG.INCLUDE_M_MODE) begin : gen_csr_m_mode end default : mstatus_new = mstatus; endcase + + //Overwrites writes to fs and sd from above + if (CONFIG.INCLUDE_UNIT.FPU) begin + if (fp_instruction_issued_with_rd | |fflag_wmask_r | (commit & csr_inputs_r.addr inside {FFLAGS, FRM, FCSR})) begin + mstatus_new.fs = 2'b11; + mstatus_new.sd = 1'b1; + end + else if (mwrite_en(MSTATUS) | swrite_en(SSTATUS)) begin + mstatus_new.fs = |updated_csr[14:13] ? updated_csr[14:13] : mstatus.fs; //Cannot disable by writing 00 + mstatus_new.sd = &updated_csr[14:13]; + end + else begin + mstatus_new.fs = mstatus.fs; + mstatus_new.sd = mstatus.sd; + end + end end always_ff @(posedge clk) begin if (rst) - mstatus <= '{default:0, mpp:MACHINE_PRIVILEGE}; + mstatus <= '{default:0, mpp:MACHINE_PRIVILEGE, fs:{1'b0, CONFIG.INCLUDE_UNIT.FPU}}; else mstatus <= mstatus_new; end @@ -428,13 +451,13 @@ generate if (CONFIG.INCLUDE_M_MODE) begin : gen_csr_m_mode //////////////////////////////////////////////////// //MTVEC //No vectored mode, mode hard-coded to zero - initial mtvec[31:2] = CONFIG.CSRS.RESET_MTVEC[31:2]; always_ff @(posedge clk) begin mtvec[1:0] <= '0; - if (CONFIG.CSRS.NON_STANDARD_OPTIONS.MTVEC_WRITEABLE & mwrite_en(MTVEC)) + if (rst) + mtvec[31:2] <= CONFIG.CSRS.RESET_TVEC[31:2]; + else if (mwrite_en(MTVEC)) mtvec[31:2] <= updated_csr[31:2]; end - assign exception_target_pc = mtvec; //////////////////////////////////////////////////// //MEDELEG @@ -442,49 +465,103 @@ generate if (CONFIG.INCLUDE_M_MODE) begin : gen_csr_m_mode always_ff @(posedge clk) begin if (rst) medeleg <= '0; - else if (mwrite_en(MEDELEG) & CONFIG.INCLUDE_S_MODE) - medeleg <= (updated_csr & medeleg_mask); + else if (mwrite_en(MEDELEG) & CONFIG.MODES == MSU) + medeleg <= updated_csr & medeleg_mask; end //////////////////////////////////////////////////// //MIDELEG - localparam logic [31:0] mideleg_mask = init_mideleg_mask(); + localparam mideleg_t mideleg_mask = '{default:0, ssid:CONFIG.MODES == MSU, stid:CONFIG.MODES == MSU, seid:CONFIG.MODES == MSU}; always_ff @(posedge clk) begin if (rst) mideleg <= '0; - else if (mwrite_en(MIDELEG) & CONFIG.INCLUDE_S_MODE) - mideleg <= (updated_csr & mideleg_mask); + else if (mwrite_en(MIDELEG) & CONFIG.MODES == MSU) + mideleg <= updated_csr & mideleg_mask; end //////////////////////////////////////////////////// //MIP - localparam mip_t mip_mask = '{default:0, meip:1, seip:CONFIG.INCLUDE_S_MODE, mtip:1, stip:CONFIG.INCLUDE_S_MODE, msip:1, ssip:CONFIG.INCLUDE_S_MODE}; - localparam mip_t mip_w_mask = '{default:0, seip:CONFIG.INCLUDE_S_MODE, stip:CONFIG.INCLUDE_S_MODE, ssip:CONFIG.INCLUDE_S_MODE}; + //Bits tracked separately + logic meip; + logic mtip; + logic msip; + + //SIP is part of MIP + logic seip; + logic stip; + logic ssip; + + assign mip = '{ + meip: meip, + mtip: mtip, + msip: msip, + seip: CONFIG.MODES == MSU & seip, + stip: CONFIG.MODES == MSU & stip, + ssip: CONFIG.MODES == MSU & ssip, + default:0 + }; - always_comb begin - mip_new = '0; - mip_new.ssip = s_interrupt.software; - mip_new.stip = s_interrupt.timer; - mip_new.seip = s_interrupt.external; + always_ff @(posedge clk) begin + meip <= m_interrupt.external; + mtip <= m_interrupt.timer; + msip <= m_interrupt.software; + end - mip_new.msip = m_interrupt.software; - mip_new.mtip = m_interrupt.timer; - mip_new.meip = m_interrupt.external; +if (CONFIG.MODES == MSU) begin : gen_supervisor_interrupts + logic seip_r; + logic seip_external; + logic seip_next; + mip_t seip_next_casted; + + //SEIP depends on an external and writable signal + assign seip_next_casted = mip_t'(csr_inputs_r.data); + assign seip = seip_r | seip_external; - mip_new &= mip_mask; + always_ff @(posedge clk) begin + seip_external <= s_interrupt.external; + case (csr_inputs_r.op) + CSR_RW : seip_next <= seip_next_casted.seip; + CSR_RS : seip_next <= seip_r | seip_next_casted.seip; + CSR_RC : seip_next <= seip_r & ~seip_next_casted.seip; + default : seip_next <= seip_next_casted.seip; + endcase end - + + //STIP and SSIP can be set externally or locally + mip_t next_csr_mip_casted; + assign next_csr_mip_casted = mip_t'(next_csr); + always_ff @(posedge clk) begin - if (rst) - mip <= 0; - else if (mwrite_en(MIP) | (|mip_new)) - mip <= (updated_csr & mip_w_mask) | mip_new; + if (rst) begin + seip_r <= 0; + stip <= 0; + ssip <= 0; + end + else begin + //SEIP + if (mwrite_en(MIP)) + seip_r <= seip_next; + + //STIP + if (CONFIG.CSRS.INCLUDE_SSTC & menvcfgh.stce) + stip <= stip_stimecmp; + else if (s_interrupt.timer) + stip <= 1; + else if (mwrite_en(MIP)) + stip <= next_csr_mip_casted.stip; + + //SSIP + if (s_interrupt.software) + ssip <= 1; + else if (mwrite_en(MIP) | (swrite_en(SIP) & mideleg.ssid)) + ssip <= next_csr_mip_casted.ssip; + end end - assign interrupt_pending = |(mip & mie) & mstatus.mie; +end //////////////////////////////////////////////////// //MIE - localparam mie_t mie_mask = '{default:0, meie:1, seie:CONFIG.INCLUDE_S_MODE, mtie:1, stie:CONFIG.INCLUDE_S_MODE, msie:1, ssie:CONFIG.INCLUDE_S_MODE}; + localparam mie_t mie_mask = '{default:0, meie:1, seie:CONFIG.MODES == MSU, mtie:1, stie:CONFIG.MODES == MSU, msie:1, ssie:CONFIG.MODES == MSU}; always_ff @(posedge clk) begin if (rst) mie <= '0; @@ -492,34 +569,32 @@ generate if (CONFIG.INCLUDE_M_MODE) begin : gen_csr_m_mode mie <= updated_csr & (swrite ? sie_mask : mie_mask); end + always_comb begin + interrupt_pending = 0; + //M interrupts + if (privilege_level != MACHINE_PRIVILEGE | mstatus.mie) + interrupt_pending |= |(mip & mie & ~mideleg); + //S interrupts + if (CONFIG.MODES == MSU & ((privilege_level == SUPERVISOR_PRIVILEGE & mstatus.sie) | privilege_level == USER_PRIVILEGE)) + interrupt_pending |= |(sip & sie); + end + //////////////////////////////////////////////////// //MEPC //Can be software written, written on exception with //exception causing PC. Lower two bits tied to zero. always_ff @(posedge clk) begin mepc[1:0] <= '0; - if (mwrite_en(MEPC) | exception.valid | interrupt_taken) - mepc[31:2] <= (exception.valid | interrupt_taken) ? exception.pc[31:2] : updated_csr[31:2]; + if (rst) + mepc[31:2] <= '0; + else if (mwrite_en(MEPC) | (exception_pkt.valid & ~exception_delegated) | (interrupt_taken & ~interrupt_delegated)) + mepc[31:2] <= (exception_pkt.valid | interrupt_taken) ? exception_pkt.pc[31:2] : updated_csr[31:2]; end - assign epc = mepc; - //////////////////////////////////////////////////// //MCAUSE - //As the exception and interrupts codes are sparsely populated, - //to ensure that only legal values are written, a ROM lookup - //is used to validate the CSR write operation - localparam logic [2**ECODE_W-1:0] M_EXCEPTION_MASKING_ROM = init_exception_masking_rom(); - localparam logic [2**ECODE_W-1:0] M_INTERRUPT_MASKING_ROM = init_interrupt_masking_rom(); - - logic mcause_write_valid; - always_comb begin - if (updated_csr[31]) //interrupt - mcause_write_valid = M_INTERRUPT_MASKING_ROM[updated_csr[ECODE_W-1:0]]; - else - mcause_write_valid = M_EXCEPTION_MASKING_ROM[updated_csr[ECODE_W-1:0]]; - end - + //Can be software written, written on exception or + //interrupt with specific code mip_t mip_cause; logic [5:0] mip_priority_vector; logic [2:0] mip_cause_sel; @@ -543,73 +618,119 @@ generate if (CONFIG.INCLUDE_M_MODE) begin : gen_csr_m_mode end always_ff @(posedge clk) begin - mcause.zeroes <= '0; + mcause.zeros <= '0; if (rst) begin mcause.is_interrupt <= 0; - mcause.code <= 0; + mcause.code <= '0; end - else if (CONFIG.CSRS.NON_STANDARD_OPTIONS.INCLUDE_MCAUSE & ((mcause_write_valid & mwrite_en(MCAUSE)) | exception.valid | interrupt_taken)) begin + else if ((mwrite_en(MCAUSE) | (exception_pkt.valid & ~exception_delegated) | (interrupt_taken & ~interrupt_delegated))) begin mcause.is_interrupt <= interrupt_taken | (mwrite_en(MCAUSE) & updated_csr[31]); - mcause.code <= interrupt_taken ? interrupt_cause_r : exception.valid ? exception.code : updated_csr[ECODE_W-1:0]; + mcause.code <= interrupt_taken ? interrupt_cause_r : exception_pkt.valid ? exception_pkt.code : updated_csr[ECODE_W-1:0]; end end //////////////////////////////////////////////////// //MTVAL always_ff @(posedge clk) begin - if (CONFIG.CSRS.NON_STANDARD_OPTIONS.INCLUDE_MTVAL & (mwrite_en(MTVAL) | exception.valid)) - mtval <= exception.valid ? exception.tval : updated_csr; + if (rst) + mtval <= '0; + else if (mwrite_en(MTVAL) | (exception_pkt.valid & ~exception_delegated)) + mtval <= exception_pkt.valid ? exception_pkt.tval : updated_csr; end //////////////////////////////////////////////////// //MSCRATCH always_ff @(posedge clk) begin - if (CONFIG.CSRS.NON_STANDARD_OPTIONS.INCLUDE_MSCRATCH & mwrite_en(MSCRATCH)) + if (rst) + mscratch <= '0; + else if (mwrite_en(MSCRATCH)) mscratch <= updated_csr; end -end -endgenerate - - //////////////////////////////////////////////////// - //END OF MACHINE REGS //////////////////////////////////////////////////// + //MCOUNTINHIBIT + localparam mcounter_t mcountinhibit_mask = '{default:0, cy:1, ir:1}; + always_ff @(posedge clk) begin + if (rst) + mcountinhibit <= '0; + else if (mwrite_en(MCOUNTINHIBIT) & CONFIG.MODES == MSU) + mcountinhibit <= updated_csr & mcountinhibit_mask; + end + //////////////////////////////////////////////////// + //MCOUNTEREN + localparam mcounter_t mcounteren_mask = '{default:0, cy:1, tm:1, ir:1}; + always_ff @(posedge clk) begin + if (rst) + mcounteren <= '0; + else if (mwrite_en(MCOUNTEREN) & CONFIG.MODES inside {MU, MSU}) + mcounteren <= updated_csr & mcounteren_mask; + end + //////////////////////////////////////////////////// + //MENVCFG + localparam envcfg_t menvcfg_mask = '{default:0, fiom: 1, cbie:{2{CONFIG.INCLUDE_CBO}}, cbcfe:CONFIG.INCLUDE_CBO}; + always_ff @(posedge clk) begin + if (rst) + menvcfg <= '0; + else if (mwrite_en(MENVCFG) & CONFIG.MODES inside {MU, MSU}) + menvcfg <= updated_csr & menvcfg_mask; + end + //////////////////////////////////////////////////// + //MENVCFGH + localparam envcfgh_t menvcfgh_mask = '{default:0, stce:CONFIG.MODES == MSU & CONFIG.CSRS.INCLUDE_SSTC}; + always_ff @(posedge clk) begin + if (rst) + menvcfgh <= '0; + else if (mwrite_en(MENVCFGH) & CONFIG.MODES == MSU & CONFIG.CSRS.INCLUDE_SSTC) + menvcfgh <= updated_csr & menvcfg_mask; + end + //////////////////////////////////////////////////// + //MSTATEEN0H + localparam mstateen0h_t mstateen0h_mask = '{default:0, se0:CONFIG.MODES == MSU, envcfg:CONFIG.MODES != M}; + always_ff @(posedge clk) begin + if (rst) + mstateen0h <= '0; + else if (mwrite_en(MSTATEEN0H) & CONFIG.MODES != M) + mstateen0h <= updated_csr & mstateen0h_mask; + end +end +endgenerate - - - - + //////////////////////////////////////////////////// + //END OF MACHINE REGS + //////////////////////////////////////////////////// //////////////////////////////////////////////////// //BEGIN OF SUPERVISOR REGS //////////////////////////////////////////////////// - logic[31:0] sepc; - - logic[31:0] stime; - logic[31:0] stimecmp; - - logic[31:0] scause; + cause_t scause; logic[31:0] stval; - logic[31:0] sstatus; logic[31:0] stvec; - satp_t satp; - logic[31:0] sscratch; + logic[31:0] scounteren; + logic[31:0] stimecmp; + logic[31:0] stimecmph; + mip_t sip; + logic[31:0] sie; + localparam logic[31:0] sstateen0 = 0; //The defined behaviour is not used + localparam logic[31:0] sstateen1 = 0; + localparam logic[31:0] sstateen2 = 0; + localparam logic[31:0] sstateen3 = 0; //TLB status --- used to mux physical/virtual address - assign tlb_on = CONFIG.INCLUDE_S_MODE & satp.mode; + assign instruction_translation_on = CONFIG.MODES == MSU & satp.mode & privilege_level != MACHINE_PRIVILEGE; + assign data_translation_on = CONFIG.MODES == MSU & satp.mode & (privilege_level != MACHINE_PRIVILEGE | (mstatus.mprv & mstatus.mpp != MACHINE_PRIVILEGE)); assign asid = satp.asid; //****************** -generate if (CONFIG.INCLUDE_S_MODE) begin : gen_csr_s_mode +generate if (CONFIG.MODES == MSU) begin : gen_csr_s_mode //////////////////////////////////////////////////// //MMU interface assign immu.mxr = mstatus.mxr; @@ -617,39 +738,120 @@ generate if (CONFIG.INCLUDE_S_MODE) begin : gen_csr_s_mode assign immu.sum = mstatus.sum; assign dmmu.sum = mstatus.sum; assign immu.privilege = privilege_level; - assign dmmu.privilege = mstatus.mprv ? mstatus.mpp : privilege_level; + assign dmmu.privilege = mstatus.mprv ? privilege_t'(mstatus.mpp) : privilege_level; assign immu.satp_ppn = satp.ppn; assign dmmu.satp_ppn = satp.ppn; //////////////////////////////////////////////////// + //////////////////////////////////////////////////// + //SEPC + always_ff @(posedge clk) begin + sepc[1:0] <= '0; + if (rst) + sepc[31:2] <= '0; + else if (swrite_en(SEPC) | (exception_pkt.valid & exception_delegated) | (interrupt_taken & interrupt_delegated)) + sepc[31:2] <= (exception_pkt.valid | interrupt_taken) ? exception_pkt.pc[31:2] : updated_csr[31:2]; + end + + //////////////////////////////////////////////////// + //SCAUSE + always_ff @(posedge clk) begin + scause.zeros <= '0; + if (rst) begin + scause.is_interrupt <= 0; + scause.code <= '0; + end + else if ((swrite_en(SCAUSE) | (exception_pkt.valid & exception_delegated) | (interrupt_taken & interrupt_delegated))) begin + scause.is_interrupt <= interrupt_taken | (swrite_en(SCAUSE) & updated_csr[31]); + scause.code <= interrupt_taken ? interrupt_cause_r : exception_pkt.valid ? exception_pkt.code : updated_csr[ECODE_W-1:0]; + end + end + //////////////////////////////////////////////////// //STVEC - logic [31:0] stvec_mask = '1; always_ff @(posedge clk) begin + stvec[1:0] <= '0; if (rst) - stvec <= {CONFIG.CSRS.RESET_VEC[31:2], 2'b00}; + stvec[31:2] <= CONFIG.CSRS.RESET_TVEC[31:2]; else if (swrite_en(STVEC)) - stvec <= (updated_csr & stvec_mask); + stvec[31:2] <= updated_csr[31:2]; + end + + //////////////////////////////////////////////////// + //STVAL + always_ff @(posedge clk) begin + if (rst) + stval <= '0; + else if (swrite_en(STVAL) | (exception_pkt.valid & exception_delegated)) + stval <= exception_pkt.valid ? exception_pkt.tval : updated_csr; end //////////////////////////////////////////////////// //SATP - logic[31:0] satp_mask; - assign satp_mask = '1; always_ff @(posedge clk) begin if (rst) satp <= 0; else if (swrite_en(SATP)) - satp <= (updated_csr & satp_mask); + satp <= updated_csr; + end + + //////////////////////////////////////////////////// + //SCOUNTEREN + always_ff @(posedge clk) begin + if (rst) + scounteren <= 0; + else if (swrite_en(SCOUNTEREN)) + scounteren <= updated_csr; end //////////////////////////////////////////////////// //SSCRATCH always_ff @(posedge clk) begin - if (swrite_en(SSCRATCH)) + if (rst) + sscratch <= '0; + else if (swrite_en(SSCRATCH)) sscratch <= updated_csr; end + //////////////////////////////////////////////////// + //SENVCFG + localparam envcfg_t senvcfg_mask = '{default:0, fiom: 1, cbie:{2{CONFIG.INCLUDE_CBO}}, cbcfe:CONFIG.INCLUDE_CBO}; + always_ff @(posedge clk) begin + if (rst) + senvcfg <= '0; + else if (swrite_en(SENVCFG) & CONFIG.MODES == MSU) + senvcfg <= updated_csr & senvcfg_mask; + end + + //////////////////////////////////////////////////// + //STIMECMP + always_ff @(posedge clk) begin + if (rst) begin + stimecmp <= '0; + stimecmph <= '0; + end + else begin + if (swrite_en(STIMECMP) & CONFIG.MODES == MSU & CONFIG.CSRS.INCLUDE_SSTC) + stimecmp <= updated_csr; + if (swrite_en(STIMECMPH) & CONFIG.MODES == MSU & CONFIG.CSRS.INCLUDE_SSTC) + stimecmph <= updated_csr; + end + end + + assign stip_stimecmp = mtime >= {stimecmph, stimecmp}; + + //////////////////////////////////////////////////// + //SIP + assign sip = mip & mideleg; + + //////////////////////////////////////////////////// + //SIE + assign sie = mie & sie_mask; + + //////////////////////////////////////////////////// + //SSTATUS + assign sstatus = mstatus & sstatus_mask; + end endgenerate @@ -661,23 +863,20 @@ endgenerate //////////////////////////////////////////////////// //Timers and Counters //Register increment for instructions completed - //Increments suppressed on writes to these registers - localparam COUNTER_W = CONFIG.CSRS.NON_STANDARD_OPTIONS.COUNTER_W; - localparam MCYCLE_WRITEABLE = CONFIG.CSRS.NON_STANDARD_OPTIONS.MCYCLE_WRITEABLE; - localparam MINSTR_WRITEABLE = CONFIG.CSRS.NON_STANDARD_OPTIONS.MINSTR_WRITEABLE; + //Can be inhbited by mcountinhibit + localparam COUNTER_W = 64; logic[COUNTER_W-1:0] mcycle; - logic[COUNTER_W-1:0] mtime; - logic[COUNTER_W-1:0] minst_ret; + logic[COUNTER_W-1:0] minstret; logic[COUNTER_W-1:0] mcycle_input_next; - logic[COUNTER_W-1:0] minst_ret_input_next; - logic[LOG2_RETIRE_PORTS:0] minst_ret_inc; logic mcycle_inc; + logic pending_inst; + logic increment_minstret; - assign mcycle_input_next[31:0] = (MCYCLE_WRITEABLE & mwrite_en(MCYCLE)) ? updated_csr : mcycle[31:0]; - assign mcycle_input_next[COUNTER_W-1:32] = (MCYCLE_WRITEABLE & mwrite_en(MCYCLE)) ? updated_csr[COUNTER_W-33:0] : mcycle[COUNTER_W-1:32]; - assign mcycle_inc = ~(MCYCLE_WRITEABLE & (mwrite_en(MCYCLE) | mwrite_en(MCYCLEH))); + assign mcycle_input_next[31:0] = mwrite_en(MCYCLE) ? updated_csr : mcycle[31:0]; + assign mcycle_input_next[COUNTER_W-1:32] = mwrite_en(MCYCLEH) ? updated_csr[COUNTER_W-33:0] : mcycle[COUNTER_W-1:32]; + assign mcycle_inc = (CONFIG.MODES != BARE | CONFIG.CSRS.INCLUDE_ZICNTR) & ~((mwrite_en(MCYCLE) | mwrite_en(MCYCLEH))) & ~mcountinhibit.cy; always_ff @(posedge clk) begin if (rst) @@ -686,15 +885,25 @@ endgenerate mcycle <= mcycle_input_next + COUNTER_W'(mcycle_inc); end - assign minst_ret_input_next[31:0] = (MINSTR_WRITEABLE & mwrite_en(MINSTRET)) ? updated_csr : minst_ret[31:0]; - assign minst_ret_input_next[COUNTER_W-1:32] = (MINSTR_WRITEABLE & mwrite_en(MINSTRET)) ? updated_csr[COUNTER_W-33:0] : minst_ret[COUNTER_W-1:32]; - assign minst_ret_inc = (MINSTR_WRITEABLE & (mwrite_en(MINSTRET) | mwrite_en(MINSTRETH))) ? '0 : retire_count; - + + //Branch and pre issue exceptions retire the pending + assign increment_minstret = pending_inst & (exception_pkt.valid ? exception_pkt.source[BR_EXCEPTION] | exception_pkt.source[PRE_ISSUE_EXCEPTION] : ~exception_pkt.possible); always_ff @(posedge clk) begin if (rst) - minst_ret <= 0; - else - minst_ret <= minst_ret_input_next + COUNTER_W'(minst_ret_inc); + pending_inst <= 0; + else begin + if (instruction_issued & ~mcountinhibit.ir) + pending_inst <= 1; + else if (mwrite_en(MINSTRET) | mwrite_en(MINSTRETH) | (~exception_pkt.possible | ~exception_pkt.valid)) + pending_inst <= 0; + end + end + + always_ff @(posedge clk) begin + if (rst) + minstret <= 0; + else if ((CONFIG.MODES != BARE | CONFIG.CSRS.INCLUDE_ZICNTR) & increment_minstret) + minstret <= minstret + 1; end //////////////////////////////////////////////////// @@ -707,23 +916,6 @@ endgenerate assign dyn_rm = frm; generate if (CONFIG.INCLUDE_UNIT.FPU) begin : gen_csr_fp - typedef enum logic[1:0] { - WRITE_NONE = 2'b00, - WRITE_FFLAGS = 2'b01, - WRITE_FRM = 2'b10, - WRITE_BOTH = 2'b11 - } fcsr_write_t; - fcsr_write_t fcsr_write_type; - - always_comb begin - case (csr_inputs_r.addr) inside - FFLAGS : fcsr_write_type = WRITE_FFLAGS; - FRM : fcsr_write_type = WRITE_FRM; - FCSR : fcsr_write_type = WRITE_BOTH; - default : fcsr_write_type = WRITE_NONE; - endcase - end - //Older versions of the spec mandated an illegal instruction exception if an instruction //with the dynamic rounding mode was issued and the frm register contained an invalid //rounding mode. This has since been changed to "reserved" behaviour, meaning we do not @@ -735,108 +927,205 @@ generate if (CONFIG.INCLUDE_UNIT.FPU) begin : gen_csr_fp fflags <= '0; end else begin - //Explicit writes - if (commit) begin - case (fcsr_write_type) - WRITE_FFLAGS : fflags <= next_csr[4:0]; - WRITE_FRM : frm <= next_csr[2:0]; - WRITE_BOTH : {frm, fflags} <= next_csr[7:0]; - default; - endcase - end - else //Implicit writes (can never overlap explicit writes) - fflags <= fflags | fflag_wmask; + //Explicit writes commit earlier than regular CSR writes because they are required by FP instructions + case ({commit, csr_inputs_r.addr}) + {1'b1, FFLAGS} : fflags <= next_csr[4:0]; + {1'b1, FRM} : frm <= next_csr[2:0]; + {1'b1, FCSR} : {frm, fflags} <= next_csr[7:0]; + default : fflags <= fflags | fflag_wmask; //Implicit writes (can never overlap explicit writes) + endcase end end - end endgenerate + //////////////////////////////////////////////////// - //CSR mux - logic [31:0] read_mask; + //GC Connections + logic will_flush; + always_ff @(posedge clk) begin + if (issue.new_request) + will_flush <= CONFIG.MODES == MSU & csr_inputs.writes & csr_inputs.addr inside {SATP, MSTATUS, SSTATUS}; + csr_frontend_flush <= commit & will_flush; + end + + assign exception_target_pc = exception_delegated | interrupt_delegated ? stvec : mtvec; + + + //////////////////////////////////////////////////// + //Exceptions + //Illegal instruction on wrong addresses, privilege + //issues, and writing read only registers +generate if (CONFIG.MODES != BARE) begin : gen_csr_exceptions + logic legal_access; + always_comb begin - case (csr_inputs_r.addr) inside - SSTATUS : read_mask = CONFIG.INCLUDE_S_MODE ? sstatus_mask : '1; - SIE : read_mask = CONFIG.INCLUDE_S_MODE ? sie_mask : '1; - SIP : read_mask = CONFIG.INCLUDE_S_MODE ? sip_mask : '1; - default : read_mask = '1; + case (csr_inputs.addr) inside + FFLAGS, FRM, FCSR : legal_access = CONFIG.INCLUDE_UNIT.FPU; //FPU always accessible if present + MVENDORID, MARCHID, MIMPID, MHARTID, MCONFIGPTR : legal_access = privilege_level == MACHINE_PRIVILEGE & ~csr_inputs.writes; //Read only + MSTATUS, MISA, MIE, MTVEC, MSTATUSH, MSCRATCH, MEPC, MCAUSE, MTVAL, MIP, MCYCLE, MINSTRET, [MHPMCOUNTER3H:MHPMCOUNTER31], MCYCLEH, MINSTRETH, [MHPMCOUNTER3H:MHPMCOUNTER31H], MCOUNTINHIBIT, [MHPMEVENT3:MHPMEVENT31], [MHPMEVENT3H:MHPMEVENT31H] : legal_access = privilege_level == MACHINE_PRIVILEGE; //Read write + MEDELEG, MIDELEG, MEDELEGH : legal_access = CONFIG.MODES == MSU & privilege_level == MACHINE_PRIVILEGE; //Read write, needs supervisor + [MSTATEEN0:MSTATEEN3], [MSTATEEN0H:MSTATEEN3H] : legal_access = CONFIG.CSRS.INCLUDE_SMSTATEEN & privilege_level == MACHINE_PRIVILEGE; //Read write, needs extension + MCOUNTEREN, MENVCFG, MENVCFGH : legal_access = CONFIG.MODES inside {MU, MSU} & privilege_level == MACHINE_PRIVILEGE; //Read write, needs user + SSTATUS, SIE, STVEC, SCOUNTEREN, SSCRATCH, SEPC, SCAUSE, STVAL, SIP, SENVCFG : legal_access = CONFIG.MODES == MSU & privilege_level inside {MACHINE_PRIVILEGE, SUPERVISOR_PRIVILEGE}; //Read write + SATP : legal_access = CONFIG.MODES == MSU & ((privilege_level == MACHINE_PRIVILEGE) | (privilege_level == SUPERVISOR_PRIVILEGE & ~mstatus.tvm)); //Read write, not TVM + SENVCFG : legal_access = CONFIG.MODES == MSU & ((privilege_level == MACHINE_PRIVILEGE) | (privilege_level == SUPERVISOR_PRIVILEGE & (~CONFIG.CSRS.INCLUDE_SMSTATEEN | mstateen0h.envcfg))); //Read write, depends on mstateen0h + SSTATEEN0 : legal_access = CONFIG.MODES == MSU & CONFIG.CSRS.INCLUDE_SMSTATEEN & ((privilege_level == MACHINE_PRIVILEGE) | (privilege_level == SUPERVISOR_PRIVILEGE & mstateen0h.se0)); //Read write, needs extension and mstateen0h + [SSTATEEN1:SSTATEEN3] : legal_access = CONFIG.MODES == MSU & CONFIG.CSRS.INCLUDE_SMSTATEEN & privilege_level inside {MACHINE_PRIVILEGE, SUPERVISOR_PRIVILEGE}; //Read write, needs extension + CYCLE, TIME, INSTRET, CYCLEH, TIMEH, INSTRETH : begin //Read only, depends on m/scounteren and extension + legal_access = CONFIG.CSRS.INCLUDE_ZICNTR & ~csr_inputs.writes; + if (privilege_level != MACHINE_PRIVILEGE) + legal_access &= mcounteren[csr_inputs.addr[4:0]]; + if (CONFIG.MODES == MSU & privilege_level == USER_PRIVILEGE) + legal_access &= scounteren[csr_inputs.addr[4:0]]; + end + [HPMCOUNTER3:HPMCOUNTER31], [HPMCOUNTER3H:HPMCOUNTER31H] : begin //Read only, depends on m/scounteren and extension + legal_access = CONFIG.CSRS.INCLUDE_ZIHPM & ~csr_inputs.writes; + if (privilege_level != MACHINE_PRIVILEGE) + legal_access &= mcounteren[csr_inputs.addr[4:0]]; + if (CONFIG.MODES == MSU & privilege_level == USER_PRIVILEGE) + legal_access &= scounteren[csr_inputs.addr[4:0]]; + end + STIMECMP, STIMECMPH : legal_access = CONFIG.MODES == MSU & CONFIG.CSRS.INCLUDE_SSTC & ((privilege_level == MACHINE_PRIVILEGE) | (privilege_level == SUPERVISOR_PRIVILEGE & mcounteren.tm & menvcfgh.stce)); //Read write, depends on TM + STCE + default: legal_access = 0; endcase end + + + always_ff @(posedge clk) begin + if (rst) + illegal_instruction <= 0; + else if (issue.new_request) + illegal_instruction <= ~legal_access; + end + + always_ff @(posedge clk) begin + if (rst) + exception.valid <= 0; + else + exception.valid <= commit & illegal_instruction; + end + + assign exception.code = ILLEGAL_INST; + assign exception.pc = issue_stage.pc_r; + assign exception.tval = issue_stage.instruction_r; + assign exception.discard = |issue_stage.instruction_r[11:7]; //Only discard if rd != x0 + +end +endgenerate + + //Interrupts need to be immediately evaluated folowing MRET/SRET or writing to a CSR that + //controls interrupts. MRET/SRET flush the fetch pipeline so nothing needs to be done, + //but we must stall for 1 cycle after writing certain CSRs to ensure pending_interrupt + //can be raised and detected before another instruction is issued + logic stall_for_interrupt; + always_ff @(posedge clk) begin + stall_for_interrupt <= wb.done & wb.ack & csr_inputs_r.writes & (mwrite_en(MIP) | mwrite_en(MIE) | mwrite_en(MSTATUS) | mwrite_en(MIDELEG) | swrite_en(SIP) | swrite_en(SIE) | swrite_en(SSTATUS)); + end + + assign exception.possible = busy | exception.valid | stall_for_interrupt; //Block future instructions + + //////////////////////////////////////////////////// + //CSR mux always_comb begin case (csr_inputs_r.addr) inside + //Floating point + FFLAGS : selected_csr = CONFIG.INCLUDE_UNIT.FPU ? {27'b0, fflags} : '0; + FRM : selected_csr = CONFIG.INCLUDE_UNIT.FPU ? {29'b0, frm} : '0; + FCSR : selected_csr = CONFIG.INCLUDE_UNIT.FPU ? {24'b0, frm, fflags} : '0; + //Machine info - MISA : selected_csr = CONFIG.INCLUDE_M_MODE ? misa : '0; - MVENDORID : selected_csr = CONFIG.INCLUDE_M_MODE ? mvendorid : '0; - MARCHID : selected_csr = CONFIG.INCLUDE_M_MODE ? marchid : '0; - MIMPID : selected_csr = CONFIG.INCLUDE_M_MODE ? mimpid : '0; - MHARTID : selected_csr = CONFIG.INCLUDE_M_MODE ? mhartid : '0; + MVENDORID : selected_csr = CONFIG.MODES != BARE ? mvendorid : '0; + MARCHID : selected_csr = CONFIG.MODES != BARE ? marchid : '0; + MIMPID : selected_csr = CONFIG.MODES != BARE ? mimpid : '0; + MHARTID : selected_csr = CONFIG.MODES != BARE ? mhartid : '0; + MCONFIGPTR : selected_csr = CONFIG.MODES != BARE ? mconfigptr : '0; //Machine trap setup - MSTATUS : selected_csr = CONFIG.INCLUDE_M_MODE ? mstatus : '0; - MEDELEG : selected_csr = CONFIG.INCLUDE_M_MODE ? medeleg : '0; - MIDELEG : selected_csr = CONFIG.INCLUDE_M_MODE ? mideleg : '0; - MIE : selected_csr = CONFIG.INCLUDE_M_MODE ? mie : '0; - MTVEC : selected_csr = CONFIG.INCLUDE_M_MODE ? mtvec : '0; - MCOUNTEREN : selected_csr = '0; + MSTATUS : selected_csr = CONFIG.MODES != BARE ? mstatus : '0; + MISA : selected_csr = CONFIG.MODES != BARE ? misa : '0; + MEDELEG : selected_csr = CONFIG.MODES == MSU ? medeleg : '0; + MIDELEG : selected_csr = CONFIG.MODES == MSU ? mideleg : '0; + MIE : selected_csr = CONFIG.MODES != BARE ? mie : '0; + MTVEC : selected_csr = CONFIG.MODES != BARE ? mtvec : '0; + MCOUNTEREN : selected_csr = CONFIG.MODES inside {MU, MSU} ? mcounteren : '0; + MSTATUSH : selected_csr = CONFIG.MODES != BARE ? mstatush : '0; + MEDELEGH : selected_csr = CONFIG.MODES == MSU ? medelegh : '0; //Machine trap handling - MSCRATCH : selected_csr = CONFIG.INCLUDE_M_MODE ? mscratch : '0; - MEPC : selected_csr = CONFIG.INCLUDE_M_MODE ? mepc : '0; - MCAUSE : selected_csr = CONFIG.INCLUDE_M_MODE ? mcause : '0; - MTVAL : selected_csr = CONFIG.INCLUDE_M_MODE ? mtval : '0; - MIP : selected_csr = CONFIG.INCLUDE_M_MODE ? mip : '0; - //Machine Memory Protection - [12'h3EF : 12'h3A0] : selected_csr = '0; + MSCRATCH : selected_csr = CONFIG.MODES != BARE ? mscratch : '0; + MEPC : selected_csr = CONFIG.MODES != BARE ? mepc : '0; + MCAUSE : selected_csr = CONFIG.MODES != BARE ? mcause : '0; + MTVAL : selected_csr = CONFIG.MODES != BARE ? mtval : '0; + MIP : selected_csr = CONFIG.MODES != BARE ? mip : '0; + //Machine configuration + MENVCFG : selected_csr = CONFIG.MODES inside {MU, MSU} ? menvcfg : '0; + MENVCFGH : selected_csr = CONFIG.MODES inside {MU, MSU} ? menvcfgh : '0; + //No PMP + //MHPM COUNTER //Machine Timers and Counters - MCYCLE : selected_csr = CONFIG.INCLUDE_M_MODE ? mcycle[31:0] : '0; - MINSTRET : selected_csr = CONFIG.INCLUDE_M_MODE ? minst_ret[31:0] : '0; - [12'hB03 : 12'hB1F] : selected_csr = '0; - MCYCLEH : selected_csr = CONFIG.INCLUDE_M_MODE ? 32'(mcycle[COUNTER_W-1:32]) : '0; - MINSTRETH : selected_csr = CONFIG.INCLUDE_M_MODE ? 32'(minst_ret[COUNTER_W-1:32]) : '0; - [12'hB83 : 12'hB9F] : selected_csr = '0; + MCYCLE : selected_csr = CONFIG.MODES != BARE ? mcycle[31:0] : '0; + MINSTRET : selected_csr = CONFIG.MODES != BARE ? minstret[31:0] : '0; + [MHPMCOUNTER3 : MHPMCOUNTER31] : selected_csr = '0; + MCYCLEH : selected_csr = CONFIG.MODES != BARE ? 32'(mcycle[COUNTER_W-1:32]) : '0; + MINSTRETH : selected_csr = CONFIG.MODES != BARE ? 32'(minstret[COUNTER_W-1:32]) : '0; + [MHPMCOUNTER3H : MHPMCOUNTER31H] : selected_csr = '0; //Machine Counter Setup - [12'h320 : 12'h33F] : selected_csr = '0; - + MCOUNTINHIBIT : selected_csr = CONFIG.MODES != BARE ? mcountinhibit : '0; + [MHPMEVENT3 : MHPMEVENT31] : selected_csr = '0; + [MHPMEVENT3H : MHPMEVENT31H] : selected_csr = '0; + //Machine state enable + MSTATEEN0 : selected_csr = CONFIG.MODES != BARE & CONFIG.CSRS.INCLUDE_SMSTATEEN ? mstateen0 : '0; + MSTATEEN1 : selected_csr = CONFIG.MODES != BARE & CONFIG.CSRS.INCLUDE_SMSTATEEN ? mstateen1 : '0; + MSTATEEN2 : selected_csr = CONFIG.MODES != BARE & CONFIG.CSRS.INCLUDE_SMSTATEEN ? mstateen2 : '0; + MSTATEEN3 : selected_csr = CONFIG.MODES != BARE & CONFIG.CSRS.INCLUDE_SMSTATEEN ? mstateen3 : '0; + MSTATEEN0H : selected_csr = CONFIG.MODES != BARE & CONFIG.CSRS.INCLUDE_SMSTATEEN ? mstateen0h : '0; + MSTATEEN1H : selected_csr = CONFIG.MODES != BARE & CONFIG.CSRS.INCLUDE_SMSTATEEN ? mstateen1h : '0; + MSTATEEN2H : selected_csr = CONFIG.MODES != BARE & CONFIG.CSRS.INCLUDE_SMSTATEEN ? mstateen2h : '0; + MSTATEEN3H : selected_csr = CONFIG.MODES != BARE & CONFIG.CSRS.INCLUDE_SMSTATEEN ? mstateen3h : '0; + + //Supervisor regs //Supervisor Trap Setup - SSTATUS : selected_csr = CONFIG.INCLUDE_S_MODE ? mstatus : '0; - SEDELEG : selected_csr = '0; //No user-level interrupts/exception handling - SIDELEG : selected_csr = '0; - SIE : selected_csr = CONFIG.INCLUDE_S_MODE ? mie : '0; - STVEC : selected_csr = CONFIG.INCLUDE_S_MODE ? stvec : '0; + SSTATUS : selected_csr = CONFIG.MODES == MSU ? sstatus : '0; + SIE : selected_csr = CONFIG.MODES == MSU ? sie : '0; + STVEC : selected_csr = CONFIG.MODES == MSU ? stvec : '0; SCOUNTEREN : selected_csr = '0; + //Supervisor configuration + SENVCFG : selected_csr = CONFIG.MODES == MSU ? senvcfg : '0; //Supervisor trap handling - SSCRATCH : selected_csr = CONFIG.INCLUDE_S_MODE ? sscratch : '0; - SEPC : selected_csr = CONFIG.INCLUDE_S_MODE ? sscratch : '0; - SCAUSE : selected_csr = CONFIG.INCLUDE_S_MODE ? sscratch : '0; - STVAL : selected_csr = CONFIG.INCLUDE_S_MODE ? sscratch : '0; - SIP : selected_csr = CONFIG.INCLUDE_S_MODE ? mip : '0; - //Supervisor Protection and Translation - SATP : selected_csr = CONFIG.INCLUDE_S_MODE ? satp : '0; - - //User status - //Floating point - FFLAGS : selected_csr = CONFIG.INCLUDE_UNIT.FPU ? {27'b0, fflags} : '0; - FRM : selected_csr = CONFIG.INCLUDE_UNIT.FPU ? {29'b0, frm} : '0; - FCSR : selected_csr = CONFIG.INCLUDE_UNIT.FPU ? {24'b0, frm, fflags} : '0; - //User Counter Timers - CYCLE : selected_csr = mcycle[31:0]; - TIME : selected_csr = mcycle[31:0]; - INSTRET : selected_csr = minst_ret[31:0]; - [12'hC03 : 12'hC1F] : selected_csr = '0; - CYCLEH : selected_csr = 32'(mcycle[COUNTER_W-1:32]); - TIMEH : selected_csr = 32'(mcycle[COUNTER_W-1:32]); - INSTRETH : selected_csr = 32'(minst_ret[COUNTER_W-1:32]); - [12'hC83 : 12'hC9F] : selected_csr = '0; + SSCRATCH : selected_csr = CONFIG.MODES == MSU ? sscratch : '0; + SEPC : selected_csr = CONFIG.MODES == MSU ? sepc : '0; + SCAUSE : selected_csr = CONFIG.MODES == MSU ? scause : '0; + STVAL : selected_csr = CONFIG.MODES == MSU ? stval : '0; + SIP : selected_csr = CONFIG.MODES == MSU ? sip : '0; + STIMECMP : selected_csr = CONFIG.MODES == MSU & CONFIG.CSRS.INCLUDE_SSTC ? stimecmp : '0; + STIMECMPH : selected_csr = CONFIG.MODES == MSU & CONFIG.CSRS.INCLUDE_SSTC ? stimecmph : '0; + //Supervisor address translation and protection + SATP : selected_csr = CONFIG.MODES == MSU ? satp : '0; + //Supervisor state enable + SSTATEEN0 : selected_csr = CONFIG.MODES == MSU & CONFIG.CSRS.INCLUDE_SMSTATEEN ? sstateen0 : '0; + SSTATEEN1 : selected_csr = CONFIG.MODES == MSU & CONFIG.CSRS.INCLUDE_SMSTATEEN ? sstateen1 : '0; + SSTATEEN2 : selected_csr = CONFIG.MODES == MSU & CONFIG.CSRS.INCLUDE_SMSTATEEN ? sstateen2 : '0; + SSTATEEN3 : selected_csr = CONFIG.MODES == MSU & CONFIG.CSRS.INCLUDE_SMSTATEEN ? sstateen3 : '0; + + //Timers and counters + CYCLE : selected_csr = CONFIG.CSRS.INCLUDE_ZICNTR ? mcycle[31:0] : '0; + TIME : selected_csr = CONFIG.CSRS.INCLUDE_ZICNTR ? mtime[31:0] : '0; + INSTRET : selected_csr = CONFIG.CSRS.INCLUDE_ZICNTR ? minstret[31:0] : '0; + [HPMCOUNTER3 : HPMCOUNTER31] : selected_csr = '0; + CYCLEH : selected_csr = CONFIG.CSRS.INCLUDE_ZICNTR ? 32'(mcycle[COUNTER_W-1:32]) : '0; + TIMEH : selected_csr = CONFIG.CSRS.INCLUDE_ZICNTR ? mtime[63:32] : '0; + INSTRETH : selected_csr = CONFIG.CSRS.INCLUDE_ZICNTR ? 32'(minstret[COUNTER_W-1:32]) : '0; + [HPMCOUNTER3H : HPMCOUNTER31H] : selected_csr = '0; default : selected_csr = '0; endcase end always_ff @(posedge clk) begin if (commit) - selected_csr_r <= selected_csr & read_mask; + selected_csr_r <= selected_csr; end //////////////////////////////////////////////////// //Assertions mstatus_update_assertion: - assert property (@(posedge clk) disable iff (rst) $onehot0({mret,sret,interrupt_taken, exception.valid,(mwrite_en(MSTATUS) | swrite_en(SSTATUS))})) else $error("multiple write to mstatus"); + assert property (@(posedge clk) disable iff (rst) $onehot0({mret,sret,interrupt_taken, exception_pkt.valid,(mwrite_en(MSTATUS) | swrite_en(SSTATUS))})) else $error("multiple write to mstatus"); endmodule diff --git a/core/execution_units/div_unit.sv b/core/execution_units/div_unit.sv old mode 100755 new mode 100644 index e10bd817..f2dfdcbd --- a/core/execution_units/div_unit.sv +++ b/core/execution_units/div_unit.sv @@ -129,7 +129,7 @@ module div_unit set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) prev_div_result_valid_m ( .clk, .rst, .set(issue.new_request & ~((issue_stage.rd_addr == issue_rs_addr[RS1]) | (issue_stage.rd_addr == issue_rs_addr[RS2]))), - .clr((instruction_issued_with_rd & div_rs_overwrite) | gc.writeback_supress), //No instructions will be issued while gc.writeback_supress is asserted + .clr((instruction_issued_with_rd & div_rs_overwrite) | gc.init_clear), //No instructions will be issued while gc.init_clear is asserted .result(prev_div_result_valid) ); diff --git a/core/execution_units/gc_unit.sv b/core/execution_units/gc_unit.sv index 359eaf6a..ac6865f8 100644 --- a/core/execution_units/gc_unit.sv +++ b/core/execution_units/gc_unit.sv @@ -44,6 +44,7 @@ module gc_unit input issue_packet_t issue_stage, input logic issue_stage_ready, + input logic instruction_issued, input logic [31:0] constant_alu, input logic [31:0] rf [REGFILE_READ_PORTS], @@ -52,39 +53,38 @@ module gc_unit //Branch miss predict input logic branch_flush, - //exception_interface.unit pre_issue_exception, - //Exception + exception_interface.unit local_gc_exception, exception_interface.econtrol exception [NUM_EXCEPTION_SOURCES], input logic [31:0] exception_target_pc, - input logic [31:0] oldest_pc, output logic mret, output logic sret, - input logic [31:0] epc, - - //Retire - input id_t retire_ids_next [RETIRE_PORTS], - input logic [$clog2(NUM_EXCEPTION_SOURCES)-1:0] current_exception_unit, + input logic [31:0] mepc, + input logic [31:0] sepc, //CSR Interrupts input logic interrupt_pending, output logic interrupt_taken, - input logic processing_csr, + //CSR signals + input logic csr_frontend_flush, + input logic [1:0] current_privilege, + input logic tvm, + input logic tsr, //Output controls output gc_outputs_t gc, + output tlb_packet_t sfence, //Ordering support - input load_store_status_t load_store_status, - input logic [LOG2_MAX_IDS:0] post_issue_count + input load_store_status_t load_store_status ); //Largest depth for TLBs localparam int TLB_CLEAR_DEPTH = (CONFIG.DTLB.DEPTH > CONFIG.ITLB.DEPTH) ? CONFIG.DTLB.DEPTH : CONFIG.ITLB.DEPTH; //For general reset clear, greater of TLB depth or id-flight memory blocks (MAX_IDS) - localparam int INIT_CLEAR_DEPTH = CONFIG.INCLUDE_S_MODE ? (TLB_CLEAR_DEPTH > 64 ? TLB_CLEAR_DEPTH : 64) : 64; + localparam int INIT_CLEAR_DEPTH = CONFIG.MODES == MSU ? (TLB_CLEAR_DEPTH > 64 ? TLB_CLEAR_DEPTH : 64) : 64; //////////////////////////////////////////////////// //Overview @@ -119,120 +119,157 @@ module gc_unit //LS exceptions (miss-aligned, TLB and MMU) (issue stage) //fetch flush, take exception. If execute or later exception occurs first, exception is overridden common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode - - typedef enum {RST_STATE, PRE_CLEAR_STATE, INIT_CLEAR_STATE, IDLE_STATE, TLB_CLEAR_STATE, POST_ISSUE_DRAIN, PRE_ISSUE_FLUSH, POST_ISSUE_DISCARD} gc_state; + typedef enum {RST_STATE, PRE_CLEAR_STATE, INIT_CLEAR_STATE, IDLE_STATE, TLB_CLEAR_STATE, WAIT_INTERRUPT, PRE_ISSUE_FLUSH, WAIT_WRITE} gc_state; gc_state state; gc_state next_state; logic init_clear_done; logic tlb_clear_done; - logic post_issue_idle; - logic ifence_in_progress; - logic ret_in_progress; - //GC registered global outputs logic gc_init_clear; logic gc_fetch_hold; logic gc_issue_hold; + logic gc_rename_revert; logic gc_fetch_flush; - logic gc_writeback_supress; - logic gc_retire_hold; + logic gc_fetch_ifence; logic gc_tlb_flush; - logic gc_sq_flush; logic gc_pc_override; logic [31:0] gc_pc; - typedef struct packed{ - logic [31:0] pc_p4; - logic is_ifence; - logic is_mret; - logic is_sret; - } gc_inputs_t; + logic possible_exception; - gc_inputs_t gc_inputs; - gc_inputs_t gc_inputs_r; //////////////////////////////////////////////////// //Implementation //////////////////////////////////////////////////// //Decode + logic [31:0] pc_p4; logic is_ifence; + logic is_sfence; + logic trivial_sfence; + logic asid_sfence; logic is_mret; logic is_sret; + logic is_wfi; assign instruction = decode_stage.instruction; assign unit_needed = - (CONFIG.INCLUDE_M_MODE & decode_stage.instruction inside {MRET}) | - (CONFIG.INCLUDE_S_MODE & decode_stage.instruction inside {SRET, SFENCE_VMA}) | - (CONFIG.INCLUDE_IFENCE & decode_stage.instruction inside {FENCE_I}); + (CONFIG.MODES != BARE & instruction inside {MRET, WFI}) | + (CONFIG.MODES == MSU & instruction inside {SRET, SFENCE_VMA}) | + (CONFIG.INCLUDE_IFENCE & instruction inside {FENCE_I}); always_comb begin uses_rs = '0; - uses_rs[RS1] = CONFIG.INCLUDE_S_MODE & decode_stage.instruction inside {SFENCE_VMA}; + uses_rs[RS1] = CONFIG.MODES == MSU & instruction inside {SFENCE_VMA}; + uses_rs[RS2] = CONFIG.MODES == MSU & instruction inside {SFENCE_VMA}; uses_rd = 0; end always_ff @(posedge clk) begin if (issue_stage_ready) begin - is_ifence = (instruction.upper_opcode == FENCE_T) & CONFIG.INCLUDE_IFENCE; - is_mret = (instruction.upper_opcode == SYSTEM_T) & (decode_stage.instruction[31:20] == MRET_imm) & CONFIG.INCLUDE_M_MODE; - is_sret = (instruction.upper_opcode == SYSTEM_T) & (decode_stage.instruction[31:20] == SRET_imm) & CONFIG.INCLUDE_S_MODE; + is_ifence <= CONFIG.INCLUDE_IFENCE & instruction.upper_opcode[2]; + is_sfence <= CONFIG.MODES == MSU & ~instruction.upper_opcode[2] & instruction.fn7[0]; + trivial_sfence <= |instruction.rs1_addr; + asid_sfence <= |instruction.rs2_addr; + is_wfi <= CONFIG.MODES != BARE & ~instruction.upper_opcode[2] & ~instruction.fn7[0] & ~instruction.rs2_addr[1]; + //Ret instructions need exact decoding + is_mret <= CONFIG.MODES != BARE & instruction inside {MRET}; + is_sret <= CONFIG.MODES == MSU & instruction inside {SRET}; end end - assign gc_inputs.pc_p4 = constant_alu; - assign gc_inputs.is_ifence = is_ifence; - assign gc_inputs.is_mret = is_mret; - assign gc_inputs.is_sret = is_sret; - //////////////////////////////////////////////////// //Issue + logic is_ifence_r; + logic is_sfence_r; + logic is_sret_r; + logic trivial_sfence_r; + logic asid_sfence_r; + logic [31:0] sfence_addr_r; + logic [ASIDLEN-1:0] asid_r; + logic new_exception; //Input registering always_ff @(posedge clk) begin - if (issue.new_request) - gc_inputs_r <= gc_inputs; + if (rst) begin + is_ifence_r <= 0; + is_sfence_r <= 0; + mret <= 0; + sret <= 0; + end + else begin + is_ifence_r <= issue.new_request & is_ifence & ~new_exception; + is_sfence_r <= issue.new_request & is_sfence & ~new_exception; + mret <= issue.new_request & is_mret & ~new_exception; + sret <= issue.new_request & is_sret & ~new_exception; + end end - //ret always_ff @(posedge clk) begin - if (rst) - ret_in_progress <= 0; - else - ret_in_progress <= (ret_in_progress & ~(next_state == PRE_ISSUE_FLUSH)) | (issue.new_request & (gc_inputs.is_mret | gc_inputs.is_sret)); + if (issue.new_request) begin + trivial_sfence_r <= trivial_sfence; + asid_sfence_r <= asid_sfence; + sfence_addr_r <= rf[RS1]; + asid_r <= rf[RS2][ASIDLEN-1:0]; + end + if (rst) begin + trivial_sfence_r <= 0; + asid_sfence_r <= 0; + end end - //ifence - always_ff @(posedge clk) begin - if (rst) - ifence_in_progress <= 0; - else - ifence_in_progress <= (ifence_in_progress & ~(next_state == PRE_ISSUE_FLUSH)) | (issue.new_request & gc_inputs.is_ifence); + //Exceptions treated like every other unit + generate if (CONFIG.MODES != BARE) begin : gen_gc_exception + always_comb begin + new_exception = 0; + if (issue.new_request) begin + if (current_privilege == USER_PRIVILEGE) + new_exception = is_sfence | is_sret | is_mret; + else if (current_privilege == SUPERVISOR_PRIVILEGE) + new_exception = (is_sfence & tvm) | (is_sret & tsr); + end + end + + always_ff @(posedge clk) begin + if (rst) + local_gc_exception.valid <= 0; + else + local_gc_exception.valid <= new_exception; + end + + assign local_gc_exception.possible = 0; //Not needed because appears on first cycle + assign local_gc_exception.code = ILLEGAL_INST; + assign local_gc_exception.tval = issue_stage.instruction_r; + assign local_gc_exception.pc = issue_stage.pc_r; + assign local_gc_exception.discard = 0; end + endgenerate //////////////////////////////////////////////////// //GC Operation - assign post_issue_idle = (post_issue_count == 0) & load_store_status.sq_empty; assign gc.fetch_flush = branch_flush | gc_pc_override; always_ff @ (posedge clk) begin - gc_fetch_hold <= next_state inside {PRE_CLEAR_STATE, INIT_CLEAR_STATE, POST_ISSUE_DRAIN, PRE_ISSUE_FLUSH}; - gc_issue_hold <= processing_csr | (next_state inside {PRE_CLEAR_STATE, INIT_CLEAR_STATE, TLB_CLEAR_STATE, POST_ISSUE_DRAIN, PRE_ISSUE_FLUSH, POST_ISSUE_DISCARD}); - gc_writeback_supress <= next_state inside {PRE_CLEAR_STATE, INIT_CLEAR_STATE, POST_ISSUE_DISCARD}; - gc_retire_hold <= next_state inside {PRE_ISSUE_FLUSH}; + gc_fetch_hold <= next_state inside {PRE_CLEAR_STATE, INIT_CLEAR_STATE, PRE_ISSUE_FLUSH, TLB_CLEAR_STATE, WAIT_WRITE}; + gc_issue_hold <= next_state inside {PRE_CLEAR_STATE, INIT_CLEAR_STATE, WAIT_INTERRUPT, PRE_ISSUE_FLUSH, TLB_CLEAR_STATE, WAIT_WRITE}; gc_init_clear <= next_state inside {INIT_CLEAR_STATE}; + gc_fetch_ifence <= issue.new_request & is_ifence; gc_tlb_flush <= next_state inside {INIT_CLEAR_STATE, TLB_CLEAR_STATE}; - gc_sq_flush <= state inside {POST_ISSUE_DISCARD} & next_state inside {IDLE_STATE}; end //work-around for verilator BLKANDNBLK signal optimizations assign gc.fetch_hold = gc_fetch_hold; - assign gc.issue_hold = gc_issue_hold; - assign gc.writeback_supress = CONFIG.INCLUDE_M_MODE & gc_writeback_supress; - assign gc.retire_hold = gc_retire_hold; + assign gc.issue_hold = gc_issue_hold | possible_exception; assign gc.init_clear = gc_init_clear; - assign gc.tlb_flush = CONFIG.INCLUDE_S_MODE & gc_tlb_flush; - assign gc.sq_flush = CONFIG.INCLUDE_M_MODE & gc_sq_flush; + assign gc.fetch_ifence = CONFIG.INCLUDE_IFENCE & gc_fetch_ifence; + assign sfence = '{ + valid : CONFIG.MODES == MSU & gc_tlb_flush, + asid_only : asid_sfence_r, + asid : asid_r, + addr_only : trivial_sfence_r, + addr : sfence_addr_r + }; + //////////////////////////////////////////////////// //GC State Machine always @(posedge clk) begin @@ -249,19 +286,47 @@ module gc_unit PRE_CLEAR_STATE : next_state = INIT_CLEAR_STATE; INIT_CLEAR_STATE : if (init_clear_done) next_state = IDLE_STATE; IDLE_STATE : begin - if (gc.exception.valid)//new pending exception is also oldest instruction + if ((issue.new_request & ~is_wfi & ~new_exception) | gc.exception.valid | csr_frontend_flush) + next_state = PRE_ISSUE_FLUSH; + else if (interrupt_pending) + next_state = WAIT_INTERRUPT; + end + WAIT_INTERRUPT : begin + if (gc.exception.valid | csr_frontend_flush) //Exception overrides interrupt next_state = PRE_ISSUE_FLUSH; - else if (issue.new_request | interrupt_pending | gc.exception_pending) - next_state = POST_ISSUE_DRAIN; + else if (~interrupt_pending) //Something cancelled the interrupt + next_state = IDLE_STATE; + else if (~possible_exception & issue_stage.stage_valid & ~branch_flush) //No more possible exceptions and issue stage has correct PC + next_state = PRE_ISSUE_FLUSH; + end + PRE_ISSUE_FLUSH : begin + if (is_sfence_r) + next_state = TLB_CLEAR_STATE; + else if (is_ifence_r) + next_state = WAIT_WRITE; + else //MRET/SRET, exception, interrupt, CSR flush + next_state = IDLE_STATE; end - TLB_CLEAR_STATE : if (tlb_clear_done) next_state = IDLE_STATE; - POST_ISSUE_DRAIN : if (((ifence_in_progress | ret_in_progress) & post_issue_idle) | gc.exception.valid | interrupt_pending) next_state = PRE_ISSUE_FLUSH; - PRE_ISSUE_FLUSH : next_state = POST_ISSUE_DISCARD; - POST_ISSUE_DISCARD : if ((post_issue_count == 0) & load_store_status.no_released_stores_pending) next_state = IDLE_STATE; + //gc.exception will never be set in these states + TLB_CLEAR_STATE : if (tlb_clear_done) next_state = (load_store_status.outstanding_store) ? WAIT_WRITE : IDLE_STATE; + WAIT_WRITE : if (~load_store_status.outstanding_store) next_state = IDLE_STATE; default : next_state = RST_STATE; endcase end + //Will never encounter an exception and can ignore interrupts -> will not have a new instruction on the transition to idle; interrupts can be ignored + //SFENCE: PRE_ISSUE_FLUSH (Override PC) -> TLB_CLEAR -> WAIT_WRITE + //IFENCE: PRE_ISSUE_FLUSH (Override PC) -> WAIT_WRITE + //MRET/SRET: PRE_ISSUE_FLUSH (Override PC) + + //Branch/CSR/LS exceptions: PRE_ISSUE_FLUSH (Override PC) + //Fetch/illegal exception: PRE_ISSUE_FLUSH (Override PC) + + //Interrupt: WAIT_UNTIL_RETIRED (capture next PC) -> PRE_ISSUE_FLUSH (Override PC) <- This can be hijacked by an exception + + //Interrupt + //wait until issue/execute exceptions are no longer possible, flush fetch, take exception + //////////////////////////////////////////////////// //State Counter logic [$clog2(INIT_CLEAR_DEPTH):0] state_counter; @@ -272,63 +337,101 @@ module gc_unit state_counter <= state_counter + 1; end assign init_clear_done = state_counter[$clog2(INIT_CLEAR_DEPTH)]; - assign tlb_clear_done = state_counter[$clog2(TLB_CLEAR_DEPTH)]; + assign tlb_clear_done = state_counter[$clog2(TLB_CLEAR_DEPTH)] | trivial_sfence_r; //////////////////////////////////////////////////// //Exception handling - generate if (CONFIG.INCLUDE_M_MODE) begin :gen_gc_m_mode + logic [NUM_EXCEPTION_SOURCES-1:0] exception_valid; + logic [NUM_EXCEPTION_SOURCES-1:0] exception_possible; + + //Separated out because possible exceptions from CSR must still stall even without M + generate for (genvar i = 0; i < NUM_EXCEPTION_SOURCES; i++) begin : gen_possible_exceptions + assign exception_possible[i] = exception[i].possible; + end endgenerate + assign possible_exception = |exception_possible; + assign gc.exception.possible = possible_exception; + +generate if (CONFIG.MODES != BARE) begin :gen_gc_m_mode //Re-assigning interface inputs to array types so that they can be dynamically indexed - logic [NUM_EXCEPTION_SOURCES-1:0] exception_pending; exception_code_t [NUM_EXCEPTION_SOURCES-1:0] exception_code; - id_t [NUM_EXCEPTION_SOURCES-1:0] exception_id; logic [NUM_EXCEPTION_SOURCES-1:0][31:0] exception_tval; - logic exception_ack; + logic [NUM_EXCEPTION_SOURCES-1:0][31:0] exception_pc; + logic [NUM_EXCEPTION_SOURCES-1:0] exception_discard; + logic [31:0] muxed_exception_pc; for (genvar i = 0; i < NUM_EXCEPTION_SOURCES; i++) begin - assign exception_pending[i] = exception[i].valid; + assign exception_valid[i] = exception[i].valid; assign exception_code[i] = exception[i].code; - assign exception_id[i] = exception[i].id; assign exception_tval[i] = exception[i].tval; - assign exception[i].ack = exception_ack; - end - - //Exception valid when the oldest instruction is a valid ID. This is done with a level of indirection (through the exception unit table) - //for better scalability, avoiding the need to compare against all exception sources. - always_comb begin - gc.exception_pending = |exception_pending; - gc.exception.valid = (retire_ids_next[0] == exception_id[current_exception_unit]) & exception_pending[current_exception_unit]; - gc.exception.pc = oldest_pc; - gc.exception.code = exception_code[current_exception_unit]; - gc.exception.tval = exception_tval[current_exception_unit]; + assign exception_discard[i] = exception[i].discard; + assign exception_pc[i] = exception[i].pc; end - assign exception_ack = gc.exception.valid; - - assign interrupt_taken = interrupt_pending & (next_state == PRE_ISSUE_FLUSH) & ~(ifence_in_progress | ret_in_progress | gc.exception.valid); - - assign mret = gc_inputs_r.is_mret & ret_in_progress & (next_state == PRE_ISSUE_FLUSH); - assign sret = gc_inputs_r.is_sret & ret_in_progress & (next_state == PRE_ISSUE_FLUSH); - - end endgenerate + assign gc.exception.valid = |exception_valid; + assign gc.exception.source = exception_valid; + + one_hot_mux #(.OPTIONS(NUM_EXCEPTION_SOURCES), .DATA_TYPE(exception_code_t)) code_mux ( + .one_hot(exception_valid), + .choices(exception_code), + .sel(gc.exception.code), + .*); + + one_hot_mux #(.OPTIONS(NUM_EXCEPTION_SOURCES), .DATA_TYPE(logic[31:0])) tval_mux ( + .one_hot(exception_valid), + .choices(exception_tval), + .sel(gc.exception.tval), + .*); + + one_hot_mux #(.OPTIONS(NUM_EXCEPTION_SOURCES), .DATA_TYPE(logic[31:0])) pc_mux ( + .one_hot(exception_valid), + .choices(exception_pc), + .sel(muxed_exception_pc), + .*); + assign gc.exception.pc = |exception_valid ? muxed_exception_pc : issue_stage.pc; + + assign interrupt_taken = interrupt_pending & (next_state == PRE_ISSUE_FLUSH) & ~(gc.exception.valid) & ~csr_frontend_flush; + + //Writeback and rename handling + logic gc_writeback_suppress_r; + logic gc_rename_revert; + always_ff @(posedge clk) begin + if (rst) begin + gc_writeback_suppress_r <= 0; + gc_rename_revert <= 0; + end + else begin + gc_writeback_suppress_r <= gc.writeback_suppress; + gc_rename_revert <= gc_writeback_suppress_r; + end + end + assign gc.writeback_suppress = |(exception_valid & exception_discard); + assign gc.rename_revert = gc_rename_revert; +end endgenerate //PC determination (trap, flush or return) //Two cycles: on first cycle the processor front end is flushed, //on the second cycle the new PC is fetched - generate if (CONFIG.INCLUDE_M_MODE || CONFIG.INCLUDE_IFENCE) begin :gen_gc_pc_override +generate if (CONFIG.MODES != BARE || CONFIG.INCLUDE_IFENCE) begin :gen_gc_pc_override always_ff @ (posedge clk) begin gc_pc_override <= next_state inside {PRE_ISSUE_FLUSH, INIT_CLEAR_STATE}; - gc_pc <= - (gc.exception.valid | interrupt_taken) ? exception_target_pc : - (gc_inputs_r.is_ifence) ? gc_inputs_r.pc_p4 : - epc; //ret + if (gc.exception.valid | interrupt_taken) + gc_pc <= exception_target_pc; + else if (instruction_issued) begin + if (is_mret) + gc_pc <= mepc; + else if (is_sret) + gc_pc <= sepc; + else //IFENCE, SFENCE, CSR flush + gc_pc <= constant_alu; + end end //work-around for verilator BLKANDNBLK signal optimizations assign gc.pc_override = gc_pc_override; assign gc.pc = gc_pc; - end endgenerate +end endgenerate //////////////////////////////////////////////////// //Decode / Write-back Handshaking //CSR reads are passed through the Load-Store unit @@ -342,12 +445,12 @@ module gc_unit //////////////////////////////////////////////////// //Assertions - `ifdef ENABLE_SIMULATION_ASSERTIONS - generate if (DEBUG_CONVERT_EXCEPTIONS_INTO_ASSERTIONS) begin - unexpected_exception_assertion: - assert property (@(posedge clk) disable iff (rst) (~gc.exception.valid)) - else $error("unexpected exception occured: %s", gc.exception.code.name()); - end endgenerate - `endif + multiple_exceptions_assertion: + assert property (@(posedge clk) disable iff (rst) $onehot0(exception_valid)) + else $error("Simultaneous exceptions"); + + multiple_possible_exceptions_assertion: + assert property (@(posedge clk) disable iff (rst) $onehot0(exception_possible)) + else $error("Simultaneous possible exceptions"); endmodule diff --git a/core/execution_units/load_store_unit/addr_hash.sv b/core/execution_units/load_store_unit/addr_hash.sv index b50e8173..86e6029f 100644 --- a/core/execution_units/load_store_unit/addr_hash.sv +++ b/core/execution_units/load_store_unit/addr_hash.sv @@ -28,7 +28,7 @@ module addr_hash parameter logic USE_BIT_3 = 1 ) ( - input logic [31:0] addr, + input logic [11:0] addr, output addr_hash_t addr_hash ); diff --git a/core/execution_units/load_store_unit/amo_alu.sv b/core/execution_units/load_store_unit/amo_alu.sv old mode 100755 new mode 100644 index 69047346..79773167 --- a/core/execution_units/load_store_unit/amo_alu.sv +++ b/core/execution_units/load_store_unit/amo_alu.sv @@ -1,5 +1,5 @@ /* - * Copyright © 2017 Eric Matthews, Lesley Shannon + * Copyright © 2017 Eric Matthews, Chris Keilbart, Lesley Shannon * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,44 +18,48 @@ * * Author(s): * Eric Matthews + * Chris Keilbart */ module amo_alu - import cva5_config::*; import riscv_types::*; - import cva5_types::*; + #( + parameter int WIDTH = 32 + ) ( - input amo_alu_inputs_t amo_alu_inputs, - output logic[31:0] result + input amo_t amo_type, + input logic[WIDTH-1:0] rs1, + input logic[WIDTH-1:0] rs2, + output logic[WIDTH-1:0] rd ); + logic signed_op; logic rs1_smaller_than_rs2; - logic signed [32:0] rs1_ext; - logic signed [32:0] rs2_ext; - - //bit 4 for unsigned - assign rs1_ext = {(~amo_alu_inputs.op[4] & amo_alu_inputs.rs1_load[31]), amo_alu_inputs.rs1_load}; - assign rs2_ext = {(~amo_alu_inputs.op[4] & amo_alu_inputs.rs2[31]), amo_alu_inputs.rs2}; - + logic signed [WIDTH:0] rs1_ext; + logic signed [WIDTH:0] rs2_ext; + logic[WIDTH-1:0] logic_result; + logic[WIDTH-1:0] arith_result; + + assign signed_op = amo_type == AMO_MIN_FN5 | amo_type == AMO_MAX_FN5; + assign rs1_ext = {(signed_op & rs1[WIDTH-1]), rs1}; + assign rs2_ext = {(signed_op & rs2[WIDTH-1]), rs2}; assign rs1_smaller_than_rs2 = rs1_ext < rs2_ext; - /* verilator lint_off CASEINCOMPLETE */ always_comb begin - case (amo_alu_inputs.op)// <--unique as not all codes are in use - AMO_SWAP_FN5 : result = amo_alu_inputs.rs2; - AMO_ADD_FN5 : result = amo_alu_inputs.rs1_load + amo_alu_inputs.rs2; - AMO_XOR_FN5 : result = amo_alu_inputs.rs1_load ^ amo_alu_inputs.rs2; - AMO_AND_FN5 : result = amo_alu_inputs.rs1_load & amo_alu_inputs.rs2; - AMO_OR_FN5 : result = amo_alu_inputs.rs1_load | amo_alu_inputs.rs2; - AMO_MIN_FN5 : result = rs1_smaller_than_rs2 ? amo_alu_inputs.rs1_load : amo_alu_inputs.rs2; - AMO_MAX_FN5 : result = rs1_smaller_than_rs2 ? amo_alu_inputs.rs2 : amo_alu_inputs.rs1_load; - AMO_MINU_FN5 : result = rs1_smaller_than_rs2 ? amo_alu_inputs.rs1_load : amo_alu_inputs.rs2; - AMO_MAXU_FN5 : result = rs1_smaller_than_rs2 ? amo_alu_inputs.rs2 : amo_alu_inputs.rs1_load; + unique case (amo_type) + AMO_XOR_FN5 : rd = rs1 ^ rs2; + AMO_OR_FN5 : rd = rs1 | rs2; + AMO_AND_FN5 : rd = rs1 & rs2; + AMO_SWAP_FN5 : rd = rs2; + AMO_MIN_FN5 : rd = rs1_smaller_than_rs2 ? rs1 : rs2; + AMO_MAX_FN5 : rd = rs1_smaller_than_rs2 ? rs2 : rs1; + AMO_MINU_FN5 : rd = rs1_smaller_than_rs2 ? rs1 : rs2; + AMO_MAXU_FN5 : rd = rs1_smaller_than_rs2 ? rs2 : rs1; + AMO_ADD_FN5 : rd = rs1 + rs2; + default : rd = 'x; //Default don't care allows some optimization endcase end - /* verilator lint_on CASEINCOMPLETE */ - -endmodule \ No newline at end of file +endmodule diff --git a/core/execution_units/load_store_unit/amo_unit.sv b/core/execution_units/load_store_unit/amo_unit.sv new file mode 100644 index 00000000..216029d1 --- /dev/null +++ b/core/execution_units/load_store_unit/amo_unit.sv @@ -0,0 +1,123 @@ +/* + * Copyright © 2024 Chris Keilbart, Lesley Shannon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Initial code developed under the supervision of Dr. Lesley Shannon, + * Reconfigurable Computing Lab, Simon Fraser University. + * + * Author(s): + * Chris Keilbart + */ + +module amo_unit + + import riscv_types::*; + + #( + parameter int NUM_UNITS = 3, + parameter int RESERVATION_WORDS = 4 + ) //TODO: reservation shape and size must be discoverable(?) + ( + input logic clk, + input logic rst, + + amo_interface.amo_unit agents[NUM_UNITS] + ); + + localparam RESERVATION_WIDTH = 30 - $clog2(RESERVATION_WORDS); + typedef logic[RESERVATION_WIDTH-1:0] reservation_t; + + //////////////////////////////////////////////////// + //Interface unpacking + logic[NUM_UNITS-1:0] set_reservation; + logic[NUM_UNITS-1:0] clear_reservation; + reservation_t[NUM_UNITS-1:0] reservation; + reservation_t lr_addr; + logic lr_valid; + + logic[NUM_UNITS-1:0] rmw_valid; + amo_t[NUM_UNITS-1:0] op; + logic[NUM_UNITS-1:0][31:0] rs1; + logic[NUM_UNITS-1:0][31:0] rs2; + logic[31:0] rd; + + generate for (genvar i = 0; i < NUM_UNITS; i++) begin : gen_unpacking + assign set_reservation[i] = agents[i].set_reservation; + assign clear_reservation[i] = agents[i].clear_reservation; + assign reservation[i] = agents[i].reservation[31-:RESERVATION_WIDTH]; + assign agents[i].reservation_valid = lr_valid & lr_addr == reservation[i]; + + assign rmw_valid[i] = agents[i].rmw_valid; + assign op[i] = agents[i].op; + assign rs1[i] = agents[i].rs1; + assign rs2[i] = agents[i].rs2; + assign agents[i].rd = rd; + end endgenerate + + //////////////////////////////////////////////////// + //Multiplexing + //Shared LR-SC and RMW port across all units + reservation_t set_val; + amo_t selected_op; + logic[31:0] selected_rs1; + logic[31:0] selected_rs2; + + one_hot_mux #(.OPTIONS(NUM_UNITS), .DATA_TYPE(amo_t)) op_mux ( + .one_hot(rmw_valid), + .choices(op), + .sel(selected_op), + .*); + + one_hot_mux #(.OPTIONS(NUM_UNITS), .DATA_TYPE(logic[31:0])) rs1_mux ( + .one_hot(rmw_valid), + .choices(rs1), + .sel(selected_rs1), + .*); + + one_hot_mux #(.OPTIONS(NUM_UNITS), .DATA_TYPE(logic[31:0])) rs2_mux ( + .one_hot(rmw_valid), + .choices(rs2), + .sel(selected_rs2), + .*); + + one_hot_mux #(.OPTIONS(NUM_UNITS), .DATA_TYPE(reservation_t)) reservation_mux ( + .one_hot(set_reservation), + .choices(reservation), + .sel(set_val), + .*); + + //////////////////////////////////////////////////// + //RISC-V LR-SC + //One address is reserved at a time for all units + //The reservation can be set or cleared at any time by any unit, but set has priority over clear on same cycle + always_ff @(posedge clk) begin + if (rst) + lr_valid <= 0; + else + lr_valid <= (lr_valid & ~|clear_reservation) | |set_reservation; + if (|set_reservation) + lr_addr <= set_val; + end + + //////////////////////////////////////////////////// + //RISC-V Atomic ALU + //Combinational; results valid in same cycle + amo_alu #(.WIDTH(32)) alu_inst ( + .amo_type(selected_op), + .rs1(selected_rs1), + .rs2(selected_rs2), + .rd(rd) + ); + +endmodule diff --git a/core/execution_units/load_store_unit/dcache.sv b/core/execution_units/load_store_unit/dcache.sv index b28b45c4..565af277 100644 --- a/core/execution_units/load_store_unit/dcache.sv +++ b/core/execution_units/load_store_unit/dcache.sv @@ -1,5 +1,5 @@ /* - * Copyright © 2022 Eric Matthews + * Copyright © 2024 Chris Keilbart * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ * Reconfigurable Computing Lab, Simon Fraser University. * * Author(s): - * Eric Matthews + * Chris Keilbart */ module dcache @@ -32,296 +32,349 @@ module dcache ( input logic clk, input logic rst, - input logic dcache_on, l1_arbiter_request_interface.master l1_request, l1_arbiter_return_interface.master l1_response, - input logic sc_complete, - input logic sc_success, - input logic clear_reservation, - input amo_details_t amo, - input logic uncacheable_load, - input logic uncacheable_store, - input logic is_load, - input logic load_request, - input logic store_request, - output logic load_ready, - output logic store_ready, - input data_access_shared_inputs_t ls_load, - input data_access_shared_inputs_t ls_store, - memory_sub_unit_interface.responder ls + output logic write_outstanding, + input logic amo, + input amo_t amo_type, + amo_interface.subunit amo_unit, + input logic cbo, + input logic uncacheable, + memory_sub_unit_interface.responder ls, + input logic load_peek, //If the next request may be a load + input logic[31:0] load_addr_peek //The address in that case ); localparam derived_cache_config_t SCONFIG = get_derived_cache_params(CONFIG, CONFIG.DCACHE, CONFIG.DCACHE_ADDR); - localparam LOG2_WAYS = (CONFIG.DCACHE.WAYS == 1) ? 1 : $clog2(CONFIG.DCACHE.WAYS); + localparam DB_ADDR_LEN = SCONFIG.LINE_ADDR_W + SCONFIG.SUB_LINE_ADDR_W; + + cache_functions_interface # (.TAG_W(SCONFIG.TAG_W), .LINE_W(SCONFIG.LINE_ADDR_W), .SUB_LINE_W(SCONFIG.SUB_LINE_ADDR_W)) addr_utils (); - localparam bit [SCONFIG.SUB_LINE_ADDR_W-1:0] END_OF_LINE_COUNT = SCONFIG.SUB_LINE_ADDR_W'(CONFIG.DCACHE.LINE_W-1); + typedef logic[SCONFIG.TAG_W-1:0] tag_t; - cache_functions_interface # (.LINE_W(SCONFIG.LINE_ADDR_W), .SUB_LINE_W(SCONFIG.SUB_LINE_ADDR_W)) addr_utils (); + typedef struct packed { + logic valid; + tag_t tag; + } tb_entry_t; - typedef struct packed{ - logic [31:0] addr; - logic uncacheable; - } load_stage2_t; - load_stage2_t stage2_load; - - typedef struct packed{ - logic [31:0] addr; - logic [3:0] be; - logic [31:0] data; - logic cache_op; + typedef struct packed { + logic[31:0] addr; + logic[31:0] data; + logic[3:0] be; + logic rnw; logic uncacheable; - } store_stage2_t; - store_stage2_t stage2_store; - - logic [CONFIG.DCACHE.WAYS-1:0] load_tag_hit_way; - logic [CONFIG.DCACHE.WAYS-1:0] store_tag_hit_way; - - logic [CONFIG.DCACHE.WAYS-1:0] replacement_way; - logic [CONFIG.DCACHE.WAYS-1:0] replacement_way_r; - - logic load_tag_check; - logic load_hit; - logic store_hit; - logic [LOG2_WAYS-1:0] tag_hit_index; - logic [LOG2_WAYS-1:0] replacement_index; - logic [LOG2_WAYS-1:0] replacement_index_r; - logic [LOG2_WAYS-1:0] load_sel; - - logic is_target_word; - logic [SCONFIG.SUB_LINE_ADDR_W-1:0] word_count; - logic miss_data_valid; - logic line_complete; - - logic arb_load_sel; - logic load_l1_arb_ack; - logic store_l1_arb_ack; - - logic [31:0] ram_load_data [CONFIG.DCACHE.WAYS-1:0]; + logic amo; + amo_t amo_type; + logic cbo; + } req_t; - typedef enum { - LOAD_IDLE = 0, - LOAD_HIT_CHECK = 1, - LOAD_L1_REQUEST = 2, - LOAD_FILL = 3 - } load_path_enum_t; - logic [3:0] load_state, load_state_next; + //Implementation + req_t stage0; + req_t stage1; + logic stage1_done; + logic stage0_advance_r; - typedef enum { - STORE_IDLE = 0, - STORE_L1_REQUEST = 1 - } store_path_enum_t; - logic [1:0] store_state, store_state_next; + assign write_outstanding = (current_state != IDLE) & (~stage1.rnw | stage1.amo); - //////////////////////////////////////////////////// - //Implementation + //Peeking avoids circular logic + assign ls.ready = (current_state == IDLE) | (stage1_done & ~stage1.cbo & ~(db_wen & load_peek & load_addr_peek[31:DB_ADDR_LEN+2] == stage1.addr[31:DB_ADDR_LEN+2] & load_addr_peek[2+:DB_ADDR_LEN] == db_addr)); - //////////////////////////////////////////////////// - //Load Path - always_ff @ (posedge clk) begin - if (rst) begin - load_state <= 0; - load_state[LOAD_IDLE] <= 1; - end + always_ff @(posedge clk) begin + if (rst) + stage0_advance_r <= 0; else - load_state <= load_state_next; + stage0_advance_r <= ls.new_request; + if (ls.new_request) + stage1 <= stage0; end + assign stage0 = '{ + addr : ls.addr, + data : ls.data_in, + be : ls.be, + rnw : ls.re, + uncacheable : uncacheable, + amo : amo, + amo_type : amo_type, + cbo : cbo + }; + + //Replacement policy + logic[CONFIG.DCACHE.WAYS-1:0] replacement_way; + cycler #(CONFIG.DCACHE.WAYS) replacement_policy ( + .en(ls.new_request), + .one_hot(replacement_way), + .*); + + //Tagbank + tb_entry_t[CONFIG.DCACHE.WAYS-1:0] tb_entries; + tb_entry_t new_entry; + logic[CONFIG.DCACHE.WAYS-1:0] hit_ohot; + logic[CONFIG.DCACHE.WAYS-1:0] hit_ohot_r; + logic hit; + logic hit_r; + logic tb_write; + + assign tb_write = stage0_advance_r & ~stage1.uncacheable & ((~hit & stage1.rnw & ~stage1_is_sc) | (stage1.cbo & hit)); + + assign new_entry = '{ + valid : ~stage1.cbo, + tag : addr_utils.getTag(stage1.addr) + }; + + sdp_ram_padded #( + .ADDR_WIDTH(SCONFIG.LINE_ADDR_W), + .NUM_COL(CONFIG.DCACHE.WAYS), + .COL_WIDTH($bits(tb_entry_t)), + .PIPELINE_DEPTH(0) + ) tagbank ( + .a_en(tb_write), + .a_wbe(replacement_way), + .a_wdata({CONFIG.DCACHE.WAYS{new_entry}}), + .a_addr(addr_utils.getTagLineAddr(stage1.addr)), + .b_en(ls.new_request), + .b_addr(addr_utils.getTagLineAddr(stage0.addr)), + .b_rdata(tb_entries), + .*); + + //Hit detection always_comb begin - load_state_next[LOAD_IDLE] = (load_state[LOAD_IDLE] & ~load_request) | ((load_hit & ~load_request) | line_complete); - load_state_next[LOAD_HIT_CHECK] = load_request; - load_state_next[LOAD_L1_REQUEST] = (load_state[LOAD_L1_REQUEST] & ~load_l1_arb_ack) | (load_state[LOAD_HIT_CHECK] & ~load_hit); - load_state_next[LOAD_FILL] = (load_state[LOAD_FILL] & ~line_complete) | (load_state[LOAD_L1_REQUEST] & load_l1_arb_ack); + hit_ohot = '0; + for (int i = 0; i < CONFIG.DCACHE.WAYS; i++) + hit_ohot[i] = tb_entries[i].valid & (tb_entries[i].tag == addr_utils.getTag(stage1.addr)); end - - assign load_ready = (load_state[LOAD_IDLE] | load_hit) & (store_state[STORE_IDLE] | store_l1_arb_ack); - - always_ff @ (posedge clk) begin - if (load_request) begin - stage2_load.addr <= ls_load.addr; - stage2_load.uncacheable <= uncacheable_load; + assign hit = |hit_ohot; + always_ff @(posedge clk) begin + if (stage0_advance_r) begin + hit_r <= hit; + hit_ohot_r <= hit_ohot; end end - assign load_tag_check = load_request & dcache_on & ~uncacheable_load; + //Databank + logic[CONFIG.DCACHE.WAYS-1:0][31:0] db_entries; + logic[31:0] db_hit_entry; + logic db_wen; + logic[CONFIG.DCACHE.WAYS-1:0] db_way; + logic[CONFIG.DCACHE.WAYS-1:0][3:0] db_wbe_full; + logic[31:0] db_wdata; - //////////////////////////////////////////////////// - //Load Miss - always_ff @ (posedge clk) begin - if (load_request) - word_count <= 0; - else - word_count <= word_count + SCONFIG.SUB_LINE_ADDR_W'(l1_response.data_valid); + always_comb begin + for (int i = 0; i < CONFIG.DCACHE.WAYS; i++) + db_wbe_full[i] = {4{db_way[i]}} & stage1.be; end - assign is_target_word = (stage2_load.addr[2 +: SCONFIG.SUB_LINE_ADDR_W] == word_count) | stage2_load.uncacheable; - assign line_complete = l1_response.data_valid & ((word_count == END_OF_LINE_COUNT) | stage2_load.uncacheable); + logic[DB_ADDR_LEN-1:0] db_addr; + assign db_addr = current_state == FILLING ? {addr_utils.getTagLineAddr(stage1.addr), word_counter} : addr_utils.getDataLineAddr(stage1.addr); + + sdp_ram #( + .ADDR_WIDTH(DB_ADDR_LEN), + .NUM_COL(4*CONFIG.DCACHE.WAYS), + .COL_WIDTH(8), + .PIPELINE_DEPTH(0) + ) databank ( + .a_en(db_wen), + .a_wbe(db_wbe_full), + .a_wdata({CONFIG.DCACHE.WAYS{db_wdata}}), + .a_addr(db_addr), + .b_en(ls.new_request), + .b_addr(addr_utils.getDataLineAddr(stage0.addr)), + .b_rdata(db_entries), + .*); - //////////////////////////////////////////////////// - //Store Path - always_ff @ (posedge clk) begin - if (rst) begin - store_state <= 0; - store_state[STORE_IDLE] <= 1; + always_comb begin + db_hit_entry = 'x; + for (int i = 0; i < CONFIG.DCACHE.WAYS; i++) begin + if (hit_ohot[i]) + db_hit_entry = db_entries[i]; end - else - store_state <= store_state_next; end - always_comb begin - store_state_next[STORE_IDLE] = (store_state[STORE_IDLE] & (~store_request | (store_request & ls_store.cache_op))) | (store_l1_arb_ack & ~store_request); - store_state_next[STORE_L1_REQUEST] = (store_state[STORE_L1_REQUEST] & ~store_l1_arb_ack) | (store_request & ~ls_store.cache_op); + //Arbiter response + logic correct_word; + logic return_done; + logic[SCONFIG.SUB_LINE_ADDR_W-1:0] word_counter; + assign return_done = l1_response.data_valid & word_counter == SCONFIG.SUB_LINE_ADDR_W'(CONFIG.DCACHE.LINE_W-1); + assign correct_word = l1_response.data_valid & word_counter == stage1.addr[2+:SCONFIG.SUB_LINE_ADDR_W]; + always_ff @(posedge clk) begin + if (l1_response.data_valid) + word_counter <= word_counter+1; + if (ls.new_request) + word_counter <= 0; end - assign store_ready = (store_state[STORE_IDLE] | store_l1_arb_ack) & (load_state[LOAD_IDLE] | load_hit); - assign ls.ready = is_load ? load_ready : store_ready; - - always_ff @ (posedge clk) begin - if (store_request) begin - stage2_store.addr <= ls_store.addr; - stage2_store.uncacheable <= uncacheable_store; - stage2_store.be <= ls_store.be; - stage2_store.data <= ls_store.data_in; - stage2_store.cache_op <= ls_store.cache_op; - end + typedef enum { + IDLE, + FIRST_CYCLE, + REQUESTING_READ, + FILLING, + UNCACHEABLE_WAITING_READ, + AMO_WRITE + } stage1_t; + stage1_t current_state; + stage1_t next_state; + + always_ff @(posedge clk) begin + if (rst) + current_state <= IDLE; + else + current_state <= next_state; end - //////////////////////////////////////////////////// - //L1 Arbiter Interface - //Priority to oldest request - fifo_interface #(.DATA_TYPE(logic)) request_order(); - - assign request_order.data_in = load_request; - assign request_order.push = load_request | (store_request & ~ls_store.cache_op); - assign request_order.potential_push = request_order.push; - - assign request_order.pop = l1_request.ack | load_hit; - - cva5_fifo #(.DATA_TYPE(logic), .FIFO_DEPTH(2)) - request_order_fifo ( - .clk (clk), - .rst (rst), - .fifo (request_order) - ); - - assign arb_load_sel = request_order.data_out; - - assign l1_request.addr = arb_load_sel ? stage2_load.addr : stage2_store.addr;//Memory interface aligns request to burst size (done there to support AMO line-read word-write) - assign l1_request.data = stage2_store.data; - assign l1_request.rnw = arb_load_sel; - assign l1_request.be = stage2_store.be; - assign l1_request.size = (arb_load_sel & ~stage2_load.uncacheable) ? 5'(CONFIG.DCACHE.LINE_W-1) : 0;//LR and AMO ops are included in load - assign l1_request.is_amo = 0; - assign l1_request.amo = 0; - - assign l1_request.request = load_state[LOAD_L1_REQUEST] | store_state[STORE_L1_REQUEST]; - - assign load_l1_arb_ack = l1_request.ack & arb_load_sel; - assign store_l1_arb_ack = l1_request.ack & ~arb_load_sel; - //////////////////////////////////////////////////// - //Replacement policy (free runing one-hot cycler, i.e. pseudo random) - cycler #(CONFIG.DCACHE.WAYS) replacement_policy ( - .clk (clk), - .rst (rst), - .en (1'b1), - .one_hot (replacement_way) - ); - - //////////////////////////////////////////////////// - //Tag banks - dcache_tag_banks #(.CONFIG(CONFIG), .SCONFIG(SCONFIG)) - tag_banks ( - .clk (clk), - .rst (rst), - .load_addr (ls_load.addr), - .load_req (load_tag_check), - .miss_addr (stage2_load.addr), - .miss_req (load_l1_arb_ack), - .miss_way (replacement_way), - .inv_addr ({l1_response.inv_addr, 2'b0}), - .extern_inv (l1_response.inv_valid), - .extern_inv_complete (l1_response.inv_ack), - .store_addr (ls_store.addr), - .store_addr_r (stage2_store.addr), - .store_req (store_request), - .cache_op_req (ls_store.cache_op), - .load_tag_hit (load_hit), - .load_tag_hit_way (load_tag_hit_way), - .store_tag_hit (store_hit), - .store_tag_hit_way (store_tag_hit_way) - ); - - //////////////////////////////////////////////////// - //Data Bank(s) - logic [SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W-1:0] data_read_addr; - assign data_read_addr = load_state[LOAD_FILL] ? {addr_utils.getTagLineAddr(stage2_load.addr), word_count} : addr_utils.getDataLineAddr(ls_load.addr); - - generate for (genvar i=0; i < CONFIG.DCACHE.WAYS; i++) begin : data_bank_gen - byte_en_bram #(CONFIG.DCACHE.LINES*CONFIG.DCACHE.LINE_W) data_bank ( - .clk(clk), - .addr_a(data_read_addr), - .addr_b(addr_utils.getDataLineAddr(stage2_store.addr)), - .en_a(load_tag_check | (replacement_way_r[i] & l1_response.data_valid)), - .en_b(store_tag_hit_way[i]), - .be_a({4{(replacement_way_r[i] & l1_response.data_valid)}}), - .be_b(stage2_store.be), - .data_in_a(l1_response.data), - .data_in_b(stage2_store.data), - .data_out_a(ram_load_data[i]), - .data_out_b() - ); - end endgenerate - - //////////////////////////////////////////////////// - //Output - //One-hot tag hit / update logic to binary int - one_hot_to_integer #(CONFIG.DCACHE.WAYS) - hit_way_conv ( - .one_hot (load_tag_hit_way), - .int_out (tag_hit_index) - ); - one_hot_to_integer #(CONFIG.DCACHE.WAYS) - replacment_way_conv ( - .one_hot (replacement_way), - .int_out (replacement_index) - ); - always_ff @ (posedge clk) begin - if (load_l1_arb_ack) begin - replacement_way_r <= replacement_way; - replacement_index_r <= replacement_index; - end + //Have to pull this into its own block to prevent a verilator circular dependency + always_comb begin + unique case (current_state) + IDLE : stage1_done = 0; + FIRST_CYCLE : stage1_done = ((~stage1.rnw | (stage1_is_sc & amo_unit.reservation_valid)) & l1_request.ack) | (stage1_is_sc & ~amo_unit.reservation_valid) | (stage1.rnw & hit & (~stage1.amo | stage1_is_lr) & ~stage1.uncacheable) | stage1.cbo; + REQUESTING_READ : stage1_done = 0; + FILLING : stage1_done = return_done & (stage1_is_lr | ~stage1.amo); + UNCACHEABLE_WAITING_READ : stage1_done = l1_response.data_valid & (stage1_is_lr | ~stage1.amo); + AMO_WRITE : stage1_done = l1_request.ack; + endcase end - always_ff @ (posedge clk) miss_data_valid <= l1_response.data_valid & is_target_word; - - logic collision; - logic [31:0] saved_data; - logic [3:0] saved_be; - - assign collision = store_state[STORE_L1_REQUEST] & (stage2_store.addr[31:2] == ls_load.addr[31:2]); - always_ff @ (posedge clk) begin - if (load_request) begin - saved_data <= stage2_store.data; - saved_be <= {4{collision}} & stage2_store.be; - end + always_comb begin + unique case (current_state) + IDLE : begin + l1_request.request = 0; + l1_request.addr = 'x; + l1_request.data = 'x; + l1_request.rnw = 'x; + l1_request.size = 'x; + db_wen = 0; + db_wdata = 'x; + db_way = 'x; + ls.data_valid = 0; + ls.data_out = 'x; + next_state = ls.new_request ? FIRST_CYCLE : IDLE; + end + FIRST_CYCLE : begin //Handles writes, read hits, uncacheable reads, and SC + l1_request.request = ~stage1.cbo & (~stage1.rnw | (stage1.uncacheable & ~stage1_is_sc) | (stage1_is_sc & amo_unit.reservation_valid)); + l1_request.addr = stage1.addr; + l1_request.data = stage1.data; + l1_request.rnw = stage1.rnw & ~stage1_is_sc; + l1_request.size = '0; + db_wen = ~stage1.cbo & hit & ~stage1.uncacheable & (~stage1.rnw | (stage1_is_sc & amo_unit.reservation_valid)); + db_wdata = stage1.data; + db_way = hit_ohot; + ls.data_valid = (stage0_advance_r & stage1_is_sc) | (stage1.rnw & ~stage1.uncacheable & hit & ~stage1_is_sc); + ls.data_out = stage1_is_sc ? {31'b0, ~amo_unit.reservation_valid} : db_hit_entry; + if (stage1_done) + next_state = ls.new_request ? FIRST_CYCLE : IDLE; + else if (stage1.uncacheable & l1_request.ack) + next_state = UNCACHEABLE_WAITING_READ; + else if (stage1.rnw & ~stage1.uncacheable & ~hit & ~stage1_is_sc) + next_state = REQUESTING_READ; + else if (stage1.amo & hit & ~stage1.uncacheable & ~stage1_is_sc) + next_state = AMO_WRITE; + else + next_state = FIRST_CYCLE; + end + REQUESTING_READ : begin + l1_request.request = 1; + l1_request.addr = stage1.addr; + l1_request.data = 'x; + l1_request.rnw = 1; + l1_request.size = 5'(CONFIG.DCACHE.LINE_W-1); + db_wen = 0; + db_wdata = 'x; + db_way = 'x; + ls.data_valid = 0; + ls.data_out = 'x; + next_state = l1_request.ack ? FILLING : REQUESTING_READ; + end + FILLING : begin + l1_request.request = 0; + l1_request.addr = 'x; + l1_request.data = 'x; + l1_request.rnw = 'x; + l1_request.size = 'x; + db_wen = l1_response.data_valid; + db_wdata = l1_response.data; + db_way = replacement_way; + ls.data_valid = correct_word; + ls.data_out = l1_response.data; + if (return_done) begin + if (stage1.amo & ~stage1_is_lr) + next_state = AMO_WRITE; + else + next_state = ls.new_request ? FIRST_CYCLE : IDLE; + end + else + next_state = FILLING; + end + UNCACHEABLE_WAITING_READ : begin + l1_request.request = 0; + l1_request.addr = 'x; + l1_request.data = 'x; + l1_request.rnw = 'x; + l1_request.size = 'x; + db_wen = 0; + db_wdata = 'x; + db_way = 'x; + ls.data_valid = l1_response.data_valid; + ls.data_out = l1_response.data; + if (l1_response.data_valid) begin + if (stage1.amo & ~stage1_is_lr) + next_state = AMO_WRITE; + else + next_state = ls.new_request ? FIRST_CYCLE : IDLE; + end + else + next_state = UNCACHEABLE_WAITING_READ; + end + AMO_WRITE : begin + l1_request.request = 1; + l1_request.addr = stage1.addr; + l1_request.data = amo_unit.rd; + l1_request.rnw = 0; + l1_request.size = '0; + db_wen = ~stage1.uncacheable; + db_wdata = amo_unit.rd; + db_way = hit_r ? hit_ohot_r : replacement_way; + ls.data_valid = 0; + ls.data_out = 'x; + if (l1_request.ack) + next_state = ls.new_request ? FIRST_CYCLE : IDLE; + else + next_state = AMO_WRITE; + end + endcase end - assign load_sel = load_state[LOAD_HIT_CHECK] ? tag_hit_index : replacement_index_r; - always_comb for (int i = 0; i < 4; i++) - ls.data_out[8*i+:8] = saved_be[i] ? saved_data[8*i+:8] : ram_load_data[load_sel][8*i+:8]; - assign ls.data_valid = load_hit | miss_data_valid; + //AMO + logic stage1_is_lr; + logic stage1_is_sc; + + assign stage1_is_lr = stage1.amo & stage1.amo_type == AMO_LR_FN5; + assign stage1_is_sc = stage1.amo & stage1.amo_type == AMO_SC_FN5; + + assign amo_unit.reservation = stage1.addr; + assign amo_unit.rs2 = stage1.data; + assign amo_unit.rmw_valid = (current_state != IDLE) & stage1.amo; + assign amo_unit.op = stage1.amo_type; + assign amo_unit.set_reservation = stage1_is_lr & stage1_done; + assign amo_unit.clear_reservation = stage1_done; + + always_ff @(posedge clk) begin + if (stage0_advance_r) + amo_unit.rs1 <= db_hit_entry; + else if (correct_word | (l1_response.data_valid & stage1.uncacheable)) + amo_unit.rs1 <= l1_response.data; + end - //////////////////////////////////////////////////// - //End of Implementation - //////////////////////////////////////////////////// + assign l1_request.be = stage1.be; + assign l1_request.is_amo = 0; + assign l1_request.amo = '0; //////////////////////////////////////////////////// //Assertions dcache_request_when_not_ready_assertion: - assert property (@(posedge clk) disable iff (rst) load_request |-> load_ready) + assert property (@(posedge clk) disable iff (rst) ls.new_request |-> ls.ready) else $error("dcache received request when not ready"); dache_suprious_l1_ack_assertion: - assert property (@(posedge clk) disable iff (rst) l1_request.ack |-> (load_state[LOAD_L1_REQUEST] | store_state[STORE_L1_REQUEST])) + assert property (@(posedge clk) disable iff (rst) l1_request.ack |-> l1_request.request) else $error("dcache received ack without a request"); endmodule diff --git a/core/execution_units/load_store_unit/dcache_tag_banks.sv b/core/execution_units/load_store_unit/dcache_tag_banks.sv deleted file mode 100644 index 2a8b7649..00000000 --- a/core/execution_units/load_store_unit/dcache_tag_banks.sv +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright © 2022 Eric Matthews - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Initial code developed under the supervision of Dr. Lesley Shannon, - * Reconfigurable Computing Lab, Simon Fraser University. - * - * Author(s): - * Eric Matthews - */ - -module dcache_tag_banks - - import cva5_config::*; - import cva5_types::*; - - # ( - parameter cpu_config_t CONFIG = EXAMPLE_CONFIG, - parameter derived_cache_config_t SCONFIG = '{LINE_ADDR_W : 9, SUB_LINE_ADDR_W : 2, TAG_W : 15} - ) - - ( - input logic clk, - input logic rst, - - //Port A - input logic[31:0] load_addr, - input logic load_req, - input logic[31:0] miss_addr, - input logic miss_req, - input logic[CONFIG.DCACHE.WAYS-1:0] miss_way, - input logic[31:0] inv_addr, - input logic extern_inv, - output logic extern_inv_complete, - - //Port B - input logic[31:0] store_addr, - input logic[31:0] store_addr_r, - input logic store_req, - input logic cache_op_req, - - output logic load_tag_hit, - output logic store_tag_hit, - output logic[CONFIG.DCACHE.WAYS-1:0] load_tag_hit_way, - output logic[CONFIG.DCACHE.WAYS-1:0] store_tag_hit_way - ); - - typedef struct packed { - logic valid; - logic [SCONFIG.TAG_W-1:0] tag; - } dtag_entry_t; - - cache_functions_interface # (.TAG_W(SCONFIG.TAG_W), .LINE_W(SCONFIG.LINE_ADDR_W), .SUB_LINE_W(SCONFIG.SUB_LINE_ADDR_W)) addr_utils (); - - dtag_entry_t tag_line_a [CONFIG.DCACHE.WAYS-1:0]; - dtag_entry_t tag_line_b [CONFIG.DCACHE.WAYS-1:0]; - - dtag_entry_t new_tagline; - - logic [SCONFIG.LINE_ADDR_W-1:0] porta_addr; - logic [SCONFIG.LINE_ADDR_W-1:0] portb_addr; - - logic external_inv; - logic load_req_r; - logic store_req_r; - //////////////////////////////////////////////////// - //Implementation - always_ff @ (posedge clk) load_req_r <= load_req; - always_ff @ (posedge clk) store_req_r <= store_req & ~cache_op_req; - - assign external_inv = extern_inv & CONFIG.DCACHE.USE_EXTERNAL_INVALIDATIONS; - - assign porta_addr = miss_req ? addr_utils.getTagLineAddr(miss_addr) : external_inv ? addr_utils.getTagLineAddr(inv_addr) : addr_utils.getTagLineAddr(store_addr); - assign portb_addr = addr_utils.getTagLineAddr(load_addr); - - assign extern_inv_complete = external_inv & ~miss_req; - - assign new_tagline = '{valid: miss_req, tag: addr_utils.getTag(miss_addr)}; - - //////////////////////////////////////////////////// - //Memory instantiation and hit detection - generate for (genvar i = 0; i < CONFIG.DCACHE.WAYS; i++) begin : tag_bank_gen - dual_port_bram #(.WIDTH($bits(dtag_entry_t)), .LINES(CONFIG.DCACHE.LINES)) dtag_bank ( - .clk (clk), - .en_a (store_req | (miss_req & miss_way[i]) | external_inv), - .wen_a ((miss_req & miss_way[i]) | external_inv | (store_req & cache_op_req)), - .addr_a (porta_addr), - .data_in_a (new_tagline), - .data_out_a (tag_line_a[i]), - .en_b (load_req), - .wen_b ('0), - .addr_b (portb_addr), - .data_in_b ('0), - .data_out_b(tag_line_b[i]) - ); - assign store_tag_hit_way[i] = ({store_req_r, 1'b1, addr_utils.getTag(store_addr_r)} == {1'b1, tag_line_a[i]}); - assign load_tag_hit_way[i] = ({load_req_r, 1'b1, addr_utils.getTag(miss_addr)} == {1'b1, tag_line_b[i]}); - end endgenerate - - assign load_tag_hit = |load_tag_hit_way; - assign store_tag_hit = |store_tag_hit_way; - -endmodule diff --git a/core/execution_units/load_store_unit/load_store_queue.sv b/core/execution_units/load_store_unit/load_store_queue.sv index 24c550ca..81050459 100644 --- a/core/execution_units/load_store_unit/load_store_queue.sv +++ b/core/execution_units/load_store_unit/load_store_queue.sv @@ -49,20 +49,32 @@ module load_store_queue //ID-based input buffer for Load/Store Unit localparam DOUBLE_MIN_WIDTH = FLEN >= 32 ? 32 : FLEN; typedef struct packed { - logic [31:0] addr; + logic [11:0] offset; logic [2:0] fn3; logic fp; logic double; + logic amo; + amo_t amo_type; + logic [31:0] amo_wdata; id_t id; logic store_collision; logic [LOG2_SQ_DEPTH-1:0] sq_index; } lq_entry_t; + typedef struct packed { + logic discard; + logic [19:0] addr; + ls_subunit_t subunit; + } addr_entry_t; + logic [LOG2_SQ_DEPTH-1:0] sq_index; logic [LOG2_SQ_DEPTH-1:0] sq_oldest; addr_hash_t addr_hash; logic potential_store_conflict; + logic lq_addr_discard; + logic sq_addr_discard; + logic load_pop; logic load_addr_bit_3; logic [2:0] load_fn3; @@ -72,7 +84,9 @@ module load_store_queue //ID-based input buffer for Load/Store Unit logic [31:0] store_data; fifo_interface #(.DATA_TYPE(lq_entry_t)) lq(); + fifo_interface #(.DATA_TYPE(addr_entry_t)) lq_addr(); store_queue_interface sq(); + fifo_interface #(.DATA_TYPE(addr_entry_t)) sq_addr(); //////////////////////////////////////////////////// //Implementation @@ -85,7 +99,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit //Address hash for load-store collision checking addr_hash #(.USE_BIT_3(~CONFIG.INCLUDE_UNIT.FPU)) lsq_addr_hash ( - .addr (lsq.data_in.addr), + .addr (lsq.data_in.offset), .addr_hash (addr_hash) ); @@ -97,31 +111,49 @@ module load_store_queue //ID-based input buffer for Load/Store Unit .rst(rst), .fifo(lq) ); + cva5_fifo #(.DATA_TYPE(addr_entry_t), .FIFO_DEPTH(MAX_IDS)) + load_queue_addr_fifo ( + .clk(clk), + .rst(rst), + .fifo(lq_addr) + ); //FIFO control signals assign lq.push = lsq.push & lsq.data_in.load; assign lq.potential_push = lsq.potential_push; - assign lq.pop = load_pop; + assign lq.pop = load_pop | lq_addr_discard; + + assign lq_addr.push = lsq.addr_push & lsq.addr_data_in.rnw; + assign lq_addr.potential_push = lq_addr.push; + assign lq_addr.data_in.addr = lsq.addr_data_in.addr; + assign lq_addr.data_in.subunit = lsq.addr_data_in.subunit; + assign lq_addr.data_in.discard = lsq.addr_data_in.discard; + assign lq_addr.pop = load_pop | lq_addr_discard; + + assign lq_addr_discard = lq_addr.valid ? lq_addr.data_out.discard : lsq.addr_push & lsq.addr_data_in.rnw & lsq.addr_data_in.discard; //FIFO data ports assign lq.data_in = '{ - addr : lsq.data_in.addr, + offset : lsq.data_in.offset, fn3 : lsq.data_in.fn3, fp : lsq.data_in.fp, double : lsq.data_in.double, + amo : lsq.data_in.amo, + amo_type : lsq.data_in.amo_type, + amo_wdata : lsq.data_in.data, id : lsq.data_in.id, - store_collision : potential_store_conflict, + store_collision : potential_store_conflict | (CONFIG.INCLUDE_AMO & lsq.data_in.amo), //Collision forces sequential consistence sq_index : sq_index }; //////////////////////////////////////////////////// //Store Queue assign sq.push = lsq.push & (lsq.data_in.store | lsq.data_in.cache_op); - assign sq.pop = store_pop; + assign sq.pop = store_pop | sq_addr_discard; assign sq.data_in = lsq.data_in; store_queue # (.CONFIG(CONFIG)) sq_block ( .clk (clk), - .rst (rst | gc.sq_flush), + .rst (rst), .sq (sq), .store_forward_wb_group (store_forward_wb_group), .fp_store_forward_wb_group (fp_store_forward_wb_group), @@ -133,6 +165,22 @@ module load_store_queue //ID-based input buffer for Load/Store Unit .fp_wb_packet (fp_wb_packet), .store_retire (store_retire) ); + cva5_fifo #(.DATA_TYPE(addr_entry_t), .FIFO_DEPTH(CONFIG.SQ_DEPTH)) + store_queue_addr_fifo ( + .clk(clk), + .rst(rst), + .fifo(sq_addr) + ); + + assign sq_addr.push = lsq.addr_push & ~lsq.addr_data_in.rnw; + assign sq_addr.potential_push = sq_addr.push; + assign sq_addr.data_in.addr = lsq.addr_data_in.addr; + assign sq_addr.data_in.subunit = lsq.addr_data_in.subunit; + assign sq_addr.data_in.discard = lsq.addr_data_in.discard; + assign sq_addr.pop = store_pop | sq_addr_discard; + + assign sq_addr_discard = sq.valid & (~lq.valid | load_blocked) & (sq_addr.valid ? sq_addr.data_out.discard : lsq.addr_push & ~lsq.addr_data_in.rnw & lsq.addr_data_in.discard); + //////////////////////////////////////////////////// //Output @@ -148,7 +196,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit assign load_fp_hold = ~load_p2 & lq.data_out.double; assign load_pop = lsq.load_pop & ~load_fp_hold; - assign load_addr_bit_3 = load_fp_hold | lq.data_out.addr[2]; + assign load_addr_bit_3 = load_fp_hold | lq.data_out.offset[2]; assign load_fn3 = lq.data_out.fp ? LS_W_fn3 : lq.data_out.fn3; always_comb begin @@ -171,7 +219,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit end else begin : gen_no_load_split //All loads are single cycle (load only the upper word) assign load_pop = lsq.load_pop; - assign load_addr_bit_3 = lq.data_out.addr[2] | lq.data_out.double; + assign load_addr_bit_3 = lq.data_out.offset[2] | lq.data_out.double; assign load_fn3 = lq.data_out.fp ? LS_W_fn3 : lq.data_out.fn3; always_comb begin if (lq.data_out.double) @@ -194,7 +242,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit assign store_fp_hold = ~store_p2 & sq.data_out.double; assign store_pop = lsq.store_pop & ~store_fp_hold; - assign store_addr_bit_3 = sq.data_out.double ? store_p2 : sq.data_out.addr[2]; + assign store_addr_bit_3 = sq.data_out.double ? store_p2 : sq.data_out.offset[2]; always_ff @(posedge clk) begin if (rst) @@ -217,11 +265,11 @@ module load_store_queue //ID-based input buffer for Load/Store Unit end else begin : gen_no_fpu //Plain integer memory operations assign load_pop = lsq.load_pop; - assign load_addr_bit_3 = lq.data_out.addr[2]; + assign load_addr_bit_3 = lq.data_out.offset[2]; assign load_fn3 = lq.data_out.fn3; assign load_type = INT_DONE; assign store_pop = lsq.store_pop; - assign store_addr_bit_3 = sq.data_out.addr[2]; + assign store_addr_bit_3 = sq.data_out.offset[2]; assign store_data = sq.data_out.data; end endgenerate @@ -229,35 +277,41 @@ module load_store_queue //ID-based input buffer for Load/Store Unit logic load_blocked; assign load_blocked = (lq.data_out.store_collision & (lq.data_out.sq_index != sq_oldest)); - assign lsq.load_valid = lq.valid & ~load_blocked; - assign lsq.store_valid = sq.valid; + //Requests are only valid if the TLB has returned the physical address and there was no exception + assign lsq.load_valid = lq.valid & ~load_blocked & (lq_addr.valid ? ~lq_addr.data_out.discard : lsq.addr_push & lsq.addr_data_in.rnw & ~lsq.addr_data_in.discard); + assign lsq.store_valid = sq.valid & (sq_addr.valid ? ~sq_addr.data_out.discard : lsq.addr_push & ~lsq.addr_data_in.rnw & ~lsq.addr_data_in.discard); assign lsq.load_data_out = '{ - addr : {lq.data_out.addr[31:3], load_addr_bit_3, lq.data_out.addr[1:0]}, + addr : {(lq_addr.valid ? lq_addr.data_out.addr : lsq.addr_data_in.addr), lq.data_out.offset[11:3], load_addr_bit_3, lq.data_out.offset[1:0]}, load : 1, store : 0, cache_op : 0, - be : 'x, + amo : lq.data_out.amo, + amo_type : lq.data_out.amo_type, + be : '1, fn3 : load_fn3, - data_in : 'x, + subunit : lq_addr.valid ? lq_addr.data_out.subunit : lsq.addr_data_in.subunit, + data_in : CONFIG.INCLUDE_AMO ? lq.data_out.amo_wdata : 'x, id : lq.data_out.id, fp_op : load_type }; assign lsq.store_data_out = '{ - addr : {sq.data_out.addr[31:3], store_addr_bit_3, sq.data_out.addr[1:0]}, + addr : {(sq_addr.valid ? sq_addr.data_out.addr : lsq.addr_data_in.addr), sq.data_out.offset[11:3], store_addr_bit_3, sq.data_out.offset[1:0]}, load : 0, store : 1, cache_op : sq.data_out.cache_op, + amo : 0, + amo_type : amo_t'('x), be : sq.data_out.be, fn3 : 'x, + subunit : sq_addr.valid ? sq_addr.data_out.subunit : lsq.addr_data_in.subunit, data_in : store_data, id : 'x, fp_op : fp_ls_op_t'('x) }; assign lsq.sq_empty = sq.empty; - assign lsq.no_released_stores_pending = sq.no_released_stores_pending; assign lsq.empty = ~lq.valid & sq.empty; //////////////////////////////////////////////////// diff --git a/core/execution_units/load_store_unit/load_store_unit.sv b/core/execution_units/load_store_unit/load_store_unit.sv old mode 100755 new mode 100644 index b00f96ca..79235d5f --- a/core/execution_units/load_store_unit/load_store_unit.sv +++ b/core/execution_units/load_store_unit/load_store_unit.sv @@ -26,6 +26,7 @@ module load_store_unit import riscv_types::*; import cva5_types::*; import fpu_types::*; + import csr_types::*; import opcodes::*; # ( @@ -62,7 +63,6 @@ module load_store_unit input logic dcache_on, input logic clear_reservation, tlb_interface.requester tlb, - input logic tlb_on, l1_arbiter_request_interface.master l1_request, l1_arbiter_return_interface.master l1_response, @@ -75,11 +75,17 @@ module load_store_unit local_memory_interface.master data_bram, + //CSR + input logic [1:0] current_privilege, + input envcfg_t menvcfg, + input envcfg_t senvcfg, + //Writeback-Store Interface input wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS], input fp_wb_packet_t fp_wb_packet [2], - //Retire release + //Retire + input id_t retire_id, input retire_packet_t store_retire, exception_interface.unit exception, @@ -96,9 +102,10 @@ module load_store_unit localparam DCACHE_ID = int'(CONFIG.INCLUDE_DLOCAL_MEM) + int'(CONFIG.INCLUDE_PERIPHERAL_BUS); //Should be equal to pipeline depth of longest load/store subunit - localparam ATTRIBUTES_DEPTH = 1; + localparam ATTRIBUTES_DEPTH = 2; //Subunit signals + amo_interface amo_if[NUM_SUB_UNITS](); addr_utils_interface #(CONFIG.DLOCAL_MEM_ADDR.L, CONFIG.DLOCAL_MEM_ADDR.H) dlocal_mem_addr_utils (); addr_utils_interface #(CONFIG.PERIPHERAL_BUS_ADDR.L, CONFIG.PERIPHERAL_BUS_ADDR.H) dpbus_addr_utils (); addr_utils_interface #(CONFIG.DCACHE_ADDR.L, CONFIG.DCACHE_ADDR.H) dcache_addr_utils (); @@ -111,11 +118,14 @@ module load_store_unit data_access_shared_inputs_t shared_inputs; logic [31:0] unit_data_array [NUM_SUB_UNITS-1:0]; logic [NUM_SUB_UNITS-1:0] unit_ready; + logic [NUM_SUB_UNITS-1:0] unit_write_outstanding; + logic write_outstanding; logic [NUM_SUB_UNITS-1:0] unit_data_valid; - logic [NUM_SUB_UNITS-1:0] last_unit; + logic [NUM_SUB_UNITS_W-1:0] last_unit; logic sub_unit_ready; logic [NUM_SUB_UNITS_W-1:0] subunit_id; + ls_subunit_t padded_subunit_id; logic unit_switch; logic unit_switch_in_progress; @@ -126,6 +136,7 @@ module load_store_unit logic sub_unit_load_issue; logic sub_unit_store_issue; + logic load_response; logic load_complete; logic [31:0] virtual_address; @@ -134,10 +145,20 @@ module load_store_unit logic [31:0] aligned_load_data; logic [31:0] final_load_data; + logic tlb_request_r; + logic tlb_lq; + logic unaligned_addr; - logic load_exception_complete; logic exception_is_fp; + logic exception_is_store; + logic nontrivial_fence; logic fence_hold; + logic illegal_cbo; + logic exception_lsq_push; + logic nomatch_fault; + logic late_exception; + + id_t exception_id; typedef struct packed{ logic is_signed; @@ -166,14 +187,19 @@ module load_store_unit assign unit_needed = instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW, FENCE} | (CONFIG.INCLUDE_CBO & instruction inside {CBO_INVAL, CBO_CLEAN, CBO_FLUSH}) | - (CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, SP_FSW, DP_FLD, DP_FSD}); + (CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, SP_FSW, DP_FLD, DP_FSD}) | + (CONFIG.INCLUDE_AMO & instruction inside {AMO_ADD, AMO_XOR, AMO_OR, AMO_AND, AMO_MIN, AMO_MAX, AMO_MINU, AMO_MAXU, AMO_SWAP, AMO_LR, AMO_SC}); always_comb begin uses_rs = '0; uses_rs[RS1] = instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW} | (CONFIG.INCLUDE_CBO & instruction inside {CBO_INVAL, CBO_CLEAN, CBO_FLUSH}) | - (CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, SP_FSW, DP_FLD, DP_FSD}); - uses_rs[RS2] = CONFIG.INCLUDE_FORWARDING_TO_STORES ? 0 : instruction inside {SB, SH, SW}; - uses_rd = instruction inside {LB, LH, LW, LBU, LHU}; + (CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, SP_FSW, DP_FLD, DP_FSD}) | + (CONFIG.INCLUDE_AMO & instruction inside {AMO_ADD, AMO_XOR, AMO_OR, AMO_AND, AMO_MIN, AMO_MAX, AMO_MINU, AMO_MAXU, AMO_SWAP, AMO_LR, AMO_SC}); + if (CONFIG.INCLUDE_AMO) + uses_rs[RS2] = instruction inside {AMO_ADD, AMO_XOR, AMO_OR, AMO_AND, AMO_MIN, AMO_MAX, AMO_MINU, AMO_MAXU, AMO_SWAP, AMO_SC}; + if (~CONFIG.INCLUDE_FORWARDING_TO_STORES) + uses_rs[RS2] |= instruction inside {SB, SH, SW}; + uses_rd = instruction inside {LB, LH, LW, LBU, LHU} | (CONFIG.INCLUDE_AMO & instruction inside {AMO_ADD, AMO_XOR, AMO_OR, AMO_AND, AMO_MIN, AMO_MAX, AMO_MINU, AMO_MAXU, AMO_SWAP, AMO_LR, AMO_SC}); fp_uses_rs = '0; fp_uses_rs[RS2] = ~CONFIG.INCLUDE_FORWARDING_TO_STORES & CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FSW, DP_FSD}; fp_uses_rd = CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, DP_FLD}; @@ -186,8 +212,13 @@ module load_store_unit logic is_store; logic is_fence; logic is_cbo; + cbo_t cbo_type; logic is_fpu; logic is_double; + logic nontrivial_fence; + logic is_amo; + amo_t amo_type; + logic rd_zero; logic [11:0] offset; } ls_attr_t; ls_attr_t decode_attr; @@ -198,17 +229,55 @@ module load_store_unit assign load_offset = instruction[31:20]; assign store_offset = {instruction[31:25], instruction[11:7]}; + //Only a reduced subset of possible fences require stalling, because of the following guarantees: + //The load queue does not reorder loads + //The store queue does not reorder stores + //Earlier loads are always selected before later stores + //The data cache and local memory are sequentially consistent (no reordering) + //All peripheral busses are sequentially consistent across request types + always_comb begin + if (NUM_SUB_UNITS == 3) + nontrivial_fence = ( + (instruction[27] & (instruction[22] | instruction[20])) | //Peripheral read before any write + (instruction[26] & (instruction[23] | |instruction[21:20])) | //Peripheral write before anything other than a peripheral write + (instruction[25] & instruction[22]) | //Regular read before peripheral write + (instruction[24]) //Regular write before anything + ); + else if (NUM_SUB_UNITS == 2 & ~CONFIG.INCLUDE_PERIPHERAL_BUS) + nontrivial_fence = instruction[24] & |instruction[21:20]; //Regular write before any regular + else if (NUM_SUB_UNITS == 2) + nontrivial_fence = ( + (instruction[27] & (instruction[22] | instruction[20])) | //Peripheral read before any write + (instruction[26] & (instruction[23] | |instruction[21:20])) | //Peripheral write before anything other than a peripheral write + (instruction[25] & instruction[22]) | //Memory read before peripheral write + (instruction[24] & |instruction[23:21]) //Memory write before anything other than a memory write + ); + else if (NUM_SUB_UNITS == 1 & ~CONFIG.INCLUDE_PERIPHERAL_BUS) + nontrivial_fence = instruction[24] & instruction[21]; //Memory write before memory read + else if (NUM_SUB_UNITS == 1 & CONFIG.INCLUDE_PERIPHERAL_BUS) + nontrivial_fence = ( + (instruction[27] & instruction[22]) | //Peripheral read before peripheral write + (instruction[26] & instruction[23]) //Peripheral write before peripheral read + ); + else //0 subunits?? + nontrivial_fence = 0; + end assign decode_attr = '{ - is_load : instruction inside {LB, LH, LW, LBU, LHU} | CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, DP_FLD}, + is_load : ~instruction.upper_opcode[5] & ~instruction.upper_opcode[3], is_store : instruction inside {SB, SH, SW} | CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FSW, DP_FSD}, - is_fence : instruction inside {FENCE}, + is_fence : ~instruction.fn3[1] & instruction.upper_opcode[3], + nontrivial_fence : nontrivial_fence, is_cbo : CONFIG.INCLUDE_CBO & instruction inside {CBO_INVAL, CBO_CLEAN, CBO_FLUSH}, - is_fpu : CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, SP_FSW, DP_FLD, DP_FSD}, - is_double : CONFIG.INCLUDE_UNIT.FPU & instruction inside {DP_FLD, DP_FSD}, - offset : instruction[5] ? store_offset : ((CONFIG.INCLUDE_CBO & instruction[2]) ? '0 : load_offset) + cbo_type : cbo_t'(instruction[21:20]), + is_fpu : CONFIG.INCLUDE_UNIT.FPU & instruction.upper_opcode[3:2] == 2'b01, + is_double : CONFIG.INCLUDE_UNIT.FPU & instruction.fn3[1:0] == 2'b11, + is_amo : CONFIG.INCLUDE_AMO & instruction.upper_opcode[3] & instruction.upper_opcode[5], + amo_type : amo_t'(instruction[31:27]), + rd_zero : ~|instruction.rd_addr, + offset : (CONFIG.INCLUDE_CBO | CONFIG.INCLUDE_AMO) & instruction[3] ? '0 : (instruction[5] ? store_offset : load_offset) }; - assign decode_is_store = decode_attr.is_store | decode_attr.is_cbo; + assign decode_is_store = decode_attr.is_store | decode_attr.is_cbo; //Must be exact always_ff @(posedge clk) begin if (issue_stage_ready) @@ -238,8 +307,36 @@ module load_store_unit ); //////////////////////////////////////////////////// - //Alignment Exception - generate if (CONFIG.INCLUDE_M_MODE) begin : gen_ls_exceptions + //CSR Permissions + //Can impact fences, atomic instructions, and CBO + logic fiom; + logic fiom_amo_hold; + generate if (CONFIG.MODES inside {MU, MSU}) begin : gen_csr_env + //Fence on IO implies memory; force all fences to be nontrivial for simplicity + always_comb begin + if (CONFIG.MODES == MU) + fiom = current_privilege == USER_PRIVILEGE & menvcfg.fiom; + else + fiom = (current_privilege != MACHINE_PRIVILEGE & menvcfg.fiom) | (current_privilege == USER_PRIVILEGE & senvcfg.fiom); + end + + //AMO instructions AQ-RL consider all memory regions; force write drain for simplicity + logic fiom_amo_hold_r; + logic set_fiom_amo_hold; + assign set_fiom_amo_hold = lsq.load_valid & shared_inputs.amo & fiom & write_outstanding; + assign fiom_amo_hold = set_fiom_amo_hold | fiom_amo_hold_r; + + always_ff @(posedge clk) begin + if (rst | ~write_outstanding) + fiom_amo_hold_r <= 0; + else + fiom_amo_hold_r <= fiom_amo_hold_r | set_fiom_amo_hold; + end + end endgenerate + + //////////////////////////////////////////////////// + //Exceptions + generate if (CONFIG.MODES != BARE) begin : gen_ls_exceptions logic new_exception; always_comb begin if (issue_stage.fn3 == LS_H_fn3 | issue_stage.fn3 == L_HU_fn3) @@ -254,53 +351,103 @@ module load_store_unit unaligned_addr = 0; end - assign new_exception = unaligned_addr & issue.new_request & ~issue_attr.is_fence; + logic menv_illegal; + logic senv_illegal; + assign menv_illegal = CONFIG.INCLUDE_CBO & (issue_attr.is_cbo & issue_attr.cbo_type == INVAL ? menvcfg.cbie == 2'b00 : ~menvcfg.cbcfe); + assign senv_illegal = CONFIG.INCLUDE_CBO & (issue_attr.is_cbo & issue_attr.cbo_type == INVAL ? senvcfg.cbie == 2'b00 : ~senvcfg.cbcfe); + assign illegal_cbo = CONFIG.MODES == MU ? current_privilege == USER_PRIVILEGE & menv_illegal : (current_privilege != MACHINE_PRIVILEGE & menv_illegal) | (current_privilege == USER_PRIVILEGE & senv_illegal); + + assign nomatch_fault = tlb.done & ~|sub_unit_address_match; + assign late_exception = tlb.is_fault | nomatch_fault; + + //Hold writeback exceptions until they are ready to retire + logic rd_zero_r; + logic delay_exception; + logic delayed_exception; + assign delay_exception = ( + (issue.new_request & unaligned_addr & (issue_attr.is_load | issue_attr.is_amo) & issue.id != retire_id & ~issue_attr.rd_zero) | + (late_exception & tlb_lq & exception_id != retire_id & ~rd_zero_r) + ); always_ff @(posedge clk) begin if (rst) - exception.valid <= 0; - else - exception.valid <= (exception.valid & ~exception.ack) | new_exception; + delayed_exception <= 0; + else if (delay_exception) + delayed_exception <= 1; + else if (new_exception) + delayed_exception <= 0; end + assign new_exception = ( + (issue.new_request & ((unaligned_addr & issue_attr.is_store) | illegal_cbo)) | + (issue.new_request & unaligned_addr & (issue_attr.is_load | issue_attr.is_amo) & (issue.id == retire_id | issue_attr.rd_zero)) | + (late_exception & ~tlb_lq) | + (late_exception & tlb_lq & (exception_id == retire_id | rd_zero_r)) | + (delayed_exception & exception_id == retire_id) + ); + always_ff @(posedge clk) begin if (rst) - exception_is_fp <= 0; - else if (new_exception) - exception_is_fp <= CONFIG.INCLUDE_UNIT.FPU & issue_attr.is_fpu; + exception.valid <= 0; + else + exception.valid <= new_exception; end + logic is_load; + logic is_load_r; + assign is_load = issue_attr.is_load & ~(issue_attr.is_amo & issue_attr.amo_type != AMO_LR_FN5); + always_ff @(posedge clk) begin - if (new_exception & ~exception.valid) begin - exception.code <= issue_attr.is_store ? STORE_AMO_ADDR_MISSALIGNED : LOAD_ADDR_MISSALIGNED; - exception.tval <= virtual_address; - exception.id <= issue.id; + exception_lsq_push <= issue.new_request & ((unaligned_addr & ~issue_attr.is_fence & ~issue_attr.is_cbo) | illegal_cbo); + if (issue.new_request) begin + rd_zero_r <= issue_attr.rd_zero; + exception_is_fp <= CONFIG.INCLUDE_UNIT.FPU & issue_attr.is_fpu; + is_load_r <= is_load; + if (illegal_cbo) begin + exception.code <= ILLEGAL_INST; + exception.tval <= issue_stage.instruction; + end else begin + exception.code <= is_load ? LOAD_ADDR_MISSALIGNED : STORE_AMO_ADDR_MISSALIGNED; + exception.tval <= virtual_address; + end + exception_id <= issue.id; end + else if (tlb.is_fault) + exception.code <= is_load_r ? LOAD_PAGE_FAULT : STORE_OR_AMO_PAGE_FAULT; + else if (nomatch_fault) + exception.code <= is_load_r ? LOAD_FAULT : STORE_AMO_FAULT; end + assign exception.possible = (tlb_request_r & (~tlb.done | ~|sub_unit_address_match)) | exception.valid | delayed_exception; //Must suppress issue for issue-time exceptions too + assign exception.pc = issue_stage.pc_r; + assign exception.discard = tlb_lq & ~rd_zero_r; - always_ff @(posedge clk) begin - if (rst) - load_exception_complete <= 0; - else - load_exception_complete <= exception.valid & exception.ack & (exception.code == LOAD_ADDR_MISSALIGNED); - end + assign exception_is_store = ~tlb_lq; end endgenerate //////////////////////////////////////////////////// //Load-Store status assign load_store_status = '{ - sq_empty : lsq.sq_empty, - no_released_stores_pending : lsq.no_released_stores_pending, - idle : lsq.empty & (~load_attributes.valid) & (&unit_ready) + outstanding_store : ~lsq.sq_empty | write_outstanding, + idle : lsq.empty & (~load_attributes.valid) & (&unit_ready) & (~write_outstanding) }; //////////////////////////////////////////////////// - //TLB interface + //Address calculation assign virtual_address = rf[RS1] + 32'(signed'(issue_attr.offset)); + //////////////////////////////////////////////////// + //TLB interface + always_ff @(posedge clk) begin + if (rst) + tlb_request_r <= 0; + else if (tlb.new_request) + tlb_request_r <= 1; + else if (tlb.done | tlb.is_fault) + tlb_request_r <= 0; + end + + assign tlb.rnw = issue_attr.is_load | (issue_attr.is_amo & issue_attr.amo_type == AMO_LR_FN5) | issue_attr.is_cbo; assign tlb.virtual_address = virtual_address; - assign tlb.new_request = tlb_on & issue.new_request; - assign tlb.execute = 0; - assign tlb.rnw = issue_attr.is_load & ~issue_attr.is_store; + assign tlb.new_request = issue.new_request & ~issue_attr.is_fence & (~unaligned_addr | issue_attr.is_cbo) & ~illegal_cbo; //////////////////////////////////////////////////// //Byte enable generation @@ -318,18 +465,22 @@ module load_store_unit end default : be = '1; endcase + if (issue_attr.is_cbo) //Treat CBOM as writes that don't do anything + be = '0; end //////////////////////////////////////////////////// //Load Store Queue assign lsq.data_in = '{ - addr : tlb_on ? tlb.physical_address : virtual_address, + offset : virtual_address[11:0], fn3 : issue_stage.fn3, be : be, data : rf[RS2], - load : issue_attr.is_load, + load : issue_attr.is_load | issue_attr.is_amo, store : issue_attr.is_store, cache_op : issue_attr.is_cbo, + amo : issue_attr.is_amo, + amo_type : issue_attr.amo_type, id : issue.id, id_needed : rd_attributes.id, fp : issue_attr.is_fpu, @@ -338,7 +489,7 @@ module load_store_unit }; assign lsq.potential_push = issue.possible_issue; - assign lsq.push = issue.new_request & ~unaligned_addr & (~tlb_on | tlb.done) & ~issue_attr.is_fence; + assign lsq.push = issue.new_request & ~issue_attr.is_fence; load_store_queue # (.CONFIG(CONFIG)) lsq_block ( .clk (clk), @@ -355,48 +506,67 @@ module load_store_unit assign lsq.load_pop = sub_unit_load_issue; assign lsq.store_pop = sub_unit_store_issue; + //Physical address passed separately + assign lsq.addr_push = tlb.done | tlb.is_fault | exception_lsq_push; + assign lsq.addr_data_in = '{ + addr : tlb.physical_address[31:12], + rnw : tlb_lq, + discard : late_exception | exception_lsq_push, + subunit : padded_subunit_id + }; + + always_ff @(posedge clk) begin + if (issue.new_request) + tlb_lq <= ~issue_attr.is_store & ~issue_attr.is_cbo; + end + //////////////////////////////////////////////////// //Unit tracking always_ff @ (posedge clk) begin if (load_attributes.push) - last_unit <= sub_unit_address_match; + last_unit <= subunit_id; end //When switching units, ensure no outstanding loads so that there can be no timing collisions with results - assign unit_switch = lsq.load_valid & (sub_unit_address_match != last_unit) & load_attributes.valid; + assign unit_switch = lsq.load_valid & (subunit_id != last_unit) & load_attributes.valid; always_ff @ (posedge clk) begin unit_switch_in_progress <= (unit_switch_in_progress | unit_switch) & ~load_attributes.valid; end - assign unit_switch_hold = unit_switch | unit_switch_in_progress; + assign unit_switch_hold = unit_switch | unit_switch_in_progress | fiom_amo_hold; //////////////////////////////////////////////////// //Primary Control Signals assign sel_load = lsq.load_valid; assign sub_unit_ready = unit_ready[subunit_id] & (~unit_switch_hold); - assign load_complete = |unit_data_valid; + assign load_response = |unit_data_valid; + assign load_complete = load_response & (~exception.valid | exception_is_store); - assign issue.ready = (~tlb_on | tlb.ready) & (~lsq.full) & (~fence_hold) & (~exception.valid); + //TLB status and exceptions can be ignored because they will prevent instructions from issuing + assign issue.ready = ~lsq.full & ~fence_hold; - assign sub_unit_load_issue = sel_load & lsq.load_valid & sub_unit_ready & sub_unit_address_match[subunit_id]; - assign sub_unit_store_issue = (lsq.store_valid & ~sel_load) & sub_unit_ready & sub_unit_address_match[subunit_id]; + assign sub_unit_load_issue = sel_load & lsq.load_valid & sub_unit_ready; + assign sub_unit_store_issue = (lsq.store_valid & ~sel_load) & sub_unit_ready; assign sub_unit_issue = sub_unit_load_issue | sub_unit_store_issue; + assign write_outstanding = |unit_write_outstanding; + always_ff @ (posedge clk) begin if (rst) fence_hold <= 0; else - fence_hold <= (fence_hold & ~load_store_status.idle) | (issue.new_request & issue_attr.is_fence); + fence_hold <= (fence_hold & ~load_store_status.idle) | (issue.new_request & issue_attr.is_fence & (issue_attr.nontrivial_fence | fiom)); end //////////////////////////////////////////////////// //Load attributes FIFO logic [1:0] final_mux_sel; + assign subunit_id = shared_inputs.subunit[NUM_SUB_UNITS_W-1:0]; one_hot_to_integer #(NUM_SUB_UNITS) sub_unit_select ( .one_hot (sub_unit_address_match), - .int_out (subunit_id) + .int_out (padded_subunit_id[NUM_SUB_UNITS_W-1:0]) ); always_comb begin @@ -431,7 +601,7 @@ module load_store_unit //////////////////////////////////////////////////// //Unit Instantiation generate for (genvar i=0; i < NUM_SUB_UNITS; i++) begin : gen_load_store_sources - assign sub_unit[i].new_request = sub_unit_issue & sub_unit_address_match[i]; + assign sub_unit[i].new_request = sub_unit_issue & subunit_id == i; assign sub_unit[i].addr = shared_inputs.addr; assign sub_unit[i].re = shared_inputs.load; assign sub_unit[i].we = shared_inputs.store; @@ -445,10 +615,14 @@ module load_store_unit endgenerate generate if (CONFIG.INCLUDE_DLOCAL_MEM) begin : gen_ls_local_mem - assign sub_unit_address_match[LOCAL_MEM_ID] = dlocal_mem_addr_utils.address_range_check(shared_inputs.addr); + assign sub_unit_address_match[LOCAL_MEM_ID] = dlocal_mem_addr_utils.address_range_check(tlb.physical_address); local_mem_sub_unit d_local_mem ( .clk (clk), .rst (rst), + .write_outstanding (unit_write_outstanding[LOCAL_MEM_ID]), + .amo (shared_inputs.amo), + .amo_type (shared_inputs.amo_type), + .amo_unit (amo_if[LOCAL_MEM_ID]), .unit (sub_unit[LOCAL_MEM_ID]), .local_mem (data_bram) ); @@ -456,27 +630,38 @@ module load_store_unit endgenerate generate if (CONFIG.INCLUDE_PERIPHERAL_BUS) begin : gen_ls_pbus - assign sub_unit_address_match[BUS_ID] = dpbus_addr_utils.address_range_check(shared_inputs.addr); + assign sub_unit_address_match[BUS_ID] = dpbus_addr_utils.address_range_check(tlb.physical_address); if(CONFIG.PERIPHERAL_BUS_TYPE == AXI_BUS) axi_master axi_bus ( .clk (clk), .rst (rst), + .write_outstanding (unit_write_outstanding[BUS_ID]), .m_axi (m_axi), - .size ({1'b0,shared_inputs.fn3[1:0]}), + .amo (shared_inputs.amo), + .amo_type (shared_inputs.amo_type), + .amo_unit (amo_if[BUS_ID]), .ls (sub_unit[BUS_ID]) ); //Lower two bits of fn3 match AXI specification for request size (byte/halfword/word) else if (CONFIG.PERIPHERAL_BUS_TYPE == WISHBONE_BUS) - wishbone_master wishbone_bus ( + wishbone_master #(.LR_WAIT(CONFIG.AMO_UNIT.LR_WAIT), .INCLUDE_AMO(CONFIG.INCLUDE_AMO)) wishbone_bus ( .clk (clk), .rst (rst), + .write_outstanding (unit_write_outstanding[BUS_ID]), .wishbone (dwishbone), + .amo (shared_inputs.amo), + .amo_type (shared_inputs.amo_type), + .amo_unit (amo_if[BUS_ID]), .ls (sub_unit[BUS_ID]) ); else if (CONFIG.PERIPHERAL_BUS_TYPE == AVALON_BUS) begin - avalon_master avalon_bus ( + avalon_master #(.LR_WAIT(CONFIG.AMO_UNIT.LR_WAIT), .INCLUDE_AMO(CONFIG.INCLUDE_AMO)) avalon_bus ( .clk (clk), .rst (rst), - .m_avalon (m_avalon), + .write_outstanding (unit_write_outstanding[BUS_ID]), + .m_avalon (m_avalon), + .amo (shared_inputs.amo), + .amo_type (shared_inputs.amo_type), + .amo_unit (amo_if[BUS_ID]), .ls (sub_unit[BUS_ID]) ); end @@ -484,46 +669,39 @@ module load_store_unit endgenerate generate if (CONFIG.INCLUDE_DCACHE) begin : gen_ls_dcache - logic load_ready; - logic store_ready; logic uncacheable_load; logic uncacheable_store; - logic dcache_load_request; - logic dcache_store_request; - assign sub_unit_address_match[DCACHE_ID] = dcache_addr_utils.address_range_check(shared_inputs.addr); + assign sub_unit_address_match[DCACHE_ID] = dcache_addr_utils.address_range_check(tlb.physical_address); assign uncacheable_load = CONFIG.DCACHE.USE_NON_CACHEABLE & uncacheable_utils.address_range_check(shared_inputs.addr); assign uncacheable_store = CONFIG.DCACHE.USE_NON_CACHEABLE & uncacheable_utils.address_range_check(shared_inputs.addr); - assign dcache_load_request = sub_unit_load_issue & sub_unit_address_match[DCACHE_ID]; - assign dcache_store_request = sub_unit_store_issue & sub_unit_address_match[DCACHE_ID]; - - dcache # (.CONFIG(CONFIG)) - data_cache ( - .clk (clk), - .rst (rst), - .dcache_on (dcache_on), - .l1_request (l1_request), - .l1_response (l1_response), - .sc_complete (sc_complete), - .sc_success (sc_success), - .clear_reservation (clear_reservation), - .amo (), - .uncacheable_load (uncacheable_load), - .uncacheable_store (uncacheable_store), - .is_load (sel_load), - .load_ready (load_ready), - .store_ready (store_ready), - .load_request (dcache_load_request), - .store_request (dcache_store_request), - .ls_load (lsq.load_data_out), - .ls_store (lsq.store_data_out), - .ls (sub_unit[DCACHE_ID]) - ); + dcache #(.CONFIG(CONFIG)) data_cache ( + .l1_request(l1_request), + .l1_response(l1_response), + .write_outstanding(unit_write_outstanding[DCACHE_ID]), + .amo(shared_inputs.amo), + .amo_type(shared_inputs.amo_type), + .amo_unit(amo_if[DCACHE_ID]), + .uncacheable(uncacheable_load | uncacheable_store), + .cbo(shared_inputs.cache_op), + .ls(sub_unit[DCACHE_ID]), + .load_peek(lsq.load_valid), + .load_addr_peek(lsq.load_data_out.addr), + .*); end endgenerate + generate if (CONFIG.INCLUDE_AMO) begin : gen_amo + amo_unit #( + .NUM_UNITS(NUM_SUB_UNITS), + .RESERVATION_WORDS(CONFIG.AMO_UNIT.RESERVATION_WORDS) + ) amo_inst ( + .agents(amo_if), + .*); + end endgenerate + //////////////////////////////////////////////////// //Output Muxing logic sign_bit_data [4]; @@ -581,13 +759,12 @@ module load_store_unit //////////////////////////////////////////////////// //Output bank assign wb.rd = final_load_data; - assign wb.done = (load_complete & (~CONFIG.INCLUDE_UNIT.FPU | wb_attr.fp_op == INT_DONE)) | (load_exception_complete & ~exception_is_fp); - //TODO: exceptions seemingly clobber load data if it appears on the same cycle - assign wb.id = load_exception_complete ? exception.id : wb_attr.id; + assign wb.done = (load_complete & (~CONFIG.INCLUDE_UNIT.FPU | wb_attr.fp_op == INT_DONE)) | (exception.valid & ~exception_is_fp & ~exception_is_store); + assign wb.id = exception.valid & ~exception_is_store ? exception_id : wb_attr.id; assign fp_wb.rd = fp_result; - assign fp_wb.done = (load_complete & (wb_attr.fp_op == SINGLE_DONE | wb_attr.fp_op == DOUBLE_DONE)) | (load_exception_complete & exception_is_fp); - assign fp_wb.id = load_exception_complete ? exception.id : wb_attr.id; + assign fp_wb.done = (load_complete & (wb_attr.fp_op == SINGLE_DONE | wb_attr.fp_op == DOUBLE_DONE)) | (exception.valid & exception_is_fp & ~exception_is_store); + assign fp_wb.id = exception.valid & ~exception_is_store ? exception_id : wb_attr.id; //////////////////////////////////////////////////// //End of Implementation diff --git a/core/execution_units/load_store_unit/store_queue.sv b/core/execution_units/load_store_unit/store_queue.sv index f90f9ef8..94605362 100644 --- a/core/execution_units/load_store_unit/store_queue.sv +++ b/core/execution_units/load_store_unit/store_queue.sv @@ -39,6 +39,7 @@ module store_queue //Address hash (shared by loads and stores) input addr_hash_t addr_hash, + //hash check on adding a load to the queue output logic [$clog2(CONFIG.SQ_DEPTH)-1:0] sq_index, output logic [$clog2(CONFIG.SQ_DEPTH)-1:0] sq_oldest, @@ -73,6 +74,8 @@ module store_queue logic [CONFIG.SQ_DEPTH-1:0] valid; logic [CONFIG.SQ_DEPTH-1:0] valid_next; addr_hash_t [CONFIG.SQ_DEPTH-1:0] hashes; + logic [CONFIG.SQ_DEPTH-1:0] ids_valid; + id_t [CONFIG.SQ_DEPTH-1:0] ids; //LUTRAM-based memory blocks sq_entry_t output_entry; @@ -131,7 +134,7 @@ module store_queue .raddr(sq_oldest_next), .ram_write(sq.push), .new_ram_data('{ - addr : sq.data_in.addr, + offset : sq.data_in.offset, be : sq.data_in.be, cache_op : sq.data_in.cache_op, data : '0, @@ -151,22 +154,28 @@ module store_queue .waddr(sq.data_in.id), .raddr(store_retire.id), .ram_write(sq.push), - .new_ram_data(sq.data_in.addr[1:0]), + .new_ram_data(sq.data_in.offset[1:0]), .ram_data_out(retire_alignment) ); //Compare store addr-hashes against new load addr-hash + //ID collisions also handled to prevent overwriting store data always_comb begin potential_store_conflict = 0; - for (int i = 0; i < CONFIG.SQ_DEPTH; i++) + for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin potential_store_conflict |= {(valid[i] & ~issued_one_hot[i]), addr_hash} == {1'b1, hashes[i]}; + potential_store_conflict |= {(valid[i] & ~issued_one_hot[i] & ids_valid[i]), sq.data_in.id} == {1'b1, ids[i]}; + end end //////////////////////////////////////////////////// //Register-based storage //Address hashes always_ff @ (posedge clk) begin for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin - if (new_request_one_hot[i]) + if (new_request_one_hot[i]) begin hashes[i] <= addr_hash; + ids[i] <= sq.data_in.id_needed; + ids_valid[i] <= CONFIG.INCLUDE_UNIT.FPU & sq.data_in.fp ? |fp_store_forward_wb_group : |store_forward_wb_group; + end end end //////////////////////////////////////////////////// @@ -178,8 +187,6 @@ module store_queue released_count <= released_count + (LOG2_SQ_DEPTH + 1)'(store_retire.valid) - (LOG2_SQ_DEPTH + 1)'(sq.pop); end - assign sq.no_released_stores_pending = ~|released_count; - //////////////////////////////////////////////////// //Forwarding and Store Data //Forwarding is only needed from multi-cycle writeback ports @@ -308,7 +315,7 @@ module store_queue assign sq.valid = |released_count; assign sq.data_out = '{ - addr : output_entry_r.addr, + offset : output_entry_r.offset, be : output_entry_r.be, cache_op : output_entry_r.cache_op, data : sq_data_out[31:0], diff --git a/core/execution_units/mul_unit.sv b/core/execution_units/mul_unit.sv old mode 100755 new mode 100644 diff --git a/core/fetch_stage/branch_predictor.sv b/core/fetch_stage/branch_predictor.sv old mode 100755 new mode 100644 index ef2ea7bf..aa430e27 --- a/core/fetch_stage/branch_predictor.sv +++ b/core/fetch_stage/branch_predictor.sv @@ -45,7 +45,7 @@ module branch_predictor localparam longint BUS_RANGE = 64'(CONFIG.IBUS_ADDR.H) - 64'(CONFIG.IBUS_ADDR.L) + 1; function int get_memory_width(); - if(CONFIG.INCLUDE_S_MODE) + if(CONFIG.MODES == MSU) return 32; else if (CONFIG.INCLUDE_ICACHE && ( (CONFIG.INCLUDE_ILOCAL_MEM && CACHE_RANGE > SCRATCH_RANGE) || @@ -66,6 +66,7 @@ module branch_predictor localparam BTAG_W = get_memory_width() - BRANCH_ADDR_W - 2; cache_functions_interface #(.TAG_W(BTAG_W), .LINE_W(BRANCH_ADDR_W), .SUB_LINE_W(0)) addr_utils(); + typedef logic[1:0] branch_predictor_metadata_t; typedef struct packed { logic valid; logic [BTAG_W-1:0] tag; @@ -76,6 +77,7 @@ module branch_predictor } branch_table_entry_t; branch_table_entry_t [CONFIG.BP.WAYS-1:0] if_entry; + branch_table_entry_t muxed_entry; branch_table_entry_t ex_entry; typedef struct packed{ @@ -88,12 +90,12 @@ module branch_predictor logic branch_predictor_direction_changed; logic [31:0] new_jump_addr; logic [CONFIG.BP.WAYS-1:0][31:0] predicted_pc; + logic [31:0] muxed_predicted_pc; logic [CONFIG.BP.WAYS-1:0] tag_matches; logic [CONFIG.BP.WAYS-1:0] replacement_way; logic [CONFIG.BP.WAYS-1:0] tag_update_way; logic [CONFIG.BP.WAYS-1:0] target_update_way; - logic [$clog2(CONFIG.BP.WAYS > 1 ? CONFIG.BP.WAYS : 2)-1:0] hit_way; logic tag_match; logic use_predicted_pc; @@ -102,70 +104,67 @@ module branch_predictor ///////////////////////////////////////// genvar i; - generate if (CONFIG.INCLUDE_BRANCH_PREDICTOR) - for (i=0; i 1) - one_hot_to_integer #(CONFIG.BP.WAYS) - hit_way_conv ( - .one_hot(tag_matches), - .int_out(hit_way) - ); - else - assign hit_way = 0; - endgenerate assign tag_match = |tag_matches; assign use_predicted_pc = CONFIG.INCLUDE_BRANCH_PREDICTOR & tag_match; //Predicted PC and whether the prediction is valid - assign bp.predicted_pc = predicted_pc[hit_way]; + assign bp.predicted_pc = muxed_predicted_pc; assign bp.use_prediction = use_predicted_pc; - assign bp.is_branch = if_entry[hit_way].is_branch; - assign bp.is_return = if_entry[hit_way].is_return; - assign bp.is_call = if_entry[hit_way].is_call; + assign bp.is_branch = muxed_entry.is_branch; + assign bp.is_return = muxed_entry.is_return; + assign bp.is_call = muxed_entry.is_call; //////////////////////////////////////////////////// //Instruction Fetch metadata @@ -184,7 +183,7 @@ module branch_predictor .raddr(br_results.id), .ram_write(bp.pc_id_assigned), .new_ram_data('{ - branch_predictor_metadata : if_entry[hit_way].metadata, + branch_predictor_metadata : muxed_entry.metadata, branch_prediction_used : use_predicted_pc, branch_predictor_update_way : tag_match ? tag_matches : replacement_way }), diff --git a/core/fetch_stage/fetch.sv b/core/fetch_stage/fetch.sv index 3352be1c..f18e04d2 100755 --- a/core/fetch_stage/fetch.sv +++ b/core/fetch_stage/fetch.sv @@ -36,7 +36,6 @@ module fetch input logic branch_flush, input gc_outputs_t gc, - input logic exception, //ID Support input id_t pc_id, @@ -77,6 +76,7 @@ module fetch addr_utils_interface #(CONFIG.IBUS_ADDR.L, CONFIG.IBUS_ADDR.H) ibus_addr_utils (); memory_sub_unit_interface sub_unit[NUM_SUB_UNITS-1:0](); + amo_interface unused(); logic [NUM_SUB_UNITS-1:0] sub_unit_address_match; logic [NUM_SUB_UNITS-1:0] unit_ready; @@ -89,6 +89,7 @@ module fetch typedef struct packed{ logic is_predicted_branch_or_jump; logic is_branch; + logic [31:0] early_flush_pc; logic address_valid; logic mmu_fault; logic [NUM_SUB_UNITS_W-1:0] subunit_id; @@ -102,8 +103,9 @@ module fetch logic [31:0] pc_plus_4; - logic [31:0] pc_mux [4]; - logic [1:0] pc_sel; + logic [31:0] early_flush_pc; + logic [31:0] pc_mux [5]; + logic [2:0] pc_sel; logic [31:0] next_pc; logic [31:0] pc; @@ -130,15 +132,16 @@ module fetch assign pc_plus_4 = pc + 4; - priority_encoder #(.WIDTH(4)) + priority_encoder #(.WIDTH(5)) pc_sel_encoder ( - .priority_vector ({1'b1, (bp.use_prediction & ~early_branch_flush), branch_flush, gc.pc_override}), + .priority_vector ({1'b1, bp.use_prediction, early_branch_flush, branch_flush, gc.pc_override}), .encoded_result (pc_sel) ); assign pc_mux[0] = gc.pc; assign pc_mux[1] = bp.branch_flush_pc; - assign pc_mux[2] = bp.is_return ? ras.addr : bp.predicted_pc; - assign pc_mux[3] = pc_plus_4; + assign pc_mux[2] = early_flush_pc; + assign pc_mux[3] = bp.is_return ? ras.addr : bp.predicted_pc; + assign pc_mux[4] = pc_plus_4; assign next_pc = pc_mux[pc_sel]; //If an exception occurs here in the fetch logic, @@ -170,15 +173,14 @@ module fetch //////////////////////////////////////////////////// //TLB assign tlb.virtual_address = pc; - assign tlb.execute = 1; - assign tlb.rnw = 0; - assign tlb.new_request = tlb.ready; + assign tlb.rnw = 1; + assign tlb.new_request = tlb.ready & pc_id_available & ~fetch_attr_fifo.full & (~exception_pending) & (~gc.fetch_hold); ////////////////////////////////////////////// //Issue Control Signals assign flush_or_rst = (rst | gc.fetch_flush | early_branch_flush); - assign new_mem_request = tlb.done & pc_id_available & ~fetch_attr_fifo.full & units_ready & (~gc.fetch_hold) & (~exception_pending); + assign new_mem_request = tlb.done & units_ready & (~gc.fetch_hold); assign pc_id_assigned = new_mem_request | tlb.is_fault; ////////////////////////////////////////////// @@ -192,6 +194,7 @@ module fetch assign fetch_attr_fifo.data_in = '{ is_predicted_branch_or_jump : bp.use_prediction, is_branch : (bp.use_prediction & bp.is_branch), + early_flush_pc : pc_plus_4, address_valid : address_valid, mmu_fault : tlb.is_fault, subunit_id : subunit_id @@ -207,19 +210,20 @@ module fetch .fifo (fetch_attr_fifo) ); assign fetch_attr = fetch_attr_fifo.data_out; + assign early_flush_pc = fetch_attr.early_flush_pc; assign inflight_count_next = inflight_count + MAX_OUTSTANDING_REQUESTS_W'(fetch_attr_fifo.push) - MAX_OUTSTANDING_REQUESTS_W'(fetch_attr_fifo.pop); always_ff @(posedge clk) begin if (rst) inflight_count <= 0; else - inflight_count <= inflight_count_next; + inflight_count <= inflight_count_next; end always_ff @(posedge clk) begin if (rst) flush_count <= 0; - else if (gc.fetch_flush) + else if (gc.fetch_flush | early_branch_flush) flush_count <= inflight_count_next; else if (|flush_count & fetch_attr_fifo.pop) flush_count <= flush_count - 1; @@ -231,7 +235,7 @@ module fetch //for any sub unit. That request can either be completed or aborted. //In either case, data_valid must NOT be asserted. generate for (i=0; i < NUM_SUB_UNITS; i++) begin : gen_fetch_sources - assign sub_unit[i].new_request = fetch_attr_fifo.push & sub_unit_address_match[i]; + assign sub_unit[i].new_request = fetch_attr_fifo.push & sub_unit_address_match[i] & ~tlb.is_fault; assign sub_unit[i].addr = tlb.physical_address; assign sub_unit[i].re = 1; assign sub_unit[i].we = 0; @@ -249,6 +253,10 @@ module fetch local_mem_sub_unit i_local_mem ( .clk (clk), .rst (rst), + .write_outstanding (), + .amo (1'b0), + .amo_type ('x), + .amo_unit (unused), .unit (sub_unit[LOCAL_MEM_ID]), .local_mem (instruction_bram) ); @@ -260,6 +268,10 @@ module fetch wishbone_master iwishbone_bus ( .clk (clk), .rst (rst), + .write_outstanding (), + .amo (1'b0), + .amo_type ('x), + .amo_unit (unused), .wishbone (iwishbone), .ls (sub_unit[BUS_ID]) ); @@ -267,19 +279,38 @@ module fetch endgenerate generate if (CONFIG.INCLUDE_ICACHE) begin : gen_fetch_icache + //////////////////////////////////////////////////// + //Instruction fence + //A fence first prevents any new instructions from being issued then waits for inflight fetches to complete + //The fence signal can only be delivered to the icache once it is idle + //This logic will be optimized away when instruction fences aren't enabled as gc.fetch_ifence will be constant 0 + logic ifence_pending; + logic ifence_start; + assign ifence_start = ifence_pending & ~|inflight_count_next; + + always_ff @(posedge clk) begin + if (rst) + ifence_pending <= 0; + else begin + if (gc.fetch_ifence) + ifence_pending <= 1; + else if (~|inflight_count_next) + ifence_pending <= 0; + end + end + assign sub_unit_address_match[ICACHE_ID] = icache_addr_utils.address_range_check(tlb.physical_address); icache #(.CONFIG(CONFIG)) i_cache ( .clk (clk), .rst (rst), - .gc (gc), + .ifence (ifence_start), .icache_on (icache_on), .l1_request (l1_request), .l1_response (l1_response), .fetch_sub (sub_unit[ICACHE_ID]) ); - end - endgenerate + end endgenerate assign units_ready = &unit_ready; assign address_valid = |sub_unit_address_match; @@ -287,25 +318,25 @@ module fetch //////////////////////////////////////////////////// //Instruction metada updates logic valid_fetch_result; - assign valid_fetch_result = CONFIG.INCLUDE_M_MODE ? (fetch_attr_fifo.valid & fetch_attr.address_valid & (~fetch_attr.mmu_fault)) : 1; + assign valid_fetch_result = CONFIG.MODES != BARE ? (fetch_attr_fifo.valid & fetch_attr.address_valid & (~fetch_attr.mmu_fault)) : 1; assign if_pc = pc; assign fetch_metadata.ok = valid_fetch_result; - assign fetch_metadata.error_code = INST_ACCESS_FAULT; + assign fetch_metadata.error_code = fetch_attr.mmu_fault ? INST_PAGE_FAULT : INST_ACCESS_FAULT; assign fetch_instruction = unit_data_array[fetch_attr.subunit_id]; - assign internal_fetch_complete = fetch_attr_fifo.valid & (fetch_attr.address_valid ? |unit_data_valid : ~valid_fetch_result);//allow instruction to propagate to decode if address is invalid + assign internal_fetch_complete = fetch_attr_fifo.valid & (~valid_fetch_result | |unit_data_valid);//allow instruction to propagate to decode if address is invalid assign fetch_complete = internal_fetch_complete & ~|flush_count; //////////////////////////////////////////////////// //Branch Predictor corruption check //Needed if instruction memory is changed after any branches have been executed - generate if (CONFIG.INCLUDE_IFENCE | CONFIG.INCLUDE_S_MODE) begin : gen_branch_corruption_check + generate if (CONFIG.INCLUDE_IFENCE | CONFIG.MODES == MSU) begin : gen_branch_corruption_check logic is_branch_or_jump; assign is_branch_or_jump = fetch_instruction[6:2] inside {JAL_T, JALR_T, BRANCH_T}; - assign early_branch_flush = (valid_fetch_result & (|unit_data_valid)) & fetch_attr.is_predicted_branch_or_jump & (~is_branch_or_jump); - assign early_branch_flush_ras_adjust = (valid_fetch_result & (|unit_data_valid)) & fetch_attr.is_branch & (~is_branch_or_jump); + assign early_branch_flush = (valid_fetch_result & (|unit_data_valid)) & fetch_attr.is_predicted_branch_or_jump & (~is_branch_or_jump) & (~|flush_count); + assign early_branch_flush_ras_adjust = (valid_fetch_result & (|unit_data_valid)) & fetch_attr.is_branch & (~is_branch_or_jump) & (~|flush_count); end endgenerate //////////////////////////////////////////////////// //End of Implementation diff --git a/core/fetch_stage/icache.sv b/core/fetch_stage/icache.sv old mode 100755 new mode 100644 index a81463ba..6dca27ce --- a/core/fetch_stage/icache.sv +++ b/core/fetch_stage/icache.sv @@ -33,7 +33,7 @@ module icache ( input logic clk, input logic rst, - input gc_outputs_t gc, + input logic ifence, input logic icache_on, l1_arbiter_request_interface.master l1_request, l1_arbiter_return_interface.master l1_response, @@ -46,6 +46,9 @@ module icache cache_functions_interface #(.TAG_W(SCONFIG.TAG_W), .LINE_W(SCONFIG.LINE_ADDR_W), .SUB_LINE_W(SCONFIG.SUB_LINE_ADDR_W)) addr_utils(); + logic ifence_in_progress; + logic[SCONFIG.LINE_ADDR_W-1:0] ifence_counter; + logic tag_hit; logic [CONFIG.ICACHE.WAYS-1:0] tag_hit_way; @@ -59,7 +62,7 @@ module icache logic line_complete; - logic [31:0] data_out [CONFIG.ICACHE.WAYS-1:0]; + logic [CONFIG.ICACHE.WAYS-1:0][31:0] data_out; logic linefill_in_progress; logic request_in_progress; @@ -94,6 +97,29 @@ module icache .rst (rst), .fifo (input_fifo) ); + + //////////////////////////////////////////////////// + //Instruction fence + generate if (CONFIG.INCLUDE_IFENCE) begin : gen_ifence + always_ff @(posedge clk) begin + if (rst) begin + ifence_counter <= '0; + ifence_in_progress <= 0; + end else begin + if (ifence) + ifence_in_progress <= 1; + else if (&ifence_counter) + ifence_in_progress <= 0; + if (ifence_in_progress) + ifence_counter <= ifence_counter+1; + end + end + + end else begin : gen_no_ifence + assign ifence_in_progress = 0; + assign ifence_counter = '0; + end endgenerate + //////////////////////////////////////////////////// //Ready determination always_ff @ (posedge clk) begin @@ -103,7 +129,7 @@ module icache request_in_progress <= (request_in_progress & ~fetch_sub.data_valid) | new_request; end - assign fetch_sub.ready = ~input_fifo.full; + assign fetch_sub.ready = ~input_fifo.full & ~ifence_in_progress; //////////////////////////////////////////////////// //General Control Logic @@ -176,6 +202,8 @@ module icache icache_tag_banks ( .clk(clk), .rst(rst), //clears the read_hit_allowed flag + .ifence(ifence_in_progress), + .ifence_addr(ifence_counter), .stage1_line_addr(addr_utils.getTagLineAddr(new_request_addr)), .stage2_line_addr(addr_utils.getTagLineAddr(second_cycle_addr)), .stage2_tag(addr_utils.getTag(second_cycle_addr)), @@ -188,22 +216,20 @@ module icache //////////////////////////////////////////////////// //Data Banks - genvar i; - generate for (i=0; i < CONFIG.ICACHE.WAYS; i++) begin : idata_bank_gen - dual_port_bram #(.WIDTH(32), .LINES(CONFIG.ICACHE.LINES*CONFIG.ICACHE.LINE_W)) idata_bank ( - .clk(clk), - .en_a(new_request), - .wen_a(0), - .addr_a(addr_utils.getDataLineAddr(new_request_addr)), - .data_in_a('0), - .data_out_a(data_out[i]), - .en_b(1), - .wen_b(tag_update_way[i] & l1_response.data_valid), - .addr_b(addr_utils.getDataLineAddr({second_cycle_addr[31:SCONFIG.SUB_LINE_ADDR_W+2], word_count, 2'b0})), - .data_in_b(l1_response.data), - .data_out_b() - ); - end endgenerate + sdp_ram #( + .ADDR_WIDTH(SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W), + .NUM_COL(CONFIG.ICACHE.WAYS), + .COL_WIDTH(32), + .PIPELINE_DEPTH(0) + ) idata_bank ( + .a_en(l1_response.data_valid), + .a_wbe(tag_update_way), + .a_wdata({CONFIG.ICACHE.WAYS{l1_response.data}}), + .a_addr(addr_utils.getDataLineAddr({second_cycle_addr[31:SCONFIG.SUB_LINE_ADDR_W+2], word_count, 2'b0})), + .b_en(new_request), + .b_addr(addr_utils.getDataLineAddr(new_request_addr)), + .b_rdata(data_out), + .*); //////////////////////////////////////////////////// //Miss data path diff --git a/core/fetch_stage/icache_tag_banks.sv b/core/fetch_stage/icache_tag_banks.sv old mode 100755 new mode 100644 index f350e916..2ab4b9bc --- a/core/fetch_stage/icache_tag_banks.sv +++ b/core/fetch_stage/icache_tag_banks.sv @@ -33,6 +33,8 @@ module itag_banks ( input logic clk, input logic rst, + input logic ifence, + input logic[SCONFIG.LINE_ADDR_W-1:0] ifence_addr, input logic[SCONFIG.LINE_ADDR_W-1:0] stage1_line_addr, input logic[SCONFIG.LINE_ADDR_W-1:0] stage2_line_addr, @@ -49,7 +51,7 @@ module itag_banks //Valid + tag typedef logic [SCONFIG.TAG_W : 0] itag_entry_t; - itag_entry_t tag_line[CONFIG.ICACHE.WAYS-1:0]; + itag_entry_t[CONFIG.ICACHE.WAYS-1:0] tag_line; logic hit_allowed; @@ -60,25 +62,25 @@ module itag_banks hit_allowed <= stage1_adv; end - genvar i; - generate - for (i=0; i < CONFIG.ICACHE.WAYS; i++) begin : tag_bank_gen - dual_port_bram #(.WIDTH(SCONFIG.TAG_W+1), .LINES(CONFIG.ICACHE.LINES)) itag_bank (.*, - .clk(clk), - .en_a(stage1_adv), - .wen_a('0), - .addr_a(stage1_line_addr), - .data_in_a('0), - .data_out_a(tag_line[i]), - .en_b(update), - .wen_b(update_way[i]), - .addr_b(stage2_line_addr), - .data_in_b({1'b1, stage2_tag}), - .data_out_b() - ); - assign tag_hit_way[i] = ({hit_allowed, 1'b1, stage2_tag} == {1'b1, tag_line[i]}); - end - endgenerate + sdp_ram_padded #( + .ADDR_WIDTH(SCONFIG.LINE_ADDR_W), + .NUM_COL(CONFIG.ICACHE.WAYS), + .COL_WIDTH(SCONFIG.TAG_W+1), + .PIPELINE_DEPTH(0) + ) itag_bank ( + .a_en(update | ifence), + .a_wbe(update_way | {CONFIG.ICACHE.WAYS{ifence}}), + .a_wdata({CONFIG.ICACHE.WAYS{~ifence, stage2_tag}}), + .a_addr(ifence ? ifence_addr : stage2_line_addr), + .b_en(stage1_adv), + .b_addr(stage1_line_addr), + .b_rdata(tag_line), + .*); + + always_comb begin + for (int i = 0; i < CONFIG.ICACHE.WAYS; i++) + tag_hit_way[i] = ({hit_allowed, 1'b1, stage2_tag} == {1'b1, tag_line[i]}); + end assign tag_hit = |tag_hit_way; diff --git a/core/fetch_stage/ras.sv b/core/fetch_stage/ras.sv old mode 100755 new mode 100644 index d1218ecf..3fb053e3 --- a/core/fetch_stage/ras.sv +++ b/core/fetch_stage/ras.sv @@ -77,4 +77,4 @@ module ras read_index <= new_index; end -endmodule \ No newline at end of file +endmodule diff --git a/core/instruction_metadata_and_id_management.sv b/core/instruction_metadata_and_id_management.sv index 183920f8..2d4dfcd2 100644 --- a/core/instruction_metadata_and_id_management.sv +++ b/core/instruction_metadata_and_id_management.sv @@ -53,7 +53,6 @@ module instruction_metadata_and_id_management input logic decode_uses_rd, input logic fp_decode_uses_rd, input rs_addr_t decode_rd_addr, - input exception_sources_t decode_exception_unit, input logic decode_is_store, //renamer input phys_addr_t decode_phys_rd_addr, @@ -76,15 +75,11 @@ module instruction_metadata_and_id_management output retire_packet_t fp_wb_retire, output retire_packet_t store_retire, output id_t retire_ids [RETIRE_PORTS], - output id_t retire_ids_next [RETIRE_PORTS], output logic retire_port_valid [RETIRE_PORTS], output logic [LOG2_RETIRE_PORTS : 0] retire_count, //CSR - output logic [LOG2_MAX_IDS:0] post_issue_count, - //Exception - output logic [31:0] oldest_pc, - output logic [$clog2(NUM_EXCEPTION_SOURCES)-1:0] current_exception_unit + output logic [LOG2_MAX_IDS:0] post_issue_count ); ////////////////////////////////////////// localparam NUM_WB_GROUPS = CONFIG.NUM_WB_GROUPS + 32'(CONFIG.INCLUDE_UNIT.FPU) + 32'(CONFIG.INCLUDE_UNIT.FPU); @@ -115,6 +110,7 @@ module instruction_metadata_and_id_management retire_packet_t fp_wb_retire_next; retire_packet_t store_retire_next; + id_t retire_ids_next [RETIRE_PORTS]; logic retire_port_valid_next [RETIRE_PORTS]; logic [LOG2_RETIRE_PORTS : 0] retire_count_next; //////////////////////////////////////////////////// @@ -133,18 +129,6 @@ module instruction_metadata_and_id_management .ram_data_out(decode_pc) ); - generate if (CONFIG.INCLUDE_M_MODE) begin : gen_pc_id_exception_support - lutram_1w_1r #(.DATA_TYPE(logic[31:0]), .DEPTH(MAX_IDS)) - pc_table_exception ( - .clk(clk), - .waddr(pc_id), - .raddr(retire_ids_next[0]), - .ram_write(pc_id_assigned), - .new_ram_data(if_pc), - .ram_data_out(oldest_pc) - ); - end endgenerate - //////////////////////////////////////////////////// //Instruction table lutram_1w_1r #(.DATA_TYPE(logic[31:0]), .DEPTH(MAX_IDS)) @@ -220,20 +204,6 @@ module instruction_metadata_and_id_management .ram_data_out(wb_phys_addrs) ); - //////////////////////////////////////////////////// - //Exception unit table - generate if (CONFIG.INCLUDE_M_MODE) begin : gen_id_exception_support - lutram_1w_1r #(.DATA_TYPE(logic[$bits(exception_sources_t)-1:0]), .DEPTH(MAX_IDS)) - exception_unit_table ( - .clk(clk), - .waddr(decode_id), - .raddr(retire_ids_next[0]), - .ram_write(decode_advance), - .new_ram_data(decode_exception_unit), - .ram_data_out(current_exception_unit) - ); - end endgenerate - //////////////////////////////////////////////////// //ID Management @@ -270,10 +240,8 @@ module instruction_metadata_and_id_management retire_ids_next[i] <= retire_ids_next[i] + LOG2_MAX_IDS'(retire_count_next); end - always_ff @ (posedge clk) begin - if (~gc.retire_hold) - retire_ids[i] <= retire_ids_next[i]; - end + always_ff @ (posedge clk) + retire_ids[i] <= retire_ids_next[i]; end endgenerate //Represented as a negative value so that the MSB indicates that the decode stage is valid @@ -343,7 +311,6 @@ module instruction_metadata_and_id_management ) id_waiting_for_writeback_toggle_mem_set ( .clk (clk), - .rst (rst), .init_clear (gc.init_clear), .toggle (id_waiting_toggle), .toggle_addr (id_waiting_toggle_addr), @@ -363,13 +330,11 @@ module instruction_metadata_and_id_management //Supports retiring up to RETIRE_PORTS instructions. The retired block of instructions must be //contiguous and must start with the first retire port. Additionally, only one register file writing //instruction is supported per cycle. - //If an exception is pending, only retire a single intrustuction per cycle. As such, the pending - //exception will have to become the oldest instruction retire_ids[0] before it can retire. logic retire_with_rd_found; logic retire_with_fp_rd_found; logic retire_with_store_found; always_comb begin - contiguous_retire = ~gc.retire_hold; + contiguous_retire = 1; retire_with_rd_found = 0; retire_with_fp_rd_found = 0; retire_with_store_found = 0; @@ -386,7 +351,7 @@ module instruction_metadata_and_id_management retire_with_rd_found |= retire_port_valid_next[i] & retire_type[i] == RD; retire_with_fp_rd_found |= retire_port_valid_next[i] & retire_type[i] == FP_RD; retire_with_store_found |= retire_port_valid_next[i] & retire_type[i] == STORE; - contiguous_retire &= retire_port_valid_next[i] & ~gc.exception_pending; + contiguous_retire &= retire_port_valid_next[i]; if (retire_port_valid_next[i] & retire_type[i] == RD) retire_with_rd_sel = LOG2_RETIRE_PORTS'(i); @@ -423,9 +388,9 @@ module instruction_metadata_and_id_management fp_wb_retire <= fp_wb_retire_next; store_retire <= store_retire_next; - retire_count <= gc.writeback_supress ? '0 : retire_count_next; + retire_count <= retire_count_next; for (int i = 0; i < RETIRE_PORTS; i++) - retire_port_valid[i] <= retire_port_valid_next[i] & ~gc.writeback_supress; + retire_port_valid[i] <= retire_port_valid_next[i]; end //////////////////////////////////////////////////// @@ -439,7 +404,7 @@ module instruction_metadata_and_id_management valid : fetched_count_neg[LOG2_MAX_IDS], pc : decode_pc, instruction : decode_instruction, - fetch_metadata : CONFIG.INCLUDE_M_MODE ? decode_fetch_metadata : ADDR_OK + fetch_metadata : CONFIG.MODES != BARE ? decode_fetch_metadata : ADDR_OK }; //////////////////////////////////////////////////// diff --git a/core/l1_arbiter.sv b/core/l1_arbiter.sv old mode 100755 new mode 100644 diff --git a/core/memory_sub_units/avalon_master.sv b/core/memory_sub_units/avalon_master.sv index b9cd606c..80b02d34 100644 --- a/core/memory_sub_units/avalon_master.sv +++ b/core/memory_sub_units/avalon_master.sv @@ -1,5 +1,5 @@ /* - * Copyright © 2019 Eric Matthews, Lesley Shannon + * Copyright © 2019 Eric Matthews, Chris Keilbart, Lesley Shannon * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,75 +18,160 @@ * * Author(s): * Eric Matthews + * Chris Keilbart */ module avalon_master - import cva5_config::*; import riscv_types::*; - import cva5_types::*; + #( + parameter int unsigned LR_WAIT = 32, //The number of cycles lock is held after an LR + parameter logic INCLUDE_AMO = 1 //Required because the tools cannot fully optimize even if amo signals are tied off + ) ( input logic clk, input logic rst, + output logic write_outstanding, avalon_interface.master m_avalon, + + input logic amo, + input amo_t amo_type, + amo_interface.subunit amo_unit, memory_sub_unit_interface.responder ls ); - //implementation + //////////////////////////////////////////////////// + //Implementation + typedef enum { + READY, + REQUESTING, + REQUESTING_AMO_R, + REQUESTING_AMO_M, + REQUESTING_AMO_W, + READY_LR, + REQUESTING_SC + } state_t; + state_t current_state; - always_ff @ (posedge clk) begin - if (ls.new_request) begin - m_avalon.addr <= ls.addr; - m_avalon.byteenable <= ls.be; - m_avalon.writedata <= ls.data_in; - end - end + logic[$clog2(LR_WAIT)-1:0] lock_counter; + logic request_is_sc; + assign request_is_sc = amo & amo_type == AMO_SC_FN5; - always_ff @ (posedge clk) begin - if (rst) - ls.ready <= 1; - else if (ls.new_request) - ls.ready <= 0; - else if (~ls.ready & ~m_avalon.waitrequest) - ls.ready <= 1; - end + assign amo_unit.set_reservation = ls.new_request & amo & amo_type == AMO_LR_FN5; + assign amo_unit.clear_reservation = ls.new_request; + assign amo_unit.reservation = ls.addr; + assign amo_unit.rs1 = ls.data_out; + assign amo_unit.rs2 = m_avalon.writedata; - always_ff @ (posedge clk) begin - if (rst) - ls.data_valid <= 0; - else if (m_avalon.read & ~m_avalon.waitrequest) - ls.data_valid <= 1; - else - ls.data_valid <= 0; - end + always_ff @(posedge clk) begin + m_avalon.addr[1:0] <= '0; + unique case (current_state) + READY : begin //Accept any request + ls.ready <= ~ls.new_request | request_is_sc; + ls.data_out <= 32'b1; + ls.data_valid <= ls.new_request & request_is_sc; + m_avalon.addr[31:2] <= ls.addr[31:2]; + m_avalon.byteenable <= ls.be; + m_avalon.writedata <= ls.data_in; + m_avalon.read <= ls.new_request & ls.re & ~request_is_sc; + m_avalon.write <= ls.new_request & ls.we; + m_avalon.lock <= ls.new_request & amo; + write_outstanding <= ls.new_request & (ls.we | amo); + amo_unit.rmw_valid <= 0; + amo_unit.op <= amo_type; + lock_counter <= '0; + if (ls.new_request & (~amo | amo_type == AMO_LR_FN5)) + current_state <= REQUESTING; + else if (ls.new_request & amo & amo_type != AMO_SC_FN5) + current_state <= REQUESTING_AMO_R; + end + REQUESTING : begin //Wait for response + ls.ready <= ~m_avalon.waitrequest; + ls.data_out <= m_avalon.readdata; + ls.data_valid <= m_avalon.read & ~m_avalon.waitrequest; + m_avalon.read <= m_avalon.read & m_avalon.waitrequest; + m_avalon.write <= m_avalon.write & m_avalon.waitrequest; + write_outstanding <= m_avalon.write & ~m_avalon.waitrequest; + if (~m_avalon.waitrequest) + current_state <= m_avalon.lock ? READY_LR : READY; + end + REQUESTING_AMO_R : begin //Read for an AMO + if (INCLUDE_AMO) begin + ls.data_out <= m_avalon.readdata; + ls.data_valid <= ~m_avalon.waitrequest; + m_avalon.read <= m_avalon.waitrequest; + amo_unit.rmw_valid <= ~m_avalon.waitrequest; + if (~m_avalon.waitrequest) + current_state <= REQUESTING_AMO_M; + end + end + REQUESTING_AMO_M : begin //One cycle for computing the AMO write value + if (INCLUDE_AMO) begin + ls.data_valid <= 0; + m_avalon.writedata <= amo_unit.rd; + m_avalon.write <= 1; + amo_unit.rmw_valid <= 0; + current_state <= REQUESTING_AMO_W; + end + end + REQUESTING_AMO_W : begin //Write for an AMO + if (INCLUDE_AMO) begin + ls.ready <= ~m_avalon.waitrequest; + m_avalon.write <= m_avalon.waitrequest; + m_avalon.lock <= m_avalon.waitrequest; + write_outstanding <= m_avalon.waitrequest; + if (~m_avalon.waitrequest) + current_state <= READY; + end + end + READY_LR : begin //Lock is held; hold for LR_WAIT cycles + if (INCLUDE_AMO) begin + ls.ready <= ~ls.new_request | (request_is_sc & ~amo_unit.reservation_valid); + ls.data_out <= {31'b0, ~amo_unit.reservation_valid}; + ls.data_valid <= ls.new_request & request_is_sc; + m_avalon.addr[31:2] <= ls.addr[31:2]; + m_avalon.byteenable <= ls.be; + m_avalon.writedata <= ls.data_in; + m_avalon.read <= ls.new_request & ls.re & ~request_is_sc; + m_avalon.write <= ls.new_request & (ls.we | (request_is_sc & amo_unit.reservation_valid)); + + write_outstanding <= ls.new_request & (ls.we | amo); + amo_unit.rmw_valid <= 0; + amo_unit.op <= amo_type; - always_ff @ (posedge clk) begin - if (m_avalon.read & ~m_avalon.waitrequest) - ls.data_out <= m_avalon.readdata; - else - ls.data_out <= 0; - end + if (ls.new_request) + m_avalon.lock <= amo; + else if (32'(lock_counter) == LR_WAIT-1) + m_avalon.lock <= 0; - always_ff @ (posedge clk) begin - if (rst) - m_avalon.read <= 0; - else if (ls.new_request & ls.re) - m_avalon.read <= 1; - else if (~m_avalon.waitrequest) - m_avalon.read <= 0; - end + lock_counter <= lock_counter + 1; - always_ff @ (posedge clk) begin + if (ls.new_request & (~amo | amo_type == AMO_LR_FN5)) + current_state <= REQUESTING; + else if (ls.new_request & amo & amo_type != AMO_SC_FN5) + current_state <= REQUESTING_AMO_R; + else if (ls.new_request & amo & amo_type == AMO_SC_FN5 & amo_unit.reservation_valid) + current_state <= REQUESTING_SC; + else if (32'(lock_counter) == LR_WAIT-1 | ls.new_request) + current_state <= READY; + end + end + REQUESTING_SC : begin //Exclusive write + if (INCLUDE_AMO) begin + ls.ready <= ~m_avalon.waitrequest; + ls.data_valid <= 0; + m_avalon.write <= m_avalon.waitrequest; + m_avalon.lock <= m_avalon.waitrequest; + write_outstanding <= m_avalon.waitrequest; + if (~m_avalon.waitrequest) + current_state <= REQUESTING; + end + end + endcase if (rst) - m_avalon.write <= 0; - else if (ls.new_request & ls.we) - m_avalon.write <= 1; - else if (~m_avalon.waitrequest) - m_avalon.write <= 0; + current_state <= READY; end - - endmodule diff --git a/core/memory_sub_units/axi_master.sv b/core/memory_sub_units/axi_master.sv old mode 100755 new mode 100644 index c43fed0b..2618e566 --- a/core/memory_sub_units/axi_master.sv +++ b/core/memory_sub_units/axi_master.sv @@ -1,5 +1,5 @@ /* - * Copyright © 2017 Eric Matthews, Lesley Shannon + * Copyright © 2024 Eric Matthews, Chris Keilbart, Lesley Shannon * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,89 +18,120 @@ * * Author(s): * Eric Matthews + * Chris Keilbart */ module axi_master - import cva5_config::*; import riscv_types::*; - import cva5_types::*; ( input logic clk, input logic rst, + output logic write_outstanding, axi_interface.master m_axi, - input logic [2:0] size, - memory_sub_unit_interface.responder ls + input logic amo, + input amo_t amo_type, + amo_interface.subunit amo_unit, + memory_sub_unit_interface.responder ls ); - logic ready; - - - //read constants - assign m_axi.arlen = 0; // 1 request - assign m_axi.arburst = 0;// burst type does not matter - assign m_axi.rready = 1; //always ready to receive data - always_ff @ (posedge clk) begin - if (ls.new_request) begin - m_axi.araddr <= ls.addr; - m_axi.arsize <= size; - m_axi.awsize <= size; - m_axi.awaddr <= ls.addr; - m_axi.wdata <= ls.data_in; - m_axi.wstrb <= ls.be; - end - end - - //write constants - assign m_axi.awlen = 0; - assign m_axi.awburst = 0; + //////////////////////////////////////////////////// + //Implementation + typedef enum { + READY, + REQUESTING_WRITE, + REQUESTING_READ, + REQUESTING_AMO_M, + WAITING_READ, + WAITING_WRITE + } state_t; + state_t current_state; + + logic request_is_invalid_sc; + assign request_is_invalid_sc = amo & amo_type == AMO_SC_FN5 & ~amo_unit.reservation_valid; + + assign amo_unit.set_reservation = ls.new_request & amo & amo_type == AMO_LR_FN5; + assign amo_unit.clear_reservation = ls.new_request; + assign amo_unit.reservation = ls.addr; + assign amo_unit.rs1 = ls.data_out; + + logic[29:0] addr; + assign m_axi.awaddr = {addr, 2'b0}; + assign m_axi.araddr = {addr, 2'b0}; + assign m_axi.awlen = '0; + assign m_axi.arlen = '0; + assign m_axi.awburst = '0; + assign m_axi.arburst = '0; + assign m_axi.awid = '0; + assign m_axi.arid = '0; + assign m_axi.rready = 1; assign m_axi.bready = 1; - set_clr_reg_with_rst #(.SET_OVER_CLR(0), .WIDTH(1), .RST_VALUE(1)) ready_m ( - .clk, .rst, - .set(m_axi.rvalid | m_axi.bvalid), - .clr(ls.new_request), - .result(ready) - ); - assign ls.ready = ready; - - always_ff @ (posedge clk) begin + always_ff @(posedge clk) begin + unique case (current_state) + READY : begin //Accept any request + ls.ready <= ~ls.new_request | request_is_invalid_sc; + ls.data_out <= 1; + ls.data_valid <= ls.new_request & request_is_invalid_sc; + addr <= ls.addr[31:2]; + m_axi.awlock <= amo & amo_type != AMO_LR_FN5; //Used in WAITING_READ to determine if it was a RMW + m_axi.awvalid <= ls.new_request & (ls.we | (amo & amo_type == AMO_SC_FN5 & amo_unit.reservation_valid)); + m_axi.arlock <= amo & amo_type != AMO_SC_FN5; //Used in WAITING_WRITE to determine if it was a RNW + m_axi.arvalid <= ls.new_request & ls.re & ~(amo & amo_type == AMO_SC_FN5); + m_axi.wvalid <= ls.new_request & (ls.we | (amo & amo_type == AMO_SC_FN5 & amo_unit.reservation_valid)); + m_axi.wdata <= ls.data_in; + m_axi.wstrb <= ls.be; + write_outstanding <= ls.new_request & (ls.we | amo); + amo_unit.rmw_valid <= 0; + amo_unit.op <= amo_type; + amo_unit.rs2 <= ls.data_in; //Cannot use wdata because wdata will be overwritten if the RMW is not exclusive + if (ls.new_request & (ls.we | (amo & amo_type == AMO_SC_FN5 & amo_unit.reservation_valid))) + current_state <= REQUESTING_WRITE; + else if (ls.new_request & ~request_is_invalid_sc) + current_state <= REQUESTING_READ; + end + REQUESTING_READ : begin //Wait for read to be accepted + m_axi.arvalid <= ~m_axi.arready; + if (m_axi.arready) + current_state <= WAITING_READ; + end + WAITING_READ : begin //Wait for read response + ls.ready <= m_axi.rvalid & ~m_axi.awlock; + ls.data_out <= m_axi.rdata; + ls.data_valid <= m_axi.rvalid; + amo_unit.rmw_valid <= m_axi.rvalid; + if (m_axi.rvalid) + current_state <= m_axi.awlock ? REQUESTING_AMO_M : READY; + end + REQUESTING_AMO_M : begin //One cycle for computing the AMO write value + ls.data_valid <= 0; + m_axi.awvalid <= 1; + m_axi.wvalid <= 1; + m_axi.wdata <= amo_unit.rd; + amo_unit.rmw_valid <= 0; + current_state <= REQUESTING_WRITE; + end + REQUESTING_WRITE : begin //Wait for write (address and data) to be accepted + m_axi.awvalid <= m_axi.awvalid & ~m_axi.awready; + m_axi.wvalid <= m_axi.wvalid & ~m_axi.wready; + if ((~m_axi.awvalid | m_axi.awready) & (~m_axi.wvalid | m_axi.wready)) + current_state <= WAITING_WRITE; + end + WAITING_WRITE : begin //Wait for write response; resubmit if RMW was not exclusive + ls.ready <= m_axi.bvalid & (~m_axi.arlock | m_axi.bresp == 2'b01); + ls.data_out <= {31'b0, m_axi.bresp != 2'b01}; + ls.data_valid <= m_axi.bvalid & m_axi.awlock & ~m_axi.arlock; + m_axi.arvalid <= m_axi.bvalid & m_axi.arlock & m_axi.bresp != 2'b01; + write_outstanding <= ~(m_axi.bvalid & (~m_axi.arlock | m_axi.bresp == 2'b01)); + if (m_axi.bvalid) + current_state <= m_axi.arlock & m_axi.bresp != 2'b01 ? REQUESTING_READ : READY; + end + endcase if (rst) - ls.data_valid <= 0; - else - ls.data_valid <= m_axi.rvalid; + current_state <= READY; end - //read channel - set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) arvalid_m ( - .clk, .rst, - .set(ls.new_request & ls.re), - .clr(m_axi.arready), - .result(m_axi.arvalid) - ); - - always_ff @ (posedge clk) begin - if (m_axi.rvalid) - ls.data_out <= m_axi.rdata; - end - - //write channel - set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) awvalid_m ( - .clk, .rst, - .set(ls.new_request & ls.we), - .clr(m_axi.awready), - .result(m_axi.awvalid) - ); - - set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) wvalid_m ( - .clk, .rst, - .set(ls.new_request & ls.we), - .clr(m_axi.wready), - .result(m_axi.wvalid) - ); - assign m_axi.wlast = m_axi.wvalid; - endmodule diff --git a/core/memory_sub_units/local_mem_sub_unit.sv b/core/memory_sub_units/local_mem_sub_unit.sv old mode 100755 new mode 100644 index ae558975..0f7739cc --- a/core/memory_sub_units/local_mem_sub_unit.sv +++ b/core/memory_sub_units/local_mem_sub_unit.sv @@ -1,5 +1,5 @@ /* - * Copyright © 2017 Eric Matthews, Lesley Shannon + * Copyright © 2017 Eric Matthews, Chris Keilbart, Lesley Shannon * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,35 +18,78 @@ * * Author(s): * Eric Matthews + * Chris Keilbart */ module local_mem_sub_unit - import cva5_config::*; import riscv_types::*; - import cva5_types::*; ( input logic clk, input logic rst, + output logic write_outstanding, + input logic amo, + input amo_t amo_type, + amo_interface.subunit amo_unit, memory_sub_unit_interface.responder unit, local_memory_interface.master local_mem ); - assign unit.ready = 1; + //If amo is tied to 0 and amo_unit is disconnected the tools can optimize most of the logic away - assign local_mem.addr = unit.addr[31:2]; - assign local_mem.en = unit.new_request; - assign local_mem.be = unit.be; - assign local_mem.data_in = unit.data_in; - assign unit.data_out = local_mem.data_out; + logic rmw; + logic[31:2] rmw_addr; + logic[31:0] rmw_rs2; + logic[31:0] rmw_op; + logic sc_valid; + logic sc_valid_r; + + assign write_outstanding = 0; - always_ff @ (posedge clk) begin - if (rst) + assign sc_valid = amo & amo_type == AMO_SC_FN5 & amo_unit.reservation_valid; + assign amo_unit.set_reservation = unit.new_request & amo & amo_type == AMO_LR_FN5; + assign amo_unit.clear_reservation = unit.new_request; + assign amo_unit.reservation = unit.addr; + + assign amo_unit.rmw_valid = rmw; + assign amo_unit.op = rmw_op; + assign amo_unit.rs1 = local_mem.data_out; + assign amo_unit.rs2 = rmw_rs2; + + always_comb begin + if (rmw) begin + unit.ready = 0; + local_mem.addr = rmw_addr; + local_mem.en = 1; + local_mem.be = '1; + local_mem.data_in = amo_unit.rd; + unit.data_out = local_mem.data_out; + end else begin + unit.ready = 1; + local_mem.addr = unit.addr[31:2]; + local_mem.en = unit.new_request; + local_mem.be = {4{unit.we | sc_valid}} & unit.be; //SC only writes when it succeeds + local_mem.data_in = unit.data_in; + unit.data_out = sc_valid_r ? 32'b1 : local_mem.data_out; + end + end + + always_ff @(posedge clk) begin + if (rst) begin unit.data_valid <= 0; - else + rmw <= 0; + sc_valid_r <= 0; + end + else begin unit.data_valid <= unit.new_request & unit.re; + rmw <= unit.new_request & amo & ~(amo_type inside {AMO_LR_FN5, AMO_SC_FN5}); + sc_valid_r <= sc_valid; + end + rmw_addr <= unit.addr[31:2]; + rmw_rs2 <= unit.data_in; + rmw_op <= amo_type; end endmodule diff --git a/core/memory_sub_units/wishbone_master.sv b/core/memory_sub_units/wishbone_master.sv index 39d48be5..8239f5fd 100644 --- a/core/memory_sub_units/wishbone_master.sv +++ b/core/memory_sub_units/wishbone_master.sv @@ -1,5 +1,5 @@ /* - * Copyright © 2019 Eric Matthews, Lesley Shannon + * Copyright © 2019 Eric Matthews, Chris Keilbart, Lesley Shannon * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,57 +18,163 @@ * * Author(s): * Eric Matthews + * Chris Keilbart */ module wishbone_master - import cva5_config::*; import riscv_types::*; - import cva5_types::*; + #( + parameter int unsigned LR_WAIT = 32, //The number of cycles the master holds cyc after an LR + parameter logic INCLUDE_AMO = 1 //Required because the tools cannot fully optimize even if amo signals are tied off + ) ( input logic clk, input logic rst, + output logic write_outstanding, wishbone_interface.master wishbone, + + input logic amo, + input amo_t amo_type, + amo_interface.subunit amo_unit, memory_sub_unit_interface.responder ls ); - logic busy; //////////////////////////////////////////////////// //Implementation - assign wishbone.cti = 0; - assign wishbone.bte = 0; + typedef enum { + READY, + REQUESTING, + REQUESTING_AMO_R, + REQUESTING_AMO_M, + REQUESTING_AMO_W, + READY_LR, + REQUESTING_SC + } state_t; + state_t current_state; - always_ff @ (posedge clk) begin - if (ls.new_request) begin - wishbone.adr <= ls.addr[31:2]; - wishbone.sel <= ls.we ? ls.be : '1; - wishbone.we <= ls.we; - wishbone.dat_w <= ls.data_in; - end - end + logic[$clog2(LR_WAIT)-1:0] cyc_counter; + logic request_is_sc; + assign request_is_sc = amo & amo_type == AMO_SC_FN5; - always_ff @ (posedge clk) begin - if (rst) - busy <= 0; - else - busy <= (busy & ~wishbone.ack) | ls.new_request; - end - assign ls.ready = (~busy); + assign amo_unit.set_reservation = ls.new_request & amo & amo_type == AMO_LR_FN5; + assign amo_unit.clear_reservation = ls.new_request; + assign amo_unit.reservation = ls.addr; + assign amo_unit.rs1 = ls.data_out; + assign amo_unit.rs2 = wishbone.dat_w; - assign wishbone.stb = busy; - assign wishbone.cyc = busy; + assign wishbone.cti = '0; + assign wishbone.bte = '0; - always_ff @ (posedge clk) begin + always_ff @(posedge clk) begin + wishbone.adr[1:0] <= '0; + unique case (current_state) + READY : begin //Accept any request + ls.ready <= ~ls.new_request | request_is_sc; + ls.data_out <= 32'b1; + ls.data_valid <= ls.new_request & request_is_sc; + wishbone.adr[31:2] <= ls.addr[31:2]; + wishbone.sel <= ls.we ? ls.be : '1; + wishbone.dat_w <= ls.data_in; + wishbone.we <= ls.we; + wishbone.stb <= ls.new_request & ~request_is_sc; + wishbone.cyc <= ls.new_request & ~request_is_sc; + write_outstanding <= ls.new_request & (ls.we | amo); + amo_unit.rmw_valid <= 0; + amo_unit.op <= amo_type; + cyc_counter <= amo ? 1 : 0; + if (ls.new_request & (~amo | amo_type == AMO_LR_FN5)) + current_state <= REQUESTING; + else if (ls.new_request & amo & amo_type != AMO_SC_FN5) + current_state <= REQUESTING_AMO_R; + end + REQUESTING : begin //Wait for response + ls.ready <= wishbone.ack; + ls.data_out <= wishbone.dat_r; + ls.data_valid <= ~wishbone.we & wishbone.ack; + wishbone.stb <= ~wishbone.ack; + wishbone.cyc <= ~wishbone.ack | cyc_counter[0]; + write_outstanding <= wishbone.we & ~wishbone.ack; + if (wishbone.ack) + current_state <= cyc_counter[0] ? READY_LR : READY; + end + REQUESTING_AMO_R : begin //Read for an AMO + if (INCLUDE_AMO) begin + ls.data_out <= wishbone.dat_r; + ls.data_valid <= wishbone.ack; + wishbone.stb <= ~wishbone.ack; + amo_unit.rmw_valid <= wishbone.ack; + if (wishbone.ack) + current_state <= REQUESTING_AMO_M; + end + end + REQUESTING_AMO_M : begin //One cycle for computing the AMO write value + if (INCLUDE_AMO) begin + ls.data_valid <= 0; + wishbone.dat_w <= amo_unit.rd; + wishbone.stb <= 1; + wishbone.we <= 1; + amo_unit.rmw_valid <= 0; + current_state <= REQUESTING_AMO_W; + end + end + REQUESTING_AMO_W : begin //Write for an AMO + if (INCLUDE_AMO) begin + ls.ready <= wishbone.ack; + wishbone.cyc <= ~wishbone.ack; + wishbone.stb <= ~wishbone.ack; + write_outstanding <= ~wishbone.ack; + if (wishbone.ack) + current_state <= READY; + end + end + READY_LR : begin //Cyc is held; hold for LR_WAIT cycles + if (INCLUDE_AMO) begin + ls.ready <= ~ls.new_request | (request_is_sc & ~amo_unit.reservation_valid); + ls.data_out <= {31'b0, ~amo_unit.reservation_valid}; + ls.data_valid <= ls.new_request & request_is_sc; + wishbone.adr[31:2] <= ls.addr[31:2]; + wishbone.sel <= ls.we ? ls.be : '1; + wishbone.dat_w <= ls.data_in; + wishbone.we <= ls.we | request_is_sc; + wishbone.stb <= ls.new_request & ~(request_is_sc & ~amo_unit.reservation_valid); + write_outstanding <= ls.new_request & (ls.we | amo); + amo_unit.rmw_valid <= 0; + amo_unit.op <= amo_type; + + if (ls.new_request) + wishbone.cyc <= ~(request_is_sc & ~amo_unit.reservation_valid); + else if (32'(cyc_counter) == LR_WAIT-1) + wishbone.cyc <= 0; + + cyc_counter <= cyc_counter + 1; + + if (ls.new_request & (~amo | amo_type == AMO_LR_FN5)) + current_state <= REQUESTING; + else if (ls.new_request & amo & amo_type != AMO_SC_FN5) + current_state <= REQUESTING_AMO_R; + else if (ls.new_request & amo & amo_type == AMO_SC_FN5 & amo_unit.reservation_valid) + current_state <= REQUESTING_SC; + else if (32'(cyc_counter) == LR_WAIT-1 | ls.new_request) + current_state <= READY; + end + end + REQUESTING_SC : begin //Exclusive write + if (INCLUDE_AMO) begin + ls.ready <= wishbone.ack; + ls.data_valid <= 0; + wishbone.stb = ~wishbone.ack; + wishbone.cyc = ~wishbone.ack; + write_outstanding <= ~wishbone.ack; + if (wishbone.ack) + current_state <= REQUESTING; + end + end + endcase if (rst) - ls.data_valid <= 0; - else - ls.data_valid <= ~wishbone.we & wishbone.ack; - end - always_ff @ (posedge clk) begin - if (wishbone.ack) - ls.data_out <= wishbone.dat_r; + current_state <= READY; end endmodule diff --git a/core/mmu/dtlb.sv b/core/mmu/dtlb.sv new file mode 100644 index 00000000..28af1119 --- /dev/null +++ b/core/mmu/dtlb.sv @@ -0,0 +1,333 @@ +/* + * Copyright © 2017 Eric Matthews, Chris Keilbart, Lesley Shannon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Initial code developed under the supervision of Dr. Lesley Shannon, + * Reconfigurable Computing Lab, Simon Fraser University. + * + * Author(s): + * Eric Matthews + * Chris Keilbart + */ + +module dtlb + + import cva5_types::*; + import riscv_types::*; + + #( + parameter WAYS = 2, + parameter DEPTH = 32 + ) + ( + input logic clk, + input logic rst, + input logic translation_on, + input tlb_packet_t sfence, + input logic [ASIDLEN-1:0] asid, + mmu_interface.tlb mmu, + tlb_interface.tlb tlb + ); + ////////////////////////////////////////// + localparam TAG_W = 20 - $clog2(DEPTH); + localparam TAG_W_S = 10 - $clog2(DEPTH); + localparam WAY_W = WAYS == 1 ? 1 : $clog2(WAYS); + + typedef struct packed { + logic valid; + logic [ASIDLEN-1:0] asid; + logic [TAG_W-1:0] tag; + //Signals from the PTE + logic [9:0] ppn1; + logic [9:0] ppn0; + logic dirty; + logic globe; + logic user; + logic execute; + logic write; + logic read; + } tlb_entry_t; + + typedef struct packed { + logic valid; + logic [ASIDLEN-1:0] asid; + logic [TAG_W_S-1:0] tag; + //Signals from the PTE + logic [9:0] ppn1; + logic dirty; + logic globe; + logic user; + logic execute; + logic write; + logic read; + } tlb_entry_s_t; + + //////////////////////////////////////////////////// + //Implementation + //Regular and super pages stored separately + //Regular pages are set associative and super pages are direct mapped + + //Random replacement + logic[WAYS-1:0] replacement_way; + cycler #(.C_WIDTH(WAYS)) replacement_policy ( + .en(1'b1), + .one_hot(replacement_way), + .*); + + //LUTRAM storage + logic [$clog2(DEPTH)-1:0] tlb_raddr; + logic [$clog2(DEPTH)-1:0] tlb_raddr_s; + logic [$clog2(DEPTH)-1:0] tlb_waddr; + logic [$clog2(DEPTH)-1:0] tlb_waddr_s; + tlb_entry_t [WAYS-1:0] rdata; + tlb_entry_s_t rdata_s; + logic [WAYS-1:0] write; + logic write_s; + tlb_entry_t wdata; + tlb_entry_s_t wdata_s; + + generate for (genvar i = 0; i < WAYS; i++) begin : gen_lut_rams + lutram_1w_1r #(.DATA_TYPE(tlb_entry_t), .DEPTH(DEPTH)) data_table ( + .waddr(tlb_waddr), + .raddr(tlb_raddr), + .ram_write(write[i]), + .new_ram_data(wdata), + .ram_data_out(rdata[i]), + .*); + end endgenerate + lutram_1w_1r #(.DATA_TYPE(tlb_entry_s_t), .DEPTH(DEPTH)) data_table_s ( + .waddr(tlb_waddr_s), + .raddr(tlb_raddr_s), + .ram_write(write_s), + .new_ram_data(wdata_s), + .ram_data_out(rdata_s), + .*); + + //Hit detection + logic [TAG_W-1:0] cmp_tag; + logic [TAG_W_S-1:0] cmp_tag_s; + logic [ASIDLEN-1:0] cmp_asid; + logic [WAYS-1:0] tag_hit; + logic tag_hit_s; + logic [WAYS-1:0] asid_hit; + logic asid_hit_s; + logic [WAYS-1:0] rdata_global; + logic rdata_global_s; + logic [WAYS-1:0][9:0] ppn0; + logic [WAYS-1:0][9:0] ppn1; + logic [9:0] ppn1_s; + logic [WAYS-1:0] perms_valid_comb; + logic perms_valid_comb_s; + logic [WAYS-1:0] perms_valid; + logic perms_valid_s; + logic [WAYS-1:0] hit_ohot; + logic hit_ohot_s; + logic [WAY_W-1:0] hit_way; + logic hit; + + assign cmp_tag = sfence.valid ? sfence.addr[31-:TAG_W] : tlb.virtual_address[31-:TAG_W]; + assign cmp_tag_s = sfence.valid ? sfence.addr[31-:TAG_W_S] : tlb.virtual_address[31-:TAG_W_S]; + assign cmp_asid = sfence.valid ? sfence.asid : asid; + + always_ff @(posedge clk) begin + for (int i = 0; i < WAYS; i++) begin + tag_hit[i] <= rdata[i].tag == cmp_tag; + rdata_global[i] <= rdata[i].globe; + ppn0[i] <= rdata[i].ppn0; + ppn1[i] <= rdata[i].ppn1; + asid_hit[i] <= rdata[i].asid == cmp_asid; + perms_valid[i] <= perms_valid_comb[i]; + hit_ohot[i] <= rdata[i].valid & (rdata[i].tag == cmp_tag) & (rdata[i].asid == cmp_asid | rdata[i].globe); + end + tag_hit_s <= rdata_s.tag == cmp_tag_s; + rdata_global_s <= rdata_s.globe; + ppn1_s <= rdata_s.ppn1; + asid_hit_s <= rdata_s.asid == cmp_asid; + perms_valid_s <= perms_valid_comb_s; + hit_ohot_s <= rdata_s.valid & (rdata_s.tag == cmp_tag_s) & (rdata_s.asid == cmp_asid | rdata_s.globe); + end + + assign hit = |hit_ohot | hit_ohot_s; + + priority_encoder #(.WIDTH(WAYS)) hit_cast ( + .priority_vector(hit_ohot), + .encoded_result(hit_way) + ); + + generate for (genvar i = 0; i < WAYS; i++) begin : gen_perms_check + perms_check checks ( + .pte_perms('{ + d : rdata[i].dirty, + a : 1'b1, + u : rdata[i].user, + x : rdata[i].execute, + w : rdata[i].write, + r : rdata[i].read, + default: 'x + }), + .rnw(tlb.rnw), + .execute(1'b0), + .mxr(mmu.mxr), + .sum(mmu.sum), + .privilege(mmu.privilege), + .valid(perms_valid_comb[i]) + ); + end endgenerate + perms_check checks ( + .pte_perms('{ + d : rdata_s.dirty, + a : 1'b1, + u : rdata_s.user, + x : rdata_s.execute, + w : rdata_s.write, + r : rdata_s.read, + default: 'x + }), + .rnw(tlb.rnw), + .execute(1'b0), + .mxr(mmu.mxr), + .sum(mmu.sum), + .privilege(mmu.privilege), + .valid(perms_valid_comb_s) + ); + + + //SFENCE + logic sfence_valid_r; + logic [$clog2(DEPTH)-1:0] flush_addr; + lfsr #(.WIDTH($clog2(DEPTH)), .NEEDS_RESET(0)) lfsr_counter ( + .en(1'b1), + .value(flush_addr), + .*); + + always_ff @(posedge clk) begin + if (tlb.new_request | sfence.valid) begin + tlb_waddr <= tlb_raddr; + tlb_waddr_s <= tlb_raddr_s; + end + sfence_valid_r <= sfence.valid; //Other SFENCE signals remain registered and do not need to be saved + end + + always_comb begin + if (sfence.valid) begin + tlb_raddr = sfence.addr_only ? sfence.addr[12 +: $clog2(DEPTH)] : flush_addr; + tlb_raddr_s = sfence.addr_only ? sfence.addr[22 +: $clog2(DEPTH)] : flush_addr; + end + else begin + tlb_raddr = tlb.virtual_address[12 +: $clog2(DEPTH)]; + tlb_raddr_s = tlb.virtual_address[22 +: $clog2(DEPTH)]; + end + end + + assign wdata = '{ + valid : ~sfence_valid_r, + asid : asid, + tag : mmu.virtual_address[31-:TAG_W], + ppn1 : mmu.upper_physical_address[19:10], + ppn0 : mmu.upper_physical_address[9:0], + dirty : mmu.perms.d, + globe : mmu.perms.g, + user : mmu.perms.u, + execute : mmu.perms.x, + write : mmu.perms.w, + read : mmu.perms.r + }; + assign wdata_s = '{ + valid : ~sfence_valid_r, + asid : asid, + tag : mmu.virtual_address[31-:TAG_W_S], + ppn1 : mmu.upper_physical_address[19:10], + dirty : mmu.perms.d, + globe : mmu.perms.g, + user : mmu.perms.u, + execute : mmu.perms.x, + write : mmu.perms.w, + read : mmu.perms.r + }; + + always_comb begin + for (int i = 0; i < WAYS; i++) begin + case ({sfence_valid_r, sfence.addr_only, sfence.asid_only}) + 3'b100: begin //Clear everything + write[i] = 1'b1; + write_s = 1'b1; + end + 3'b101: begin //Clear non global for specified address space + write[i] = ~rdata_global[i] & asid_hit[i]; + write_s = ~rdata_global_s & asid_hit_s; + end + 3'b110: begin //Clear matching addresses + write[i] = tag_hit[i]; + write_s = tag_hit_s; + end + 3'b111: begin //Clear if both + write[i] = (~rdata[i].globe & asid_hit[i]) & tag_hit[i]; + write_s = (~rdata_s.globe & asid_hit_s) & tag_hit_s; + end + default: begin + write[i] = mmu.write_entry & ~mmu.superpage & replacement_way[i]; + write_s = mmu.write_entry & mmu.superpage; + end + endcase + end + end + + //Permission fail + logic perm_fail; + assign perm_fail = |(hit_ohot & ~perms_valid) | (hit_ohot_s & ~perms_valid_s); + + + //MMU interface + logic new_request_r; + assign mmu.request = translation_on & new_request_r & ~hit & ~perm_fail; + assign mmu.execute = 0; + + always_ff @(posedge clk) begin + new_request_r <= tlb.new_request; + if (tlb.new_request) begin + mmu.virtual_address <= tlb.virtual_address; + mmu.rnw <= tlb.rnw; + end + end + + //TLB interface + assign tlb.done = (new_request_r & ((hit & ~perm_fail) | ~translation_on)) | mmu.write_entry; + assign tlb.ready = 1; //Not always ready, but requests will not be sent if it isn't done + assign tlb.is_fault = mmu.is_fault | (new_request_r & translation_on & perm_fail); + + + always_comb begin + tlb.physical_address[11:0] = mmu.virtual_address[11:0]; + if (~translation_on) + tlb.physical_address[31:12] = mmu.virtual_address[31:12]; + else if (new_request_r) begin + tlb.physical_address[31:22] = hit_ohot_s ? ppn1_s : ppn1[hit_way]; + tlb.physical_address[21:12] = hit_ohot_s ? mmu.virtual_address[21:12] : ppn0[hit_way]; + end else begin + tlb.physical_address[31:22] = mmu.upper_physical_address[19:10]; + tlb.physical_address[21:12] = mmu.superpage ? mmu.virtual_address[21:12] : mmu.upper_physical_address[9:0]; + end + end + + //////////////////////////////////////////////////// + //End of Implementation + //////////////////////////////////////////////////// + + //////////////////////////////////////////////////// + //Assertions + request_on_miss: + assert property (@(posedge clk) disable iff (rst) (mmu.request) |-> ~tlb.new_request) + else $error("Request during miss in TLB!"); + +endmodule diff --git a/core/mmu/itlb.sv b/core/mmu/itlb.sv new file mode 100644 index 00000000..ee96ca51 --- /dev/null +++ b/core/mmu/itlb.sv @@ -0,0 +1,294 @@ +/* + * Copyright © 2017 Eric Matthews, Chris Keilbart, Lesley Shannon + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Initial code developed under the supervision of Dr. Lesley Shannon, + * Reconfigurable Computing Lab, Simon Fraser University. + * + * Author(s): + * Eric Matthews + * Chris Keilbart + */ + +module itlb + + import riscv_types::*; + import cva5_types::*; + + #( + parameter WAYS = 2, + parameter DEPTH = 32 + ) + ( + input logic clk, + input logic rst, + + input logic translation_on, + input tlb_packet_t sfence, + input logic abort_request, + input logic [ASIDLEN-1:0] asid, + mmu_interface.tlb mmu, + tlb_interface.tlb tlb + ); + ////////////////////////////////////////// + localparam TAG_W = 20 - $clog2(DEPTH); + localparam TAG_W_S = 10 - $clog2(DEPTH); + localparam WAY_W = WAYS == 1 ? 1 : $clog2(WAYS); + + typedef struct packed { + logic valid; + logic [ASIDLEN-1:0] asid; + logic [TAG_W-1:0] tag; + //Signals from the PTE + logic [9:0] ppn1; + logic [9:0] ppn0; + logic globe; + logic user; + } tlb_entry_t; + + typedef struct packed { + logic valid; + logic [ASIDLEN-1:0] asid; + logic [TAG_W_S-1:0] tag; + //Signals from the PTE + logic [9:0] ppn1; + logic globe; + logic user; + } tlb_entry_s_t; + + //////////////////////////////////////////////////// + //Implementation + //Regular and super pages stored separately + //Regular pages are set associative and super pages are direct mapped + + //Random replacement + logic[WAYS-1:0] replacement_way; + cycler #(.C_WIDTH(WAYS)) replacement_policy ( + .en(1'b1), + .one_hot(replacement_way), + .*); + + //LUTRAM storage + logic [$clog2(DEPTH)-1:0] tlb_addr; + logic [$clog2(DEPTH)-1:0] tlb_addr_s; + tlb_entry_t [WAYS-1:0] rdata; + tlb_entry_s_t rdata_s; + logic [WAYS-1:0] write; + logic write_s; + tlb_entry_t wdata; + tlb_entry_s_t wdata_s; + + generate for (genvar i = 0; i < WAYS; i++) begin : gen_lut_rams + lutram_1w_1r #(.DATA_TYPE(tlb_entry_t), .DEPTH(DEPTH)) data_table ( + .waddr(tlb_addr), + .raddr(tlb_addr), + .ram_write(write[i]), + .new_ram_data(wdata), + .ram_data_out(rdata[i]), + .*); + end endgenerate + lutram_1w_1r #(.DATA_TYPE(tlb_entry_s_t), .DEPTH(DEPTH)) data_table_s ( + .waddr(tlb_addr_s), + .raddr(tlb_addr_s), + .ram_write(write_s), + .new_ram_data(wdata_s), + .ram_data_out(rdata_s), + .*); + + + //Hit detection + logic [TAG_W-1:0] cmp_tag; + logic [TAG_W_S-1:0] cmp_tag_s; + logic [ASIDLEN-1:0] cmp_asid; + logic [WAYS-1:0] tag_hit; + logic tag_hit_s; + logic [WAYS-1:0] asid_hit; + logic asid_hit_s; + logic [WAYS-1:0] perms_valid; + logic perms_valid_s; + logic [WAYS-1:0] hit_ohot; + logic hit_ohot_s; + logic [WAY_W-1:0] hit_way; + logic hit; + + assign cmp_tag = sfence.valid ? sfence.addr[31-:TAG_W] : tlb.virtual_address[31-:TAG_W]; + assign cmp_tag_s = sfence.valid ? sfence.addr[31-:TAG_W_S] : tlb.virtual_address[31-:TAG_W_S]; + assign cmp_asid = sfence.valid ? sfence.asid : asid; + + always_comb begin + for (int i = 0; i < WAYS; i++) begin + tag_hit[i] = rdata[i].tag == cmp_tag; + asid_hit[i] = rdata[i].asid == cmp_asid; + hit_ohot[i] = rdata[i].valid & tag_hit[i] & (asid_hit[i] | rdata[i].globe); + end + tag_hit_s = rdata_s.tag == cmp_tag_s; + asid_hit_s = rdata_s.asid == cmp_asid; + hit_ohot_s = rdata_s.valid & tag_hit_s & (asid_hit_s | rdata_s.globe); + end + assign hit = |hit_ohot | hit_ohot_s; + + priority_encoder #(.WIDTH(WAYS)) hit_cast ( + .priority_vector(hit_ohot), + .encoded_result(hit_way) + ); + + generate for (genvar i = 0; i < WAYS; i++) begin : gen_perms_check + perms_check checks ( + .pte_perms('{ + x : 1'b1, + a : 1'b1, + u : rdata[i].user, + default: 'x + }), + .rnw(tlb.rnw), + .execute(1'b1), + .mxr(mmu.mxr), + .sum(mmu.sum), + .privilege(mmu.privilege), + .valid(perms_valid[i]) + ); + end endgenerate + perms_check checks_s ( + .pte_perms('{ + x : 1'b1, + a : 1'b1, + u : rdata_s.user, + default: 'x + }), + .rnw(tlb.rnw), + .execute(1'b1), + .mxr(mmu.mxr), + .sum(mmu.sum), + .privilege(mmu.privilege), + .valid(perms_valid_s) + ); + + + //SFENCE + logic [$clog2(DEPTH)-1:0] flush_addr; + lfsr #(.WIDTH($clog2(DEPTH)), .NEEDS_RESET(0)) lfsr_counter ( + .en(1'b1), + .value(flush_addr), + .*); + + always_comb begin + if (sfence.valid) begin + tlb_addr = sfence.addr_only ? sfence.addr[12 +: $clog2(DEPTH)] : flush_addr; + tlb_addr_s = sfence.addr_only ? sfence.addr[22 +: $clog2(DEPTH)] : flush_addr; + end + else begin + tlb_addr = tlb.virtual_address[12 +: $clog2(DEPTH)]; + tlb_addr_s = tlb.virtual_address[22 +: $clog2(DEPTH)]; + end + end + + assign wdata = '{ + valid : ~sfence.valid, + asid : asid, + tag : tlb.virtual_address[31-:TAG_W], + ppn1 : mmu.upper_physical_address[19:10], + ppn0 : mmu.upper_physical_address[9:0], + globe : mmu.perms.g, + user : mmu.perms.u + }; + assign wdata_s = '{ + valid : ~sfence.valid, + asid : asid, + tag : tlb.virtual_address[31-:TAG_W_S], + ppn1 : mmu.upper_physical_address[19:10], + globe : mmu.perms.g, + user : mmu.perms.u + }; + + always_comb begin + for (int i = 0; i < WAYS; i++) begin + case ({sfence.valid, sfence.addr_only, sfence.asid_only}) + 3'b100: begin //Clear everything + write[i] = 1'b1; + write_s = 1'b1; + end + 3'b101: begin //Clear non global for specified address space + write[i] = ~rdata[i].globe & asid_hit[i]; + write_s = ~rdata_s.globe & asid_hit_s; + end + 3'b110: begin //Clear matching addresses + write[i] = tag_hit[i]; + write_s = tag_hit_s; + end + 3'b111: begin //Clear if both + write[i] = (~rdata[i].globe & asid_hit[i]) & tag_hit[i]; + write_s = (~rdata_s.globe & asid_hit_s) & tag_hit_s; + end + default: begin + write[i] = mmu.write_entry & ~mmu.superpage & replacement_way[i]; + write_s = mmu.write_entry & mmu.superpage; + end + endcase + end + end + + //Permission fail + logic perm_fail; + assign perm_fail = |(hit_ohot & ~perms_valid) | (hit_ohot_s & ~perms_valid_s); + + //MMU interface + logic request_in_progress; + always_ff @ (posedge clk) begin + if (rst) + request_in_progress <= 0; + else if (mmu.write_entry | mmu.is_fault | abort_request) + request_in_progress <= 0; + else if (mmu.request) + request_in_progress <= 1; + end + + assign mmu.request = translation_on & tlb.new_request & ~hit & ~perm_fail; + assign mmu.execute = 1; + assign mmu.rnw = tlb.rnw; + assign mmu.virtual_address = tlb.virtual_address; + + //TLB interface + logic mmu_request_complete; + always_ff @(posedge clk) begin + if (rst) + mmu_request_complete <= 0; + else + mmu_request_complete <= mmu.write_entry & ~abort_request; + end + assign tlb.done = translation_on ? (hit & ~perm_fail & (tlb.new_request | mmu_request_complete)) : tlb.new_request; + assign tlb.ready = ~request_in_progress & ~mmu_request_complete; + assign tlb.is_fault = mmu.is_fault | (tlb.new_request & translation_on & perm_fail); + + always_comb begin + tlb.physical_address[11:0] = tlb.virtual_address[11:0]; + if (~translation_on) + tlb.physical_address[31:12] = tlb.virtual_address[31:12]; + else if (hit_ohot_s) begin + tlb.physical_address[21:12] = tlb.virtual_address[21:12]; + tlb.physical_address[31:22] = rdata_s.ppn1; + end + else begin + tlb.physical_address[21:12] = rdata[hit_way].ppn0; + tlb.physical_address[31:22] = rdata[hit_way].ppn1; + end + end + + //////////////////////////////////////////////////// + //End of Implementation + //////////////////////////////////////////////////// + + //////////////////////////////////////////////////// + //Assertions + +endmodule diff --git a/core/mmu.sv b/core/mmu/mmu.sv old mode 100755 new mode 100644 similarity index 79% rename from core/mmu.sv rename to core/mmu/mmu.sv index 9893c661..100834c5 --- a/core/mmu.sv +++ b/core/mmu/mmu.sv @@ -22,9 +22,6 @@ module mmu - import cva5_config::*; - import riscv_types::*; - import cva5_types::*; import csr_types::*; ( @@ -40,14 +37,7 @@ module mmu logic [11:0] ppn1; logic [9:0] ppn0; logic [1:0] reserved; - logic d; - logic a; - logic g; - logic u; - logic x; - logic w; - logic r; - logic v; + pte_perms_t perms; } pte_t; typedef enum { @@ -63,8 +53,7 @@ module mmu logic [6:0] next_state; pte_t pte; - logic access_valid; - logic privilege_valid; + logic perms_valid; localparam MAX_ABORTED_REQUESTS = 4; logic abort_queue_full; @@ -84,7 +73,7 @@ module mmu //Page Table addresses always_ff @ (posedge clk) begin - if (state[IDLE] | l1_response.data_valid) begin + if (state[IDLE] | (l1_response.data_valid & ~discard_data)) begin if (state[IDLE]) l1_request.addr <= {mmu.satp_ppn[19:0], mmu.virtual_address[31:22], 2'b00}; else @@ -103,7 +92,7 @@ module mmu logic delayed_abort_complete; assign delayed_abort = abort_request & (state[WAIT_REQUEST_1] | state[WAIT_REQUEST_2]); - assign delayed_abort_complete = discard_data & l1_response.data_valid; + assign delayed_abort_complete = (discard_data | abort_request) & l1_response.data_valid; always_ff @ (posedge clk) begin if (rst) abort_tracking <= 0; @@ -113,18 +102,16 @@ module mmu assign discard_data = abort_tracking[COUNT_W]; assign abort_queue_full = abort_tracking[COUNT_W] & ~|abort_tracking[COUNT_W-1:0]; - //////////////////////////////////////////////////// - //Access and permission checks - //A and D bits are software managed - assign access_valid = - (mmu.execute & pte.x & pte.a) | //fetch - (mmu.rnw & (pte.r | (pte.x & mmu.mxr)) & pte.a) | //load - ((~mmu.rnw & ~mmu.execute) & pte.w & pte.a & pte.d); //store - - assign privilege_valid = - (mmu.privilege == MACHINE_PRIVILEGE) | - ((mmu.privilege == SUPERVISOR_PRIVILEGE) & (~pte.u | (pte.u & mmu.sum))) | - ((mmu.privilege == USER_PRIVILEGE) & pte.u); + + perms_check perm ( + .pte_perms(pte.perms), + .rnw(mmu.rnw), + .execute(mmu.execute), + .mxr(mmu.mxr), + .sum(mmu.sum), + .privilege(mmu.privilege), + .valid(perms_valid) + ); //////////////////////////////////////////////////// //State Machine @@ -139,14 +126,14 @@ module mmu next_state = 2**WAIT_REQUEST_1; state[WAIT_REQUEST_1] : if (l1_response.data_valid & ~discard_data) begin - if (~pte.v | (~pte.r & pte.w)) //page not valid OR invalid xwr pattern + if (~pte.perms.v | (~pte.perms.r & pte.perms.w)) //page not valid OR invalid xwr pattern next_state = 2**COMPLETE_FAULT; - else if (pte.v & (pte.r | pte.x)) begin//superpage (all remaining xwr patterns other than all zeros) - if (access_valid & privilege_valid) + else if (pte.perms.v & (pte.perms.r | pte.perms.x)) begin//superpage (all remaining xwr patterns other than all zeros) + if (perms_valid & ~|pte.ppn0) //check for misaligned superpage next_state = 2**COMPLETE_SUCCESS; else next_state = 2**COMPLETE_FAULT; - end else //(pte.v & ~pte.x & ~pte.w & ~pte.r) pointer to next level in page table + end else //(pte.perms.v & ~pte.perms.x & ~pte.perms.w & ~pte.perms.r) pointer to next level in page table next_state = 2**SEND_REQUEST_2; end state[SEND_REQUEST_2] : @@ -154,10 +141,10 @@ module mmu next_state = 2**WAIT_REQUEST_2; state[WAIT_REQUEST_2] : if (l1_response.data_valid & ~discard_data) begin - if (access_valid & privilege_valid) - next_state = 2**COMPLETE_SUCCESS; - else + if (~perms_valid | ~pte.perms.v | (~pte.perms.r & pte.perms.w)) //perm fail or invalid next_state = 2**COMPLETE_FAULT; + else + next_state = 2**COMPLETE_SUCCESS; end state[COMPLETE_SUCCESS], state[COMPLETE_FAULT] : next_state = 2**IDLE; @@ -178,6 +165,15 @@ module mmu //TLB return path always_ff @ (posedge clk) begin if (l1_response.data_valid) begin + mmu.superpage <= state[WAIT_REQUEST_1]; + mmu.perms.d <= pte.perms.d; + mmu.perms.a <= pte.perms.a; + mmu.perms.g <= pte.perms.g | (state[WAIT_REQUEST_2] & mmu.perms.g); + mmu.perms.u <= pte.perms.u; + mmu.perms.x <= pte.perms.x; + mmu.perms.w <= pte.perms.w; + mmu.perms.r <= pte.perms.r; + mmu.perms.v <= pte.perms.v; mmu.upper_physical_address[19:10] <= pte.ppn1[9:0]; mmu.upper_physical_address[9:0] <= state[WAIT_REQUEST_2] ? pte.ppn0 : mmu.virtual_address[21:12]; end @@ -201,7 +197,7 @@ module mmu //the transaction is aborted. As such, if TLB request is low and we are not in the //IDLE state, then our current processor state has been corrupted mmu_tlb_state_mismatch: - assert property (@(posedge clk) disable iff (rst) (~mmu.request) |-> (state[IDLE])) + assert property (@(posedge clk) disable iff (rst) (mmu.request) |-> (state[IDLE])) else $error("MMU and TLB state mismatch"); endmodule diff --git a/core/mmu/perms_check.sv b/core/mmu/perms_check.sv new file mode 100644 index 00000000..f4838b63 --- /dev/null +++ b/core/mmu/perms_check.sv @@ -0,0 +1,58 @@ +/* + * Copyright © 2024 Liam Feng, Chris Keilbart, Eric Matthews + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Initial code developed under the supervision of Dr. Lesley Shannon, + * Reconfigurable Computing Lab, Simon Fraser University. + * + * Author(s): + * Liam Feng + * Chris Keilbart + * Eric Matthews + */ + +module perms_check + + import csr_types::*; + + ( + input pte_perms_t pte_perms, + + input logic rnw, //LS type + input logic execute, //Fetch + input logic mxr, //Make eXecutable Readable + input logic sum, //permit Supervisor User Memory access + input privilege_t privilege, //Effective operatinf privilege + + output logic valid + ); + + logic access_valid; + logic privilege_valid; + + //Access and permission checks + //A and D bits are software managed; this implementation corresponds to the Svade extension + assign access_valid = + (execute & pte_perms.x & pte_perms.a) | //fetch + (rnw & (pte_perms.r | (pte_perms.x & mxr)) & pte_perms.a) | //load + ((~rnw & ~execute) & pte_perms.w & pte_perms.a & pte_perms.d); //store + + assign privilege_valid = + (privilege == MACHINE_PRIVILEGE) | + ((privilege == SUPERVISOR_PRIVILEGE) & (~pte_perms.u | (pte_perms.u & sum))) | + ((privilege == USER_PRIVILEGE) & pte_perms.u); + + assign valid = access_valid & privilege_valid; + +endmodule diff --git a/core/register_file.sv b/core/register_file.sv old mode 100755 new mode 100644 index 4700bd8e..5720d828 --- a/core/register_file.sv +++ b/core/register_file.sv @@ -100,7 +100,6 @@ module register_file ) id_inuse_toggle_mem_set ( .clk (clk), - .rst (rst), .init_clear (gc.init_clear), .toggle (toggle), .toggle_addr (toggle_addr), @@ -118,7 +117,7 @@ module register_file .clk, .waddr(wb_phys_addr[i]), .raddr(decode_phys_rs_addr), - .ram_write(commit[i].valid & ~gc.writeback_supress), + .ram_write(commit[i].valid & ~gc.writeback_suppress), .new_ram_data(commit[i].data), .ram_data_out(regfile_rs_data[i]) ); diff --git a/core/register_free_list.sv b/core/register_free_list.sv index ea3acf2f..03225540 100644 --- a/core/register_free_list.sv +++ b/core/register_free_list.sv @@ -91,4 +91,4 @@ module register_free_list fifo_underflow_assertion: assert property (@(posedge clk) disable iff (rst) fifo.pop |-> fifo.valid) else $error("underflow"); -endmodule \ No newline at end of file +endmodule diff --git a/core/renamer.sv b/core/renamer.sv index 89a30eaf..56d888ec 100644 --- a/core/renamer.sv +++ b/core/renamer.sv @@ -96,7 +96,7 @@ module renamer assign free_list.potential_push = (gc.init_clear & ~clear_index[5]) | (wb_retire.valid); assign free_list.push = free_list.potential_push; - assign free_list.data_in = gc.init_clear ? {1'b1, clear_index[4:0]} : (gc.writeback_supress ? inuse_table_output.spec_phys_addr : inuse_table_output.previous_phys_addr); + assign free_list.data_in = gc.init_clear ? {1'b1, clear_index[4:0]} : (gc.rename_revert ? inuse_table_output.spec_phys_addr : inuse_table_output.previous_phys_addr); assign free_list.pop = rename_valid; //////////////////////////////////////////////////// @@ -137,12 +137,12 @@ module renamer rs_addr_t spec_table_write_index; rs_addr_t spec_table_write_index_mux [4]; - assign spec_table_update = rename_valid | rollback | gc.init_clear | (wb_retire.valid & gc.writeback_supress); + assign spec_table_update = rename_valid | rollback | gc.init_clear | gc.rename_revert; logic [1:0] spec_table_sel; one_hot_to_integer #(.C_WIDTH(4)) spec_table_sel_one_hot_to_int ( - .one_hot ({gc.init_clear, rollback, (wb_retire.valid & gc.writeback_supress), 1'b0}), + .one_hot ({gc.init_clear, rollback, gc.rename_revert, 1'b0}), .int_out (spec_table_sel) ); @@ -150,7 +150,7 @@ module renamer assign spec_table_write_index_mux[0] = decode.rd_addr; assign spec_table_next_mux[0].phys_addr = free_list.data_out; assign spec_table_next_mux[0].wb_group = decode.rd_wb_group; - //gc.writeback_supress + //gc.rename_revert assign spec_table_write_index_mux[1] = inuse_table_output.rd_addr; assign spec_table_next_mux[1].phys_addr = inuse_table_output.previous_phys_addr; assign spec_table_next_mux[1].wb_group = inuse_table_output.previous_wb_group; diff --git a/core/tlb_lut_ram.sv b/core/tlb_lut_ram.sv deleted file mode 100755 index db21485c..00000000 --- a/core/tlb_lut_ram.sv +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright © 2017 Eric Matthews, Lesley Shannon - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Initial code developed under the supervision of Dr. Lesley Shannon, - * Reconfigurable Computing Lab, Simon Fraser University. - * - * Author(s): - * Eric Matthews - */ - - - -module tlb_lut_ram - - import cva5_config::*; - import riscv_types::*; - import cva5_types::*; - - #( - parameter WAYS = 2, - parameter DEPTH = 32 - ) - ( - input logic clk, - input logic rst, - input gc_outputs_t gc, - input logic abort_request, - input logic [ASIDLEN-1:0] asid, - mmu_interface.tlb mmu, - tlb_interface.tlb tlb - ); - ////////////////////////////////////////// - localparam TLB_TAG_W = 32-12-$clog2(DEPTH); - - typedef struct packed { - logic valid; - logic [TLB_TAG_W-1:0] tag; - logic [19:0] phys_addr; - } tlb_entry_t; - - logic [$clog2(DEPTH)-1:0] tlb_addr; - logic [TLB_TAG_W-1:0] virtual_tag; - - tlb_entry_t ram [DEPTH-1:0][WAYS-1:0]; - logic [DEPTH-1:0] valid [WAYS-1:0]; - - logic [WAYS-1:0] tag_hit; - logic hit; - logic [WAYS-1:0] replacement_way; - - logic [$bits(tlb_entry_t)-1:0] ram_data [WAYS-1:0]; - tlb_entry_t ram_entry [WAYS-1:0]; - tlb_entry_t new_entry; - - logic [$clog2(DEPTH)-1:0] flush_addr; - - logic [WAYS-1:0] tlb_write; - logic request_in_progress; - logic mmu_request_complete; - //////////////////////////////////////////////////// - //Implementation - //LUTRAM-based - //Reset is performed sequentially, coordinated by the gc unit - - lfsr #(.WIDTH($clog2(DEPTH)), .NEEDS_RESET(0)) - lfsr_counter ( - .clk (clk), .rst (rst), - .en(gc.tlb_flush), - .value(flush_addr) - ); - - assign tlb_addr = gc.tlb_flush ? flush_addr : tlb.virtual_address[12 +: $clog2(DEPTH)]; - assign tlb_write = {WAYS{gc.tlb_flush}} | replacement_way; - - assign new_entry.valid = ~gc.tlb_flush; - assign new_entry.tag = virtual_tag; - assign new_entry.phys_addr = mmu.upper_physical_address; - - genvar i; - generate - for (i=0; i $onehot(tag_hit)) - else $error("Multiple tag hits in TLB!"); - -endmodule \ No newline at end of file diff --git a/core/types_and_interfaces/csr_types.sv b/core/types_and_interfaces/csr_types.sv index b3bd37ed..510a8884 100644 --- a/core/types_and_interfaces/csr_types.sv +++ b/core/types_and_interfaces/csr_types.sv @@ -29,7 +29,7 @@ package csr_types; typedef enum logic [1:0] { USER_PRIVILEGE = 2'b00, SUPERVISOR_PRIVILEGE = 2'b01, - //reserved + RESERVED_PRIVILEGE = 2'b10, MACHINE_PRIVILEGE = 2'b11 } privilege_t; @@ -72,8 +72,6 @@ package csr_types; logic A; //Atomic } misa_t; - - typedef struct packed { logic sd; logic [7:0] zeros; @@ -86,7 +84,7 @@ package csr_types; logic [1:0] xs; logic [1:0] fs; logic [1:0] mpp; - logic [1:0] zeros1; + logic [1:0] vs; logic spp; logic mpie; logic ube; @@ -121,7 +119,9 @@ package csr_types; typedef struct packed { logic [31:16] custom; - logic [15:12] zeros; + logic [15:14] zeros; + logic lcofip; + logic zero0; logic meip; logic zero1; logic seip; @@ -138,7 +138,9 @@ package csr_types; typedef struct packed { logic [31:16] custom; - logic [15:12] zeros; + logic [15:14] zeros; + logic lcofie; + logic zero0; logic meie; logic zero1; logic seie; @@ -153,12 +155,66 @@ package csr_types; logic zero6; } mie_t; + typedef struct packed { + logic [31:16] custom; + logic [15:14] zeros; + logic lcofipd; + logic [12:10] zero1; + logic seid; + logic [8:6] zero2; + logic stid; + logic [4:2] zero3; + logic ssid; + logic zero4; + } mideleg_t; + typedef struct packed { logic is_interrupt; - logic [XLEN-1-1-ECODE_W:0] zeroes; + logic [XLEN-1-1-ECODE_W:0] zeros; logic [ECODE_W-1:0] code; - } mcause_t; + } cause_t; + + typedef struct packed { + logic [28:0] hpm; + logic ir; + logic tm; + logic cy; + } mcounter_t; + typedef struct packed { + logic [24:0] zeros_high; + logic cbze; + logic cbcfe; + logic [1:0] cbie; + logic [1:0] zeros_low; + logic fiom; + } envcfg_t; + + typedef struct packed { + logic stce; + logic pbmte; + logic adue; + logic cde; + logic [27:0] zeros; + } envcfgh_t; + + typedef struct packed { + logic [28:0] zeros; + logic jvt; + logic fcsr; + logic c; + } stateen0_t; + + typedef struct packed { + logic se0; + logic envcfg; + logic zero; + logic csrind; + logic aia; + logic imsic; + logic contex; + logic [24:0] zeros; + } mstateen0h_t; typedef struct packed { logic mode; @@ -166,5 +222,15 @@ package csr_types; logic [21:0] ppn; } satp_t; + typedef struct packed { + logic d; + logic a; + logic g; + logic u; + logic x; + logic w; + logic r; + logic v; + } pte_perms_t; endpackage diff --git a/core/types_and_interfaces/cva5_config.sv b/core/types_and_interfaces/cva5_config.sv old mode 100755 new mode 100644 index 534b0c4b..9748f574 --- a/core/types_and_interfaces/cva5_config.sv +++ b/core/types_and_interfaces/cva5_config.sv @@ -32,31 +32,38 @@ package cva5_config; //////////////////////////////////////////////////// //CSR Options - typedef struct packed { - int unsigned COUNTER_W; //CSR counter width (33-64 bits): 48-bits --> 32 days @ 100MHz - bit MCYCLE_WRITEABLE; - bit MINSTR_WRITEABLE; - bit MTVEC_WRITEABLE; - bit INCLUDE_MSCRATCH; - bit INCLUDE_MCAUSE; - bit INCLUDE_MTVAL; - } csr_non_standard_config_t; + typedef enum { + BARE, + M, + MU, + MSU + } modes_t; typedef struct packed { bit [31:0] MACHINE_IMPLEMENTATION_ID; bit [31:0] CPU_ID; bit [31:0] RESET_VEC; //PC value on reset - bit [31:0] RESET_MTVEC; - csr_non_standard_config_t NON_STANDARD_OPTIONS; + bit [31:0] RESET_TVEC; + bit [31:0] MCONFIGPTR; + bit INCLUDE_ZICNTR; + bit INCLUDE_ZIHPM; + bit INCLUDE_SSTC; + bit INCLUDE_SMSTATEEN; } csr_config_t; //Memory range [L, H] //Address range is inclusive and must be aligned to its size typedef struct packed { - bit [31:0] L; - bit [31:0] H; + logic [31:0] L; + logic [31:0] H; } memory_config_t; + //Atomic configuration + typedef struct packed { + int unsigned LR_WAIT; //Must be >= the maximum number of cycles a constrained LR-SC can take + int unsigned RESERVATION_WORDS; //The amount of 32-bit words that are reserved by an LR instruction, must be == cache line size (if cache present) + } amo_config_t; + //////////////////////////////////////////////////// //Cache Options //Size in bytes: (LINES * WAYS * LINE_W * 4) @@ -109,7 +116,7 @@ package cva5_config; //Additionally, writeback units must be grouped before non-writeback units localparam MAX_NUM_UNITS = 9; typedef struct packed { - bit IEC; + bit GC; bit BR; //End of Write-Back Units bit CUSTOM; @@ -122,7 +129,7 @@ package cva5_config; } units_t; typedef enum bit [$clog2(MAX_NUM_UNITS)-1:0] { - IEC_ID = 8, + GC_ID = 8, BR_ID = 7, //End of Write-Back Units (insert new writeback units here) CUSTOM_ID = 6, @@ -161,9 +168,7 @@ package cva5_config; typedef struct packed { //ISA options - bit INCLUDE_M_MODE; - bit INCLUDE_S_MODE; - bit INCLUDE_U_MODE; + modes_t MODES; bit INCLUDE_IFENCE; //local mem operations only bit INCLUDE_AMO; @@ -177,6 +182,7 @@ package cva5_config; //Memory Options int unsigned SQ_DEPTH;//CAM-based reasonable max of 4 bit INCLUDE_FORWARDING_TO_STORES; + amo_config_t AMO_UNIT; //Caches bit INCLUDE_ICACHE; cache_config_t ICACHE; @@ -232,9 +238,7 @@ package cva5_config; localparam cpu_config_t EXAMPLE_CONFIG = '{ //ISA options - INCLUDE_M_MODE : 1, - INCLUDE_S_MODE : 0, - INCLUDE_U_MODE : 0, + MODES : MSU, INCLUDE_UNIT : '{ ALU : 1, @@ -245,7 +249,7 @@ package cva5_config; FPU : 1, CUSTOM : 0, BR : 1, - IEC : 1 + GC : 1 }, INCLUDE_IFENCE : 1, @@ -257,20 +261,20 @@ package cva5_config; MACHINE_IMPLEMENTATION_ID : 0, CPU_ID : 0, RESET_VEC : 32'h80000000, - RESET_MTVEC : 32'h80000100, - NON_STANDARD_OPTIONS : '{ - COUNTER_W : 33, - MCYCLE_WRITEABLE : 0, - MINSTR_WRITEABLE : 0, - MTVEC_WRITEABLE : 1, - INCLUDE_MSCRATCH : 0, - INCLUDE_MCAUSE : 1, - INCLUDE_MTVAL : 1 - } + RESET_TVEC : 32'h00000000, + MCONFIGPTR : '0, + INCLUDE_ZICNTR : 1, + INCLUDE_ZIHPM : 1, + INCLUDE_SSTC : 1, + INCLUDE_SMSTATEEN : 1 }, //Memory Options SQ_DEPTH : 4, INCLUDE_FORWARDING_TO_STORES : 1, + AMO_UNIT : '{ + LR_WAIT : 32, + RESERVATION_WORDS : 8 + }, INCLUDE_ICACHE : 0, ICACHE_ADDR : '{ L: 32'h80000000, @@ -377,12 +381,14 @@ package cva5_config; //////////////////////////////////////////////////// //Exceptions - localparam NUM_EXCEPTION_SOURCES = 3; //LS, Branch, Illegal + localparam NUM_EXCEPTION_SOURCES = 5; //LS, Branch, Illegal, CSR, GC //Stored in a ID table on issue, checked at retire - typedef enum bit [1:0] { + typedef enum bit [2:0] { LS_EXCEPTION = 0, BR_EXCEPTION = 1, - PRE_ISSUE_EXCEPTION = 2 + PRE_ISSUE_EXCEPTION = 2, + CSR_EXCEPTION = 3, + GC_EXCEPTION = 4 } exception_sources_t; //////////////////////////////////////////////////// diff --git a/core/types_and_interfaces/cva5_types.sv b/core/types_and_interfaces/cva5_types.sv old mode 100755 new mode 100644 index 0cb71492..a3e40a4a --- a/core/types_and_interfaces/cva5_types.sv +++ b/core/types_and_interfaces/cva5_types.sv @@ -27,9 +27,10 @@ package cva5_types; localparam LOG2_RETIRE_PORTS = $clog2(RETIRE_PORTS); localparam LOG2_MAX_IDS = $clog2(MAX_IDS); + localparam MAX_LS_SUBUNITS = 3; typedef logic[LOG2_MAX_IDS-1:0] id_t; - typedef logic[1:0] branch_predictor_metadata_t; + typedef logic[$clog2(MAX_LS_SUBUNITS)-1:0] ls_subunit_t; typedef logic [3:0] addr_hash_t; typedef logic [5:0] phys_addr_t; @@ -43,6 +44,8 @@ package cva5_types; typedef struct packed{ logic valid; + logic possible; + logic [NUM_EXCEPTION_SOURCES-1:0] source; exception_code_t code; logic [31:0] tval; logic [31:0] pc; @@ -64,7 +67,9 @@ package cva5_types; typedef struct packed{ logic [31:0] pc; + logic [31:0] pc_r; logic [31:0] instruction; + logic [31:0] instruction_r; logic [2:0] fn3; logic [6:0] opcode; @@ -76,7 +81,6 @@ package cva5_types; logic fp_uses_rd; logic is_multicycle; id_t id; - exception_sources_t exception_unit; logic stage_valid; fetch_metadata_t fetch_metadata; } issue_packet_t; @@ -98,18 +102,13 @@ package cva5_types; logic [4:0] op; }amo_alu_inputs_t; - typedef struct packed{ - logic is_lr; - logic is_sc; - logic is_amo; - logic [4:0] op; - } amo_details_t; - typedef struct packed { - logic [31:0] addr; + logic [11:0] offset; logic load; logic store; logic cache_op; + logic amo; + amo_t amo_type; logic [3:0] be; logic [2:0] fn3; logic [31:0] data; @@ -121,7 +120,14 @@ package cva5_types; } lsq_entry_t; typedef struct packed { - logic [31:0] addr; + logic [19:0] addr; + logic rnw; + logic discard; + ls_subunit_t subunit; + } lsq_addr_entry_t; + + typedef struct packed { + logic [11:0] offset; logic [3:0] be; logic cache_op; logic [31:0] data; @@ -131,8 +137,7 @@ package cva5_types; } sq_entry_t; typedef struct packed { - logic sq_empty; - logic no_released_stores_pending; + logic outstanding_store; logic idle; } load_store_status_t; @@ -165,29 +170,32 @@ package cva5_types; logic load; logic store; logic cache_op; + logic amo; + amo_t amo_type; logic [3:0] be; logic [2:0] fn3; + ls_subunit_t subunit; logic [31:0] data_in; id_t id; fp_ls_op_t fp_op; } data_access_shared_inputs_t; - typedef enum { - LUTRAM_FIFO, - NON_MUXED_INPUT_FIFO, - NON_MUXED_OUTPUT_FIFO - } fifo_type_t; + typedef struct packed { + logic valid; + logic asid_only; + logic[ASIDLEN-1:0] asid; + logic addr_only; + logic[31:0] addr; + } tlb_packet_t; typedef struct packed{ logic init_clear; logic fetch_hold; logic issue_hold; logic fetch_flush; - logic writeback_supress; - logic retire_hold; - logic sq_flush; - logic tlb_flush; - logic exception_pending; + logic fetch_ifence; + logic writeback_suppress; + logic rename_revert; exception_packet_t exception; logic pc_override; logic [31:0] pc; diff --git a/core/types_and_interfaces/external_interfaces.sv b/core/types_and_interfaces/external_interfaces.sv index 97796e8c..ff7e0def 100644 --- a/core/types_and_interfaces/external_interfaces.sv +++ b/core/types_and_interfaces/external_interfaces.sv @@ -31,6 +31,7 @@ interface axi_interface; logic [1:0] arburst; logic [3:0] arcache; logic [5:0] arid; + logic arlock; //read data logic rready; @@ -50,6 +51,7 @@ interface axi_interface; logic [1:0] awburst; logic [3:0] awcache; logic [5:0] awid; + logic awlock; //write data logic wready; @@ -65,12 +67,12 @@ interface axi_interface; logic [5:0] bid; modport master (input arready, rvalid, rdata, rresp, rlast, rid, awready, wready, bvalid, bresp, bid, - output arvalid, araddr, arlen, arsize, arburst, arcache, arid, rready, awvalid, awaddr, awlen, awsize, awburst, awcache, awid, + output arvalid, araddr, arlen, arsize, arburst, arcache, arlock, arid, rready, awvalid, awaddr, awlen, awsize, awburst, awcache, awid, awlock, wvalid, wdata, wstrb, wlast, bready); - modport slave (input arvalid, araddr, arlen, arsize, arburst, arcache, + modport slave (input arvalid, araddr, arlen, arsize, arburst, arcache, arlock, rready, - awvalid, awaddr, awlen, awsize, awburst, awcache, arid, + awvalid, awaddr, awlen, awsize, awburst, awcache, awlock, arid, wvalid, wdata, wstrb, wlast, awid, bready, output arready, rvalid, rdata, rresp, rlast, rid, @@ -79,9 +81,9 @@ interface axi_interface; bvalid, bresp, bid); `ifdef __CVA5_FORMAL__ - modport formal (input arready, arvalid, araddr, arlen, arsize, arburst, arcache, + modport formal (input arready, arvalid, araddr, arlen, arsize, arburst, arcache, arlock, rready, rvalid, rdata, rresp, rlast, rid, - awready, awvalid, awaddr, awlen, awsize, awburst, awcache, arid, + awready, awvalid, awaddr, awlen, awsize, awburst, awcache, awlock, arid, wready, wvalid, wdata, wstrb, wlast, awid, bready, bvalid, bresp, bid); `endif @@ -92,6 +94,7 @@ interface avalon_interface; logic [31:0] addr; logic read; logic write; + logic lock; logic [3:0] byteenable; logic [31:0] readdata; logic [31:0] writedata; @@ -100,13 +103,13 @@ interface avalon_interface; logic writeresponsevalid; modport master (input readdata, waitrequest, readdatavalid, writeresponsevalid, - output addr, read, write, byteenable, writedata); + output addr, read, write, lock, byteenable, writedata); modport slave (output readdata, waitrequest, readdatavalid, writeresponsevalid, - input addr, read, write, byteenable, writedata); + input addr, read, write, lock, byteenable, writedata); `ifdef __CVA5_FORMAL__ modport formal (input readdata, waitrequest, readdatavalid, writeresponsevalid, - addr, read, write, byteenable, writedata); + addr, read, write, lock, byteenable, writedata); `endif endinterface diff --git a/core/types_and_interfaces/internal_interfaces.sv b/core/types_and_interfaces/internal_interfaces.sv old mode 100755 new mode 100644 index 07aff104..041b40b1 --- a/core/types_and_interfaces/internal_interfaces.sv +++ b/core/types_and_interfaces/internal_interfaces.sv @@ -98,14 +98,15 @@ interface exception_interface; import cva5_types::*; logic valid; - logic ack; + logic possible; exception_code_t code; - id_t id; logic [31:0] tval; + logic [31:0] pc; + logic discard; - modport unit (output valid, code, id, tval, input ack); - modport econtrol (input valid, code, id, tval, output ack); + modport unit (output valid, possible, code, tval, pc, discard); + modport econtrol (input valid, possible, code, tval, pc, discard); endinterface interface fifo_interface #(parameter type DATA_TYPE = logic); @@ -122,6 +123,8 @@ interface fifo_interface #(parameter type DATA_TYPE = logic); endinterface interface mmu_interface; + import csr_types::*; + //From TLB logic request; logic execute; @@ -130,6 +133,8 @@ interface mmu_interface; //TLB response logic write_entry; + logic superpage; + pte_perms_t perms; logic [19:0] upper_physical_address; logic is_fault; @@ -137,10 +142,10 @@ interface mmu_interface; logic [21:0] satp_ppn; logic mxr; //Make eXecutable Readable logic sum; //permit Supervisor User Memory access - logic [1:0] privilege; + privilege_t privilege; - modport mmu (input virtual_address, request, execute, rnw, satp_ppn, mxr, sum, privilege, output write_entry, upper_physical_address, is_fault); - modport tlb (input write_entry, upper_physical_address, is_fault, output request, virtual_address, execute, rnw); + modport mmu (input virtual_address, request, execute, rnw, satp_ppn, mxr, sum, privilege, output write_entry, superpage, perms, upper_physical_address, is_fault); + modport tlb (input write_entry, superpage, perms, upper_physical_address, is_fault, mxr, sum, privilege, output request, virtual_address, execute, rnw); modport csr (output satp_ppn, mxr, sum, privilege); endinterface @@ -154,18 +159,17 @@ interface tlb_interface; //TLB Inputs logic [31:0] virtual_address; logic rnw; - logic execute; //TLB Outputs logic is_fault; logic [31:0] physical_address; modport tlb ( - input new_request, virtual_address, rnw, execute, + input new_request, virtual_address, rnw, output ready, done, is_fault, physical_address ); modport requester ( - output new_request, virtual_address, rnw, execute, + output new_request, virtual_address, rnw, input ready, done, is_fault, physical_address ); endinterface @@ -181,6 +185,10 @@ interface load_store_queue_interface; logic load_pop; logic store_pop; + //Address translation + logic addr_push; + lsq_addr_entry_t addr_data_in; + //LSQ outputs data_access_shared_inputs_t load_data_out; data_access_shared_inputs_t store_data_out; @@ -193,15 +201,14 @@ interface load_store_queue_interface; //LSQ status logic sq_empty; logic empty; - logic no_released_stores_pending; modport queue ( - input data_in, potential_push, push, load_pop, store_pop, - output full, load_data_out, store_data_out, load_valid, store_valid, sq_empty, empty, no_released_stores_pending + input data_in, potential_push, push, addr_push, addr_data_in, load_pop, store_pop, + output full, load_data_out, store_data_out, load_valid, store_valid, sq_empty, empty ); modport ls ( - output data_in, potential_push, push, load_pop, store_pop, - input full, load_data_out, store_data_out, load_valid, store_valid, sq_empty, empty, no_released_stores_pending + output data_in, potential_push, push, addr_push, addr_data_in, load_pop, store_pop, + input full, load_data_out, store_data_out, load_valid, store_valid, sq_empty, empty ); endinterface @@ -221,15 +228,14 @@ interface store_queue_interface; //SQ status logic empty; - logic no_released_stores_pending; modport queue ( input data_in, push, pop, - output full, data_out, valid, empty, no_released_stores_pending + output full, data_out, valid, empty ); modport ls ( output data_in, push, pop, - input full, data_out, valid, empty, no_released_stores_pending + input full, data_out, valid, empty ); endinterface @@ -258,23 +264,14 @@ interface cache_functions_interface #(parameter int TAG_W = 8, parameter int LIN endinterface -interface addr_utils_interface #(parameter bit [31:0] BASE_ADDR = 32'h00000000, parameter bit [31:0] UPPER_BOUND = 32'hFFFFFFFF); - //Based on the lower and upper address ranges, - //find the number of bits needed to uniquely identify this memory range. - //Assumption: address range is aligned to its size - function int unsigned bit_range (); - for(int i=0; i < 32; i++) begin - if (BASE_ADDR[i] == UPPER_BOUND[i]) - return (32 - i); - end - return 0; - endfunction - - localparam int unsigned BIT_RANGE = bit_range(); - - /* verilator lint_off SELRANGE */ +interface addr_utils_interface #(parameter logic [31:0] BASE_ADDR = 32'h00000000, parameter logic [31:0] UPPER_BOUND = 32'hFFFFFFFF); + //The range should be aligned for performance function address_range_check (input logic[31:0] addr); - return (BIT_RANGE == 0) ? 1 : (addr[31:32-BIT_RANGE] == BASE_ADDR[31:32-BIT_RANGE]); + /* verilator lint_off UNSIGNED */ + /* verilator lint_off CMPCONST */ + return addr >= BASE_ADDR & addr <= UPPER_BOUND; + /* verilator lint_on UNSIGNED */ + /* verilator lint_on CMPCONST */ endfunction endinterface @@ -406,3 +403,30 @@ interface fp_intermediate_wb_interface; input id, done, rd, expo_overflow, fflags, rm, hidden, grs, clz, carry, safe, subnormal, right_shift, right_shift_amt, ignore_max_expo, d2s ); endinterface + +interface amo_interface; + import riscv_types::*; + + //Atomic Load Reserved and Store Conditional + logic set_reservation; + logic clear_reservation; + logic[31:0] reservation; + logic reservation_valid; + + //Atomic Read-Modify-Write + logic rmw_valid; + amo_t op; + logic[31:0] rs1; + logic[31:0] rs2; + logic[31:0] rd; + + modport subunit ( + input reservation_valid, rd, + output set_reservation, clear_reservation, reservation, rmw_valid, op, rs1, rs2 + ); + modport amo_unit ( + output reservation_valid, rd, + input set_reservation, clear_reservation, reservation, rmw_valid, op, rs1, rs2 + ); + +endinterface \ No newline at end of file diff --git a/core/types_and_interfaces/opcodes.sv b/core/types_and_interfaces/opcodes.sv index 606f8a7d..b5f8f933 100644 --- a/core/types_and_interfaces/opcodes.sv +++ b/core/types_and_interfaces/opcodes.sv @@ -148,8 +148,8 @@ package opcodes; localparam [31:0] AMO_MINU = 32'b11000????????????010?????0101111; localparam [31:0] AMO_MAXU = 32'b11100????????????010?????0101111; localparam [31:0] AMO_SWAP = 32'b00001????????????010?????0101111; - localparam [31:0] LR = 32'b00010??00000?????010?????0101111; - localparam [31:0] SC = 32'b00011????????????010?????0101111; + localparam [31:0] AMO_LR = 32'b00010??00000?????010?????0101111; + localparam [31:0] AMO_SC = 32'b00011????????????010?????0101111; //Machine/Supervisor localparam [31:0] SRET = 32'b00010000001000000000000001110011; diff --git a/core/types_and_interfaces/riscv_types.sv b/core/types_and_interfaces/riscv_types.sv index 53c41610..fd6d4a57 100644 --- a/core/types_and_interfaces/riscv_types.sv +++ b/core/types_and_interfaces/riscv_types.sv @@ -113,15 +113,23 @@ package riscv_types; URET_imm = 12'b000000000010, SRET_imm = 12'b000100000010, MRET_imm = 12'b001100000010, - SFENCE_imm = 12'b0001001????? + SFENCE_imm = 12'b0001001?????, + WFI_imm = 12'b000100000101 } imm_sys_t; + //Other registers exist but are not supported typedef enum logic [11:0] { + //Floating Point + FFLAGS = 12'h001, + FRM = 12'h002, + FCSR = 12'h003, + //Machine info MVENDORID = 12'hF11, MARCHID = 12'hF12, MIMPID = 12'hF13, MHARTID = 12'hF14, + MCONFIGPTR = 12'hF15, //Machine trap setup MSTATUS = 12'h300, MISA = 12'h301, @@ -130,55 +138,79 @@ package riscv_types; MIE = 12'h304, MTVEC = 12'h305, MCOUNTEREN = 12'h306, + MSTATUSH = 12'h310, + MEDELEGH = 12'h312, //Machine trap handling MSCRATCH = 12'h340, MEPC = 12'h341, MCAUSE = 12'h342, MTVAL = 12'h343, MIP = 12'h344, - - + //Machine configuration + MENVCFG = 12'h30A, + MENVCFGH = 12'h31A, + //No optional mseccfg/mseccfgh + //No PMP //Machine Counters MCYCLE = 12'hB00, MINSTRET = 12'hB02, + MHPMCOUNTER3 = 12'hB03, + MHPMCOUNTER31 = 12'hB1F, MCYCLEH = 12'hB80, MINSTRETH = 12'hB82, + MHPMCOUNTER3H = 12'hB83, + MHPMCOUNTER31H = 12'hB9F, + //Machine counter setup + MCOUNTINHIBIT = 12'h320, + MHPMEVENT3 = 12'h323, + MHPMEVENT31 = 12'h33F, + MHPMEVENT3H = 12'h723, + MHPMEVENT31H = 12'h73F, + //Machine state enable + MSTATEEN0 = 12'h30C, + MSTATEEN1 = 12'h30D, + MSTATEEN2 = 12'h30E, + MSTATEEN3 = 12'h30F, + MSTATEEN0H = 12'h31C, + MSTATEEN1H = 12'h31D, + MSTATEEN2H = 12'h31E, + MSTATEEN3H = 12'h31F, //Supervisor regs //Supervisor Trap Setup SSTATUS = 12'h100, - SEDELEG = 12'h102, - SIDELEG = 12'h103, SIE = 12'h104, STVEC = 12'h105, SCOUNTEREN = 12'h106, + //Supervisor configuration + SENVCFG = 12'h10A, //Supervisor trap handling SSCRATCH = 12'h140, SEPC = 12'h141, SCAUSE = 12'h142, STVAL = 12'h143, SIP = 12'h144, - + STIMECMP = 12'h14D, + STIMECMPH = 12'h15D, //Supervisor address translation and protection SATP = 12'h180, + //Supervisor state enable + SSTATEEN0 = 12'h10C, + SSTATEEN1 = 12'h10D, + SSTATEEN2 = 12'h10E, + SSTATEEN3 = 12'h10F, - //User regs - //USER Floating Point - FFLAGS = 12'h001, - FRM = 12'h002, - FCSR = 12'h003, - //User Counter Timers + //Timers and counters CYCLE = 12'hC00, TIME = 12'hC01, INSTRET = 12'hC02, + HPMCOUNTER3 = 12'hC03, + HPMCOUNTER31 = 12'hC1F, CYCLEH = 12'hC80, TIMEH = 12'hC81, INSTRETH = 12'hC82, - - //Debug regs - DCSR = 12'h7B0, - DPC = 12'h7B1, - DSCRATCH = 12'h7B2 + HPMCOUNTER3H = 12'hC83, + HPMCOUNTER31H = 12'hC9F } csr_reg_addr_t; typedef enum logic [2:0] { @@ -198,11 +230,6 @@ package riscv_types; CSR_RC = 2'b11 } csr_op_t; - typedef enum logic [4:0] { - BARE = 5'd0, - SV32 = 5'd8 - } vm_t; - localparam ASIDLEN = 9;//pid typedef enum logic [ECODE_W-1:0] { @@ -221,7 +248,9 @@ package riscv_types; INST_PAGE_FAULT = 5'd12, LOAD_PAGE_FAULT = 5'd13, //reserved - STORE_OR_AMO_PAGE_FAULT = 5'd15 + STORE_OR_AMO_PAGE_FAULT = 5'd15, + SOFTWARE_CHECK = 5'd18, + HARDWARE_ERROR = 5'd19 //reserved } exception_code_t; @@ -238,7 +267,9 @@ package riscv_types; //RESERVED S_EXTERNAL_INTERRUPT = 5'd9, //RESERVED - M_EXTERNAL_INTERRUPT = 5'd11 + M_EXTERNAL_INTERRUPT = 5'd11, + //RESERVED + LOCAL_COUNT_OVERFLOW_INTERRUPT = 5'd13 } interrupt_code_t; typedef enum bit [4:0] { @@ -255,6 +286,12 @@ package riscv_types; AMO_MAXU_FN5 = 5'b11100 } amo_t; + typedef enum bit [1:0] { + INVAL = 2'b00, + CLEAN = 2'b01, + FLUSH = 2'b10 + } cbo_t; + //Assembly register definitions for simulation purposes typedef struct packed{ logic [XLEN-1:0] zero; diff --git a/debug_module/debug_module.sv b/debug_module/debug_module.sv index 66b8c8b4..49eaa6e4 100644 --- a/debug_module/debug_module.sv +++ b/debug_module/debug_module.sv @@ -624,4 +624,4 @@ for (index = 0; index < NUM_CPUS; index=index+1) begin end end endgenerate -endmodule \ No newline at end of file +endmodule diff --git a/examples/litex/l1_to_wishbone.sv b/examples/litex/l1_to_wishbone.sv index d355c9ba..abdb60a4 100644 --- a/examples/litex/l1_to_wishbone.sv +++ b/examples/litex/l1_to_wishbone.sv @@ -67,7 +67,7 @@ module l1_to_wishbone assign data_fifo.pop = wishbone.we & wishbone.ack; assign data_fifo.data_in = '{ data : cpu.wr_data, - be : cpu_wr_data_be + be : cpu.wr_data_be }; assign data_request = data_fifo.data_out; diff --git a/examples/litex/litex_wrapper.sv b/examples/litex/litex_wrapper.sv index a775895b..cb9c4dac 100755 --- a/examples/litex/litex_wrapper.sv +++ b/examples/litex/litex_wrapper.sv @@ -1,5 +1,5 @@ /* - * Copyright © 2022 Eric Matthews, Lesley Shannon + * Copyright © 2022 Eric Matthews, Lesley Shannon * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,42 +24,22 @@ module litex_wrapper import cva5_config::*; import cva5_types::*; import l2_config_and_types::*; + import riscv_types::*; #( - parameter LITEX_VARIANT = 0, parameter bit [31:0] RESET_VEC = 0, parameter bit [31:0] NON_CACHABLE_L = 32'h80000000, - parameter bit [31:0] NON_CACHABLE_H =32'hFFFFFFFF + parameter bit [31:0] NON_CACHABLE_H = 32'hFFFFFFFF ) ( input logic clk, input logic rst, - input logic [15:0] litex_interrupt, - - output logic [29:0] ibus_adr, - output logic [31:0] ibus_dat_w, - output logic [3:0] ibus_sel, - output logic ibus_cyc, - output logic ibus_stb, - output logic ibus_we, - output logic ibus_cti, - output logic ibus_bte, - input logic [31:0] ibus_dat_r, - input logic ibus_ack, - input logic ibus_err, - - output logic [29:0] dbus_adr, - output logic [31:0] dbus_dat_w, - output logic [3:0] dbus_sel, - output logic dbus_cyc, - output logic dbus_stb, - output logic dbus_we, - output logic dbus_cti, - output logic dbus_bte, - input logic [31:0] dbus_dat_r, - input logic dbus_ack, - input logic dbus_err, + input logic cpu_m_interrupt, + input logic cpu_s_interrupt, + input logic cpu_software_in, + input logic cpu_timer_in, + input logic [63:0] mtime, output logic [29:0] idbus_adr, output logic [31:0] idbus_dat_w, @@ -74,122 +54,6 @@ module litex_wrapper input logic idbus_err ); - - localparam wb_group_config_t MINIMAL_WB_GROUP_CONFIG = '{ - 0 : '{0: ALU_ID, default : NON_WRITEBACK_ID}, - 1 : '{0: LS_ID, 1: CSR_ID, default : NON_WRITEBACK_ID}, - default : '{default : NON_WRITEBACK_ID} - }; - - localparam cpu_config_t MINIMAL_CONFIG = '{ - //ISA options - INCLUDE_M_MODE : 1, - INCLUDE_S_MODE : 0, - INCLUDE_U_MODE : 0, - INCLUDE_UNIT : '{ - ALU : 1, - LS : 1, - MUL : 0, - DIV : 0, - CSR : 1, - CUSTOM : 0, - BR : 1, - IEC : 1 - }, - INCLUDE_IFENCE : 0, - INCLUDE_AMO : 0, - //CSR constants - CSRS : '{ - MACHINE_IMPLEMENTATION_ID : 0, - CPU_ID : 0, - RESET_VEC : RESET_VEC, - RESET_MTVEC : 32'h00000000, - NON_STANDARD_OPTIONS : '{ - COUNTER_W : 33, - MCYCLE_WRITEABLE : 0, - MINSTR_WRITEABLE : 0, - MTVEC_WRITEABLE : 1, - INCLUDE_MSCRATCH : 0, - INCLUDE_MCAUSE : 1, - INCLUDE_MTVAL : 1 - } - }, - //Memory Options - SQ_DEPTH : 2, - INCLUDE_FORWARDING_TO_STORES : 0, - INCLUDE_ICACHE : 0, - ICACHE_ADDR : '{ - L: 32'h40000000, - H: 32'h4FFFFFFF - }, - ICACHE : '{ - LINES : 512, - LINE_W : 4, - WAYS : 2, - USE_EXTERNAL_INVALIDATIONS : 0, - USE_NON_CACHEABLE : 0, - NON_CACHEABLE : '{ - L: 32'h00000000, - H: 32'h00000000 - } - }, - ITLB : '{ - WAYS : 2, - DEPTH : 64 - }, - INCLUDE_DCACHE : 0, - DCACHE_ADDR : '{ - L: 32'h40000000, - H: 32'h4FFFFFFF - }, - DCACHE : '{ - LINES : 512, - LINE_W : 4, - WAYS : 2, - USE_EXTERNAL_INVALIDATIONS : 0, - USE_NON_CACHEABLE : 0, - NON_CACHEABLE : '{ - L: 32'h00000000, - H: 32'h00000000 - } - }, - DTLB : '{ - WAYS : 2, - DEPTH : 64 - }, - INCLUDE_ILOCAL_MEM : 0, - ILOCAL_MEM_ADDR : '{ - L : 32'h80000000, - H : 32'h8FFFFFFF - }, - INCLUDE_DLOCAL_MEM : 0, - DLOCAL_MEM_ADDR : '{ - L : 32'h80000000, - H : 32'h8FFFFFFF - }, - INCLUDE_IBUS : 1, - IBUS_ADDR : '{ - L : 32'h00000000, - H : 32'hFFFFFFFF - }, - INCLUDE_PERIPHERAL_BUS : 1, - PERIPHERAL_BUS_ADDR : '{ - L : 32'h00000000, - H : 32'hFFFFFFFF - }, - PERIPHERAL_BUS_TYPE : WISHBONE_BUS, - //Branch Predictor Options - INCLUDE_BRANCH_PREDICTOR : 0, - BP : '{ - WAYS : 2, - ENTRIES : 512, - RAS_ENTRIES : 8 - }, - //Writeback Options - NUM_WB_GROUPS : 2, - WB_GROUP : MINIMAL_WB_GROUP_CONFIG - }; - localparam wb_group_config_t STANDARD_WB_GROUP_CONFIG = '{ 0 : '{0: ALU_ID, default : NON_WRITEBACK_ID}, 1 : '{0: LS_ID, default : NON_WRITEBACK_ID}, @@ -199,48 +63,49 @@ module litex_wrapper localparam cpu_config_t STANDARD_CONFIG = '{ //ISA options - INCLUDE_M_MODE : 1, - INCLUDE_S_MODE : 0, - INCLUDE_U_MODE : 0, + MODES : MSU, INCLUDE_UNIT : '{ ALU : 1, LS : 1, MUL : 1, DIV : 1, CSR : 1, + FPU : 0, CUSTOM : 0, BR : 1, - IEC : 1 + GC : 1 }, - INCLUDE_IFENCE : 0, - INCLUDE_AMO : 0, + INCLUDE_IFENCE : 1, + INCLUDE_AMO : 1, + INCLUDE_CBO : 0, + //CSR constants CSRS : '{ MACHINE_IMPLEMENTATION_ID : 0, CPU_ID : 0, RESET_VEC : RESET_VEC, - RESET_MTVEC : 32'h00000000, - NON_STANDARD_OPTIONS : '{ - COUNTER_W : 33, - MCYCLE_WRITEABLE : 0, - MINSTR_WRITEABLE : 0, - MTVEC_WRITEABLE : 1, - INCLUDE_MSCRATCH : 0, - INCLUDE_MCAUSE : 1, - INCLUDE_MTVAL : 1 - } + RESET_TVEC : 32'h00000000, + MCONFIGPTR : '0, + INCLUDE_ZICNTR : 1, + INCLUDE_ZIHPM : 1, + INCLUDE_SSTC : 1, + INCLUDE_SMSTATEEN : 1 }, //Memory Options SQ_DEPTH : 4, INCLUDE_FORWARDING_TO_STORES : 1, + AMO_UNIT : '{ + LR_WAIT : 8, + RESERVATION_WORDS : 8 + }, INCLUDE_ICACHE : 1, ICACHE_ADDR : '{ L : 32'h00000000, - H : 32'hFFFFFFFF + H : 32'h7FFFFFFF }, ICACHE : '{ LINES : 512, - LINE_W : 4, + LINE_W : 8, WAYS : 2, USE_EXTERNAL_INVALIDATIONS : 0, USE_NON_CACHEABLE : 0, @@ -260,7 +125,7 @@ module litex_wrapper }, DCACHE : '{ LINES : 512, - LINE_W : 4, + LINE_W : 8, WAYS : 2, USE_EXTERNAL_INVALIDATIONS : 0, USE_NON_CACHEABLE : 1, @@ -306,23 +171,15 @@ module litex_wrapper WB_GROUP : STANDARD_WB_GROUP_CONFIG }; - function cpu_config_t config_select (input integer variant); - case (variant) - 0 : config_select = MINIMAL_CONFIG; - 1 : config_select = STANDARD_CONFIG; - default : config_select = STANDARD_CONFIG; - endcase - endfunction - - localparam cpu_config_t LITEX_CONFIG = config_select(LITEX_VARIANT); - - //Unused interfaces axi_interface m_axi(); avalon_interface m_avalon(); local_memory_interface instruction_bram(); local_memory_interface data_bram(); interrupt_t s_interrupt; + assign s_interrupt.software = 0; + assign s_interrupt.timer = cpu_timer_in; + assign s_interrupt.external = cpu_s_interrupt; //L2 to Wishbone l2_requester_interface l2(); @@ -334,50 +191,24 @@ module litex_wrapper //Timer and External interrupts interrupt_t m_interrupt; - assign m_interrupt.software = 0; - assign m_interrupt.timer = litex_interrupt[1]; - assign m_interrupt.external = litex_interrupt[0]; - - cva5 #(.CONFIG(LITEX_CONFIG)) cpu(.*); - - generate if (LITEX_VARIANT != 0) begin : l1_arb_gen - l1_to_wishbone arb(.*, .cpu(l2), .wishbone(idwishbone)); - assign idbus_adr = idwishbone.adr; - assign idbus_dat_w = idwishbone.dat_w; - assign idbus_sel = idwishbone.sel; - assign idbus_cyc = idwishbone.cyc; - assign idbus_stb = idwishbone.stb; - assign idbus_we = idwishbone.we; - assign idbus_cti = idwishbone.cti; - assign idbus_bte = idwishbone.bte; - assign idwishbone.dat_r = idbus_dat_r; - assign idwishbone.ack = idbus_ack; - assign idwishbone.err = idbus_err; - end else begin - assign ibus_adr = iwishbone.adr; - assign ibus_dat_w = iwishbone.dat_w; - assign ibus_sel = iwishbone.sel; - assign ibus_cyc = iwishbone.cyc; - assign ibus_stb = iwishbone.stb; - assign ibus_we = iwishbone.we; - assign ibus_cti = iwishbone.cti; - assign ibus_bte = iwishbone.bte; - assign iwishbone.dat_r = ibus_dat_r; - assign iwishbone.ack = ibus_ack; - assign iwishbone.err = ibus_err; - - assign dbus_adr = dwishbone.adr; - assign dbus_dat_w = dwishbone.dat_w; - assign dbus_sel = dwishbone.sel; - assign dbus_cyc = dwishbone.cyc; - assign dbus_stb = dwishbone.stb; - assign dbus_we = dwishbone.we; - assign dbus_cti = dwishbone.cti; - assign dbus_bte = dwishbone.bte; - assign dwishbone.dat_r = dbus_dat_r; - assign dwishbone.ack = dbus_ack; - assign dwishbone.err = dbus_err; - end endgenerate - + assign m_interrupt.software = cpu_software_in; + assign m_interrupt.timer = 0; + //assign m_interrupt.timer = cpu_timer_in; + assign m_interrupt.external = cpu_m_interrupt; + + cva5 #(.CONFIG(STANDARD_CONFIG)) cpu(.*); + + l1_to_wishbone arb(.*, .cpu(l2), .wishbone(idwishbone)); + assign idbus_adr = idwishbone.adr; + assign idbus_dat_w = idwishbone.dat_w; + assign idbus_sel = idwishbone.sel; + assign idbus_cyc = idwishbone.cyc; + assign idbus_stb = idwishbone.stb; + assign idbus_we = idwishbone.we; + assign idbus_cti = idwishbone.cti; + assign idbus_bte = idwishbone.bte; + assign idwishbone.dat_r = idbus_dat_r; + assign idwishbone.ack = idbus_ack; + assign idwishbone.err = idbus_err; endmodule diff --git a/examples/nexys/nexys_config.sv b/examples/nexys/nexys_config.sv index b99494fd..a9658ba3 100644 --- a/examples/nexys/nexys_config.sv +++ b/examples/nexys/nexys_config.sv @@ -34,19 +34,17 @@ package nexys_config; localparam cpu_config_t NEXYS_CONFIG = '{ //ISA options - INCLUDE_M_MODE : 1, - INCLUDE_S_MODE : 0, - INCLUDE_U_MODE : 0, + MODES : MSU, INCLUDE_UNIT : '{ ALU : 1, LS : 1, MUL : 1, DIV : 1, CSR : 1, - FPU : 0, + FPU : 1, CUSTOM : 0, BR : 1, - IEC : 1 + GC : 1 }, INCLUDE_IFENCE : 0, INCLUDE_AMO : 0, @@ -57,20 +55,20 @@ package nexys_config; MACHINE_IMPLEMENTATION_ID : 0, CPU_ID : 0, RESET_VEC : 32'h80000000, - RESET_MTVEC : 32'h80000000, - NON_STANDARD_OPTIONS : '{ - COUNTER_W : 33, - MCYCLE_WRITEABLE : 0, - MINSTR_WRITEABLE : 0, - MTVEC_WRITEABLE : 1, - INCLUDE_MSCRATCH : 0, - INCLUDE_MCAUSE : 1, - INCLUDE_MTVAL : 1 - } + RESET_TVEC : 32'h00000000, + MCONFIGPTR : '0, + INCLUDE_ZICNTR : 1, + INCLUDE_ZIHPM : 1, + INCLUDE_SSTC : 1, + INCLUDE_SMSTATEEN : 1 }, //Memory Options SQ_DEPTH : 8, INCLUDE_FORWARDING_TO_STORES : 1, + AMO_UNIT : '{ + LR_WAIT : 32, + RESERVATION_WORDS : 8 //Must be the same size as the DCACHE line width + }, INCLUDE_ICACHE : 1, ICACHE_ADDR : '{ L : 32'h80000000, @@ -126,10 +124,10 @@ package nexys_config; L : 32'h00000000, H : 32'hFFFFFFFF }, - INCLUDE_PERIPHERAL_BUS : 0, + INCLUDE_PERIPHERAL_BUS : 1, PERIPHERAL_BUS_ADDR : '{ - L : 32'h00000000, - H : 32'hFFFFFFFF + L : 32'h60000000, + H : 32'h6FFFFFFF }, PERIPHERAL_BUS_TYPE : AXI_BUS, //Branch Predictor Options diff --git a/examples/nexys/nexys_sim.sv b/examples/nexys/nexys_sim.sv index 10cc8c16..63d7b26e 100644 --- a/examples/nexys/nexys_sim.sv +++ b/examples/nexys/nexys_sim.sv @@ -102,13 +102,9 @@ module cva5_sim output logic store_queue_empty ); - parameter SCRATCH_MEM_KB = 128; - parameter MEM_LINES = (SCRATCH_MEM_KB*1024)/4; - parameter UART_ADDR = 32'h88001000; - parameter UART_ADDR_LINE_STATUS = 32'h88001014; - interrupt_t s_interrupt; interrupt_t m_interrupt; + logic[63:0] mtime; assign s_interrupt = '{default: 0}; assign m_interrupt = '{default: 0}; @@ -139,14 +135,50 @@ module cva5_sim l1_to_axi arb(.*, .cpu(l2), .axi(axi)); cva5 #(.CONFIG(NEXYS_CONFIG)) cpu(.*); - initial begin - write_uart = 0; - uart_byte = 0; - end //Capture writes to UART always_ff @(posedge clk) begin - write_uart <= (axi.wvalid && axi.wready && axi.awaddr == UART_ADDR); - uart_byte <= axi.wdata[7:0]; + if (rst) begin + m_axi.awready <= 1; + m_axi.wready <= 0; + m_axi.bvalid <= 0; + write_uart <= 0; + end + else begin + write_uart <= 0; + if (m_axi.awvalid & m_axi.awready) begin + m_axi.awready <= 0; + m_axi.wready <= 1; + end + else if (m_axi.wvalid & m_axi.wready) begin + m_axi.wready <= 0; + m_axi.bvalid <= 1; + write_uart <= 1; + end + else if (m_axi.bvalid & m_axi.bready) begin + m_axi.bvalid <= 0; + m_axi.awready <= 1; + end + end + uart_byte <= m_axi.wdata[7:0]; + end + + //Simulate UART read response + assign m_axi.rdata = 32'hFFFFFF21; + always_ff @(posedge clk) begin + if (rst) begin + m_axi.arready <= 1; + m_axi.rvalid <= 0; + end + else begin + if (m_axi.arvalid & m_axi.arready) begin + m_axi.arready <= 0; + m_axi.rvalid <= 1; + end + else if (m_axi.rvalid & m_axi.rready) begin + m_axi.rvalid <= 0; + m_axi.arready <= 1; + end + end end //////////////////////////////////////////////////// @@ -261,9 +293,9 @@ module cva5_sim end endgenerate generate if (NEXYS_CONFIG.INCLUDE_DCACHE) begin - assign dcache_hit = `DCACHE_P.load_hit; - assign dcache_miss = `DCACHE_P.line_complete; - assign darb_stall = cpu.l1_request[L1_DCACHE_ID].request & ~cpu.l1_request[L1_DCACHE_ID].ack; + // assign dcache_hit = `DCACHE_P.load_hit; + // assign dcache_miss = `DCACHE_P.line_complete; + // assign darb_stall = cpu.l1_request[L1_DCACHE_ID].request & ~cpu.l1_request[L1_DCACHE_ID].ack; end endgenerate logic [MAX_NUM_UNITS-1:0] unit_ready; @@ -414,7 +446,7 @@ module cva5_sim assign retire_ports_valid[i] = cpu.retire_port_valid[i]; end endgenerate - assign store_queue_empty = cpu.load_store_status.sq_empty; + assign store_queue_empty = ~cpu.load_store_status.outstanding_store; //////////////////////////////////////////////////// //Assertion Binding diff --git a/examples/nexys/nexys_wrapper.sv b/examples/nexys/nexys_wrapper.sv index 4ad4e545..0da6026a 100644 --- a/examples/nexys/nexys_wrapper.sv +++ b/examples/nexys/nexys_wrapper.sv @@ -139,4 +139,3 @@ module nexys_wrapper cva5 #(.CONFIG(NEXYS_CONFIG)) cpu(.rst(rst_r2), .*); endmodule - diff --git a/examples/zedboard/README.md b/examples/zedboard/README.md old mode 100755 new mode 100644 diff --git a/examples/zedboard/arm.tcl b/examples/zedboard/arm.tcl old mode 100755 new mode 100644 diff --git a/examples/zedboard/cva5.png b/examples/zedboard/cva5.png old mode 100755 new mode 100644 diff --git a/examples/zedboard/cva5_small.png b/examples/zedboard/cva5_small.png old mode 100755 new mode 100644 diff --git a/examples/zedboard/cva5_wrapper.sv b/examples/zedboard/cva5_wrapper.sv old mode 100755 new mode 100644 index 654384f2..a5f8f684 --- a/examples/zedboard/cva5_wrapper.sv +++ b/examples/zedboard/cva5_wrapper.sv @@ -236,7 +236,7 @@ module cva5_wrapper ( //design_2 infra(.*); generate - if (EXAMPLE_CONFIG.INCLUDE_S_MODE || EXAMPLE_CONFIG.INCLUDE_ICACHE || EXAMPLE_CONFIG.INCLUDE_DCACHE) begin + if (EXAMPLE_CONFIG.MODES == MSU || EXAMPLE_CONFIG.INCLUDE_ICACHE || EXAMPLE_CONFIG.INCLUDE_DCACHE) begin l2_arbiter l2_arb (.*, .request(l2)); axi_to_arb l2_to_mem (.*, .l2(mem)); end @@ -258,4 +258,4 @@ module cva5_wrapper ( .data_out_b(data_bram.data_out) ); -endmodule \ No newline at end of file +endmodule diff --git a/examples/zedboard/simulator_output_example.png b/examples/zedboard/simulator_output_example.png old mode 100755 new mode 100644 diff --git a/examples/zedboard/system.png b/examples/zedboard/system.png old mode 100755 new mode 100644 diff --git a/examples/zedboard/system_periperhals.tcl b/examples/zedboard/system_periperhals.tcl old mode 100755 new mode 100644 diff --git a/formal/interfaces/axi4_basic_props.sv b/formal/interfaces/axi4_basic_props.sv old mode 100755 new mode 100644 diff --git a/formal/models/cva5_fbm.sv b/formal/models/cva5_fbm.sv old mode 100755 new mode 100644 diff --git a/formal/models/cva5_formal_wrapper.sv b/formal/models/cva5_formal_wrapper.sv old mode 100755 new mode 100644 diff --git a/l2_arbiter/axi_to_arb.sv b/l2_arbiter/axi_to_arb.sv old mode 100755 new mode 100644 index 1c7ef02f..0224fd19 --- a/l2_arbiter/axi_to_arb.sv +++ b/l2_arbiter/axi_to_arb.sv @@ -238,4 +238,3 @@ module axi_to_arb assign l2.rd_data_valid = axi_rvalid; endmodule - diff --git a/l2_arbiter/l2_arbiter.sv b/l2_arbiter/l2_arbiter.sv old mode 100755 new mode 100644 diff --git a/l2_arbiter/l2_config_and_types.sv b/l2_arbiter/l2_config_and_types.sv old mode 100755 new mode 100644 diff --git a/l2_arbiter/l2_fifo.sv b/l2_arbiter/l2_fifo.sv old mode 100755 new mode 100644 diff --git a/l2_arbiter/l2_interfaces.sv b/l2_arbiter/l2_interfaces.sv old mode 100755 new mode 100644 diff --git a/l2_arbiter/l2_reservation_logic.sv b/l2_arbiter/l2_reservation_logic.sv old mode 100755 new mode 100644 diff --git a/l2_arbiter/l2_round_robin.sv b/l2_arbiter/l2_round_robin.sv old mode 100755 new mode 100644 diff --git a/test_benches/axi_mem_sim.sv b/test_benches/axi_mem_sim.sv old mode 100755 new mode 100644 diff --git a/test_benches/cva5_tb.sv b/test_benches/cva5_tb.sv old mode 100755 new mode 100644 diff --git a/test_benches/cva5_tb.wcfg b/test_benches/cva5_tb.wcfg old mode 100755 new mode 100644 diff --git a/test_benches/sim_mem.sv b/test_benches/sim_mem.sv old mode 100755 new mode 100644 diff --git a/test_benches/unit_test_benches/alu_unit_tb.sv b/test_benches/unit_test_benches/alu_unit_tb.sv old mode 100755 new mode 100644 diff --git a/test_benches/unit_test_benches/div_unit_tb.sv b/test_benches/unit_test_benches/div_unit_tb.sv old mode 100755 new mode 100644 diff --git a/test_benches/unit_test_benches/mul_unit_tb.sv b/test_benches/unit_test_benches/mul_unit_tb.sv old mode 100755 new mode 100644 diff --git a/test_benches/verilator/CVA5Tracer.cc b/test_benches/verilator/CVA5Tracer.cc index 9a336e1a..74c859d4 100644 --- a/test_benches/verilator/CVA5Tracer.cc +++ b/test_benches/verilator/CVA5Tracer.cc @@ -152,13 +152,15 @@ void CVA5Tracer::start_tracer(const char *trace_file) { } - +uint64_t CVA5Tracer::cycle_count = 0; uint64_t CVA5Tracer::get_cycle_count() { return cycle_count; } CVA5Tracer::CVA5Tracer(std::ifstream& programFile) { + cycle_count = 0; + #ifdef TRACE_ON Verilated::traceEverOn(true); #endif diff --git a/test_benches/verilator/CVA5Tracer.h b/test_benches/verilator/CVA5Tracer.h index e34e6be8..4e51790c 100644 --- a/test_benches/verilator/CVA5Tracer.h +++ b/test_benches/verilator/CVA5Tracer.h @@ -54,7 +54,7 @@ class CVA5Tracer { void set_log_file(std::ofstream* logFile); void set_pc_file(std::ofstream* pcFile); void start_tracer(const char *trace_file); - uint64_t get_cycle_count(); + static uint64_t get_cycle_count(); //DDR Simulation Vcva5_sim *tb; @@ -71,7 +71,7 @@ class CVA5Tracer { int reset_length = 64; int stall_limit = 2000; int stall_count = 0; - uint64_t cycle_count = 0; + static uint64_t cycle_count; bool program_complete = false; diff --git a/test_benches/verilator/cva5_sim.cc b/test_benches/verilator/cva5_sim.cc index 978ce8ad..1f61cc33 100644 --- a/test_benches/verilator/cva5_sim.cc +++ b/test_benches/verilator/cva5_sim.cc @@ -12,7 +12,7 @@ CVA5Tracer *cva5Tracer; char* csv_log_name; //For time index on assertions double sc_time_stamp () { - return cva5Tracer->get_cycle_count(); + return CVA5Tracer::get_cycle_count(); } const char* cva5_csv_log_file_name () { diff --git a/test_benches/verilator/cva5_sim.sv b/test_benches/verilator/cva5_sim.sv index df3841de..99840826 100644 --- a/test_benches/verilator/cva5_sim.sv +++ b/test_benches/verilator/cva5_sim.sv @@ -649,7 +649,7 @@ module cva5_sim assign retire_ports_valid[i] = cpu.retire_port_valid[i]; end endgenerate - assign store_queue_empty = cpu.load_store_status.sq_empty; + assign store_queue_empty = ~cpu.load_store_status.outstanding_store; //////////////////////////////////////////////////// //Assertion Binding diff --git a/tools/compile_order b/tools/compile_order index 197f6b4f..58eacd03 100644 --- a/tools/compile_order +++ b/tools/compile_order @@ -14,11 +14,14 @@ local_memory/local_mem.sv core/types_and_interfaces/internal_interfaces.sv core/types_and_interfaces/external_interfaces.sv -core/common_components/lutram_1w_1r.sv -core/common_components/lutram_1w_mr.sv -core/common_components/dual_port_bram.sv +core/common_components/ram/lutram_1w_1r.sv +core/common_components/ram/lutram_1w_mr.sv +core/common_components/ram/sdp_ram.sv +core/common_components/ram/sdp_ram_padded.sv +core/common_components/ram/dual_port_bram.sv core/common_components/set_clr_reg_with_rst.sv core/common_components/one_hot_to_integer.sv +core/common_components/one_hot_mux.sv core/common_components/cycler.sv core/common_components/lfsr.sv core/common_components/cva5_fifo.sv @@ -44,8 +47,8 @@ core/memory_sub_units/axi_master.sv core/memory_sub_units/avalon_master.sv core/memory_sub_units/wishbone_master.sv -core/execution_units/load_store_unit/dcache_tag_banks.sv core/execution_units/load_store_unit/amo_alu.sv +core/execution_units/load_store_unit/amo_unit.sv core/execution_units/load_store_unit/dcache.sv core/execution_units/load_store_unit/addr_hash.sv core/execution_units/load_store_unit/store_queue.sv @@ -90,8 +93,10 @@ core/fetch_stage/fetch.sv core/instruction_metadata_and_id_management.sv -core/tlb_lut_ram.sv -core/mmu.sv +core/mmu/perms_check.sv +core/mmu/itlb.sv +core/mmu/dtlb.sv +core/mmu/mmu.sv core/decode_and_issue.sv From e8f35161b8dc54a0eb53e340f5110b5bd4b4942a Mon Sep 17 00:00:00 2001 From: Chris Keilbart Date: Wed, 11 Sep 2024 13:10:33 -0700 Subject: [PATCH 2/4] Fix local memory AMO --- core/memory_sub_units/local_mem_sub_unit.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/memory_sub_units/local_mem_sub_unit.sv b/core/memory_sub_units/local_mem_sub_unit.sv index 0f7739cc..60450603 100644 --- a/core/memory_sub_units/local_mem_sub_unit.sv +++ b/core/memory_sub_units/local_mem_sub_unit.sv @@ -42,7 +42,7 @@ module local_mem_sub_unit logic rmw; logic[31:2] rmw_addr; logic[31:0] rmw_rs2; - logic[31:0] rmw_op; + amo_t rmw_op; logic sc_valid; logic sc_valid_r; From 0d9f727dc83879287647c9119bc5f02fa2001672 Mon Sep 17 00:00:00 2001 From: Chris Keilbart Date: Sun, 22 Sep 2024 14:30:46 -0700 Subject: [PATCH 3/4] Fetch FIFO and ID fix --- core/fetch_stage/fetch.sv | 6 +++--- core/instruction_metadata_and_id_management.sv | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/fetch_stage/fetch.sv b/core/fetch_stage/fetch.sv index f18e04d2..1f466a0a 100755 --- a/core/fetch_stage/fetch.sv +++ b/core/fetch_stage/fetch.sv @@ -150,7 +150,7 @@ module fetch always_ff @(posedge clk) begin if (flush_or_rst) exception_pending <= 0; - else if (tlb.is_fault | (new_mem_request & ~address_valid)) + else if ((tlb.is_fault & ~fetch_attr_fifo.full) | (new_mem_request & ~address_valid)) exception_pending <= 1; end @@ -180,8 +180,8 @@ module fetch //Issue Control Signals assign flush_or_rst = (rst | gc.fetch_flush | early_branch_flush); - assign new_mem_request = tlb.done & units_ready & (~gc.fetch_hold); - assign pc_id_assigned = new_mem_request | tlb.is_fault; + assign new_mem_request = tlb.done & units_ready & ~gc.fetch_hold & ~fetch_attr_fifo.full; + assign pc_id_assigned = new_mem_request | (tlb.is_fault & ~fetch_attr_fifo.full); ////////////////////////////////////////////// //Subunit Tracking diff --git a/core/instruction_metadata_and_id_management.sv b/core/instruction_metadata_and_id_management.sv index 2d4dfcd2..066bd85a 100644 --- a/core/instruction_metadata_and_id_management.sv +++ b/core/instruction_metadata_and_id_management.sv @@ -225,7 +225,7 @@ module instruction_metadata_and_id_management decode_id <= oldest_pre_issue_id; end else begin - pc_id <= (early_branch_flush ? fetch_id : pc_id) + LOG2_MAX_IDS'(pc_id_assigned); + pc_id <= early_branch_flush ? fetch_id + LOG2_MAX_IDS'(fetch_complete) : pc_id + LOG2_MAX_IDS'(pc_id_assigned); fetch_id <= fetch_id + LOG2_MAX_IDS'(fetch_complete); decode_id <= decode_id + LOG2_MAX_IDS'(decode_advance); end From 3a124ae292032bd95f0fe9364719e2b483002d76 Mon Sep 17 00:00:00 2001 From: Chris Keilbart Date: Sun, 22 Sep 2024 19:13:16 -0700 Subject: [PATCH 4/4] Fix one hot mux assertion for branch predictor --- core/common_components/one_hot_mux.sv | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/core/common_components/one_hot_mux.sv b/core/common_components/one_hot_mux.sv index 83c9a7fe..cac3006c 100644 --- a/core/common_components/one_hot_mux.sv +++ b/core/common_components/one_hot_mux.sv @@ -62,8 +62,25 @@ module one_hot_mux //////////////////////////////////////////////////// //Assertions + //Support inputs that aren't one hot as long as they are identical + logic supported_inputs; + logic saw_first; + casted_t queried_input; + always_comb begin + supported_inputs = 1; + saw_first = 0; + queried_input = 'x; + for (int i = 0; i < OPTIONS; i++) begin + if (one_hot[i]) begin + supported_inputs |= ~saw_first | (queried_input == choices_casted[i]); + saw_first = 1; + queried_input = choices_casted[i]; + end + end + end + ohot_assertion: - assert property (@(posedge clk) disable iff (rst) $onehot0(one_hot)) + assert property (@(posedge clk) disable iff (rst) supported_inputs) else $error("Selection mux not one hot"); endmodule