diff --git a/Bender.yml b/Bender.yml index f5a5a21b..6e065cdf 100644 --- a/Bender.yml +++ b/Bender.yml @@ -46,6 +46,7 @@ sources: - src/lfsr_8bit.sv - src/lossy_valid_to_stream.sv - src/mv_filter.sv + - src/mem_multibank_pwrgate.sv - src/onehot_to_bin.sv - src/plru_tree.sv - src/passthrough_stream_fifo.sv @@ -129,6 +130,7 @@ sources: - test/fifo_tb.sv - test/graycode_tb.sv - test/id_queue_tb.sv + - test/mem_multibank_pwrgate_tb.sv - test/passthrough_stream_fifo_tb.sv - test/popcount_tb.sv - test/rr_arb_tree_tb.sv diff --git a/src/mem_multibank_pwrgate.sv b/src/mem_multibank_pwrgate.sv new file mode 100644 index 00000000..419bb817 --- /dev/null +++ b/src/mem_multibank_pwrgate.sv @@ -0,0 +1,199 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Lorenzo Leone + +// ## Description: +// A wrapper for `tc_sram_impl` that instantiates logic banks with retention mode +// or power-off capability. +// This module can be used for power-aware simulations, with control signals driven +// directly by UPF signals. +// +// ## Goal: +// In a memory with multiple banks that support power gating and retention, +// each bank’s addressing must ensure that interleaving remains intact. During retention +// or power-off states, only contiguous addresses should be switched. +// The memory should always appear as a set of contiguous addresses, with no gaps in the +// address mapping. +// This module is responsible for managing the correct memory addressing +// +module mem_multibank_pwrgate #( + parameter int unsigned NumWords = 32'd1024, // Number of Words in data array + parameter int unsigned DataWidth = 32'd128, // Data signal width + parameter int unsigned ByteWidth = 32'd8, // Width of a data byte + parameter int unsigned NumPorts = 32'd2, // Number of read and write ports + parameter int unsigned Latency = 32'd1, // Latency when the read data is available + parameter int unsigned NumLogicBanks = 32'd1, // Logic bank for Power Management + parameter SimInit = "none", // Simulation initialization + parameter bit PrintSimCfg = 1'b0, // Print configuration + parameter ImplKey = "none", // Reference to specific implementation + // DEPENDENT PARAMETERS, DO NOT OVERWRITE! + parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1, + parameter int unsigned BeWidth = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div + parameter type addr_t = logic [AddrWidth-1:0], + parameter type data_t = logic [DataWidth-1:0], + parameter type be_t = logic [BeWidth-1:0] +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // input ports + input logic [ NumPorts-1:0] req_i, // request + input logic [ NumPorts-1:0] we_i, // write enable + input addr_t [ NumPorts-1:0] addr_i, // request address + input data_t [ NumPorts-1:0] wdata_i, // write data + input be_t [ NumPorts-1:0] be_i, // write byte enable + input logic [NumLogicBanks-1:0] deepsleep_i, // deep sleep enable + input logic [NumLogicBanks-1:0] powergate_i, // power gate enable + // output ports + output data_t [ NumPorts-1:0] rdata_o // read data +); + + // Implementation type for Power Gating and Deppesleep ports + typedef struct packed { + logic deepsleep; + logic powergate; + } impl_in_t; + + + if (NumLogicBanks == 32'd0) begin : gen_no_logic_bank + $fatal("Error: %d logic banks are not supported", NumLogicBanks); + end else if (NumLogicBanks == 32'd1) begin : gen_simple_sram + tc_sram_impl #( + .NumWords (NumWords), + .DataWidth (DataWidth), + .ByteWidth (ByteWidth), + .NumPorts (NumPorts), + .Latency (Latency), + .SimInit (SimInit), + .PrintSimCfg(PrintSimCfg), + .ImplKey (ImplKey), + .impl_in_t (impl_in_t), + .impl_out_t (impl_in_t) + ) i_tc_sram_impl ( + .clk_i, + .rst_ni, + .impl_i({deepsleep_i, powergate_i}), + .impl_o(), + .req_i, + .we_i, + .addr_i, + .wdata_i, + .be_i, + .rdata_o + ); + + end else begin : gen_logic_bank // block: gen_simple_sram + localparam int unsigned LogicBankSize = NumWords / NumLogicBanks; + localparam int unsigned BankSelWidth = (NumLogicBanks > 32'd1) ? $clog2( + NumLogicBanks + ) : 32'd1; + + if (LogicBankSize != 2 ** (AddrWidth - BankSelWidth)) + $fatal("Logic Bank size is not a power of two: UNSUPPORTED "); + + // Signals from/to logic banks + logic [NumLogicBanks-1:0][ NumPorts-1:0] req_cut; + logic [NumLogicBanks-1:0][ NumPorts-1:0] we_cut; + logic [NumLogicBanks-1:0][ NumPorts-1:0][AddrWidth-BankSelWidth-1:0] addr_cut; + data_t [NumLogicBanks-1:0][ NumPorts-1:0] wdata_cut; + be_t [NumLogicBanks-1:0][ NumPorts-1:0] be_cut; + data_t [NumLogicBanks-1:0][ NumPorts-1:0] rdata_cut; + + // Signals to select the right bank + logic [ NumPorts-1:0][BankSelWidth-1:0] bank_sel; + logic [NumPorts-1:0][Latency-1:0][BankSelWidth-1:0] out_mux_sel_d, out_mux_sel_q; + + // Identify bank looking at the BankSelWidth-th MSBs of the Address + for (genvar PortIdx = 0; PortIdx < NumPorts; PortIdx++) begin : gen_bank_sel + assign bank_sel[PortIdx] = addr_i[PortIdx][AddrWidth-1-:BankSelWidth]; + end + + // Read Data Mux Logic: + // + // If the memory has Latency != 0, the read data will arive after a certain delay. + // During this time, the bank_select signal must be stored in order to + // correctly select the output bank after the expected latency. + if (Latency == 32'd0) begin : gen_no_latency + for (genvar PortIdx = 0; PortIdx < NumPorts; PortIdx++) begin : gen_read_mux_signals + assign rdata_o[PortIdx] = rdata_cut[bank_sel[PortIdx]][PortIdx]; + end + end else begin : gen_read_latency + always_comb begin + for (int PortIdx = 0; PortIdx < NumPorts; PortIdx++) begin : gen_read_mux_signals + rdata_o[PortIdx] = rdata_cut[out_mux_sel_q[PortIdx][0]][PortIdx]; + for (int shift_idx = 0; shift_idx < (Latency - 1); shift_idx++) begin : gen_shift + out_mux_sel_d[PortIdx][shift_idx] = out_mux_sel_q[PortIdx][shift_idx+1]; + end + out_mux_sel_d[PortIdx][Latency-1] = bank_sel[PortIdx]; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + for (int PortIdx = 0; PortIdx < NumPorts; PortIdx++) begin + if (!rst_ni) begin + out_mux_sel_q[PortIdx] = '0; + end else begin + for (int shift_idx = 0; shift_idx < Latency; shift_idx++) begin + out_mux_sel_q[PortIdx][shift_idx] = out_mux_sel_d[PortIdx][shift_idx]; + end + end + end + end + end : gen_read_latency + + // Write data Mux Logic + // + for (genvar BankIdx = 0; BankIdx < NumLogicBanks; BankIdx++) begin : gen_logic_bank + for (genvar PortIdx = 0; PortIdx < NumPorts; PortIdx++) begin + // DEMUX the input signals to the correct logic bank + // Assign req channel to the correct logic bank + assign req_cut[BankIdx][PortIdx] = req_i[PortIdx] && (bank_sel[PortIdx] == BankIdx); + // Assign lowest part of the address to the correct logic bank + assign addr_cut[BankIdx][PortIdx] = req_cut[BankIdx][PortIdx] ? addr_i[PortIdx][AddrWidth-BankSelWidth-1:0] : '0; + // Assign data to the correct logic bank + assign wdata_cut[BankIdx][PortIdx] = req_cut[BankIdx][PortIdx] ? wdata_i[PortIdx] : '0; + assign we_cut[BankIdx][PortIdx] = req_cut[BankIdx][PortIdx] ? we_i[PortIdx] : '0; + assign be_cut[BankIdx][PortIdx] = req_cut[BankIdx][PortIdx] ? be_i[PortIdx] : '0; + end + + tc_sram_impl #( + .NumWords (LogicBankSize), + .DataWidth (DataWidth), + .ByteWidth (ByteWidth), + .NumPorts (NumPorts), + .Latency (Latency), + .SimInit (SimInit), + .PrintSimCfg(PrintSimCfg), + .ImplKey (ImplKey), + .impl_in_t (impl_in_t), + .impl_out_t (impl_in_t) + ) i_tc_sram_impl ( + .clk_i, + .rst_ni, + .impl_i ({deepsleep_i[BankIdx], powergate_i[BankIdx]}), + .impl_o (), + .req_i (req_cut[BankIdx]), + .we_i (we_cut[BankIdx]), + .addr_i (addr_cut[BankIdx]), + .wdata_i(wdata_cut[BankIdx]), + .be_i (be_cut[BankIdx]), + .rdata_o(rdata_cut[BankIdx]) + ); + end + end + + // Trigger warnings when power signals (deepsleep_i and powergate_i) are not connected. + // Usually those signals must be linked through the UPF. +`ifndef VERILATOR +`ifndef TARGET_SYNTHESIS + initial begin + assert (!$isunknown(deepsleep_i)) + else $warning("deepsleep_i has some unconnected signals"); + assert (!$isunknown(powergate_i)) + else $warning("powergate_i has some unconnected signals"); + end +`endif +`endif + +endmodule //endmodule: mem_multibank_pwrgate diff --git a/test/mem_multibank_pwrgate_tb.sv b/test/mem_multibank_pwrgate_tb.sv new file mode 100644 index 00000000..f48ef6ed --- /dev/null +++ b/test/mem_multibank_pwrgate_tb.sv @@ -0,0 +1,212 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Wolfgang Roenninger , ETH Zurich +// +// ## Description: +// Test to address the multibanked powergated SRAM and checlk correct address handling. + +module mem_multibank_pwrgate_tb #( + parameter int unsigned NumPorts = 32'd2, + parameter int unsigned Latency = 32'd1, + parameter int unsigned NumWords = 32'd1024, + parameter int unsigned DataWidth = 32'd64, + parameter int unsigned ByteWidth = 32'd8, + parameter int unsigned NoReq = 32'd200000, + parameter int unsigned NumLogicBanks = 32'd1, + parameter string SimInit = "zeros", + parameter time CyclTime = 10ns, + parameter time ApplTime = 2ns, + parameter time TestTime = 8ns +); + + //----------------------------------- + // Clock generator + //----------------------------------- + logic clk, rst_n; + clk_rst_gen #( + .ClkPeriod (CyclTime), + .RstClkCycles(5) + ) i_clk_gen ( + .clk_o (clk), + .rst_no(rst_n) + ); + + logic [NumPorts-1:0] done; + + localparam int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1; + localparam int unsigned BeWidth = (DataWidth + ByteWidth - 32'd1) / ByteWidth; + + typedef logic [AddrWidth-1:0] addr_t; + typedef logic [DataWidth-1:0] data_t; + typedef logic [BeWidth-1:0] be_t; + + // signal declarations for each sram + logic [NumPorts-1:0] req, we; + addr_t [NumPorts-1:0] addr; + data_t [NumPorts-1:0] wdata, rdata; + be_t [NumPorts-1:0] be; + + // golden model + data_t memory [NumWords-1:0]; + longint unsigned failed_test; + + // This process drives the requests on the port with random data. + for (genvar i = 0; i < NumPorts; i++) begin : gen_stimuli + initial begin : proc_drive_port + automatic logic stim_write; + automatic addr_t stim_addr; + automatic data_t stim_data; + automatic be_t stim_be; + + done[i] <= 1'b0; + req[i] <= 1'b0; + we[i] <= 1'b0; + addr[i] <= addr_t'(0); + wdata[i] <= data_t'(0); + be[i] <= be_t'(0); + + @(posedge rst_n); + repeat (10) @(posedge clk); + + for (int unsigned j = 0; j < NoReq; j++) begin + stim_write = bit'($urandom()); + for (int unsigned k = 0; k < AddrWidth; k++) begin + stim_addr[k] = bit'($urandom()); + end + // this statement makes sure that only valid addresses are in a request + while (stim_addr >= NumWords) begin + for (int unsigned k = 0; k < AddrWidth; k++) begin + stim_addr[k] = bit'($urandom()); + end + end + for (int unsigned k = 0; k < DataWidth; k++) begin + stim_data[k] = bit'($urandom()); + end + for (int unsigned k = 0; k < BeWidth; k++) begin + stim_be[k] = bit'($urandom()); + end + + req[i] <= #ApplTime 1'b1; + we[i] <= #ApplTime stim_write; + addr[i] <= #ApplTime stim_addr; + wdata[i] <= #ApplTime stim_data; + be[i] <= #ApplTime stim_be; + @(posedge clk); + req[i] <= #ApplTime 1'b0; + we[i] <= #ApplTime 1'b0; + addr[i] <= #ApplTime addr_t'(0); + wdata[i] <= #ApplTime data_t'(0); + be[i] <= #ApplTime be_t'(0); + + repeat ($urandom_range(0, 5)) @(posedge clk); + end + done[i] <= 1'b1; + end + end + + // This process controls the golden model + // - The memory array is initialized according to the parameter + // - Data is written exactly at the clock edge, if there is a write request on a port. + // - At `TestTime` a process is launched on read requests which lives for `Latency` cycles. + // This process asserts the expected read output at `TestTime` in the respective cycle. + initial begin : proc_golden_model + failed_test = 0; + for (int unsigned i = 0; i < NumWords; i++) begin + for (int unsigned j = 0; j < DataWidth; j++) begin + case (SimInit) + "zeros": memory[i][j] = 1'b0; + "ones": memory[i][j] = 1'b1; + default: memory[i][j] = 1'bx; + endcase + end + end + + @(posedge rst_n); + + forever begin + @(posedge clk); + // writes get latched at clock in golden model array + for (int unsigned i = 0; i < NumPorts; i++) begin + if (req[i] && we[i]) begin + for (int unsigned j = 0; j < DataWidth; j++) begin + if (be[i][j/ByteWidth]) begin + memory[addr[i]][j] = wdata[i][j]; + end + end + end + end + + // read test process is launched at `TestTime` + #TestTime; + fork + for (int unsigned i = 0; i < NumPorts; i++) begin + check_read(i, addr[i]); + end + join_none + end + end + + // Read test process. This task lives for a number of cycles determined by `Latency`. + task automatic check_read(input int unsigned port, input addr_t read_addr); + // only continue if there is a read request at this port + if (req[port] && !we[port]) begin + data_t exp_data = memory[read_addr]; + + if (Latency > 0) begin + repeat (Latency) @(posedge clk); + #TestTime; + end + + for (int unsigned i = 0; i < DataWidth; i++) begin + if (!$isunknown(exp_data[i])) begin + assert (exp_data[i] === rdata[port][i]) + else begin + $warning("Port: %0d unexpected bit[%0h], Addr: %0h expected: %0h, measured: %0h", + port, i, read_addr, exp_data[i], rdata[port][i]); + failed_test++; + end + end + end + end + endtask : check_read + + // Stop the simulation at the end. + initial begin : proc_stop + @(posedge rst_n); + wait (&done); + repeat (10) @(posedge clk); + $info("Simulation done, errors: %0d", failed_test); + $stop(); + end + + mem_multibank_pwrgate #( + .NumWords (NumWords), // Number of Words in data array + .DataWidth (DataWidth), // Data signal width + .ByteWidth (ByteWidth), // Width of a data byte + .NumPorts (NumPorts), // Number of read and write ports + .Latency (Latency), // Latency when the read data is available + .NumLogicBanks(NumLogicBanks), // Number of Logic Banks for power gating/retention + .SimInit (SimInit), // Simulation initialization + .PrintSimCfg (1'b1) // Print configuration + ) i_tc_sram_dut ( + .clk_i (clk), // Clock + .rst_ni (rst_n), // Asynchronous reset active low + .req_i (req), // request + .we_i (we), // write enable + .addr_i (addr), // request address + .wdata_i (wdata), // write data + .be_i (be), // write byte enable + .deepsleep_i('0), // Tied to zero to suppress Warnings + .powergate_i('0), // Tied to zero to suppress Warnings + .rdata_o (rdata) // read data + ); + +endmodule diff --git a/test/simulate.sh b/test/simulate.sh index e0544fe9..8fd30fe1 100755 --- a/test/simulate.sh +++ b/test/simulate.sh @@ -23,6 +23,21 @@ call_vsim() { grep "Errors: 0," vsim.log } +for PORTS in 1 ; do + for LATENCY in 1 ; do + for WORDS in 1024; do + for DWIDTH in 64; do + for BYTEWIDTH in 9; do + for BANKS in 1 2 4 8; do + call_vsim mem_multibank_pwrgate_tb -gNumPorts=$PORTS -gLatency=$LATENCY -gNumWords=$WORDS -gDataWidth=$DWIDTH -gByteWidth=$BYTEWIDTH -gNumLogicBanks=$BANKS + done + done + done + done + done +done + + #call_vsim cdc_fifo_tb # currently broken for tb in cdc_2phase_tb fifo_tb graycode_tb id_queue_tb popcount_tb stream_register_tb addr_decode_tb; do call_vsim $tb