diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c0538262c..64b9d36cf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -277,7 +277,7 @@ jobs: ln -s $VERILATOR_ROOT/share/verilator/include $VERILATOR_ROOT/include ln -s $VERILATOR_ROOT/share/verilator/bin/verilator_includer $VERILATOR_ROOT/bin/verilator_includer - name: Download RTL submodules - run: git submodule update --init --recursive hardware + run: make -C hardware update - name: Compile Verilated model of Ara run: | sudo apt-get install libelf-dev @@ -454,9 +454,10 @@ jobs: # are mandatory there if they exist. - name: Check for trailing whitespaces and tabs run: | - git diff --check $base HEAD -- \ - apps config hardware .github \ - *.md Bender.* Makefile + bash -O extglob -c \ + "git diff --check $base HEAD -- \ + apps config .github *.md Bender.* \ + Makefile hardware/!(patches)" ##################### # Benchmark stage # diff --git a/.gitmodules b/.gitmodules index 824e85ae0..4221e4dff 100644 --- a/.gitmodules +++ b/.gitmodules @@ -9,21 +9,6 @@ [submodule "toolchain/verilator"] path = toolchain/verilator url = https://github.com/verilator/verilator -[submodule "hardware/deps/axi"] - path = hardware/deps/axi - url = https://github.com/pulp-platform/axi.git -[submodule "hardware/deps/common_cells"] - path = hardware/deps/common_cells - url = https://github.com/pulp-platform/common_cells.git -[submodule "hardware/deps/tech_cells_generic"] - path = hardware/deps/tech_cells_generic - url = https://github.com/pulp-platform/tech_cells_generic.git -[submodule "hardware/deps/common_verification"] - path = hardware/deps/common_verification - url = https://github.com/pulp-platform/common_verification.git -[submodule "hardware/deps/cva6"] - path = hardware/deps/cva6 - url = https://github.com/pulp-platform/cva6.git [submodule "toolchain/newlib"] path = toolchain/newlib url = https://sourceware.org/git/newlib-cygwin.git @@ -32,6 +17,3 @@ path = toolchain/riscv-llvm url = https://github.com/llvm/llvm-project.git ignore = dirty -[submodule "hardware/deps/apb"] - path = hardware/deps/apb - url = https://github.com/pulp-platform/apb.git diff --git a/Bender.local b/Bender.local new file mode 100644 index 000000000..5d8caebc0 --- /dev/null +++ b/Bender.local @@ -0,0 +1,7 @@ +# Copyright 2023 ETH Zurich and University of Bologna. +# Solderpad Hardware License, Version 0.51, see LICENSE for details. +# SPDX-License-Identifier: SHL-0.51 + +overrides: + axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.1 } + tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.13 } diff --git a/Bender.lock b/Bender.lock index fc78b497e..b1a28f968 100644 --- a/Bender.lock +++ b/Bender.lock @@ -1,4 +1,3 @@ ---- packages: apb: revision: 77ddf073f194d44b9119949d2421be59789e69ae @@ -7,38 +6,58 @@ packages: Git: https://github.com/pulp-platform/apb.git dependencies: - common_cells + ariane: + revision: 2ebe023f7289300348c68e99267afcc03256f3ed + version: null + source: + Git: https://github.com/pulp-platform/cva6.git + dependencies: + - axi + - common_cells + - fpnew + - tech_cells_generic axi: - revision: 442ff3375710513623f95944d66cc2bd09b2f155 - version: 0.29.1 + revision: fccffb5953ec8564218ba05e20adbedec845e014 + version: 0.39.1 source: - Git: "https://github.com/pulp-platform/axi.git" + Git: https://github.com/pulp-platform/axi.git dependencies: - - common_cells - - common_verification + - common_cells + - common_verification + - tech_cells_generic common_cells: - revision: 015917ff33e5f944e866814f72f2074fb0f4220f - version: 1.22.1 + revision: 2bd027cb87eaa9bf7d17196ec5f69864b35b630f + version: 1.32.0 source: - Git: "https://github.com/pulp-platform/common_cells.git" + Git: https://github.com/pulp-platform/common_cells.git dependencies: - - common_verification - - tech_cells_generic + - common_verification + - tech_cells_generic common_verification: - revision: 6fc76fb013315af9fabbb90b431863d498df2d6d - version: 0.2.0 + revision: 9c07fa860593b2caabd9b5681740c25fac04b878 + version: 0.2.3 source: - Git: "https://github.com/pulp-platform/common_verification.git" + Git: https://github.com/pulp-platform/common_verification.git dependencies: [] - cva6: - revision: 3245e44ec49c1cdcd19eb298cd81f0672eaf81ca - version: ~ + fpnew: + revision: 3116391bf66660f806b45e212b9949c528b4e270 + version: 0.7.0 source: - Git: "https://github.com/pulp-platform/cva6.git" - dependencies: [] + Git: https://github.com/openhwgroup/cvfpu.git + dependencies: + - common_cells + - fpu_div_sqrt_mvp + fpu_div_sqrt_mvp: + revision: 86e1f558b3c95e91577c41b2fc452c86b04e85ac + version: 1.0.4 + source: + Git: https://github.com/pulp-platform/fpu_div_sqrt_mvp.git + dependencies: + - common_cells tech_cells_generic: - revision: 203038f857158ae4634c47ce0281f402cc2a1344 - version: 0.2.4 + revision: 7968dd6e6180df2c644636bc6d2908a49f2190cf + version: 0.2.13 source: - Git: "https://github.com/pulp-platform/tech_cells_generic.git" + Git: https://github.com/pulp-platform/tech_cells_generic.git dependencies: - - common_verification \ No newline at end of file + - common_verification diff --git a/Bender.yml b/Bender.yml index 051313258..ffe56f936 100644 --- a/Bender.yml +++ b/Bender.yml @@ -8,11 +8,11 @@ package: - "Paul Scheffler " dependencies: - axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.29.1 } - common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.22.1 } - cva6: { git: "https://github.com/pulp-platform/cva6.git", rev: acc_port } - tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.1 } - apb: { git: "https://github.com/pulp-platform/apb.git", version: 0.2.4 } + axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.1 } + common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.22.1 } + ariane: { git: "https://github.com/pulp-platform/cva6.git", rev: 2ebe023f7289300348c68e99267afcc03256f3ed } # mp/acc_port_rebase + tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.13 } + apb: { git: "https://github.com/pulp-platform/apb.git", version: 0.2.4 } workspace: checkout_dir: "hardware/deps" diff --git a/CHANGELOG.md b/CHANGELOG.md index 454448f22..c3513976f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -80,6 +80,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Reset gating registers before the integer multipliers in `vmfpu` - Fix narrowing for `vnclip` and `vnclipu` - NaN-box the scalar value before forwarding back to CVA6 + - Filter operand queue ready_i from addrgen and sldu selectively when they should not handshake + - CI: don't check the patch directory for whitespace changes ### Added @@ -189,6 +191,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Simplify the datapath of the slide unit. The `sldu` supports only powers of two, and cannot slide and reshuffle at the same time. Non-power-of-two slides are now handled with micro operations. - Bump Verilator to v5.012 - Only allow one workflow at a time per branch/PR + - Bump CVA6 version following OpenHW Group's modification + - Bump fpnew to CVFPU + - Switch from git submodules to Bender to handle hardware dependencies + - Bump AXI, tech_cells_generic, and common_cells dependencies ## 2.2.0 - 2021-11-02 diff --git a/README.md b/README.md index 96719e765..4412208b7 100644 --- a/README.md +++ b/README.md @@ -97,13 +97,38 @@ make riscv_tests ## RTL Simulation -To simulate the Ara system with ModelSim, go to the `hardware` folder, which contains all the SystemVerilog files. Use the following command to run your simulation: +### Hardware dependencies + +The Ara repository depends on external IPs and uses Bender to handle the IP dependencies. +To install Bender and initialize all the hardware IPs, run the following commands: + +```bash +# Go to the hardware folder +cd hardware +# Install Bender and checkout all the IPs +make update +``` + +### Patches (only once!) + +Note: this step is required only once, and needs to be repeated ONLY if the IP hardware dependencies are deleted and checked out again. + +Some of the IPs need to be patched to work with Verilator. ```bash # Go to the hardware folder cd hardware # Apply the patches (only need to run this once) make apply-patches +``` + +### Simulation + +To simulate the Ara system with ModelSim, go to the `hardware` folder, which contains all the SystemVerilog files. Use the following command to run your simulation: + +```bash +# Go to the hardware folder +cd hardware # Only compile the hardware without running the simulation. make compile # Run the simulation with the *hello_world* binary loaded diff --git a/hardware/Makefile b/hardware/Makefile index 0625fcc28..0b72af6a6 100644 --- a/hardware/Makefile +++ b/hardware/Makefile @@ -10,6 +10,9 @@ ARA_DIR := $(shell git rev-parse --show-toplevel 2>/dev/null || echo $$ARA_DIR) INSTALL_DIR := $(abspath $(ROOT_DIR)/../install) VERILATOR_INCLUDE := $(INSTALL_DIR)/verilator/share/verilator/include/vltstd +BENDER := $(ROOT_DIR)/../hardware/bender +BENDER_VERSION := 0.27.3 + # Choose Ara's configuration ifndef config ifdef ARA_CONFIGURATION @@ -101,8 +104,14 @@ dpi := $(patsubst tb/dpi/%.cc,$(buildpath)/$(dpi_library)/%.o,$(wildcard tb/dp vlog_args += -suppress vlog-2583 -suppress vlog-13314 -suppress vlog-13233 vlog_args += -work $(library) +# Bender # Defines -bender_defs += --define NR_LANES=$(nr_lanes) --define VLEN=$(vlen) --define RVV_ARIANE=1 +bender_defs += --define NR_LANES=$(nr_lanes) --define VLEN=$(vlen) --define ARIANE_ACCELERATOR_PORT=1 +# Targets +bender_common_targs := -t rtl -t cv64a6_imafdcv_sv39 -t tech_cells_generic_include_tc_sram -t tech_cells_generic_include_tc_clk +bender_targs_simc := $(bender_common_targs) -t ara_test -t cva6_test +bender_targs_veril := $(bender_common_targs) -t ara_test -t cva6_test -t verilator +bender_targs_spyglass := $(bender_common_targs) -t spyglass # Default target all: compile @@ -111,12 +120,22 @@ all: compile $(buildpath): mkdir -p $(buildpath) +.PHONY: bender update # Bender -bender: - @[ -x ./bender ] && echo "Bender already exists." || \ - curl --proto '=https' --tlsv1.2 https://fabianschuiki.github.io/bender/init -sSf | sh -s -- 0.23.1 +bender: $(BENDER) +$(BENDER): + @[ -x $(BENDER) ] && echo "Bender already exists." || \ + curl --proto '=https' --tlsv1.2 https://pulp-platform.github.io/bender/init -sSf | sh -s -- $(BENDER_VERSION) @echo "$$(./bender --version) available." +update: $(BENDER) $(ROOT_DIR)/../Bender.yml + rm -rf $(ROOT_DIR)/../hardware/deps/* + $(BENDER) update -f + $(BENDER) checkout + +checkout: $(BENDER) + $(BENDER) checkout + # Patches .PHONY: apply-patches apply-patches: patches @@ -132,11 +151,11 @@ $(buildpath)/$(library): .PHONY: compile compile: dpi lib $(buildpath) bender $(buildpath)/compile_$(config).tcl $(buildpath)/compile_$(config).tcl: $(config_file) Makefile ../Bender.yml $(shell find src -type f) $(shell find ../config -type f) $(shell find include -type f) $(shell find tb -type f) $(shell find deps -type f) - ./bender script vsim --vlog-arg="$(vlog_args)" -t rtl -t asic -t ara_test -t cva6_test $(bender_defs) > $(buildpath)/compile_$(config).tcl + $(BENDER) script vsim --vlog-arg="$(vlog_args)" $(bender_targs_simc) $(bender_defs) > $(buildpath)/compile_$(config).tcl echo "exit" >> $(buildpath)/compile_$(config).tcl cd $(buildpath) && $(questa_cmd) vsim -work $(library) -c -do compile_$(config).tcl - # Remove the file if compilation did not succeed - if [ `cat $(buildpath)/transcript | grep "\*\* Error" | wc -l` -ne 0 ]; then rm $(buildpath)/compile_$(config).tcl; fi + # Rename the file if compilation did not succeed + if [ `cat $(buildpath)/transcript | grep "\*\* Error" | wc -l` -ne 0 ]; then mv $(buildpath)/compile_$(config).tcl $(buildpath)/compile_$(config).tcl.ERROR; fi # Simulation .PHONY: sim @@ -164,11 +183,13 @@ verilate: $(buildpath) bender $(veril_library)/V$(veril_top) $(veril_library)/V$(veril_top): $(config_file) Makefile ../Bender.yml $(shell find src -type f) $(shell find ../config -type f) $(shell find include -type f) $(shell find tb -type f) $(shell find deps -type f) rm -rf $(veril_library); mkdir -p $(veril_library) - ./bender script verilator -t rtl -t ara_test -t cva6_test -t verilator $(bender_defs) > $(veril_library)/bender_script_$(config) + $(BENDER) script verilator $(bender_targs_veril) $(bender_defs) > $(veril_library)/bender_script_$(config) # Verilate the design $(veril_path)/verilator -f $(veril_library)/bender_script_$(config) \ -GNrLanes=$(nr_lanes) \ -O3 \ + -Wno-fatal \ + -Wno-PINCONNECTEMPTY \ -Wno-BLKANDNBLK \ -Wno-CASEINCOMPLETE \ -Wno-CMPCONST \ @@ -180,7 +201,7 @@ $(veril_library)/V$(veril_top): $(config_file) Makefile ../Bender.yml $(shell fi -Wno-WIDTH \ -Wno-WIDTHCONCAT \ -Wno-ENUMVALUE \ - -Wno-COMBDLY \ + -Wno-COMBDLY \ --hierarchical \ tb/verilator/waiver.vlt \ --Mdir $(veril_library) \ @@ -224,7 +245,7 @@ lint: spyglass/tmp/files spyglass/sdc/func.sdc spyglass/scripts/run_lint.tcl spyglass/tmp/files: $(bender) mkdir -p spyglass/tmp - ./bender script verilator -t rtl -t spyglass -t cva6_test $(bender_defs) --define SPYGLASS > spyglass/tmp/files + $(BENDER) script verilator $(bender_targs_spyglass) $(bender_defs) --define SPYGLASS > spyglass/tmp/files # DPIs .PHONY: dpi @@ -242,4 +263,4 @@ $(buildpath)/$(dpi_library)/ara_dpi.so: $(dpi) .PHONY: clean clean: rm -rf $(buildpath) - rm -f bender + rm -f $(BENDER) diff --git a/hardware/deps/apb b/hardware/deps/apb deleted file mode 160000 index 77ddf073f..000000000 --- a/hardware/deps/apb +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 77ddf073f194d44b9119949d2421be59789e69ae diff --git a/hardware/deps/axi b/hardware/deps/axi deleted file mode 160000 index 442ff3375..000000000 --- a/hardware/deps/axi +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 442ff3375710513623f95944d66cc2bd09b2f155 diff --git a/hardware/deps/common_cells b/hardware/deps/common_cells deleted file mode 160000 index 015917ff3..000000000 --- a/hardware/deps/common_cells +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 015917ff33e5f944e866814f72f2074fb0f4220f diff --git a/hardware/deps/common_verification b/hardware/deps/common_verification deleted file mode 160000 index 6fc76fb01..000000000 --- a/hardware/deps/common_verification +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 6fc76fb013315af9fabbb90b431863d498df2d6d diff --git a/hardware/deps/cva6 b/hardware/deps/cva6 deleted file mode 160000 index bebbc1475..000000000 --- a/hardware/deps/cva6 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit bebbc1475f9ffba661e8354d8773e27ab9338db1 diff --git a/hardware/deps/tech_cells_generic b/hardware/deps/tech_cells_generic deleted file mode 160000 index 203038f85..000000000 --- a/hardware/deps/tech_cells_generic +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 203038f857158ae4634c47ce0281f402cc2a1344 diff --git a/hardware/include/ara_pkg.sv b/hardware/include/ara_pkg.sv index 014b00473..1f8e50cfa 100644 --- a/hardware/include/ara_pkg.sv +++ b/hardware/include/ara_pkg.sv @@ -239,8 +239,8 @@ package ara_pkg; ///////////////////////////// // Use Ariane's accelerator interface. - typedef ariane_pkg::accelerator_req_t accelerator_req_t; - typedef ariane_pkg::accelerator_resp_t accelerator_resp_t; + typedef acc_pkg::accelerator_req_t accelerator_req_t; + typedef acc_pkg::accelerator_resp_t accelerator_resp_t; ///////////////////////// // Backend interface // diff --git a/hardware/patches/0001-tech-cells-generic-sram.patch b/hardware/patches/0001-tech-cells-generic-sram.patch index dc1e1a99b..054143245 100644 --- a/hardware/patches/0001-tech-cells-generic-sram.patch +++ b/hardware/patches/0001-tech-cells-generic-sram.patch @@ -1,38 +1,51 @@ diff --git a/src/rtl/tc_sram.sv b/src/rtl/tc_sram.sv -index 53530e0..075dcea 100644 +index b702a11..eeef776 100644 --- a/src/rtl/tc_sram.sv +++ b/src/rtl/tc_sram.sv -@@ -124,9 +124,11 @@ module tc_sram #( - // write memory array - always_ff @(posedge clk_i or negedge rst_ni) begin - if (!rst_ni) begin -+ `ifndef VERILATOR - for (int unsigned i = 0; i < NumWords; i++) begin - sram[i] <= init_val[i]; - end -+ `endif - for (int i = 0; i < NumPorts; i++) begin - r_addr_q[i] <= {AddrWidth{1'b0}}; - // initialize the read output register for each port -@@ -149,12 +151,14 @@ module tc_sram #( - for (int unsigned i = 0; i < NumPorts; i++) begin - if (req_i[i]) begin - if (we_i[i]) begin +@@ -145,12 +145,14 @@ module tc_sram #( + for (int unsigned i = 0; i < NumPorts; i++) begin + if (req_i[i]) begin + if (we_i[i]) begin + `ifndef VERILATOR - // update value when write is set at clock - for (int unsigned j = 0; j < DataWidth; j++) begin - if (be_i[i][j/ByteWidth]) begin - sram[addr_i[i]][j] <= wdata_i[i][j]; + // update value when write is set at clock + for (int unsigned j = 0; j < BeWidth; j++) begin + if (be_i[i][j]) begin + sram[addr_i[i]][j*ByteWidth+:ByteWidth] <= wdata_i[i][j*ByteWidth+:ByteWidth]; + end end - end + `endif - end else begin - // otherwise update read address for subsequent non request cycles - r_addr_q[i] <= addr_i[i]; -@@ -164,6 +168,23 @@ module tc_sram #( - end // if !rst_ni + end else begin + // otherwise update read address for subsequent non request cycles + r_addr_q[i] <= addr_i[i]; +@@ -163,7 +165,9 @@ module tc_sram #( + // write memory array + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin ++ `ifndef VERILATOR + sram <= init_val; ++ `endif + for (int i = 0; i < NumPorts; i++) begin + r_addr_q[i] <= {AddrWidth{1'b0}}; + // initialize the read output register for each port +@@ -186,12 +190,14 @@ module tc_sram #( + for (int unsigned i = 0; i < NumPorts; i++) begin + if (req_i[i]) begin + if (we_i[i]) begin ++ `ifndef VERILATOR + // update value when write is set at clock + for (int unsigned j = 0; j < BeWidth; j++) begin + if (be_i[i][j]) begin + sram[addr_i[i]][j*ByteWidth+:ByteWidth] <= wdata_i[i][j*ByteWidth+:ByteWidth]; + end + end ++ `endif + end else begin + // otherwise update read address for subsequent non request cycles + r_addr_q[i] <= addr_i[i]; +@@ -202,6 +208,23 @@ module tc_sram #( + end end - + + `ifdef VERILATOR + for (genvar i = 0; i < NumPorts; i++) begin + // update value when write is set at clock @@ -53,7 +66,7 @@ index 53530e0..075dcea 100644 // Validate parameters. // pragma translate_off `ifndef VERILATOR -@@ -204,4 +225,59 @@ module tc_sram #( +@@ -242,4 +265,59 @@ module tc_sram #( `endif `endif // pragma translate_on diff --git a/hardware/scripts/wave_core.tcl b/hardware/scripts/wave_core.tcl index 7f0434ad7..757f814e7 100644 --- a/hardware/scripts/wave_core.tcl +++ b/hardware/scripts/wave_core.tcl @@ -7,15 +7,15 @@ add wave -noupdate -group CVA6 -group core /ara_tb/dut/i_ara_soc/i_system/i_ariane/* add wave -noupdate -group CVA6 -group frontend /ara_tb/dut/i_ara_soc/i_system/i_ariane/i_frontend/* -add wave -noupdate -group CVA6 -group frontend -group icache /ara_tb/dut/i_ara_soc/i_system/i_ariane/i_cache_subsystem/i_cva6_icache/* -add wave -noupdate -group CVA6 -group frontend -group ras /ara_tb/dut/i_ara_soc/i_system/i_ariane/i_frontend/i_ras/* -add wave -noupdate -group CVA6 -group frontend -group btb /ara_tb/dut/i_ara_soc/i_system/i_ariane/i_frontend/i_btb/* -add wave -noupdate -group CVA6 -group frontend -group bht /ara_tb/dut/i_ara_soc/i_system/i_ariane/i_frontend/i_bht/* +add wave -noupdate -group CVA6 -group frontend -group icache /ara_tb/dut/i_ara_soc/i_system/i_ariane/genblk4/i_cache_subsystem/* +# add wave -noupdate -group CVA6 -group frontend -group ras /ara_tb/dut/i_ara_soc/i_system/i_ariane/i_frontend/i_ras/* +# add wave -noupdate -group CVA6 -group frontend -group btb /ara_tb/dut/i_ara_soc/i_system/i_ariane/i_frontend/i_btb/* +# add wave -noupdate -group CVA6 -group frontend -group bht /ara_tb/dut/i_ara_soc/i_system/i_ariane/i_frontend/i_bht/* # add wave -noupdate -group CVA6 -group frontend -group instr_scan /ara_tb/dut/i_ara_soc/i_system/i_ariane/i_frontend/*/i_instr_scan/* # add wave -noupdate -group CVA6 -group frontend -group fetch_fifo /ara_tb/dut/i_ara_soc/i_system/i_ariane/i_frontend/i_fetch_fifo/* add wave -noupdate -group CVA6 -group id_stage -group decoder /ara_tb/dut/i_ara_soc/i_system/i_ariane/id_stage_i/decoder_i/* -add wave -noupdate -group CVA6 -group id_stage -group compressed_decoder /ara_tb/dut/i_ara_soc/i_system/i_ariane/id_stage_i/compressed_decoder_i/* +add wave -noupdate -group CVA6 -group id_stage -group compressed_decoder /ara_tb/dut/i_ara_soc/i_system/i_ariane/id_stage_i/genblk1/compressed_decoder_i/* add wave -noupdate -group CVA6 -group id_stage /ara_tb/dut/i_ara_soc/i_system/i_ariane/id_stage_i/* add wave -noupdate -group CVA6 -group issue_stage -group scoreboard /ara_tb/dut/i_ara_soc/i_system/i_ariane/issue_stage_i/i_scoreboard/* @@ -32,10 +32,10 @@ add wave -noupdate -group CVA6 -group ex_stage -group fpu -group fpnew /ara_tb/d add wave -noupdate -group CVA6 -group ex_stage -group lsu /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/lsu_i/* add wave -noupdate -group CVA6 -group ex_stage -group lsu -group lsu_bypass /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/lsu_i/lsu_bypass_i/* -add wave -noupdate -group CVA6 -group ex_stage -group lsu -group mmu /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/lsu_i/i_mmu/* -add wave -noupdate -group CVA6 -group ex_stage -group lsu -group mmu -group itlb /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/lsu_i/i_mmu/i_itlb/* -add wave -noupdate -group CVA6 -group ex_stage -group lsu -group mmu -group dtlb /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/lsu_i/i_mmu/i_dtlb/* -add wave -noupdate -group CVA6 -group ex_stage -group lsu -group mmu -group ptw /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/lsu_i/i_mmu/i_ptw/* +add wave -noupdate -group CVA6 -group ex_stage -group lsu -group mmu /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/lsu_i/gen_mmu_sv39/i_cva6_mmu/* +add wave -noupdate -group CVA6 -group ex_stage -group lsu -group mmu -group itlb /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/lsu_i/gen_mmu_sv39/i_cva6_mmu/i_itlb/* +add wave -noupdate -group CVA6 -group ex_stage -group lsu -group mmu -group dtlb /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/lsu_i/gen_mmu_sv39/i_cva6_mmu/i_dtlb/* +add wave -noupdate -group CVA6 -group ex_stage -group lsu -group mmu -group ptw /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/lsu_i/gen_mmu_sv39/i_cva6_mmu/i_ptw/* add wave -noupdate -group CVA6 -group ex_stage -group lsu -group store_unit /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/lsu_i/i_store_unit/* add wave -noupdate -group CVA6 -group ex_stage -group lsu -group store_unit -group store_buffer /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/lsu_i/i_store_unit/store_buffer_i/* @@ -46,7 +46,6 @@ add wave -noupdate -group CVA6 -group ex_stage -group branch_unit /ara_tb/dut/i_ add wave -noupdate -group CVA6 -group ex_stage -group csr_buffer /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/csr_buffer_i/* -add wave -noupdate -group CVA6 -group ex_stage -group dispatcher /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/gen_accelerator/i_acc_dispatcher/* add wave -noupdate -group CVA6 -group ex_stage /ara_tb/dut/i_ara_soc/i_system/i_ariane/ex_stage_i/* add wave -noupdate -group CVA6 -group commit_stage /ara_tb/dut/i_ara_soc/i_system/i_ariane/commit_stage_i/* @@ -55,10 +54,12 @@ add wave -noupdate -group CVA6 -group csr_file /ara_tb/dut/i_ara_soc/i_system/i_ add wave -noupdate -group CVA6 -group controller /ara_tb/dut/i_ara_soc/i_system/i_ariane/controller_i/* -add wave -noupdate -group CVA6 -group wt_dcache /ara_tb/dut/i_ara_soc/i_system/i_ariane/i_cache_subsystem/i_wt_dcache/* -add wave -noupdate -group CVA6 -group wt_dcache -group miss_handler /ara_tb/dut/i_ara_soc/i_system/i_ariane/i_cache_subsystem/i_wt_dcache/i_wt_dcache_missunit/* +add wave -noupdate -group CVA6 -group wt_dcache /ara_tb/dut/i_ara_soc/i_system/i_ariane/genblk4/i_cache_subsystem/i_wt_dcache/* +add wave -noupdate -group CVA6 -group wt_dcache -group miss_handler /ara_tb/dut/i_ara_soc/i_system/i_ariane/genblk4/i_cache_subsystem/i_wt_dcache/i_wt_dcache_missunit/* -add wave -noupdate -group CVA6 -group wt_dcache -group load {/ara_tb/dut/i_ara_soc/i_system/i_ariane/i_cache_subsystem/i_wt_dcache/gen_rd_ports[0]/i_wt_dcache_ctrl/*} -add wave -noupdate -group CVA6 -group wt_dcache -group ptw {/ara_tb/dut/i_ara_soc/i_system/i_ariane/i_cache_subsystem/i_wt_dcache/gen_rd_ports[1]/i_wt_dcache_ctrl/*} +add wave -noupdate -group CVA6 -group wt_dcache -group load {/ara_tb/dut/i_ara_soc/i_system/i_ariane/genblk4/i_cache_subsystem/i_wt_dcache/gen_rd_ports[0]/i_wt_dcache_ctrl/*} +add wave -noupdate -group CVA6 -group wt_dcache -group ptw {/ara_tb/dut/i_ara_soc/i_system/i_ariane/genblk4/i_cache_subsystem/i_wt_dcache/gen_rd_ports[1]/i_wt_dcache_ctrl/*} -add wave -noupdate -group CVA6 -group perf_counters /ara_tb/dut/i_ara_soc/i_system/i_ariane/i_perf_counters/* +add wave -noupdate -group CVA6 -group dispatcher /ara_tb/dut/i_ara_soc/i_system/i_ariane/gen_accelerator/i_acc_dispatcher/* + +add wave -noupdate -group CVA6 -group perf_counters /ara_tb/dut/i_ara_soc/i_system/i_ariane/gen_perf_counter/perf_counters_i/* diff --git a/hardware/src/accel_dispatcher_ideal.sv b/hardware/src/accel_dispatcher_ideal.sv index 8c564b34c..b89d93474 100644 --- a/hardware/src/accel_dispatcher_ideal.sv +++ b/hardware/src/accel_dispatcher_ideal.sv @@ -25,11 +25,7 @@ module accel_dispatcher_ideal import axi_pkg::*; import ara_pkg::*; ( input logic rst_ni, // Accelerator interaface output accelerator_req_t acc_req_o, - output logic acc_req_valid_o, - input logic acc_req_ready_i, - input accelerator_resp_t acc_resp_i, - input logic acc_resp_valid_i, - output logic acc_resp_ready_o + input accelerator_resp_t acc_resp_i ); localparam string vtrace = `STRINGIFY(`VTRACE); @@ -69,7 +65,7 @@ module accel_dispatcher_ideal import axi_pkg::*; import ara_pkg::*; ( status_cnt_n = status_cnt_q; fifo_data_raw = fifo_q[read_pointer_q]; - if (acc_req_ready_i && ~fifo_empty) begin + if (acc_resp_i.req_ready && ~fifo_empty) begin // read from the queue is a default assignment // but increment the read pointer... if (read_pointer_n == N_VINSN - 1) @@ -94,16 +90,16 @@ module accel_dispatcher_ideal import axi_pkg::*; import ara_pkg::*; ( assign fifo_empty = (status_cnt_q == 0); - // Always valid until empty - assign acc_req_valid_o = ~fifo_empty; - // Flush the answer - assign acc_resp_ready_o = 1'b1; // Output assignment assign fifo_data = fifo_payload_t'(fifo_data_raw); assign acc_req_o = '{ insn : fifo_data.insn, rs1 : fifo_data.rs1, rs2 : fifo_data.rs2, + // Always valid until empty + req_valid : ~fifo_empty, + // Flush the answer + resp_ready : 1'b1, default : '0 }; @@ -133,7 +129,7 @@ module accel_dispatcher_ideal import axi_pkg::*; import ara_pkg::*; ( // Stop the computation when the instructions are over and ara has returned idle // Just check that we are after reset always_ff @(posedge clk_i) begin - if (rst_ni && was_reset && !acc_req_valid_o && i_system.i_ara.ara_idle) begin + if (rst_ni && was_reset && !acc_req_o.req_valid && i_system.i_ara.ara_idle) begin $display("[hw-cycles]: %d", int'(perf_cnt_q)); $info("Core Test ", $sformatf("*** SUCCESS *** (tohost = %0d)", 0)); $finish(0); @@ -160,10 +156,10 @@ endmodule fifo_payload_t payload; acc_req_o = '0; - acc_req_valid_o = 1'b0; + acc_req_o.req_valid = 1'b0; // Flush the answer - acc_resp_ready_o = 1'b1; + acc_req_o.resp_ready = 1'b1; acc_req_o = '0; acc_req_o.frm = fpnew_pkg::RNE; @@ -176,17 +172,17 @@ endmodule while ($fscanf(fd, "%h", payload) == 1) begin // Always valid - acc_req_valid_o = 1'b1; + acc_req_o.req_valid = 1'b1; acc_req_o.insn = payload.insn; acc_req_o.rs1 = payload.rs1; // Wait for the handshake - wait(acc_req_ready_i); + wait(acc_resp_i.req_ready); @(posedge clk_i); @(negedge clk_i); end // Stop dispatching - acc_req_valid_o = 1'b0; + acc_req_o.req_valid = 1'b0; $fclose(fd); end diff --git a/hardware/src/ara.sv b/hardware/src/ara.sv index c6976be6f..0583d1eea 100644 --- a/hardware/src/ara.sv +++ b/hardware/src/ara.sv @@ -39,11 +39,7 @@ module ara import ara_pkg::*; #( output logic scan_data_o, // Interface with Ariane input accelerator_req_t acc_req_i, - input logic acc_req_valid_i, - output logic acc_req_ready_o, output accelerator_resp_t acc_resp_o, - output logic acc_resp_valid_o, - input logic acc_resp_ready_i, // AXI interface output axi_req_t axi_req_o, input axi_resp_t axi_resp_i @@ -95,11 +91,7 @@ module ara import ara_pkg::*; #( .rst_ni (rst_ni ), // Interface with Ariane .acc_req_i (acc_req_i ), - .acc_req_valid_i (acc_req_valid_i ), - .acc_req_ready_o (acc_req_ready_o ), .acc_resp_o (acc_resp_o ), - .acc_resp_valid_o (acc_resp_valid_o), - .acc_resp_ready_i (acc_resp_ready_i), // Interface with the sequencer .ara_req_o (ara_req ), .ara_req_valid_o (ara_req_valid ), diff --git a/hardware/src/ara_dispatcher.sv b/hardware/src/ara_dispatcher.sv index 2eb6e2ce3..998e84230 100644 --- a/hardware/src/ara_dispatcher.sv +++ b/hardware/src/ara_dispatcher.sv @@ -22,11 +22,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( input logic rst_ni, // Interfaces with Ariane input accelerator_req_t acc_req_i, - input logic acc_req_valid_i, - output logic acc_req_ready_o, output accelerator_resp_t acc_resp_o, - output logic acc_resp_valid_o, - input logic acc_resp_ready_i, // Interface with Ara's backend output ara_req_t ara_req_o, output logic ara_req_valid_o, @@ -276,8 +272,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( is_decoding = 1'b0; in_lane_op = 1'b0; - acc_req_ready_o = 1'b0; - acc_resp_valid_o = 1'b0; + acc_resp_o.req_ready = 1'b0; + acc_resp_o.resp_valid = 1'b0; acc_resp_o = '{ trans_id : acc_req_i.trans_id, load_complete : load_zero_vl | load_complete_q, @@ -326,8 +322,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( automatic rvv_instruction_t insn = rvv_instruction_t'(acc_req_i.insn.instr); // Stall the interface, wait for the backend to accept the injected uop - acc_req_ready_o = 1'b0; - acc_resp_valid_o = 1'b0; + acc_resp_o.req_ready = 1'b0; + acc_resp_o.resp_valid = 1'b0; // Handle LMUL > 1 rs_lmul_cnt_d = rs_lmul_cnt_q; @@ -428,11 +424,11 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase if (state_d == NORMAL_OPERATION && state_q != RESHUFFLE) begin - if (acc_req_valid_i && ara_req_ready_i && acc_resp_ready_i) begin + if (acc_req_i.req_valid && ara_req_ready_i && acc_req_i.resp_ready) begin // Decoding is_decoding = 1'b1; // Acknowledge the request - acc_req_ready_o = ara_req_ready_i; + acc_resp_o.req_ready = ara_req_ready_i; // Decode the instructions based on their opcode unique case (acc_req_i.insn.itype.opcode) @@ -445,14 +441,14 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( automatic rvv_instruction_t insn = rvv_instruction_t'(acc_req_i.insn.instr); // These always respond at the same cycle - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; // Decode based on their func3 field unique case (insn.varith_type.func3) // Configuration instructions OPCFG: begin: opcfg // These can be acknowledged regardless of the state of Ara - acc_req_ready_o = 1'b1; + acc_resp_o.req_ready = 1'b1; is_config = 1'b1; // Update vtype @@ -1216,8 +1212,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( 6'b010000: begin // VWXUNARY0 // vmv.x.s // Stall the interface until we get the result - acc_req_ready_o = 1'b0; - acc_resp_valid_o = 1'b0; + acc_resp_o.req_ready = 1'b0; + acc_resp_o.resp_valid = 1'b0; case (insn.varith_type.rs1) 5'b00000: begin @@ -1256,10 +1252,10 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Wait until the back-end answers to acknowledge those instructions if (ara_resp_valid_i) begin - acc_req_ready_o = 1'b1; + acc_resp_o.req_ready = 1'b1; acc_resp_o.result = ara_resp_i.resp; acc_resp_o.error = ara_resp_i.error; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; ara_req_valid_d = 1'b0; end end @@ -1894,8 +1890,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( 6'b010000: begin // VWFUNARY0 // vmv.f.s // Stall the interface until we get the result - acc_req_ready_o = 1'b0; - acc_resp_valid_o = 1'b0; + acc_resp_o.req_ready = 1'b0; + acc_resp_o.resp_valid = 1'b0; ara_req_d.op = ara_pkg::VFMVFS; ara_req_d.use_vd = 1'b0; @@ -1930,10 +1926,10 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Wait until the back-end answers to acknowledge those instructions if (ara_resp_valid_i) begin - acc_req_ready_o = 1'b1; + acc_resp_o.req_ready = 1'b1; acc_resp_o.result = vfmvfs_result; acc_resp_o.error = ara_resp_i.error; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; ara_req_valid_d = 1'b0; end end @@ -2513,7 +2509,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( is_vload = 1'b1; // Wait before acknowledging this instruction - acc_req_ready_o = 1'b0; + acc_resp_o.req_ready = 1'b0; // These generate a request to Ara's backend ara_req_d.vd = insn.vmem_type.rd; @@ -2558,9 +2554,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end end default: begin // Invalid. Element is too wide, or encoding is non-existant. - acc_req_ready_o = 1'b1; + acc_resp_o.req_ready = 1'b1; acc_resp_o.error = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; ara_req_valid_d = 1'b0; end endcase @@ -2582,13 +2578,13 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( 5'b10000: begin // Unit-strided, fault-only first // TODO: Not implemented illegal_insn = 1'b1; - acc_req_ready_o = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.req_ready = 1'b1; + acc_resp_o.resp_valid = 1'b1; end default: begin // Reserved illegal_insn = 1'b1; - acc_req_ready_o = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.req_ready = 1'b1; + acc_resp_o.resp_valid = 1'b1; end endcase end @@ -2617,7 +2613,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // But the new eew is greater than vsew if (signed'(ara_req_d.vtype.vsew - vtype_q.vsew) > 0) begin illegal_insn = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end end // The new emul is greater than the previous lmul @@ -2625,7 +2621,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // But the new eew is lower than vsew if (signed'(ara_req_d.vtype.vsew - vtype_q.vsew) < 0) begin illegal_insn = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end end default:; @@ -2636,19 +2632,19 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( unique case (ara_req_d.emul) LMUL_2: if ((insn.varith_type.rd & 5'b00001) != 5'b00000) begin illegal_insn = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end LMUL_4: if ((insn.varith_type.rd & 5'b00011) != 5'b00000) begin illegal_insn = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end LMUL_8: if ((insn.varith_type.rd & 5'b00111) != 5'b00000) begin illegal_insn = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end LMUL_RSVD: begin illegal_insn = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end default:; endcase @@ -2659,8 +2655,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( ignore_zero_vl_check = 1'b1; // The LMUL value is kept in the instruction itself illegal_insn = 1'b0; - acc_req_ready_o = 1'b0; - acc_resp_valid_o = 1'b0; + acc_resp_o.req_ready = 1'b0; + acc_resp_o.resp_valid = 1'b0; ara_req_valid_d = 1'b1; // Maximum vector length. VLMAX = nf * VLEN / EW8. @@ -2691,9 +2687,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // Wait until the back-end answers to acknowledge those instructions if (ara_resp_valid_i) begin - acc_req_ready_o = 1'b1; + acc_resp_o.req_ready = 1'b1; acc_resp_o.error = ara_resp_i.error; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; ara_req_valid_d = 1'b0; // In case of error, modify vstart if (ara_resp_i.error) @@ -2719,7 +2715,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( is_vstore = 1'b1; // Wait before acknowledging this instruction - acc_req_ready_o = 1'b0; + acc_resp_o.req_ready = 1'b0; // vl depends on the EEW encoded in the instruction. // Ara does not reshuffle source vregs upon vector stores, @@ -2771,9 +2767,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end end default: begin // Invalid. Element is too wide, or encoding is non-existant. - acc_req_ready_o = 1'b1; + acc_resp_o.req_ready = 1'b1; acc_resp_o.error = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; ara_req_valid_d = 1'b0; end endcase @@ -2794,8 +2790,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( end default: begin // Reserved illegal_insn = 1'b1; - acc_req_ready_o = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.req_ready = 1'b1; + acc_resp_o.resp_valid = 1'b1; end endcase end @@ -2824,7 +2820,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // But the new eew is greater than vsew if (signed'(ara_req_d.vtype.vsew - vtype_q.vsew) > 0) begin illegal_insn = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end end // The new emul is greater than the previous lmul @@ -2832,7 +2828,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // But the new eew is lower than vsew if (signed'(ara_req_d.vtype.vsew - vtype_q.vsew) < 0) begin illegal_insn = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end end default:; @@ -2843,19 +2839,19 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( unique case (ara_req_d.emul) LMUL_2: if ((insn.varith_type.rd & 5'b00001) != 5'b00000) begin illegal_insn = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end LMUL_4: if ((insn.varith_type.rd & 5'b00011) != 5'b00000) begin illegal_insn = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end LMUL_8: if ((insn.varith_type.rd & 5'b00111) != 5'b00000) begin illegal_insn = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end LMUL_RSVD: begin illegal_insn = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end default:; endcase @@ -2892,16 +2888,16 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( endcase illegal_insn = 1'b0; - acc_req_ready_o = 1'b0; - acc_resp_valid_o = 1'b0; + acc_resp_o.req_ready = 1'b0; + acc_resp_o.resp_valid = 1'b0; ara_req_valid_d = 1'b1; end // Wait until the back-end answers to acknowledge those instructions if (ara_resp_valid_i) begin - acc_req_ready_o = 1'b1; + acc_resp_o.req_ready = 1'b1; acc_resp_o.error = ara_resp_i.error; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; ara_req_valid_d = 1'b0; // If there is an error, change vstart if (ara_resp_i.error) @@ -2915,7 +2911,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( riscv::OpcodeSystem: begin // These always respond at the same cycle - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; is_config = 1'b1; unique case (acc_req_i.insn.itype.funct3) @@ -3083,7 +3079,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( default: begin // Trigger an illegal instruction acc_resp_o.error = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end endcase end @@ -3091,7 +3087,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( default: begin // Trigger an illegal instruction acc_resp_o.error = 1'b1; - acc_resp_valid_o = 1'b1; + acc_resp_o.resp_valid = 1'b1; end endcase end @@ -3148,8 +3144,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( automatic rvv_instruction_t insn = rvv_instruction_t'(acc_req_i.insn.instr); // Stall the interface, and inject a reshuffling instruction - acc_req_ready_o = 1'b0; - acc_resp_valid_o = 1'b0; + acc_resp_o.req_ready = 1'b0; + acc_resp_o.resp_valid = 1'b0; ara_req_valid_d = 1'b0; // Initialize the reshuffle counter limit to handle LMUL > 1 @@ -3215,8 +3211,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #( // operation was resolved (to decrement its pending load/store counter) // This can collide with the same signal from the vector load/store unit, so we must // delay the zero_vl acknowledge by 1 cycle - acc_req_ready_o = ~((is_vload & load_complete_q) | (is_vstore & store_complete_q)); - acc_resp_valid_o = ~((is_vload & load_complete_q) | (is_vstore & store_complete_q)); + acc_resp_o.req_ready = ~((is_vload & load_complete_q) | (is_vstore & store_complete_q)); + acc_resp_o.resp_valid = ~((is_vload & load_complete_q) | (is_vstore & store_complete_q)); ara_req_valid_d = 1'b0; load_zero_vl = is_vload; store_zero_vl = is_vstore; diff --git a/hardware/src/ara_soc.sv b/hardware/src/ara_soc.sv index f65d37160..2806587e7 100644 --- a/hardware/src/ara_soc.sv +++ b/hardware/src/ara_soc.sv @@ -133,6 +133,7 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #( MaxSlvTrans : 4, FallThrough : 1'b0, LatencyMode : axi_pkg::CUT_MST_PORTS, + PipelineStages : 0, AxiIdWidthSlvPorts: AxiSocIdWidth, AxiIdUsedSlvPorts : AxiSocIdWidth, UniqueIds : 1'b0, @@ -188,8 +189,8 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #( axi_atop_filter #( .AxiIdWidth (AxiSocIdWidth ), .AxiMaxWriteTxns(4 ), - .req_t (soc_wide_req_t ), - .resp_t (soc_wide_resp_t) + .axi_req_t (soc_wide_req_t ), + .axi_resp_t (soc_wide_resp_t) ) i_l2mem_atop_filter ( .clk_i (clk_i ), .rst_ni (rst_ni ), @@ -457,7 +458,7 @@ module ara_soc import axi_pkg::*; import ara_pkg::*; #( CachedRegionAddrBase : {DRAMBase}, CachedRegionLength : {DRAMLength}, // cache config - Axi64BitCompliant : 1'b1, + AxiCompliant : 1'b1, SwapEndianess : 1'b0, // debug DmBaseAddress : 64'h0, diff --git a/hardware/src/ara_system.sv b/hardware/src/ara_system.sv index f8c32d44e..1e8d639b4 100644 --- a/hardware/src/ara_system.sv +++ b/hardware/src/ara_system.sv @@ -73,13 +73,11 @@ module ara_system import axi_pkg::*; import ara_pkg::*; #( // Ara and Ariane // ////////////////////// - import ariane_pkg::accelerator_req_t; - import ariane_pkg::accelerator_resp_t; + import acc_pkg::accelerator_req_t; + import acc_pkg::accelerator_resp_t; // Accelerator ports accelerator_req_t acc_req; - logic acc_req_valid; - logic acc_req_ready; accelerator_resp_t acc_resp; logic acc_resp_valid; logic acc_resp_ready; @@ -92,21 +90,39 @@ module ara_system import axi_pkg::*; import ara_pkg::*; #( logic [63:0] hart_id; assign hart_id = {'0, hart_id_i}; + // Pack invalidation interface into acc interface + accelerator_resp_t acc_resp_pack; + always_comb begin : pack_inval + acc_resp_pack = acc_resp; + acc_resp_pack.inval_valid = inval_valid; + acc_resp_pack.inval_addr = inval_addr; + inval_ready = acc_req.inval_ready; + acc_cons_en = acc_req.acc_cons_en; + end + `ifdef IDEAL_DISPATCHER // Perfect dispatcher to Ara accel_dispatcher_ideal i_accel_dispatcher_ideal ( .clk_i (clk_i ), .rst_ni (rst_ni ), .acc_req_o (acc_req ), - .acc_req_valid_o (acc_req_valid ), - .acc_req_ready_i (acc_req_ready ), .acc_resp_i (acc_resp ), .acc_resp_valid_i (acc_resp_valid ), .acc_resp_ready_o (acc_resp_ready ) ); `else - ariane #( - .ArianeCfg(ArianeCfg) + cva6 #( + .ArianeCfg(ArianeCfg), + .cvxif_req_t (acc_pkg::accelerator_req_t), + .cvxif_resp_t (acc_pkg::accelerator_resp_t), + .AxiAddrWidth ( AxiAddrWidth ), + .AxiDataWidth ( AxiNarrowDataWidth ), + .AxiIdWidth ( AxiIdWidth ), + .axi_ar_chan_t (ariane_axi_ar_t), + .axi_aw_chan_t (ariane_axi_aw_t), + .axi_w_chan_t (ariane_axi_w_t), + .axi_req_t (ariane_axi_req_t), + .axi_rsp_t (ariane_axi_resp_t) ) i_ariane ( .clk_i (clk_i ), .rst_ni (rst_ni ), @@ -116,19 +132,15 @@ module ara_system import axi_pkg::*; import ara_pkg::*; #( .ipi_i ('0 ), .time_irq_i ('0 ), .debug_req_i ('0 ), - .axi_req_o (ariane_narrow_axi_req ), - .axi_resp_i (ariane_narrow_axi_resp), + .rvfi_o ( ), // Accelerator ports - .acc_req_o (acc_req ), - .acc_req_valid_o (acc_req_valid ), - .acc_req_ready_i (acc_req_ready ), - .acc_resp_i (acc_resp ), - .acc_resp_valid_i (acc_resp_valid ), - .acc_resp_ready_o (acc_resp_ready ), - .acc_cons_en_o (acc_cons_en ), - .inval_addr_i (inval_addr ), - .inval_valid_i (inval_valid ), - .inval_ready_o (inval_ready ) + .cvxif_req_o (acc_req ), + .cvxif_resp_i (acc_resp_pack ), + .l15_req_o ( ), + .l15_rtrn_i ( '0 ), + // Memory interface + .axi_req_o (ariane_narrow_axi_req ), + .axi_resp_i (ariane_narrow_axi_resp) ); `endif @@ -211,11 +223,7 @@ module ara_system import axi_pkg::*; import ara_pkg::*; #( .scan_data_i (1'b0 ), .scan_data_o (/* Unused */ ), .acc_req_i (acc_req ), - .acc_req_valid_i (acc_req_valid ), - .acc_req_ready_o (acc_req_ready ), .acc_resp_o (acc_resp ), - .acc_resp_valid_o(acc_resp_valid), - .acc_resp_ready_i(acc_resp_ready), .axi_req_o (ara_axi_req ), .axi_resp_i (ara_axi_resp ) ); diff --git a/hardware/src/cva6_accel_first_pass_decoder.sv b/hardware/src/cva6_accel_first_pass_decoder.sv index 0519d58b7..74c7e14e2 100644 --- a/hardware/src/cva6_accel_first_pass_decoder.sv +++ b/hardware/src/cva6_accel_first_pass_decoder.sv @@ -7,36 +7,49 @@ // instruction, whether it reads scalar registers, and whether // it writes to a destination scalar register -module cva6_accel_first_pass_decoder import rvv_pkg::*; ( - input logic [31:0] instruction_i, // instruction from IF - output logic is_accel_o, // is a vector instruction - output logic is_rs1_o, - output logic is_rs2_o, - output logic is_rd_o, - output logic is_fs1_o, - output logic is_fs2_o, - output logic is_fd_o, - output logic is_vfp_o, // is a vector floating-point instruction - output logic is_load_o, - output logic is_store_o +module cva6_accel_first_pass_decoder import rvv_pkg::*; import ariane_pkg::*; ( + input logic [31:0] instruction_i, // instruction from IF + input riscv::xs_t fs_i, // floating point extension status + input riscv::xs_t vs_i, // vector extension status + output logic is_accel_o, // is a vector instruction + output scoreboard_entry_t instruction_o, // predecoded instruction + output logic illegal_instr_o, // is an illegal instruction + output logic is_control_flow_instr_o ); + logic is_rs1; + logic is_rs2; + logic is_rd; + logic is_fs1; + logic is_fs2; + logic is_fd; + logic is_vfp; // is a vector floating-point instruction + logic is_load; + logic is_store; + // Cast instruction into the `rvv_instruction_t` struct rvv_instruction_t instr; assign instr = rvv_instruction_t'(instruction_i); + // Cast instruction into scalar `instruction_t` struct + riscv::instruction_t instr_scalar; + assign instr_scalar = riscv::instruction_t'(instruction_i); + + // Vector instructions never change control flow + assign is_control_flow_instr_o = 1'b0; + always_comb begin // Default values is_accel_o = 1'b0; - is_rs1_o = 1'b0; - is_rs2_o = 1'b0; - is_rd_o = 1'b0; - is_fs1_o = 1'b0; - is_fs2_o = 1'b0; - is_fd_o = 1'b0; - is_vfp_o = 1'b0; - is_load_o = instr.i_type.opcode == riscv::OpcodeLoadFp; - is_store_o = instr.i_type.opcode == riscv::OpcodeStoreFp; + is_rs1 = 1'b0; + is_rs2 = 1'b0; + is_rd = 1'b0; + is_fs1 = 1'b0; + is_fs2 = 1'b0; + is_fd = 1'b0; + is_vfp = 1'b0; + is_load = instr.i_type.opcode == riscv::OpcodeLoadFp; + is_store = instr.i_type.opcode == riscv::OpcodeStoreFp; // Decode based on the opcode case (instr.i_type.opcode) @@ -46,20 +59,20 @@ module cva6_accel_first_pass_decoder import rvv_pkg::*; ( is_accel_o = 1'b1; case (instr.varith_type.func3) OPFVV: begin - is_fd_o = instr.varith_type.func6 == 6'b010_000; // VFWUNARY0 - is_vfp_o = 1'b1; + is_fd = instr.varith_type.func6 == 6'b010_000; // VFWUNARY0 + is_vfp = 1'b1; end - OPMVV: is_rd_o = instr.varith_type.func6 == 6'b010_000; // VWXUNARY0 - OPIVX: is_rs1_o = 1'b1 ; + OPMVV: is_rd = instr.varith_type.func6 == 6'b010_000; // VWXUNARY0 + OPIVX: is_rs1 = 1'b1 ; OPFVF: begin - is_fs1_o = 1'b1; - is_vfp_o = 1'b1; + is_fs1 = 1'b1; + is_vfp = 1'b1; end - OPMVX: is_rs1_o = 1'b1 ; + OPMVX: is_rs1 = 1'b1 ; OPCFG: begin - is_rs1_o = instr.vsetivli_type.func2 != 2'b11; // not vsetivli - is_rs2_o = instr.vsetvl_type.func7 == 7'b100_0000; // vsetvl - is_rd_o = 1'b1 ; + is_rs1 = instr.vsetivli_type.func2 != 2'b11; // not vsetivli + is_rs2 = instr.vsetvl_type.func7 == 7'b100_0000; // vsetvl + is_rd = 1'b1 ; end endcase end @@ -77,8 +90,8 @@ module cva6_accel_first_pass_decoder import rvv_pkg::*; ( 4'b1110, //VLxE512/VSxE512 4'b1111: begin //VLxE1024/VSxE1024 is_accel_o = 1'b1 ; - is_rs1_o = 1'b1 ; - is_rs2_o = instr.vmem_type.mop == 2'b10; // Strided operation + is_rs1 = 1'b1 ; + is_rs2 = instr.vmem_type.mop == 2'b10; // Strided operation end endcase end @@ -91,7 +104,7 @@ module cva6_accel_first_pass_decoder import rvv_pkg::*; ( 3'b110, //VAMO*EI32.V 3'b111: begin //VAMO*EI64.V is_accel_o = 1'b1; - is_rs1_o = 1'b1; + is_rs1 = 1'b1; end endcase end @@ -106,13 +119,44 @@ module cva6_accel_first_pass_decoder import rvv_pkg::*; ( 3'b110, //CSRRSI 3'b111: begin //CSRRCI is_accel_o = is_vector_csr(riscv::csr_reg_t'(instr.i_type.imm)); - is_rs1_o = is_vector_csr(riscv::csr_reg_t'(instr.i_type.imm)); - is_rs2_o = is_vector_csr(riscv::csr_reg_t'(instr.i_type.imm)); - is_rd_o = is_vector_csr(riscv::csr_reg_t'(instr.i_type.imm)); + is_rs1 = is_vector_csr(riscv::csr_reg_t'(instr.i_type.imm)); + is_rs2 = is_vector_csr(riscv::csr_reg_t'(instr.i_type.imm)); + is_rd = is_vector_csr(riscv::csr_reg_t'(instr.i_type.imm)); end endcase end endcase end + always_comb begin + instruction_o = '0; + illegal_instr_o = 1'b1; + + if (is_accel_o && vs_i != riscv::Off) begin // trigger illegal instruction if the vector extension is turned off + // TODO: Instruction going to other accelerators might need to distinguish whether the value of vs_i is needed or not. + // Send accelerator instructions to the coprocessor + instruction_o.fu = ACCEL; + instruction_o.vfp = is_vfp; + instruction_o.rs1 = (is_rs1 || is_fs1) ? instr_scalar.rtype.rs1 : {REG_ADDR_SIZE{1'b0}}; + instruction_o.rs2 = (is_rs2 || is_fs2) ? instr_scalar.rtype.rs2 : {REG_ADDR_SIZE{1'b0}}; + instruction_o.rd = (is_rd || is_fd) ? instr_scalar.rtype.rd : {REG_ADDR_SIZE{1'b0}}; + + // Decode the vector operation + unique case ({is_store, is_load, is_fs1, is_fs2, is_fd}) + 5'b10000: instruction_o.op = ACCEL_OP_STORE; + 5'b01000: instruction_o.op = ACCEL_OP_LOAD; + 5'b00100: instruction_o.op = ACCEL_OP_FS1; + 5'b00001: instruction_o.op = ACCEL_OP_FD; + 5'b00000: instruction_o.op = ACCEL_OP; + endcase + + // Check that mstatus.FS is not OFF if we have a FP instruction for the accelerator + illegal_instr_o = (is_vfp && (fs_i == riscv::Off)) ? 1'b1 : 1'b0; + + // result holds the undecoded instruction + instruction_o.result = { {riscv::XLEN-32{1'b0}}, instruction_i[31:0] }; + instruction_o.use_imm = 1'b0; + end + end + endmodule : cva6_accel_first_pass_decoder diff --git a/hardware/src/lane/operand_queues_stage.sv b/hardware/src/lane/operand_queues_stage.sv index 5ed714522..467584510 100644 --- a/hardware/src/lane/operand_queues_stage.sv +++ b/hardware/src/lane/operand_queues_stage.sv @@ -208,25 +208,41 @@ module operand_queues_stage import ara_pkg::*; import rvv_pkg::*; import cf_math * Slide Unit * ****************/ + // This operand queue is common to slide unit and addrgen. + // Since it's shared, we should be sure not to sample a + // spurious ready from the wrong unit, i.e., when we are + // feeding the addrgen, we don't want spurious readies from + // the slide unit. The units should be responsible for avoiding + // sampling wrong data, but spurious ready signals can happen in + // specific corner cases. Fixing this without impacting timing is + // hard, so we just mask the ready signals here as well to avoid + // bugs. + logic sldu_operand_ready_filtered; + logic addrgen_operand_ready_filtered; + assign sldu_operand_ready_filtered = sldu_operand_ready_i & + (sldu_addrgen_operand_target_fu_o == ALU_SLDU); + assign addrgen_operand_ready_filtered = addrgen_operand_ready_i & + (sldu_addrgen_operand_target_fu_o == MFPU_ADDRGEN); + operand_queue #( .CmdBufDepth (VlduInsnQueueDepth), .DataBufDepth (2 ), .FPUSupport (FPUSupport ), .NrLanes (NrLanes ) ) i_operand_queue_slide_addrgen_a ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .lane_id_i (lane_id_i ), - .operand_queue_cmd_i (operand_queue_cmd_i[SlideAddrGenA] ), - .operand_queue_cmd_valid_i(operand_queue_cmd_valid_i[SlideAddrGenA] ), - .operand_i (operand_i[SlideAddrGenA] ), - .operand_valid_i (operand_valid_i[SlideAddrGenA] ), - .operand_issued_i (operand_issued_i[SlideAddrGenA] ), - .operand_queue_ready_o (operand_queue_ready_o[SlideAddrGenA] ), - .operand_o (sldu_addrgen_operand_o ), - .operand_target_fu_o (sldu_addrgen_operand_target_fu_o ), - .operand_valid_o (sldu_addrgen_operand_valid_o ), - .operand_ready_i (addrgen_operand_ready_i | sldu_operand_ready_i) + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .lane_id_i (lane_id_i ), + .operand_queue_cmd_i (operand_queue_cmd_i[SlideAddrGenA] ), + .operand_queue_cmd_valid_i(operand_queue_cmd_valid_i[SlideAddrGenA] ), + .operand_i (operand_i[SlideAddrGenA] ), + .operand_valid_i (operand_valid_i[SlideAddrGenA] ), + .operand_issued_i (operand_issued_i[SlideAddrGenA] ), + .operand_queue_ready_o (operand_queue_ready_o[SlideAddrGenA] ), + .operand_o (sldu_addrgen_operand_o ), + .operand_target_fu_o (sldu_addrgen_operand_target_fu_o ), + .operand_valid_o (sldu_addrgen_operand_valid_o ), + .operand_ready_i (addrgen_operand_ready_filtered | sldu_operand_ready_filtered) ); ///////////////// diff --git a/hardware/src/lane/vmfpu.sv b/hardware/src/lane/vmfpu.sv index c00844552..544225b04 100644 --- a/hardware/src/lane/vmfpu.sv +++ b/hardware/src/lane/vmfpu.sv @@ -782,7 +782,8 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; }; // Don't compress classify result - localparam int unsigned TrueSIMDClass = 1; + localparam int unsigned TrueSIMDClass = 1; + localparam int unsigned EnableSIMDMask = 1; operation_e fp_op; logic fp_opmod; @@ -983,9 +984,8 @@ module vmfpu import ara_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; .Features (FPUFeatures ), .Implementation(FPUImplementation), .TagType (strb_t ), - .NumLanes (FPULanes ), .TrueSIMDClass (TrueSIMDClass ), - .MaskType (fpu_mask_t ) + .EnableSIMDMask(EnableSIMDMask ) ) i_fpnew_bulk ( .clk_i (clk_i ), .rst_ni (rst_ni ), diff --git a/hardware/src/vlsu/addrgen.sv b/hardware/src/vlsu/addrgen.sv index 2fbe05e55..a25d086a1 100644 --- a/hardware/src/vlsu/addrgen.sv +++ b/hardware/src/vlsu/addrgen.sv @@ -324,9 +324,10 @@ module addrgen import ara_pkg::*; import rvv_pkg::*; #( // Bump lane pointer elm_ptr_d = '0; word_lane_ptr_d += 1; - if (word_lane_ptr_q == NrLanes - 1) - // Ready for the next full word - addrgen_operand_ready_o = 1'b1; + if (word_lane_ptr_q == NrLanes - 1) begin + // Ready for the next full word + addrgen_operand_ready_o = 1'b1; + end end else begin // Bump element pointer elm_ptr_d += 1; diff --git a/hardware/src/vlsu/vlsu.sv b/hardware/src/vlsu/vlsu.sv index aa2e05283..b6904850c 100644 --- a/hardware/src/vlsu/vlsu.sv +++ b/hardware/src/vlsu/vlsu.sv @@ -84,13 +84,13 @@ module vlsu import ara_pkg::*; import rvv_pkg::*; #( axi_resp_t axi_resp; axi_cut #( - .ar_chan_t(axi_ar_t ), - .r_chan_t (axi_r_t ), - .aw_chan_t(axi_aw_t ), - .w_chan_t (axi_w_t ), - .b_chan_t (axi_b_t ), - .req_t (axi_req_t ), - .resp_t (axi_resp_t) + .ar_chan_t (axi_ar_t ), + .r_chan_t (axi_r_t ), + .aw_chan_t (axi_aw_t ), + .w_chan_t (axi_w_t ), + .b_chan_t (axi_b_t ), + .axi_req_t (axi_req_t ), + .axi_resp_t(axi_resp_t) ) i_axi_cut ( .clk_i (clk_i ), .rst_ni (rst_ni ), diff --git a/hardware/tb/ara_testharness.sv b/hardware/tb/ara_testharness.sv index 09901b262..84edf4c8e 100644 --- a/hardware/tb/ara_testharness.sv +++ b/hardware/tb/ara_testharness.sv @@ -153,7 +153,7 @@ module ara_testharness #( // If disabled if (!runtime_cnt_en_q) // Start only if the software allowed the enable and we detect the first V instruction - runtime_cnt_en_d = i_ara_soc.i_system.i_ara.acc_req_valid_i & cnt_en_mask; + runtime_cnt_en_d = i_ara_soc.i_system.i_ara.acc_req_i.req_valid & cnt_en_mask; // If enabled if (runtime_cnt_en_q) // Stop counting only if the software disabled the counter and Ara returned idle @@ -177,14 +177,14 @@ module ara_testharness #( runtime_to_be_updated_d = runtime_to_be_updated_q; // Assert the update flag upon a new valid vector instruction - if (!runtime_to_be_updated_q && i_ara_soc.i_system.i_ara.acc_req_valid_i) begin + if (!runtime_to_be_updated_q && i_ara_soc.i_system.i_ara.acc_req_i.req_valid) begin runtime_to_be_updated_d = 1'b1; end // Update the internal runtime and reset the update flag if (runtime_to_be_updated_q && i_ara_soc.i_system.i_ara.ara_idle && - !i_ara_soc.i_system.i_ara.acc_req_valid_i) begin + !i_ara_soc.i_system.i_ara.acc_req_i.req_valid) begin runtime_buf_d = runtime_cnt_q; runtime_to_be_updated_d = 1'b0; end diff --git a/hardware/tb/dpi/elfloader.cc b/hardware/tb/dpi/elfloader.cc index 60f06c358..0df988270 120000 --- a/hardware/tb/dpi/elfloader.cc +++ b/hardware/tb/dpi/elfloader.cc @@ -1 +1 @@ -../../deps/cva6/tb/dpi/elfloader.cc \ No newline at end of file +../../deps/ariane/corev_apu/tb/dpi/elfloader.cc \ No newline at end of file diff --git a/scripts/check_cycles.py b/scripts/check_cycles.py index 7b7040c07..24a861d1a 100644 --- a/scripts/check_cycles.py +++ b/scripts/check_cycles.py @@ -24,19 +24,19 @@ import numpy as np threshold = { - 'imatmul' : 300, - 'fmatmul' : 300, - 'iconv2d' : 300, - 'fconv2d' : 300, - 'fconv3d' : 300, - 'jacobi2d' : 300, - 'dropout' : 300, - 'fft' : 300, - 'dwt' : 300, - 'exp' : 300, - 'softmax' : 300, - 'pathfinder' : 300, - 'roi_align' : 300, + 'imatmul' : 500, + 'fmatmul' : 500, + 'iconv2d' : 500, + 'fconv2d' : 500, + 'fconv3d' : 500, + 'jacobi2d' : 500, + 'dropout' : 500, + 'fft' : 500, + 'dwt' : 500, + 'exp' : 500, + 'softmax' : 500, + 'pathfinder' : 500, + 'roi_align' : 500, } skip_check = {