diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 49e8c4080..aa9fffc3e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,14 +28,14 @@ jobs: tc-llvm: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Recover the submodule commit hash id: recover_hash run: | git submodule status toolchain/riscv-llvm | cut -d' ' -f1 echo "tc-llvm-hash=`git submodule status toolchain/riscv-llvm | cut -d' ' -f1`" >> $GITHUB_ENV - name: Cache the LLVM toolchain - uses: actions/cache@v3 + uses: actions/cache@v4 id: tc-llvm-cache env: cache-name: cache-llvm @@ -59,7 +59,7 @@ jobs: - name: Tar LLVM run: tar -cvf tc-llvm.tar install/riscv-llvm - name: Upload LLVM - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: tc-llvm path: tc-llvm.tar @@ -67,14 +67,14 @@ jobs: tc-gcc: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Recover the submodule commit hash id: recover_hash run: | git submodule status toolchain/riscv-gnu-toolchain | cut -d' ' -f1 echo "tc-gcc-hash=`git submodule status toolchain/riscv-gnu-toolchain | cut -d' ' -f1`" >> $GITHUB_ENV - name: Cache the GCC toolchain - uses: actions/cache@v3 + uses: actions/cache@v4 id: tc-gcc-cache env: cache-name: cache-gcc @@ -94,7 +94,7 @@ jobs: - name: Tar GCC run: tar -cvf tc-gcc.tar install/riscv-gcc - name: Upload GCC - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: tc-gcc path: tc-gcc.tar @@ -102,14 +102,14 @@ jobs: tc-isa-sim: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Recover the submodule commit hash id: recover_hash run: | git submodule status toolchain/riscv-isa-sim | cut -d' ' -f1 echo "tc-isa-sim-hash=`git submodule status toolchain/riscv-isa-sim | cut -d' ' -f1`" >> $GITHUB_ENV - name: Cache Spike - uses: actions/cache@v3 + uses: actions/cache@v4 id: tc-isa-sim-cache env: cache-name: cache-spike @@ -129,7 +129,7 @@ jobs: - name: Tar Spike run: tar -cvf tc-isa-sim.tar install/riscv-isa-sim - name: Upload Spike - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: tc-isa-sim path: tc-isa-sim.tar @@ -137,14 +137,14 @@ jobs: tc-verilator: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Recover the submodule commit hash id: recover_hash run: | git submodule status toolchain/verilator | cut -d' ' -f1 echo "tc-verilator-hash=`git submodule status toolchain/verilator | cut -d' ' -f1`" >> $GITHUB_ENV - name: Cache Verilator - uses: actions/cache@v3 + uses: actions/cache@v4 id: tc-verilator-cache env: cache-name: cache-verilator @@ -165,7 +165,7 @@ jobs: - name: Tar Verilator run: tar -cvf tc-verilator.tar install/verilator - name: Upload Verilator - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: tc-verilator path: tc-verilator.tar @@ -182,7 +182,7 @@ jobs: ara_config: [2_lanes, 4_lanes, 8_lanes, 16_lanes] needs: tc-llvm steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: python-version: 3.6 @@ -193,7 +193,7 @@ jobs: git submodule update --init --recursive -- toolchain/riscv-isa-sim git submodule foreach --recursive git reset --hard - name: Download the LLVM toolchain - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-llvm - name: Untar LLVM @@ -201,7 +201,7 @@ jobs: - name: Compile applications run: config=${{ matrix.ara_config }} make -C apps - name: Upload applications - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: compile-apps-${{ matrix.ara_config }} path: apps/bin @@ -214,25 +214,25 @@ jobs: ara_config: [2_lanes, 4_lanes, 8_lanes, 16_lanes] needs: ["tc-llvm", "tc-gcc", "tc-isa-sim"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Download Spike run: | git submodule update --init --recursive -- toolchain/riscv-isa-sim git submodule foreach --recursive git reset --hard - name: Get Spike artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-isa-sim - name: Untar Spike run: tar xvf tc-isa-sim.tar - name: Get LLVM toolchain artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-llvm - name: Untar LLVM run: tar xvf tc-llvm.tar - name: Get GCC toolchain artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-gcc - name: Untar GCC @@ -240,7 +240,7 @@ jobs: - name: Compile applications run: config=${{ matrix.ara_config }} make -C apps riscv_tests - name: Upload applications - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: compile-riscv-tests-${{ matrix.ara_config }} path: apps/bin @@ -253,19 +253,19 @@ jobs: ara_config: [2_lanes, 4_lanes, 8_lanes, 16_lanes] needs: ["tc-verilator", "tc-isa-sim"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Download Spike run: | git submodule update --init --recursive -- toolchain/riscv-isa-sim git submodule foreach --recursive git reset --hard - name: Get Spike artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-isa-sim - name: Untar Spike run: tar xvf tc-isa-sim.tar - name: Get Verilator artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-verilator - name: Untar Verilator @@ -286,7 +286,7 @@ jobs: - name: Tar Verilated model of Ara run: tar -cvf ara.tar hardware/build/verilator hardware/bender - name: Upload Ara Verilated model - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: compile-ara-${{ matrix.ara_config }} path: ara.tar @@ -304,21 +304,21 @@ jobs: ara_config: [2_lanes, 4_lanes, 8_lanes, 16_lanes] needs: ["compile-ara", "compile-apps"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Get Spike artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-isa-sim - name: Untar Spike run: tar xvf tc-isa-sim.tar - name: Get Verilated model of Ara - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: compile-ara-${{ matrix.ara_config }} - name: Untar Verilated model of Ara run: tar xvf ara.tar - name: Get applications - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: compile-apps-${{ matrix.ara_config }} path: apps/bin @@ -337,21 +337,21 @@ jobs: ara_config: [2_lanes, 4_lanes, 8_lanes, 16_lanes] needs: ["compile-ara", "compile-riscv-tests"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Get Spike artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-isa-sim - name: Untar Spike run: tar xvf tc-isa-sim.tar - name: Get Verilated model of Ara - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: compile-ara-${{ matrix.ara_config }} - name: Untar Verilated model of Ara run: tar xvf ara.tar - name: Get RISC-V tests - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: compile-riscv-tests-${{ matrix.ara_config }} path: apps/bin @@ -362,21 +362,21 @@ jobs: runs-on: ubuntu-20.04 needs: ["tc-isa-sim", "compile-riscv-tests"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Get Spike artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-isa-sim - name: Untar Spike run: tar xvf tc-isa-sim.tar - name: Download the LLVM toolchain - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-llvm - name: Untar LLVM run: tar xvf tc-llvm.tar - name: Download the GCC toolchain - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-gcc - name: Untar GCC @@ -386,7 +386,7 @@ jobs: make -C apps/riscv-tests/isa clean make -C apps riscv_tests_spike - name: Upload dumps - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: riscv-tests-spike path: | @@ -400,7 +400,7 @@ jobs: check-license: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: python-version: 3.6 @@ -413,14 +413,14 @@ jobs: runs-on: ubuntu-20.04 needs: ['tc-llvm'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: python-version: 3.6 - name: Install Python requirements run: pip install -r python-requirements.txt - name: Download the LLVM toolchain - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-llvm - name: Untar LLVM @@ -438,7 +438,7 @@ jobs: check-trailing-whitespaces: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - name: Determine base commit @@ -471,26 +471,26 @@ jobs: ara_config: [2_lanes, 4_lanes, 8_lanes, 16_lanes] needs: ["compile-ara", "compile-apps"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: python-version: 3.6 - name: Install Python requirements run: pip install -r python-requirements.txt - name: Download the LLVM toolchain - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-llvm - name: Untar LLVM run: tar xvf tc-llvm.tar - name: Get Spike artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: tc-isa-sim - name: Untar Spike run: tar xvf tc-isa-sim.tar - name: Get Verilated model of Ara - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: compile-ara-${{ matrix.ara_config }} - name: Untar Verilated model of Ara @@ -505,12 +505,12 @@ jobs: run: | tar -cvf benchmarks-${{ matrix.ara_config }}.tar *.benchmark - name: Upload [f]dotproduct runtime results - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: dotproducts-${{ matrix.ara_config }} path: dotproducts-${{ matrix.ara_config }}.tar - name: Upload runtime results - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: benchmark-${{ matrix.ara_config }} path: benchmarks-${{ matrix.ara_config }}.tar @@ -519,43 +519,43 @@ jobs: runs-on: ubuntu-20.04 needs: benchmark steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: python-version: 3.6 - name: Install Python requirements run: pip install -r python-requirements.txt - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Get [f]dotproduct results (2 lanes) - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: dotproducts-2_lanes - name: Get [f]dotproduct results (4 lanes) - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: dotproducts-4_lanes - name: Get [f]dotproduct results (8 lanes) - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: dotproducts-8_lanes - name: Get [f]dotproduct results (16 lanes) - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: dotproducts-16_lanes - name: Get benchmark results (2 lanes) - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: benchmark-2_lanes - name: Get benchmark results (4 lanes) - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: benchmark-4_lanes - name: Get benchmark results (8 lanes) - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: benchmark-8_lanes - name: Get benchmark results (16 lanes) - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: benchmark-16_lanes - name: Untar the [f]dotproduct results @@ -587,77 +587,77 @@ jobs: - name: Plot the rooflines run: gnuplot -c scripts/benchmark.gnuplot - name: Upload the imatmul roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: imatmul_roofline path: imatmul.png - name: Upload the fmatmul roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: fmatmul_roofline path: fmatmul.png - name: Upload the iconv2d roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: iconv2d_roofline path: iconv2d.png - name: Upload the fconv2d roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: fconv2d_roofline path: fconv2d.png - name: Upload the fconv3d roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: fconv3d_roofline path: fconv3d.png - name: Upload the jacobi2d roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: jacobi2d_roofline path: jacobi2d.png - name: Upload the dropout roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: dropout_roofline path: dropout.png - name: Upload the fft roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: fft_roofline path: fft.png - name: Upload the dwt roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: dwt_roofline path: dwt.png - name: Upload the exp roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: exp_roofline path: exp.png - name: Upload the softmax roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: softmax_roofline path: softmax.png - name: Upload the fdotproduct roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: fdotproduct_plots path: fdotproduct.tar - name: Upload the dotproduct roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: dotproduct_plots path: dotproduct.tar - name: Upload the pathfinder roofline - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: pathfinder_roofline path: pathfinder.png - name: Upload the roi_align roofline - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: roi_align_roofline path: roi_align.png @@ -671,9 +671,9 @@ jobs: if: always() needs: ["simulate", "riscv-tests-spike", "riscv-tests-simv"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Delete artifacts - uses: geekyeggo/delete-artifact@v2 + uses: geekyeggo/delete-artifact@v5 with: name: | tc-llvm @@ -691,9 +691,9 @@ jobs: if: always() needs: ["simulate", "riscv-tests-spike", "riscv-tests-simv"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Delete artifacts - uses: geekyeggo/delete-artifact@v2 + uses: geekyeggo/delete-artifact@v5 with: name: | compile-ara-${{ matrix.ara_config }} diff --git a/.gitmodules b/.gitmodules index 4221e4dff..bb7a2e822 100644 --- a/.gitmodules +++ b/.gitmodules @@ -17,3 +17,6 @@ path = toolchain/riscv-llvm url = https://github.com/llvm/llvm-project.git ignore = dirty +[submodule "cheshire/sw/cva6-sdk"] + path = cheshire/sw/cva6-sdk + url = git@github.com:moimfeld/cva6-sdk.git diff --git a/Bender.lock b/Bender.lock index 4318389f4..a63b5e779 100644 --- a/Bender.lock +++ b/Bender.lock @@ -30,7 +30,7 @@ packages: Git: https://github.com/pulp-platform/common_verification.git dependencies: [] cva6: - revision: f9ebe5006fefe97fc1d2a8b1515fa2fd321fec6d + revision: 5086bff7a039f02a2d9604bca7b1b3e9d961713e version: null source: Git: https://github.com/pulp-platform/cva6.git diff --git a/Bender.yml b/Bender.yml index 89ef503d6..58a20d33e 100644 --- a/Bender.yml +++ b/Bender.yml @@ -10,7 +10,7 @@ package: dependencies: axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.39.1 } common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.22.1 } - cva6: { git: "https://github.com/pulp-platform/cva6.git", rev: f9ebe5006fefe97fc1d2a8b1515fa2fd321fec6d } # mp/pulp-v1-araOS + cva6: { git: "https://github.com/pulp-platform/cva6.git", rev: 5086bff7a039f02a2d9604bca7b1b3e9d961713e } # mp/pulp-v1-araOS tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.13 } apb: { git: "https://github.com/pulp-platform/apb.git", version: 0.2.4 } diff --git a/CHANGELOG.md b/CHANGELOG.md index d8eb04fc3..3c364dde0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Fix `acc_dispatcher` CVA6 bug for instructions with side effects - Fix NaN/subnormal floating-point handling in opqueues - Stall vfdiv/vfsqrt instructions following/preceding other fp instructions + - Fix vector slicing in the operand requesters ### Added @@ -34,6 +35,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Add virtual->physical address translation for Ara by sharing CVA6 MMU - Add Ara VLSU support for MMU exceptions - Add multi-precision conv3d + - Add Cheshire bare-metal FPGA flow for vcu128 and vcu118 + - Add cva6-sdk submodule + - Add Cheshire Linux FPGA flow for vcu128 and vcu118 ### Changed @@ -56,6 +60,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Memory size is now constant with NrLanes - Enable hierarchical verilation - Bump AXI and common cells to solve verilation warnings + - Update all Github Actions for CI + - Update READMEs with FPGA implementation instructions ## 3.0.0 - 2023-09-08 diff --git a/README.md b/README.md index 8f7c713bf..af94ef4bd 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,12 @@ Currently, the following kernels support automatic VCD dumping: `fmatmul`, `fcon We also provide Synopsys Spyglass linting scripts in the hardware/spyglass. Run make lint in the hardware folder, with a specific MemPool configuration, to run the tests associated with the lint_rtl target. +## FPGA implementation and Linux flow + +Ara supports Cheshire's FPGA flow and can be currently implemented on VCU128 and VCU118 in bare-metal and with Linux. The tested configuration is with 2 lanes. + +For information about the FPGA bare-metal and Linux flows, please refer to `cheshire/README.md`. + ## Publications If you want to use Ara, you can cite us: diff --git a/apps/Makefile b/apps/Makefile index 3116addbf..7b2907b89 100644 --- a/apps/Makefile +++ b/apps/Makefile @@ -37,6 +37,13 @@ CVA6_BINARIES := $(addprefix bin/, $(cva6_tests)) ARA_EXTENSIONS := rv64uv ARA_BINARIES := $(addprefix bin/, $(ara_tests)) +# Suffix for binaries +ifeq ($(LINUX),1) +BIN_SUFFIX := -linux +else +BIN_SUFFIX := +endif + # FFT requires special treatment because of its header files ifeq ($(ENV_DEFINES),) bin/fft: ENV_DEFINES += -DFFT_SAMPLES=$(subst ",,$(firstword $(def_args_fft))) @@ -98,9 +105,9 @@ endef $(foreach app,$(APPS),$(eval $(call app_compile_template_spike,$(app)))) define app_compile_template -bin/$1: $1/data.S.o $(addsuffix .o, $(shell find $(1) -name "*.c" -o -name "*.S")) $(RUNTIME_LLVM) linker_script +bin/$1$(BIN_SUFFIX): $1/data.S.o $(addsuffix .o, $(shell find $(1) -name "*.c" -o -name "*.S")) $(RUNTIME_LLVM) linker_script mkdir -p bin/ - $$(RISCV_CC) -Iinclude $(RISCV_CCFLAGS) -o $$@ $$(addsuffix .o, $$(shell find $(1) -name "*.c" -o -name "*.S")) $(RUNTIME_LLVM) $$(RISCV_LDFLAGS) -T$$(CURDIR)/common/link.ld + $$(RISCV_CC) -Iinclude $(RISCV_CCFLAGS) -o $$@ $$(addsuffix .o, $$(shell find $(1) -name "*.c" -o -name "*.S")) $(RUNTIME_LLVM) $$(RISCV_LDFLAGS) $$(RISCV_OBJDUMP) $$(RISCV_OBJDUMP_FLAGS) -D $$@ > $$@.dump $$(RISCV_STRIP) $$@ -S --strip-unneeded endef @@ -114,7 +121,7 @@ TESTS_$(1) := $(addprefix bin/, $($(addsuffix _ara_tests, $1))) bin/$(1)-ara-%: $(TESTS_DIR)/$(1)/%.$(2) $(RUNTIME_GCC) linker_script mkdir -p bin/ - $$(RISCV_CC_GCC) -Iinclude -I$$(TESTS_DIR)/macros/scalar -I$$(TESTS_DIR)/macros/vector $$(RISCV_CCFLAGS_GCC) $$(RISCV_LDFLAGS_GCC) -o $$@ $$< $(RUNTIME_GCC) -T$$(CURDIR)/common/link.ld + $$(RISCV_CC_GCC) -Iinclude -I$$(TESTS_DIR)/macros/scalar -I$$(TESTS_DIR)/macros/vector $$(RISCV_CCFLAGS_GCC) $$(RISCV_LDFLAGS_GCC) -o $$@ $$< $(RUNTIME_GCC) $$(RISCV_OBJDUMP) $$(RISCV_OBJDUMP_FLAGS) -D $$@ > $$@.dump $$(RISCV_STRIP) $$@ -S --strip-unneeded endef @@ -124,7 +131,7 @@ TESTS_$(1) := $(addprefix bin/, $($(addsuffix _ara_tests, $1))) bin/$(1)-ara-%: $(TESTS_DIR)/$(1)/%.$(2) $(RUNTIME_LLVM) linker_script mkdir -p bin/ - $$(RISCV_CC) -Iinclude -I$$(TESTS_DIR)/macros/scalar -I$$(TESTS_DIR)/macros/vector $$(RISCV_CCFLAGS) $$(RISCV_LDFLAGS) -o $$@ $$< $(RUNTIME_LLVM) -T$$(CURDIR)/common/link.ld + $$(RISCV_CC) -Iinclude -I$$(TESTS_DIR)/macros/scalar -I$$(TESTS_DIR)/macros/vector $$(RISCV_CCFLAGS) $$(RISCV_LDFLAGS) -o $$@ $$< $(RUNTIME_LLVM) $$(RISCV_OBJDUMP) $$(RISCV_OBJDUMP_FLAGS) -D $$@ > $$@.dump $$(RISCV_STRIP) $$@ -S --strip-unneeded endef diff --git a/apps/common/runtime.mk b/apps/common/runtime.mk index adac4f902..7998ebf02 100644 --- a/apps/common/runtime.mk +++ b/apps/common/runtime.mk @@ -44,7 +44,18 @@ RISCV_ARCH ?= rv$(RISCV_XLEN)gcv RISCV_ABI ?= lp64d RISCV_TARGET ?= riscv$(RISCV_XLEN)-unknown-elf -# Use LLVM +# Use LLVM for bare-metal RVV and GCC for Linux RVV +ifeq ($(LINUX),1) +RISCV_PREFIX ?= $(ARA_DIR)/cheshire/sw/cva6-sdk/buildroot/output/host/bin/riscv64-buildroot-linux-gnu- +RISCV_CC ?= $(RISCV_PREFIX)gcc +RISCV_CXX ?= $(RISCV_PREFIX)g++ +RISCV_OBJDUMP ?= $(RISCV_PREFIX)objdump +RISCV_OBJCOPY ?= $(RISCV_PREFIX)objcopy +RISCV_AS ?= $(RISCV_PREFIX)as +RISCV_AR ?= $(RISCV_PREFIX)ar +RISCV_LD ?= $(RISCV_PREFIX)ld.lld +RISCV_STRIP ?= $(RISCV_PREFIX)strip +else RISCV_PREFIX ?= $(LLVM_INSTALL_DIR)/bin/ RISCV_CC ?= $(RISCV_PREFIX)clang RISCV_CXX ?= $(RISCV_PREFIX)clang++ @@ -54,6 +65,7 @@ RISCV_AS ?= $(RISCV_PREFIX)llvm-as RISCV_AR ?= $(RISCV_PREFIX)llvm-ar RISCV_LD ?= $(RISCV_PREFIX)ld.lld RISCV_STRIP ?= $(RISCV_PREFIX)llvm-strip +endif # Use gcc to compile scalar riscv-tests RISCV_CC_GCC ?= $(GCC_INSTALL_DIR)/bin/$(RISCV_TARGET)-gcc @@ -75,6 +87,9 @@ PYTHON ?= python3 # Defines ENV_DEFINES ?= +ifeq ($(LINUX),1) +ENV_DEFINES += -DARA_LINUX=1 +endif ifeq ($(vcd_dump),1) ENV_DEFINES += -DVCD_DUMP=1 endif @@ -88,27 +103,38 @@ RISCV_WARNINGS += -Wunused-variable -Wall -Wextra -Wno-unused-command-line-argum LLVM_FLAGS ?= -march=rv64gcv_zfh_zvfh0p1 -menable-experimental-extensions -mabi=$(RISCV_ABI) -mno-relax -fuse-ld=lld LLVM_V_FLAGS ?= -fno-vectorize -mllvm -scalable-vectorization=off -mllvm -riscv-v-vector-bits-min=0 -Xclang -target-feature -Xclang +no-optimized-zero-stride-load RISCV_FLAGS ?= $(LLVM_FLAGS) $(LLVM_V_FLAGS) -mcmodel=medany -I$(CURDIR)/common -std=gnu99 -O3 -ffast-math -fno-common -fno-builtin-printf $(DEFINES) $(RISCV_WARNINGS) +ifeq ($(LINUX),1) +RISCV_CCFLAGS ?= -march=rv64gcv -mabi=$(RISCV_ABI) -I$(CURDIR)/common -O3 $(DEFINES) +RISCV_LDFLAGS ?= +else RISCV_CCFLAGS ?= $(RISCV_FLAGS) -ffunction-sections -fdata-sections +RISCV_LDFLAGS ?= -static -nostartfiles -lm -Wl,--gc-sections -T$(CURDIR)/common/link.ld +endif RISCV_CCFLAGS_SPIKE ?= $(RISCV_FLAGS) $(SPIKE_CCFLAGS) -ffunction-sections -fdata-sections RISCV_CXXFLAGS ?= $(RISCV_FLAGS) -ffunction-sections -fdata-sections -RISCV_LDFLAGS ?= -static -nostartfiles -lm -Wl,--gc-sections RISCV_LDFLAGS_SPIKE ?= $(RISCV_LDFLAGS) $(SPIKE_LDFLAGS) -Wl,--gc-sections # GCC Flags RISCV_FLAGS_GCC ?= -mcmodel=medany -march=$(RISCV_ARCH) -mabi=$(RISCV_ABI) -I$(CURDIR)/common -static -std=gnu99 -O3 -ffast-math -fno-common -fno-builtin-printf $(DEFINES) $(RISCV_WARNINGS) RISCV_CCFLAGS_GCC ?= $(RISCV_FLAGS_GCC) RISCV_CXXFLAGS_GCC ?= $(RISCV_FLAGS_GCC) -RISCV_LDFLAGS_GCC ?= -static -nostartfiles -lm -lgcc $(RISCV_FLAGS_GCC) +RISCV_LDFLAGS_GCC ?= -static -nostartfiles -lm -lgcc $(RISCV_FLAGS_GCC) -T$(CURDIR)/common/link.ld ifeq ($(COMPILER),gcc) RISCV_OBJDUMP_FLAGS ?= else +ifneq ($(LINUX),1) RISCV_OBJDUMP_FLAGS ?= --mattr=v endif +endif # Compile two different versions of the runtime, since we cannot link code compiled with two different toolchains RUNTIME_GCC ?= common/crt0-gcc.S.o common/printf-gcc.c.o common/string-gcc.c.o common/serial-gcc.c.o common/util-gcc.c.o +ifeq ($(LINUX),1) +RUNTIME_LLVM ?= common/util-llvm.c.o +else RUNTIME_LLVM ?= common/crt0-llvm.S.o common/printf-llvm.c.o common/string-llvm.c.o common/serial-llvm.c.o common/util-llvm.c.o +endif RUNTIME_SPIKE ?= $(spike_env_dir)/benchmarks/common/crt.S.o.spike $(spike_env_dir)/benchmarks/common/syscalls.c.o.spike common/util.c.o.spike .INTERMEDIATE: $(RUNTIME_GCC) $(RUNTIME_LLVM) diff --git a/apps/common/util.c b/apps/common/util.c index ffaadb3a7..6287ed17b 100644 --- a/apps/common/util.c +++ b/apps/common/util.c @@ -22,6 +22,8 @@ int *__dummy__errno__ptr__; +unsigned long int timer; + // Floating-point similarity check with threshold int similarity_check(double a, double b, double threshold) { double diff = a - b; diff --git a/apps/conjugate_gradient/main.c b/apps/conjugate_gradient/main.c index 408226118..23f0718b5 100644 --- a/apps/conjugate_gradient/main.c +++ b/apps/conjugate_gradient/main.c @@ -25,10 +25,13 @@ #include "shared_kernel/spmv.h" #include "util.h" -#ifndef SPIKE -#include "printf.h" -#else +#ifdef SPIKE +#include "util.h" #include +#elif defined ARA_LINUX +#include +#else +#include "printf.h" #endif #define USE_SPMV 1 diff --git a/apps/cos/main.c b/apps/cos/main.c index be86b450d..aa6f1a46a 100644 --- a/apps/cos/main.c +++ b/apps/cos/main.c @@ -20,10 +20,18 @@ #include #include "kernel/cos.h" -#include "printf.h" #include "runtime.h" + #include "util.h" +#ifdef SPIKE +#include +#elif defined ARA_LINUX +#include +#else +#include "printf.h" +#endif + extern size_t N_f64; extern double angles_f64[] __attribute__((aligned(4 * NR_LANES))); extern double results_f64[] __attribute__((aligned(4 * NR_LANES))); diff --git a/apps/dotproduct/main.c b/apps/dotproduct/main.c index 50b1911de..75e75c9a9 100644 --- a/apps/dotproduct/main.c +++ b/apps/dotproduct/main.c @@ -23,10 +23,14 @@ #include "kernel/dotproduct.h" -#ifndef SPIKE -#include "printf.h" -#else +#include "util.h" + +#ifdef SPIKE #include +#elif defined ARA_LINUX +#include +#else +#include "printf.h" #endif // Run also the scalar benchmark diff --git a/apps/dropout/kernel/dropout.h b/apps/dropout/kernel/dropout.h index 6806b181e..6ee1bb7af 100644 --- a/apps/dropout/kernel/dropout.h +++ b/apps/dropout/kernel/dropout.h @@ -19,7 +19,15 @@ #ifndef _DROPOUT_H_ #define _DROPOUT_H_ +#include "util.h" + +#ifdef SPIKE +#include +#elif defined ARA_LINUX #include +#else +#include "printf.h" +#endif #include @@ -29,10 +37,6 @@ #include "runtime.h" -#ifndef SPIKE -#include "printf.h" -#endif - void dropout_gold(const unsigned int n, const float *i, const float scale, const uint8_t *sel_ptr, float *o); void dropout_vec(const unsigned int n, const float *i, const float scale, diff --git a/apps/dtype-conv3d/main.c b/apps/dtype-conv3d/main.c index b522b62d0..4e73b1840 100644 --- a/apps/dtype-conv3d/main.c +++ b/apps/dtype-conv3d/main.c @@ -22,7 +22,11 @@ #include "runtime.h" -#ifndef SPIKE +#ifdef SPIKE +#include +#elif defined ARA_LINUX +#include +#else #include "printf.h" #endif diff --git a/apps/dtype-matmul/main.c b/apps/dtype-matmul/main.c index 0e1caafec..7c33699c2 100644 --- a/apps/dtype-matmul/main.c +++ b/apps/dtype-matmul/main.c @@ -18,13 +18,16 @@ // Samuel Riedel, ETH Zurich #include -#include #include #include "runtime.h" #include "util.h" -#ifndef SPIKE +#ifdef SPIKE +#include +#elif defined ARA_LINUX +#include +#else #include "printf.h" #endif diff --git a/apps/dwt/main.c b/apps/dwt/main.c index 1e89e2531..330e69b53 100644 --- a/apps/dwt/main.c +++ b/apps/dwt/main.c @@ -23,10 +23,12 @@ #include "runtime.h" #include "util.h" -#ifndef SPIKE -#include "printf.h" -#else +#ifdef SPIKE +#include +#elif defined ARA_LINUX #include +#else +#include "printf.h" #endif #define CHECK diff --git a/apps/exp/main.c b/apps/exp/main.c index 4c9e33eda..9c2e42984 100644 --- a/apps/exp/main.c +++ b/apps/exp/main.c @@ -23,10 +23,12 @@ #include "runtime.h" #include "util.h" -#ifndef SPIKE -#include "printf.h" -#else +#ifdef SPIKE +#include +#elif defined ARA_LINUX #include +#else +#include "printf.h" #endif extern size_t N_f64; diff --git a/apps/fconv2d/main.c b/apps/fconv2d/main.c index 5e551acdf..f26312677 100644 --- a/apps/fconv2d/main.c +++ b/apps/fconv2d/main.c @@ -17,14 +17,17 @@ // Author: Matteo Perotti #include -#include #include #include "fconv2d.h" #include "runtime.h" #include "util.h" -#ifndef SPIKE +#ifdef SPIKE +#include +#elif defined ARA_LINUX +#include +#else #include "printf.h" #endif diff --git a/apps/fconv3d/main.c b/apps/fconv3d/main.c index 991691e29..79668955f 100644 --- a/apps/fconv3d/main.c +++ b/apps/fconv3d/main.c @@ -17,14 +17,17 @@ // Author: Matteo Perotti #include -#include #include #include "fconv3d.h" #include "runtime.h" #include "util.h" -#ifndef SPIKE +#ifdef SPIKE +#include +#elif defined ARA_LINUX +#include +#else #include "printf.h" #endif diff --git a/apps/fdotproduct/main.c b/apps/fdotproduct/main.c index 44f736a26..7277f3d60 100644 --- a/apps/fdotproduct/main.c +++ b/apps/fdotproduct/main.c @@ -24,10 +24,12 @@ #include "kernel/fdotproduct.h" -#ifndef SPIKE -#include "printf.h" -#else +#ifdef SPIKE +#include +#elif defined ARA_LINUX #include +#else +#include "printf.h" #endif // Threshold for FP comparisons diff --git a/apps/fft/main.c b/apps/fft/main.c index 3bdc31ac7..02514510f 100644 --- a/apps/fft/main.c +++ b/apps/fft/main.c @@ -23,10 +23,12 @@ #include "runtime.h" #include "util.h" -#ifndef SPIKE -#include "printf.h" -#else +#ifdef SPIKE +#include +#elif defined ARA_LINUX #include +#else +#include "printf.h" #endif #define MAX_NFFT 256 diff --git a/apps/fmatmul/main.c b/apps/fmatmul/main.c index 7f97d09ec..a9d2dc185 100644 --- a/apps/fmatmul/main.c +++ b/apps/fmatmul/main.c @@ -18,14 +18,17 @@ // Samuel Riedel, ETH Zurich // Matteo Perotti, ETH Zurich -#include #include #include "kernel/fmatmul.h" #include "runtime.h" #include "util.h" -#ifndef SPIKE +#ifdef SPIKE +#include +#elif defined ARA_LINUX +#include +#else #include "printf.h" #endif diff --git a/apps/gemv/main.c b/apps/gemv/main.c index a7b1e4d1e..8d3bb3448 100644 --- a/apps/gemv/main.c +++ b/apps/gemv/main.c @@ -23,10 +23,12 @@ #include "runtime.h" #include "util.h" -#ifndef SPIKE -#include "printf.h" -#else +#ifdef SPIKE +#include +#elif defined ARA_LINUX #include +#else +#include "printf.h" #endif #define M_ROW 1024 diff --git a/apps/hello_world/main.c b/apps/hello_world/main.c index a3a95a9c1..668a00ee9 100644 --- a/apps/hello_world/main.c +++ b/apps/hello_world/main.c @@ -19,11 +19,13 @@ #include #include -#ifndef SPIKE -#include "printf.h" -#else +#ifdef SPIKE #include "util.h" #include +#elif defined ARA_LINUX +#include +#else +#include "printf.h" #endif int main() { diff --git a/apps/iconv2d/main.c b/apps/iconv2d/main.c index 53b9bcadc..2990bd960 100644 --- a/apps/iconv2d/main.c +++ b/apps/iconv2d/main.c @@ -23,7 +23,13 @@ #include "iconv2d.h" #include "runtime.h" -#ifndef SPIKE +#include "util.h" + +#ifdef SPIKE +#include +#elif defined ARA_LINUX +#include +#else #include "printf.h" #endif diff --git a/apps/imatmul/main.c b/apps/imatmul/main.c index 5740eae9c..b2a609a40 100644 --- a/apps/imatmul/main.c +++ b/apps/imatmul/main.c @@ -18,14 +18,17 @@ // Samuel Riedel, ETH Zurich #include -#include #include #include "kernel/imatmul.h" #include "runtime.h" #include "util.h" -#ifndef SPIKE +#ifdef SPIKE +#include +#elif defined ARA_LINUX +#include +#else #include "printf.h" #endif diff --git a/apps/jacobi2d/kernel/jacobi2d.c b/apps/jacobi2d/kernel/jacobi2d.c index 67390bef6..1779ebc5b 100644 --- a/apps/jacobi2d/kernel/jacobi2d.c +++ b/apps/jacobi2d/kernel/jacobi2d.c @@ -104,6 +104,8 @@ void j2d_v(uint64_t r, uint64_t c, DATA_TYPE *A, DATA_TYPE *B, } } +// Not compatible with Linux compiler +#ifndef ARA_LINUX void j2d_kernel_v(uint64_t r, uint64_t c, DATA_TYPE *A, DATA_TYPE *B) { vfloat64m1_t xU; vfloat64m1_t xUtmp; @@ -143,6 +145,7 @@ void j2d_kernel_v(uint64_t r, uint64_t c, DATA_TYPE *A, DATA_TYPE *B) { } } } +#endif // Optimized version of the jacobi2d kernel void j2d_kernel_adhoc_warm(uint64_t r, uint64_t c, DATA_TYPE *A, DATA_TYPE *B) { diff --git a/apps/jacobi2d/main.c b/apps/jacobi2d/main.c index df22fa7dc..06de28c7c 100644 --- a/apps/jacobi2d/main.c +++ b/apps/jacobi2d/main.c @@ -80,7 +80,11 @@ WITH ACCESS OR USE OF THE SOFTWARE. #include "runtime.h" #include "util.h" -#ifndef SPIKE +#ifdef SPIKE +#include +#elif defined ARA_LINUX +#include +#else #include "printf.h" #endif diff --git a/apps/log/main.c b/apps/log/main.c index 778a7d5e9..823273f3c 100644 --- a/apps/log/main.c +++ b/apps/log/main.c @@ -20,10 +20,17 @@ #include #include "kernel/log.h" -#include "printf.h" #include "runtime.h" #include "util.h" +#ifdef SPIKE +#include +#elif defined ARA_LINUX +#include +#else +#include "printf.h" +#endif + #define THRESHOLD 1 #define CHECK diff --git a/apps/pathfinder/main.c b/apps/pathfinder/main.c index 696f3b1ba..94054bd90 100644 --- a/apps/pathfinder/main.c +++ b/apps/pathfinder/main.c @@ -15,11 +15,14 @@ #include "runtime.h" #include "kernel/pathfinder.h" +#include "util.h" -#ifndef SPIKE -#include "printf.h" -#else +#ifdef SPIKE +#include +#elif defined ARA_LINUX #include +#else +#include "printf.h" #endif #define CHECK diff --git a/apps/roi_align/main.c b/apps/roi_align/main.c index 2605b10c8..526d11f03 100644 --- a/apps/roi_align/main.c +++ b/apps/roi_align/main.c @@ -14,10 +14,14 @@ #include "kernel/roi_align.h" -#ifndef SPIKE -#include "printf.h" -#else +#include "util.h" + +#ifdef SPIKE #include +#elif defined ARA_LINUX +#include +#else +#include "printf.h" #endif // Execute only the central kernel with fake data diff --git a/apps/softmax/main.c b/apps/softmax/main.c index fe1f88b9a..7ccac904b 100644 --- a/apps/softmax/main.c +++ b/apps/softmax/main.c @@ -23,10 +23,12 @@ #include "runtime.h" #include "util.h" -#ifndef SPIKE -#include "printf.h" -#else +#ifdef SPIKE +#include +#elif defined ARA_LINUX #include +#else +#include "printf.h" #endif // Check the results using a threshold diff --git a/apps/spmv/main.c b/apps/spmv/main.c index c697eeaa9..bfed8c3bb 100644 --- a/apps/spmv/main.c +++ b/apps/spmv/main.c @@ -23,10 +23,12 @@ #include "runtime.h" #include "util.h" -#ifndef SPIKE -#include "printf.h" -#else +#ifdef SPIKE +#include +#elif defined ARA_LINUX #include +#else +#include "printf.h" #endif extern uint64_t R; diff --git a/apps/vfredsum/main.c b/apps/vfredsum/main.c index 0c3bf4786..4d9823109 100644 --- a/apps/vfredsum/main.c +++ b/apps/vfredsum/main.c @@ -17,13 +17,19 @@ // Author: Matteo Perotti #include -#include #include #include "kernel/vfredsum.h" -#include "printf.h" #include "runtime.h" +#ifdef SPIKE +#include +#elif defined ARA_LINUX +#include +#else +#include "printf.h" +#endif + // Size of the largest possible vector register in Bytes (LMUL == 8) #define MAX_BYTE_LMUL8 VLEN diff --git a/cheshire/Makefile b/cheshire/Makefile new file mode 100644 index 000000000..692db5e2f --- /dev/null +++ b/cheshire/Makefile @@ -0,0 +1,58 @@ +# Copyright 2024 ETH Zurich and University of Bologna. +# Solderpad Hardware License, Version 0.51, see LICENSE for details. +# SPDX-License-Identifier: SHL-0.51 +# +# Author: Moritz Imfeld +# Author: Matteo Perotti +# Author: Mojtaba Rostami + +# Chshire root reposiotry +MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) +ARA_ROOT := $(MAKEFILE_DIR)/.. +BACKREF_CHS_ROOT ?= $(realpath ../../../../..) +BACKREF_CHS_XIL_SCRIPTS := $(BACKREF_CHS_ROOT)/target/xilinx/scripts +BACKREF_CHS_SIM_SCRIPTS := $(BACKREF_CHS_ROOT)/target/sim/vsim/ + +# Tools +VIVADO ?= 'vitis-2020.2 vivado' + +# Set up Bender targets and defines +# default configuration for Cheshire + Ara is 2_lanes +ARA_CONFIGURATION ?= 2_lanes +include $(ARA_ROOT)/config/$(ARA_CONFIGURATION).mk +BOARD ?= vcu128 +VLOG_ARGS ?= -suppress 2583 -suppress 13314 +COMMON_CUSTOM_TARGETS := -t cv64a6_imafdcv_sv39 -t cva6 --define ARA --define NR_LANES=$(nr_lanes) --define VLEN=$(vlen) +CUSTOM_SIM_BENDER_TARGETS := $(COMMON_CUSTOM_TARGETS) -t sim -t test -t rtl --vlog-arg="$(VLOG_ARGS)" +CUSTOM_XIL_BENDER_TARGETS := $(COMMON_CUSTOM_TARGETS) -t fpga -t $(BOARD) + +# Define XILINX FPGA URL and PATH for programming +CHS_XILINX_HWS_URL ?= +CHS_XILINX_HWS_PATH ?= + +.PHONY: ara-chs-xilinx ara-chs-flash ara-chs-xilinx-program update_xilinx_src update_vsim_src clean + +ara-chs-xilinx: update_xilinx_src + make -C $(BACKREF_CHS_ROOT) chs-xilinx-$(BOARD) + +ara-chs-image: + make -C $(BACKREF_CHS_ROOT) $(BACKREF_CHS_ROOT)/sw/boot/linux.$(BOARD).gpt.bin -B + +ara-chs-xilinx-flash: + make -C $(BACKREF_CHS_ROOT) chs-xilinx-flash-$(BOARD) VIVADO="$(VIVADO)" CHS_XILINX_HWS_URL="$(CHS_XILINX_HWS_URL)" CHS_XILINX_HWS_PATH_$(BOARD)="$(CHS_XILINX_HWS_PATH)" + +ara-chs-xilinx-program: + make -C $(BACKREF_CHS_ROOT) chs-xilinx-program-$(BOARD) VIVADO="$(VIVADO)" CHS_XILINX_HWS_URL="$(CHS_XILINX_HWS_URL)" CHS_XILINX_HWS_PATH_$(BOARD)="$(CHS_XILINX_HWS_PATH)" + +update_xilinx_src: + cd $(BACKREF_CHS_ROOT) && \ + bender script vivado $(CUSTOM_XIL_BENDER_TARGETS) > $(BACKREF_CHS_XIL_SCRIPTS)/add_sources.$(BOARD).tcl + +update_vsim_src: + cd $(BACKREF_CHS_ROOT) && \ + bender script vsim $(CUSTOM_SIM_BENDER_TARGETS) > $(BACKREF_CHS_SIM_SCRIPTS)/compile.cheshire_soc.tcl + echo 'vlog "$(realpath $(BACKREF_CHS_ROOT))/target/sim/src/elfloader.cpp" -ccflags "-std=c++11"' >> $(BACKREF_CHS_SIM_SCRIPTS)/compile.cheshire_soc.tcl + +clean: + rm $(BACKREF_CHS_XIL_SCRIPTS)/add_sources.$(BOARD).tcl + rm $(MAKEFILE_DIR)/add_sources.$(BOARD).tcl diff --git a/cheshire/README.md b/cheshire/README.md new file mode 100644 index 000000000..8476ba41b --- /dev/null +++ b/cheshire/README.md @@ -0,0 +1,183 @@ +## Introduction + +Ara can be synthesized on a VCU128 FPGA and boot Linux through the Cheshire SoC. This folder provides the necessary targets and flow to build RVV-Linux and deploy Ara on FPGA. To make them work, this repository should be deployed as a submodule of Cheshire. + +Our entry point is to generate a custom `add_sources.vcu128.tcl` file with specific Ara targets, copy this file into the Cheshire directory, and then use the default Cheshire compile flow, which will use our provided TCL file + +## How to Use + +Ara should be instantiated as a submodule of Cheshire. This means that the Ara repo should be downloaded through `bender checkout` from the Cheshire directory. Then, Ara's path can be retrived using `bender path ara`. + +```bash +git clone git@github.com:pulp-platform/cheshire.git +cd cheshire +git checkout ${COMMIT} +bender checkout +ARA_ROOT=$(bender path ara) +cd ${ARA_ROOT} +``` + +## FPGA and OS flow + +### LINUX-RVV Kernels +Compile kernels to be run on the FPGA under Linux (this will also install the buildroot toolchain) + +```bash +cd ${ARA_ROOT}/cheshire/sw + +# Choose a kernel from the apps directory +kernel=fmatmul +make ${kernel}-linux +``` + +### Generate the Linux IMG +Generate the Linux image (containing all the RVV kernels previously built) + +```bash +# Generate the Linux img +cd ${ARA_ROOT}/cheshire/sw +make linux-img + +# Generate Cheshire's Linux img +cd ${ARA_ROOT}/cheshire +make ara-chs-image +``` + +### Generate the FPGA bitstream + +```bash +cd ${ARA_ROOT}/cheshire +make ara-chs-xilinx +``` + +### Flash the Linux image on the SD card + +```bash +cd ${ARA_ROOT}/cheshire +make ara-chs-xilinx-flash +``` + +### Program the bitstream + +```bash +cd ${ARA_ROOT}/cheshire +make ara-chs-xilinx-program +``` + +For more information, see Cheshire's documentation (https://pulp-platform.github.io/cheshire/tg/xilinx). + +### Example + +Example script to boot Linux on a VCU128 FPGA board. Modify the variables as needed. + +There should be an open Hardware Target for the VCU128 board. Moreover, a UART terminal is required. + +Note: this script requires `bender`. Also, some Cheshire targets may require an up-to-date RISC-V compiler. + +```bash +export CHS_ROOT=$(pwd)/cheshire + +# Cheshire commit +# FILL ME +export CHS_HASH= + +# Do we need a specific GCC/G++ version to build the buildroot GCC compiler? +HOST_TOOLCHAIN_SUFFIX= + +# Which RVV kernels to run under Linux +export RVV_KERNELS="hello_world-linux fmatmul-linux fconv3d-linux jacobi2d-linux fdotproduct-linux" + +# FPGA details +# FILL ME +export BOARD="vcu128" +export CHS_XILINX_HWS_URL= +export CHS_XILINX_HWS_PATH= + +# Info +echo "Using the VCU128 ${CHS_XILINX_HWS_URL} ${CHS_XILINX_HWS_PATH}" +# Clone Cheshire +echo "Cloning Cheshire" +git clone git@github.com:pulp-platform/cheshire.git +cd ${CHS_ROOT} +git checkout ${CHS_HASH} +# Checkout Ara +echo 'Checkout hardware deps' +bender checkout +export ARA_ROOT=$(bender -d ${CHS_ROOT} path ara) +# Compile RVV kernels +echo 'Install the Linux compiler and compile the LINUX RVV kernels' +make -C ${ARA_ROOT}/cheshire/sw ${RVV_KERNELS} HOST_TOOLCHAIN_SUFFIX=${HOST_TOOLCHAIN_SUFFIX} +# Compile the Linux image +echo 'Compile the Linux image' +make -C ${ARA_ROOT}/cheshire/sw linux-img HOST_TOOLCHAIN_SUFFIX=${HOST_TOOLCHAIN_SUFFIX} +# Generate Cheshire's Linux image +echo 'Generate Cheshire Linux image' +make -C ${ARA_ROOT}/cheshire ara-chs-image BOARD=${BOARD} +# Generate the bitstream +echo 'Generate the bitstream' +make -C ${ARA_ROOT}/cheshire ara-chs-xilinx BOARD=${BOARD} +# Flash the SD with Linux +echo 'Flash the SD with Linux' +make -C ${ARA_ROOT}/cheshire ara-chs-xilinx-flash BOARD=${BOARD} CHS_XILINX_HWS_URL=${CHS_XILINX_HWS_URL} CHS_XILINX_HWS_PATH=${CHS_XILINX_HWS_PATH} +# Program the bitstream +echo 'Program the bitstream' +make -C ${ARA_ROOT}/cheshire ara-chs-xilinx-flash BOARD=${BOARD} CHS_XILINX_HWS_URL=${CHS_XILINX_HWS_URL} CHS_XILINX_HWS_PATH=${CHS_XILINX_HWS_PATH} +``` + +## Bare-metal flow + +### Compile the bare-metal programs in `${ARA_ROOT}/cheshire/sw/src` + +```bash +cd ${ARA_ROOT}/cheshire/sw +make chs-sw-all +``` + +### Generate the FPGA bitstream + +```bash +cd ${ARA_ROOT}/cheshire +make ara-chs-xilinx +``` + +### Program the bitstream + +Provided that an Hardware Target is available: + +```bash +cd ${ARA_ROOT}/cheshire +make ara-chs-xilinx-program +``` + +### Run programs on the FPGA +The programs can now be injected in the FPGA via JTAG (OpenOCD + GDB). +For more information, see Cheshire's documentation (https://pulp-platform.github.io/cheshire/tg/xilinx). + +## Back-Referencing Explained + +Here's how we use back-referencing in our setup: + +1. **Generate Custom TCL File**: + + - We generate a custom `add_sources.vcu128.tcl` file using the `bender script vivado` command with our specific targets (`-t fpga -t vcu128 -t cv64a6_imafdcv_sv39 -t cva6 --define ARA --define NR_LANES=$(nr_lanes) --define VLEN=$(vlen)`). + - This custom TCL file includes all the necessary sources and configurations required for the FPGA synthesis with Cheshire + Ara. + +2. **Copy Custom TCL File**: + + - The generated custom TCL file is then copied into the Cheshire directory (`$(BACKREF_CHS_XIL_SCRIPTS)/add_sources.vcu128.tcl`). + +3. **Invoke Cheshire Compile Flow**: + + - With the custom TCL file in place, we invoke the Cheshire compile flow by running `make -C $(BACKREF_CHS_ROOT) chs-xilinx-all`. + - The Cheshire compile flow target depends on the `add_sources.vcu128.tcl` file, and since we have provided our custom version, it will use ours for the synthesis process. + +This method ensures that we can extend and customize the compile flow for our specific needs without modifying the Cheshire repository directly. + +## Notes + +### Variables + - ARA_CONFIGURATION: thus far, only Ara with 2 lanes has been tested (ARA_CONFIGURATION=2_lanes). + - HOST_TOOLCHAIN_SUFFIX: the host GCC and G++ should be sufficiently up to date to build the buildroot cross compiler. For environments that track the program version with suffixes, this variable helps choose the correct host compiler version. Use this variable only if needed when installing the buildroot toolchain. + - BOARD: name of the board, e.g., `vcu128`. + - CHS_XILINX_HWS_URL: URL of the FPGA, if connected to the net. + - CHS_XILINX_HWS_PATH: physical PATH of the FPGA. \ No newline at end of file diff --git a/cheshire/patches/device_tree.patch b/cheshire/patches/device_tree.patch new file mode 100644 index 000000000..198f95509 --- /dev/null +++ b/cheshire/patches/device_tree.patch @@ -0,0 +1,13 @@ +diff --git a/sw/boot/cheshire.dtsi b/sw/boot/cheshire.dtsi +index 76decc8..5561e87 100644 +--- a/sw/boot/cheshire.dtsi ++++ b/sw/boot/cheshire.dtsi +@@ -30,7 +30,7 @@ + status = "okay"; + compatible = "eth,ariane", "riscv"; + clock-frequency = <50000000>; // 50 MHz +- riscv,isa = "rv64imafdc"; ++ riscv,isa = "rv64imafdcv"; + mmu-type = "riscv,sv39"; + tlb-split; + reg = <0>; diff --git a/cheshire/sw/Makefile b/cheshire/sw/Makefile new file mode 100644 index 000000000..9f3a8d919 --- /dev/null +++ b/cheshire/sw/Makefile @@ -0,0 +1,44 @@ +# Copyright 2024 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Matteo Perotti +# +# Copy and compile vector software on Cheshire + +CHS_ROOT ?= $(dir $(realpath $(firstword $(MAKEFILE_LIST))))/../../../../../.. +ARA_ROOT := $(dir $(realpath $(firstword $(MAKEFILE_LIST))))/../.. +CHS_SW := $(CHS_ROOT)/sw +ARA_SW := $(ARA_ROOT)/cheshire/sw +ARA_APPS := $(ARA_ROOT)/apps + +APPS := $(patsubst $(ARA_APPS)/%/main.c,%,$(shell find $(ARA_APPS) -name "main.c")) +SW_C := $(wildcard $(ARA_SW)/src/*.c) +DEPS_H := $(wildcard $(ARA_SW)/include/*.h) + +# Hardware configuration for the Ara RVV kernels +# Can be chosen in [2|4|8|16]_lanes +ARA_CONFIGURATION ?= 2_lanes +include $(ARA_ROOT)/config/$(ARA_CONFIGURATION).mk + +# Get the original compiler options and add the support for vector extension +CHS_SW_FLAGS ?= $(shell grep "^CHS_SW_FLAGS\s\+?=\s\+" -- $(CHS_SW)/sw.mk | sed 's/^.*?= //' | sed s/rv64gc/rv64gcv/) +# Tweak the compilation to include Cheshire-related headers and files +CHS_SW_FLAGS += -DCHESHIRE -DNR_LANES=$(nr_lanes) -DVLEN=$(vlen) + +# Vars and rules to make the Linux image +include cva6-sdk.mk + +.PHONY: chs-sw-all copy_vector_sw copy-vector-deps + +# Forward build command to the main Cheshire makefile and attach the correct -march +chs-sw-all: copy-vector-sw copy-vector-deps + make -C $(CHS_ROOT) $@ CHS_SW_FLAGS="$(CHS_SW_FLAGS)" + +# Copy the dependencies from this folder to Cheshire +copy-vector-deps: $(DEPS_H) + cp $^ $(CHS_SW)/tests + +# Copy the vector programs from the src folder to cheshire +copy-vector-sw: $(SW_C) + cp $^ $(CHS_SW)/tests diff --git a/cheshire/sw/README.md b/cheshire/sw/README.md new file mode 100644 index 000000000..643e6961a --- /dev/null +++ b/cheshire/sw/README.md @@ -0,0 +1,53 @@ +# Build software for Cheshire Ara + +Ara should be instantiated as a submodule of Cheshire. This means that the Ara repo should be downloaded through `bender checkout` from the Cheshire directory. Then, Ara's path can be retrived using `bender path ara`. + +```bash +git clone git@github.com:pulp-platform/cheshire.git +cd cheshire +git checkout ${COMMIT} +bender checkout +ARA_ROOT=$(bender path ara) +cd ${ARA_ROOT} +``` +## Operating System + +### Build an RVV-ready Linux Image with vector kernels + +1) Compile kernels to be run on the FPGA under Linux (this will also install the buildroot toolchain) + +```bash +cd ${ARA_ROOT}/cheshire/sw + +# Choose a kernel from the apps directory +kernel=fmatmul +make ${kernel}-linux +``` + +2) Generate the Linux image (containing all the RVV kernels previously built) + +```bash +# Generate the Linux img +cd ${ARA_ROOT}/cheshire/sw +make linux-img +``` + +## Bare-metal + +Compile the source files with the vector extension support enable: + +```bash +make chs-sw-all +``` + +This command will also copy the necessary dependencies to `sw/tests` and enable the vector extension at compile time. + +## Notes + +If the version of the default host compiler is too low, the build of the buildroot toolchain can fail. +Host `gcc` and `g++` version 11.2.0 work. + +For IIS builds: +``` +make [linux_img, ${kernel}-linux] TOOLCHAIN_SUFFIX=-11.2.0 +``` diff --git a/cheshire/sw/cva6-sdk b/cheshire/sw/cva6-sdk new file mode 160000 index 000000000..ed582f3b8 --- /dev/null +++ b/cheshire/sw/cva6-sdk @@ -0,0 +1 @@ +Subproject commit ed582f3b8ea78a3d0a20f81b3da0f36a73d7e86b diff --git a/cheshire/sw/cva6-sdk.mk b/cheshire/sw/cva6-sdk.mk new file mode 100644 index 000000000..486bac277 --- /dev/null +++ b/cheshire/sw/cva6-sdk.mk @@ -0,0 +1,70 @@ +# Copyright 2024 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Matteo Perotti +# +# Build an RVV-ready Linux image + +# CVA6-SDK subpath +CVA6_SDK_ROOT = $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))/cva6-sdk + +# Linux should be RVV-ready? +RVV_LINUX := 1 +# Suffix for host toolchain (to build buildroot toolchain) +# For IIS users: HOST_TOOLCHAIN_SUFFIX=-11.2.0 +HOST_TOOLCHAIN_SUFFIX ?= +# Buildroot toolchain +TARGET_OS_TOOLCHAIN := $(CVA6_SDK_ROOT)/buildroot/output/host/bin/riscv64-buildroot-linux-gnu-gcc +# Github submodule update tokens +CVA6_SDK_UPDATED := $(ARA_ROOT)/cheshire/sw/.cva6-sdk.updated + +.PHONY: %-linux linux-img + +################ +## Build deps ## +################ + +.PRECIOUS: $(CVA6_SDK_UPDATED) +$(CVA6_SDK_UPDATED): + git submodule update --init --recursive -- $(CVA6_SDK_ROOT) + touch $@ + +$(TARGET_OS_TOOLCHAIN): $(CVA6_SDK_UPDATED) + @echo "Building the RISC-V CVA6-SDK Linux TOOLCHAIN" + @echo "Your gcc version is: $$(gcc -dumpfullversion). This build worked with gcc and g++ version 11.2.0. Please adjust this if needed." + make -C $(CVA6_SDK_ROOT) $@ \ + HOSTCC=gcc$(HOST_TOOLCHAIN_SUFFIX) \ + HOSTCXX=g++$(HOST_TOOLCHAIN_SUFFIX) \ + HOSTCPP=cpp$(HOST_TOOLCHAIN_SUFFIX) \ + RVV=$(RVV_LINUX) + touch $@ + +######################## +## Build RVV Software ## +######################## + +$(ARA_APPS)/bin/%-linux: $(shell find $(ARA_APPS)/$* -name "*.c" -o -name "*.S") $(TARGET_OS_TOOLCHAIN) + make -C $(ARA_APPS) bin/$*-linux LINUX=1 config=${ARA_CONFIGURATION} + +.PRECIOUS: $(CVA6_SDK_ROOT)/rootfs/% +$(CVA6_SDK_ROOT)/rootfs/%: $(ARA_APPS)/bin/%-linux + cp $< $@ + +%-linux: $(CVA6_SDK_ROOT)/rootfs/% + @echo "$@ built and copied." + +##################### +## Build Linux IMG ## +##################### + +$(CVA6_SDK_ROOT)/install64/vmlinux: $(CVA6_SDK_UPDATED) $(TARGET_OS_TOOLCHAIN) $(TARGET_KERNELS) + make -C $(ARA_SW)/cva6-sdk images RVV=$(RVV_LINUX) + +# Softlink the linux image and create a bootable Cheshire image +linux-img: $(CVA6_SDK_ROOT)/install64/vmlinux + if [ -d "$(CHS_SW)/deps/cva6-sdk/install64" ]; then \ + echo "$(CHS_SW)/deps/cva6-sdk/install64 already exists, creating a backup..."; \ + mv $(CHS_SW)/deps/cva6-sdk/install64 $(CHS_SW)/deps/cva6-sdk/install64.bak_$(shell date +%Y%m%d_%H%M%S); \ + fi + cp -r $(CVA6_SDK_ROOT)/install64 $(CHS_SW)/deps/cva6-sdk/ diff --git a/cheshire/sw/include/cheshire_util.h b/cheshire/sw/include/cheshire_util.h new file mode 100644 index 000000000..9d57d7cc4 --- /dev/null +++ b/cheshire/sw/include/cheshire_util.h @@ -0,0 +1,26 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Matteo Perotti +// +// Cheshire-related util + +#ifndef __CHESHIRE_UTIL_H__ +#define __CHESHIRE_UTIL_H__ + +#include "printf.h" + +void cheshire_start() { + // Initialize Cheshire's UART + uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET); + uint64_t reset_freq = clint_get_core_freq(rtc_freq, 2500); + uart_init(&__base_uart, reset_freq, __BOOT_BAUDRATE); +} + +void cheshire_end() { + // Flush teh UART + uart_write_flush(&__base_uart); +} + +#endif diff --git a/cheshire/sw/include/encoding.h b/cheshire/sw/include/encoding.h new file mode 120000 index 000000000..674da338d --- /dev/null +++ b/cheshire/sw/include/encoding.h @@ -0,0 +1 @@ +../../../apps/common/encoding.h \ No newline at end of file diff --git a/cheshire/sw/include/fmatmul.c.h b/cheshire/sw/include/fmatmul.c.h new file mode 120000 index 000000000..1aa8fb602 --- /dev/null +++ b/cheshire/sw/include/fmatmul.c.h @@ -0,0 +1 @@ +../../../apps/fmatmul/kernel/fmatmul.c \ No newline at end of file diff --git a/cheshire/sw/include/fmatmul.h b/cheshire/sw/include/fmatmul.h new file mode 120000 index 000000000..928a355e6 --- /dev/null +++ b/cheshire/sw/include/fmatmul.h @@ -0,0 +1 @@ +../../../apps/fmatmul/kernel/fmatmul.h \ No newline at end of file diff --git a/cheshire/sw/include/vector_util.h b/cheshire/sw/include/vector_util.h new file mode 100644 index 000000000..02959e2c0 --- /dev/null +++ b/cheshire/sw/include/vector_util.h @@ -0,0 +1,57 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Matteo Perotti +// +// Custom vector util + +#ifndef __VECTOR_UTIL_H__ +#define __VECTOR_UTIL_H__ + +// Compile with version(GCC) >= 13 +#include +#include "encoding.h" + +#define FABS(x) ((x < 0) ? -x : x) + +unsigned int timer; + +// Return the current value of the cycle counter +int64_t get_cycle_count() { + int64_t cycle_count; + // The fence is needed to be sure that Ara is idle, and it is not performing + // the last vector stores when we read mcycle with stop_timer() + asm volatile("fence; csrr %[cycle_count], cycle" : [cycle_count] "=r"(cycle_count)); + return cycle_count; +}; + +// Start and stop the counter +void start_timer() { timer = -get_cycle_count(); } +void stop_timer() { timer += get_cycle_count(); } + +// Get the value of the timer +int64_t get_timer() { return timer; } + +inline void enable_rvv() { + asm volatile ("li t0, %0" :: "i"(MSTATUS_VS)); + asm volatile ("csrs mstatus, t0" ); +} + +inline int similarity_check(double a, double b, double threshold) { + double diff = a - b; + if (FABS(diff) > threshold) + return 0; + else + return 1; +} + +inline int similarity_check_32b(float a, float b, float threshold) { + float diff = a - b; + if (FABS(diff) > threshold) + return 0; + else + return 1; +} + +#endif diff --git a/cheshire/sw/src/fmatmul.c b/cheshire/sw/src/fmatmul.c new file mode 100644 index 000000000..9f014dddf --- /dev/null +++ b/cheshire/sw/src/fmatmul.c @@ -0,0 +1,118 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Matteo Perotti +// +// fmatmul wrapper for Cheshire + +#include "regs/cheshire.h" +#include "dif/clint.h" +#include "dif/uart.h" +#include "params.h" +#include "util.h" + +#include "cheshire_util.h" +#include "vector_util.h" + +#include "fmatmul.c.h" + +#ifndef _MM_SIZE_ +#define _MM_SIZE_ 32 +#endif + +// Define Matrix dimensions: +// C = AB with A=[MxN], B=[NxP], C=[MxP] +uint64_t M = _MM_SIZE_; +uint64_t N = _MM_SIZE_; +uint64_t P = _MM_SIZE_; + +// Max matrix size: 256x256 +double a[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES))); +double b[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES))); +double c[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES))); +// Gold results +double g[_MM_SIZE_*_MM_SIZE_] __attribute__((aligned(32 * NR_LANES))); + +#define THRESHOLD 0.001 + +// Verify the matrix +int verify_matrix(double *result, double *gold, size_t R, size_t C, + double threshold) { + for (uint64_t i = 0; i < R; ++i) { + for (uint64_t j = 0; j < C; ++j) { + int idx = i * C + j; + if (!similarity_check(result[idx], gold[idx], threshold)) { + return (i + j) == 0 ? -1 : idx; + } + } + } + return 0; +} + +int main() { + printf("fmatmul kernel:\r\n"); + + cheshire_start(); + enable_rvv(); + + unsigned int s = M; + + // Initialize matrices + for (unsigned int i = 0; i < s; ++i) { + for (unsigned int k = 0; k < s; ++k) { + a[k + i*s] = (double) (i + k); + } + } + for (unsigned int k = 0; k < s; ++k) { + for (unsigned int j = 0; j < s; ++j) { + b[j + k*s] = (double) (k - j); + } + } + + // Run scalar check + printf("Calculating fmatmul on scalar core...\r\n"); + for (unsigned int i = 0; i < s; ++i) { + for (unsigned int j = 0; j < s; ++j) { + double sum = 0; + for (unsigned int k = 0; k < s; ++k) { + sum += a[k + i * s] * b[j + k * s]; + } + g[j + i * s] = sum; + } + } + + // Run vector kernel + printf("Calculating fmatmul on vector core...\r\n"); + start_timer(); + fmatmul(c, a, b, s, s, s); + stop_timer(); + + // Metrics + int64_t runtime = get_timer(); + float performance = 2.0 * s * s * s / runtime; + float utilization = 100 * performance / (2.0 * NR_LANES); + + printf("The execution took %d cycles.\r\n", runtime); + printf("The performance is %f FLOP/cycle (%f%% utilization).\r\n", + performance, utilization); + + // Verify the result only for s == M (to keep it simple) + if (s == M) { + printf("Verifying result...\r\n"); + int error = verify_matrix(c, g, s, s, THRESHOLD); + if (error != 0) { + printf("Error code %d\r\n", error); + printf("c[%d]=%f != %f\r\n", error, c[error], g[error]); + cheshire_end(); + return error; + } else { + printf("Passed.\r\n"); + } + } + + + cheshire_end(); + + return 0; +} diff --git a/cheshire/sw/src/vector_helloworld.c b/cheshire/sw/src/vector_helloworld.c new file mode 100644 index 000000000..05943ed61 --- /dev/null +++ b/cheshire/sw/src/vector_helloworld.c @@ -0,0 +1,37 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Matteo Perotti +// +// Simple vector memcpy for Hello World! + +#include "regs/cheshire.h" +#include "dif/clint.h" +#include "dif/uart.h" +#include "params.h" +#include "util.h" + +#include "cheshire_util.h" +#include "vector_util.h" + +unsigned char buf[64]; + +int main(void) { + cheshire_start(); + enable_rvv(); + + const unsigned char str[] = "Hello Vector World!\r\n"; + vuint8m1_t str_v; + + // Copy the hello world string to buf + str_v = __riscv_vle8_v_u8m1(str, sizeof(str)); + __riscv_vse8_v_u8m1(buf, str_v, sizeof(str)); + + // Print buf + printf("%s", str_v); + + cheshire_end(); + + return 0; +} diff --git a/hardware/src/lane/operand_requester.sv b/hardware/src/lane/operand_requester.sv index 4d46d161a..1baec0780 100644 --- a/hardware/src/lane/operand_requester.sv +++ b/hardware/src/lane/operand_requester.sv @@ -215,7 +215,8 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #( // A set bit indicates that the the master q is requesting access to the bank b // Masters 0 to NrOperandQueues-1 correspond to the operand queues. // The remaining four masters correspond to the ALU, the MFPU, the MASKU, the VLDU, and the SLDU. - localparam NrMasters = NrOperandQueues + 5; + localparam NrGlobalMasters = 5; + localparam NrMasters = NrOperandQueues + NrGlobalMasters; typedef struct packed { vaddr_t addr; @@ -225,7 +226,9 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #( opqueue_e opqueue; } payload_t; - logic [NrBanks-1:0][NrMasters-1:0] operand_req; + logic [NrBanks-1:0][NrOperandQueues-1:0] lane_operand_req; + logic [NrOperandQueues-1:0][NrBanks-1:0] lane_operand_req_transposed; + logic [NrBanks-1:0][NrGlobalMasters-1:0] ext_operand_req; logic [NrBanks-1:0][NrMasters-1:0] operand_gnt; payload_t [NrMasters-1:0] operand_payload; @@ -251,6 +254,12 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #( logic [NrVInsn-1:0] waw_hazard_counter; } requester_metadata_t; + for (genvar b = 0; b < NrBanks; b++) begin + for (genvar r = 0; r < NrOperandQueues; r++) begin + assign lane_operand_req[b][r] = lane_operand_req_transposed[r][b]; + end + end + for (genvar requester_index = 0; requester_index < NrOperandQueues; requester_index++) begin : gen_operand_requester // State of this operand requester_index state_t state_d, state_q; @@ -293,7 +302,7 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #( // Make no requests to the VRF operand_payload[requester_index] = '0; - for (int bank = 0; bank < NrBanks; bank++) operand_req[bank][requester_index] = 1'b0; + for (int b = 0; b < NrBanks; b++) lane_operand_req_transposed[requester_index][b] = 1'b0; // Do not acknowledge any operand requester_index commands operand_request_ready_o[requester_index] = 1'b0; @@ -395,7 +404,7 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #( automatic vlen_t num_bytes; // Operand request - operand_req[bank][requester_index] = !stall; + lane_operand_req_transposed[requester_index][bank] = !stall; operand_payload[requester_index] = '{ addr : requester_metadata_q.addr >> $clog2(NrBanks), opqueue: opqueue_e'(requester_index), @@ -462,11 +471,11 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #( // Reset state state_d = IDLE; // Don't wake up the store queue (redundant, as it will be flushed anyway) - operand_queue_cmd_valid_o[StA] = 1'b0; + operand_queue_cmd_valid_o[requester_index] = 1'b0; // Clear metadata requester_metadata_d = '0; // Flush this request - operand_req[bank][StA] = '0; + lane_operand_req_transposed[requester_index][bank] = '0; end : vstu_exception_idle end : operand_requester @@ -489,11 +498,11 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #( always_comb begin // Default assignment for (int bank = 0; bank < NrBanks; bank++) begin - operand_req[bank][NrOperandQueues + VFU_Alu] = 1'b0; - operand_req[bank][NrOperandQueues + VFU_MFpu] = 1'b0; - operand_req[bank][NrOperandQueues + VFU_MaskUnit] = 1'b0; - operand_req[bank][NrOperandQueues + VFU_SlideUnit] = 1'b0; - operand_req[bank][NrOperandQueues + VFU_LoadUnit] = 1'b0; + ext_operand_req[bank][VFU_Alu] = 1'b0; + ext_operand_req[bank][VFU_MFpu] = 1'b0; + ext_operand_req[bank][VFU_MaskUnit] = 1'b0; + ext_operand_req[bank][VFU_SlideUnit] = 1'b0; + ext_operand_req[bank][VFU_LoadUnit] = 1'b0; end // Generate the payloads for write back operations @@ -534,15 +543,15 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #( }; // Store their request value - operand_req[alu_result_addr_i[idx_width(NrBanks)-1:0]][NrOperandQueues + VFU_Alu] = + ext_operand_req[alu_result_addr_i[idx_width(NrBanks)-1:0]][VFU_Alu] = alu_result_req_i; - operand_req[mfpu_result_addr_i[idx_width(NrBanks)-1:0]][NrOperandQueues + VFU_MFpu] = + ext_operand_req[mfpu_result_addr_i[idx_width(NrBanks)-1:0]][VFU_MFpu] = mfpu_result_req_i; - operand_req[masku_result_addr[idx_width(NrBanks)-1:0]][NrOperandQueues + VFU_MaskUnit] = + ext_operand_req[masku_result_addr[idx_width(NrBanks)-1:0]][VFU_MaskUnit] = masku_result_req; - operand_req[sldu_result_addr[idx_width(NrBanks)-1:0]][NrOperandQueues + VFU_SlideUnit] = + ext_operand_req[sldu_result_addr[idx_width(NrBanks)-1:0]][VFU_SlideUnit] = sldu_result_req; - operand_req[ldu_result_addr[idx_width(NrBanks)-1:0]][NrOperandQueues + VFU_LoadUnit] = + ext_operand_req[ldu_result_addr[idx_width(NrBanks)-1:0]][VFU_LoadUnit] = ldu_result_req; // Generate the grant signals @@ -577,8 +586,8 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #( .rr_i ('0 ), .data_i ({operand_payload[MulFPUC:AluA], operand_payload[NrOperandQueues + VFU_MFpu:NrOperandQueues + VFU_Alu]} ), - .req_i ({operand_req[bank][MulFPUC:AluA], - operand_req[bank][NrOperandQueues + VFU_MFpu:NrOperandQueues + VFU_Alu]}), + .req_i ({lane_operand_req[bank][MulFPUC:AluA], + ext_operand_req[bank][VFU_MFpu:VFU_Alu]}), .gnt_o ({operand_gnt[bank][MulFPUC:AluA], operand_gnt[bank][NrOperandQueues + VFU_MFpu:NrOperandQueues + VFU_Alu]}), .data_o (payload_hp ), @@ -602,8 +611,8 @@ module operand_requester import ara_pkg::*; import rvv_pkg::*; #( .rr_i ('0 ), .data_i ({operand_payload[SlideAddrGenA:MaskB], operand_payload[NrOperandQueues + VFU_LoadUnit:NrOperandQueues + VFU_SlideUnit]} ), - .req_i ({operand_req[bank][SlideAddrGenA:MaskB], - operand_req[bank][NrOperandQueues + VFU_LoadUnit:NrOperandQueues + VFU_SlideUnit]}), + .req_i ({lane_operand_req[bank][SlideAddrGenA:MaskB], + ext_operand_req[bank][VFU_LoadUnit:VFU_SlideUnit]}), .gnt_o ({operand_gnt[bank][SlideAddrGenA:MaskB], operand_gnt[bank][NrOperandQueues + VFU_LoadUnit:NrOperandQueues + VFU_SlideUnit]}), .data_o (payload_lp ),