diff --git a/.github/cases/blastoise/default.json b/.github/cases/blastoise/default.json index 3a46b2e41..14a4b6fd1 100644 --- a/.github/cases/blastoise/default.json +++ b/.github/cases/blastoise/default.json @@ -5,9 +5,9 @@ "mlir.stripmining": 26736, "asm.mmm": 92573, "asm.smoke": 4867, - "intrinsic.conv2d_less_m2": 2612, + "intrinsic.conv2d_less_m2": 2594, "intrinsic.linear_normalization": 3280, - "intrinsic.softmax": 8348, + "intrinsic.softmax": 8347, "codegen.vaadd_vv": 91859, "codegen.vaadd_vx": 253663, "codegen.vaaddu_vv": 91859, @@ -499,14 +499,14 @@ "codegen.vfredusum_vs": 122275, "codegen.vfredmax_vs": 122275, "codegen.vfredmin_vs": 122275, - "rvv_bench.ascii_to_utf16": 1583921, - "rvv_bench.ascii_to_utf32": 704111, - "rvv_bench.byteswap": 3353234, + "rvv_bench.ascii_to_utf16": 1583663, + "rvv_bench.ascii_to_utf32": 703954, + "rvv_bench.byteswap": 3353148, "rvv_bench.chacha20": 2, - "rvv_bench.mandelbrot": 4056018, - "rvv_bench.memcpy": 2152069, - "rvv_bench.memset": 439171, - "rvv_bench.mergelines": 3338064, + "rvv_bench.mandelbrot": 4055961, + "rvv_bench.memcpy": 2131904, + "rvv_bench.memset": 438545, + "rvv_bench.mergelines": 3337870, "rvv_bench.poly1305": 2, "rvv_bench.strlen": 877539, "rvv_bench.utf8_count": 6340756 diff --git a/.github/cases/machamp/default.json b/.github/cases/machamp/default.json index 000e0c79f..d7cde8503 100644 --- a/.github/cases/machamp/default.json +++ b/.github/cases/machamp/default.json @@ -5,7 +5,7 @@ "mlir.stripmining": 13425, "asm.mmm": 91428, "asm.smoke": 5005, - "intrinsic.conv2d_less_m2": 2612, + "intrinsic.conv2d_less_m2": 2585, "codegen.vaadd_vv": 90595, "codegen.vaadd_vx": 250851, "codegen.vaaddu_vv": 90595, @@ -435,13 +435,13 @@ "codegen.vxor_vx": 63090, "codegen.vzext_vf2": 39398, "codegen.vzext_vf4": 6420, - "rvv_bench.ascii_to_utf16": 1460381, - "rvv_bench.ascii_to_utf32": 631370, - "rvv_bench.byteswap": 3259113, + "rvv_bench.ascii_to_utf16": 1460078, + "rvv_bench.ascii_to_utf32": 631187, + "rvv_bench.byteswap": 3259002, "rvv_bench.chacha20": 2, - "rvv_bench.memcpy": 1905444, - "rvv_bench.memset": 244925, - "rvv_bench.mergelines": 3137885, + "rvv_bench.memcpy": 1875713, + "rvv_bench.memset": 244319, + "rvv_bench.mergelines": 3137759, "rvv_bench.poly1305": 2, "rvv_bench.strlen": 710421, "rvv_bench.utf8_count": 5729721 diff --git a/.github/cases/sandslash/default.json b/.github/cases/sandslash/default.json index 6fa9e360a..32e69fe3a 100644 --- a/.github/cases/sandslash/default.json +++ b/.github/cases/sandslash/default.json @@ -5,7 +5,7 @@ "mlir.stripmining": 3577, "asm.mmm": 91437, "asm.smoke": 3558, - "intrinsic.conv2d_less_m2": 2612, + "intrinsic.conv2d_less_m2": 2576, "codegen.vaadd_vv": 119061, "codegen.vaadd_vx": 336401, "codegen.vaaddu_vv": 119061, @@ -435,13 +435,13 @@ "codegen.vxor_vx": 84837, "codegen.vzext_vf2": 134873, "codegen.vzext_vf4": 19800, - "rvv_bench.ascii_to_utf16": 1371886, - "rvv_bench.ascii_to_utf32": 583519, - "rvv_bench.byteswap": 3556432, + "rvv_bench.ascii_to_utf16": 1371550, + "rvv_bench.ascii_to_utf32": 583318, + "rvv_bench.byteswap": 3556315, "rvv_bench.chacha20": 2, - "rvv_bench.memcpy": 1739436, - "rvv_bench.memset": 131052, - "rvv_bench.mergelines": 3038652, + "rvv_bench.memcpy": 1704869, + "rvv_bench.memset": 130483, + "rvv_bench.mergelines": 3038600, "rvv_bench.poly1305": 2, "rvv_bench.strlen": 715272, "rvv_bench.utf8_count": 4797732 diff --git a/difftest/default.nix b/difftest/default.nix index 41ed87e64..efcd77d71 100644 --- a/difftest/default.nix +++ b/difftest/default.nix @@ -1,4 +1,5 @@ { lib +, libspike , callPackage , elaborateConfig @@ -48,6 +49,8 @@ let env = { VERILATED_INC_DIR = "${verilated}/include"; VERILATED_LIB_DIR = "${verilated}/lib"; + SPIKE_LIB_DIR = "${libspike}/lib"; + SPIKE_INTERFACES_LIB_DIR = "${spike_interfaces}/lib"; DESIGN_VLEN = elaborateConfig.parameter.vLen; DESIGN_DLEN = elaborateConfig.parameter.dLen; }; diff --git a/difftest/spike_interfaces/CMakeLists.txt b/difftest/spike_interfaces/CMakeLists.txt index 6da21334c..fe5272891 100644 --- a/difftest/spike_interfaces/CMakeLists.txt +++ b/difftest/spike_interfaces/CMakeLists.txt @@ -4,7 +4,7 @@ set(CMAKE_CXX_STANDARD 17) find_package(libspike REQUIRED) -add_library(${CMAKE_PROJECT_NAME} SHARED spike_interfaces.cc) +add_library(${CMAKE_PROJECT_NAME} STATIC spike_interfaces.cc) target_link_libraries(${CMAKE_PROJECT_NAME} PUBLIC libspike) diff --git a/difftest/spike_rs/build.rs b/difftest/spike_rs/build.rs new file mode 100644 index 000000000..9399fdaf0 --- /dev/null +++ b/difftest/spike_rs/build.rs @@ -0,0 +1,18 @@ +use std::env; + +fn main() { + println!("cargo::rustc-link-search=native={}", env::var("SPIKE_LIB_DIR").expect("SPIKE_LIB_DIR should be set")); + println!("cargo::rustc-link-lib=static=riscv"); + println!("cargo::rustc-link-lib=static=softfloat"); + println!("cargo::rustc-link-lib=static=disasm"); + println!("cargo::rustc-link-lib=static=fesvr"); + println!("cargo::rustc-link-lib=static=fdt"); + + println!("cargo::rustc-link-search=native={}", env::var("SPIKE_INTERFACES_LIB_DIR").expect("SPIKE_INTERFACES_LIB_DIR should be set")); + println!("cargo::rustc-link-lib=static=spike_interfaces"); + + println!("cargo::rerun-if-env-changed=SPIKE_LIB_DIR"); + println!("cargo::rerun-if-env-changed=SPIKE_INTERFACES_LIB_DIR"); + + println!("cargo::rustc-link-lib=stdc++"); +} diff --git a/nix/overlay.nix b/nix/overlay.nix index 662ae18ef..c6027ba31 100644 --- a/nix/overlay.nix +++ b/nix/overlay.nix @@ -18,7 +18,6 @@ rec { espresso = final.callPackage ./pkgs/espresso.nix { }; dramsim3 = final.callPackage ./pkgs/dramsim3.nix { }; libspike = final.callPackage ./pkgs/libspike.nix { }; - libspike_interfaces = final.callPackage ../difftest/libspike_interfaces { }; buddy-mlir = final.callPackage ./pkgs/buddy-mlir.nix { }; fetchMillDeps = final.callPackage ./pkgs/mill-builder.nix { }; circt-full = final.callPackage ./pkgs/circt-full.nix { }; diff --git a/nix/pkgs/libspike.nix b/nix/pkgs/libspike.nix index dc3a28fdc..1ecb1c317 100644 --- a/nix/pkgs/libspike.nix +++ b/nix/pkgs/libspike.nix @@ -7,7 +7,7 @@ stdenv.mkDerivation { env.cmakeConfig = '' add_library(libspike STATIC IMPORTED GLOBAL) set_target_properties(libspike PROPERTIES - IMPORTED_LOCATION "${placeholder "out"}/lib/libriscv.so") + IMPORTED_LOCATION "${placeholder "out"}/lib/libriscv.a") target_include_directories(libspike AFTER INTERFACE "${placeholder "out"}/include" "${placeholder "out"}/include/riscv" @@ -37,6 +37,7 @@ stdenv.mkDerivation { cp softfloat/*.h $out/include/softfloat cp config.h $out/include cp *.so $out/lib + cp *.a $out/lib echo "$cmakeConfig" > $out/lib/cmake/libspike/libspike-config.cmake runHook postInstall ''; diff --git a/t1/src/lsu/StoreUnit.scala b/t1/src/lsu/StoreUnit.scala index fb5d9ee4a..69f28a6a0 100644 --- a/t1/src/lsu/StoreUnit.scala +++ b/t1/src/lsu/StoreUnit.scala @@ -58,7 +58,6 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic { val lastDataGroupReg: UInt = RegEnable(lastDataGroupForInstruction, 0.U, lsuRequest.valid) val nextDataGroup: UInt = Mux(lsuRequest.valid, 0.U, dataGroup + 1.U) val isLastRead: Bool = dataGroup === lastDataGroupReg - val lastGroupAndNeedAlign: Bool = initOffset.orR && isLastRead // stage1, 读vrf // todo: need hazardCheck? @@ -159,12 +158,12 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic { // 存每条cache 的mask, 也许能优化, 暂时先这样 val maskForBufferData: Vec[UInt] = RegInit(VecInit(Seq.fill(8)(0.U(param.lsuTransposeSize.W)))) val maskForBufferDequeue: UInt = maskForBufferData(cacheLineIndexInBuffer) - val tailLeft2: Bool = RegInit(false.B) + val lastDataGroupInDataBuffer: Bool = RegInit(false.B) val alignedDequeueFire: Bool = memRequest.fire // cache 不对齐的时候的上一条残留 val cacheLineTemp: UInt = RegEnable(dataBuffer.head, 0.U((param.lsuTransposeSize * 8).W), alignedDequeueFire) val maskTemp: UInt = RegInit(0.U(param.lsuTransposeSize.W)) - val tailValid: Bool = RegInit(false.B) + val canSendTail: Bool = RegInit(false.B) val isLastCacheLineInBuffer: Bool = cacheLineIndexInBuffer === lsuRequestReg.instructionInformation.nf val bufferWillClear: Bool = alignedDequeueFire && isLastCacheLineInBuffer accessBufferDequeueReady := !bufferValid || (memRequest.ready && isLastCacheLineInBuffer) @@ -176,7 +175,7 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic { // 把数据regroup, 然后放去 [[dataBuffer]] when(accessBufferDequeueFire) { maskForBufferData := cutUInt(fillBySeg, param.lsuTransposeSize) - tailLeft2 := lastGroupAndNeedAlign + lastDataGroupInDataBuffer := isLastRead // todo: 只是因为参数恰好是一个方形的, 需要写一个反的 dataBuffer := Mux1H(dataEEWOH, Seq.tabulate(3) { sewSize => // 每个数据块 2 ** sew byte @@ -238,11 +237,12 @@ class StoreUnit(param: MSHRParam) extends StrideBase(param) with LSUPublic { when(lsuRequest.valid || alignedDequeueFire) { maskTemp := Mux(lsuRequest.valid, 0.U, maskForBufferDequeue) - tailValid := Mux(lsuRequest.valid, false.B, bufferValid && tailLeft2 && isLastCacheLineInBuffer) + canSendTail := !lsuRequest.valid && bufferValid && isLastCacheLineInBuffer && lastDataGroupInDataBuffer } // 连接 alignedDequeue - memRequest.valid := bufferValid || tailValid + val needSendTail: Bool = bufferBaseCacheLineIndex === cacheLineNumberReg + memRequest.valid := bufferValid || (canSendTail && needSendTail) // aligned memRequest.bits.data := multiShifter(right = false, multiSize = 8)(dataBuffer.head ## cacheLineTemp, initOffset) >> cacheLineTemp.getWidth