From 0488c46d478f26f6da9fadc54c32d3649a4e05c6 Mon Sep 17 00:00:00 2001 From: HidetaroTanaka Date: Mon, 6 Nov 2023 18:25:14 +0900 Subject: [PATCH] a --- .../hajime/common/HajimeCoreParams.scala | 1 + src/main/scala/hajime/simple4Stage/Core.scala | 107 +----------------- .../scala/hajime/vectorOoO/Dispatcher.scala | 41 +++++++ .../hajime/vectorOoO/FrontEndForOoO.scala | 45 ++++++-- .../hajime/vectormodules/VectorCpu.scala | 4 - 5 files changed, 78 insertions(+), 120 deletions(-) create mode 100644 src/main/scala/hajime/vectorOoO/Dispatcher.scala diff --git a/src/main/scala/hajime/common/HajimeCoreParams.scala b/src/main/scala/hajime/common/HajimeCoreParams.scala index 9a6a3c6f..60c9b6b9 100644 --- a/src/main/scala/hajime/common/HajimeCoreParams.scala +++ b/src/main/scala/hajime/common/HajimeCoreParams.scala @@ -34,6 +34,7 @@ case class HajimeCoreParams( vlen: Int = 256, vecAluExecUnitNum: Int = 2, ) { + def physicalRegWidth: Int = log2Up(physicalRegFileEntriesFor1Thread) def robTagWidth: Int = log2Up(robEntries) def generateDefaultMISA: UInt = { Cat((xprlen match { diff --git a/src/main/scala/hajime/simple4Stage/Core.scala b/src/main/scala/hajime/simple4Stage/Core.scala index cc9a081c..88d226e4 100644 --- a/src/main/scala/hajime/simple4Stage/Core.scala +++ b/src/main/scala/hajime/simple4Stage/Core.scala @@ -164,9 +164,6 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons val csrUnit = Module(new CSRUnit()) csrUnit.io := DontCare val multiplier = if(params.useMulDiv) Some(Module(new NonPipelinedMultiplierWrap())) else None - val vectorDecoder = if(params.useVector) Some(Module(new VectorDecoder())) else None - val vecCtrlUnit = if(params.useVector) Some(Module(new VecCtrlUnit())) else None - val vecRegFile = if(params.useVector) Some(Module(new VecRegFile(vrfPortNum = 2))) else None if(params.useMulDiv) multiplier.get.io := DontCare ldstUnit.io.dcache_axi4lite <> io.dcache_axi4lite @@ -257,20 +254,6 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons bypassingUnit.io.ID.out.rs2_bypassMatchAtWB -> (!bypassingUnit.io.WB.in.bits.rd.valid), )) - if(params.useVector) { - vectorDecoder.get.io.inst := decoded_inst - when(decoder.io.out.valid && decoder.io.out.bits.vector.get) { - ID_EX_REG.bits.vectorCtrlSignals.get := vectorDecoder.get.io.out - } - // 0 -> v0.mask[i]が1ならば書き込み,0ならば書き込まない - // 1 -> マスクなし,全て書き込む - // (マスクを使わないベクタ命令は全てvm=1か?) - ID_EX_REG.bits.vectorDataSignals.get.mask := decoded_inst.bits(25) - ID_EX_REG.bits.vectorDataSignals.get.vs1 := decoded_inst.rs1 - ID_EX_REG.bits.vectorDataSignals.get.vs2 := decoded_inst.rs2 - ID_EX_REG.bits.vectorDataSignals.get.vd := decoded_inst.rd - } - if(params.debug) { ID_EX_REG.bits.debug.get.instruction := decoded_inst.bits ID_EX_REG.bits.debug.get.pc := io.frontend.resp.bits.pc @@ -287,44 +270,6 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons } // START OF EX STAGE - val idxReg = if(params.useVector) Some(RegInit(0.U(log2Up(params.vlen/8).W))) else None - val EX_WB_idxReg = if(params.useVector) Some(RegNext(idxReg.get)) else None - // TODO: ロードストアユニット内に入れる,他のベクタ実行ユニットも同様 - val vecValid = if(params.useVector) Some(RegInit(false.B)) else None - val vecDataReg = if(params.useVector) Some(RegNext(ID_EX_REG.bits.vectorDataSignals.get)) else None - if (params.useVector) { - // ベクタ命令がベクタレジスタに書き込み,かつinst.vmが1またはv0.mask[i]=1ならば書き込み - val vecWriteBack = ID_EX_REG.bits.vectorCtrlSignals.get.vrfWrite && (ID_EX_REG.bits.vectorDataSignals.get.mask || vecRegFile.get.io.readReq(0).resp.vm) - // vsetvli系でないベクタ命令が実行され,かつ最終要素でないならばインクリメント,それ以外ならばリセット - idxReg.get := MuxCase(0.U, Seq( - (ID_EX_REG.valid && ID_EX_REG.bits.ctrlSignals.decode.vector.get && !ID_EX_REG.bits.vectorCtrlSignals.get.isConfsetInst && - ((idxReg.get + 1.U) < EX_WB_REG.bits.vectorCsrPorts.get.vl)) -> (idxReg.get + 1.U) - )) - // Mux(!EX_stall && ID_inst_valid && decoder.io.out.bits.vector.get && !vectorDecoder.get.io.out.isConfsetInst, 0.U, idxReg.get + 1.U) - vecValid.get := ID_EX_REG.valid && ID_EX_REG.bits.ctrlSignals.decode.vector.get && vecWriteBack - - // vecRegFileへの入力 - vecRegFile.get.io.readReq(1) := DontCare - vecRegFile.get.io.writeReq(1) := DontCare - vecRegFile.get.io.readReq(0).req.sew := EX_WB_REG.bits.vectorCsrPorts.get.vtype.vsew - vecRegFile.get.io.readReq(0).req.idx := idxReg.get - vecRegFile.get.io.readReq(0).req.vs1 := ID_EX_REG.bits.vectorDataSignals.get.vs1 - vecRegFile.get.io.readReq(0).req.vs2 := ID_EX_REG.bits.vectorDataSignals.get.vs2 - vecRegFile.get.io.readReq(0).req.vd := ID_EX_REG.bits.vectorDataSignals.get.vd - - vecRegFile.get.io.writeReq(0).valid := vecValid.get - vecRegFile.get.io.writeReq(0).bits.vd := vecDataReg.get.vd - vecRegFile.get.io.writeReq(0).bits.vtype := EX_WB_REG.bits.vectorCsrPorts.get.vtype - vecRegFile.get.io.writeReq(0).bits.index := EX_WB_idxReg.get - vecRegFile.get.io.writeReq(0).bits.last := (EX_WB_idxReg.get-1.U) === EX_WB_REG.bits.vectorCsrPorts.get.vl - vecRegFile.get.io.writeReq(0).bits.data := ldstUnit.io.cpu.resp.bits.data - vecRegFile.get.io.writeReq(0).bits.vm := false.B - vecRegFile.get.io.writeReq(0).bits.writeReq := vecValid.get - - if(params.debug) { - io.debug_io.get.vrfMap.get := vecRegFile.get.io.debug.get - } - } alu.io.in1 := MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.value1, 0.U)(Seq( Value1.RS1.asUInt -> ID_EX_REG.bits.dataSignals.rs1, @@ -337,18 +282,6 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons Value2.PC.asUInt -> ID_EX_REG.bits.dataSignals.pc.addr, )) - if (params.useVector) { - // ベクタメモリアクセス命令が有効ならaluへの入力を上書き - // UNIT_STRIDEならrs1+index*elen - when(ID_EX_REG.valid && ID_EX_REG.bits.ctrlSignals.decode.vector.get && ID_EX_REG.bits.vectorCtrlSignals.get.mop === MOP.UNIT_STRIDE.asUInt) { - alu.io.in2 := idxReg.get << MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.memory_length, 0.U)(Seq( - MEM_LEN.B.asUInt -> 0.U, - MEM_LEN.H.asUInt -> 1.U, - MEM_LEN.W.asUInt -> 2.U, - MEM_LEN.D.asUInt -> 3.U, - )) - } - } alu.io.funct := ID_EX_REG.bits.ctrlSignals.decode branch_evaluator.io.req.bits.ALU_Result := alu.io.out @@ -370,36 +303,21 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons multiplier.get.io.resp.ready := !(EX_WB_REG.valid && WB_stall) } - if(params.useVector) { - vecCtrlUnit.get.io.req.valid := ID_EX_REG.valid && ID_EX_REG.bits.ctrlSignals.decode.vector.get && ID_EX_REG.bits.vectorCtrlSignals.get.isConfsetInst - vecCtrlUnit.get.io.req.bits.vDecode := ID_EX_REG.bits.vectorCtrlSignals.get - vecCtrlUnit.get.io.req.bits.rs1_value := ID_EX_REG.bits.dataSignals.rs1 - vecCtrlUnit.get.io.req.bits.rs2_value := ID_EX_REG.bits.dataSignals.rs2 - vecCtrlUnit.get.io.req.bits.zimm := ID_EX_REG.bits.dataSignals.zimm - vecCtrlUnit.get.io.req.bits.uimm := ID_EX_REG.bits.dataSignals.imm - } - val EX_arithmetic_result = if(params.useMulDiv) { Mux(ID_EX_REG.bits.ctrlSignals.decode.use_MUL, multiplier.get.io.resp.bits, alu.io.out) } else { alu.io.out } - val EX_vector_result = if(params.useVector) Some(vecCtrlUnit.get.io.resp.bits.vl) else None - - ldstUnit.io.cpu.req.valid := ID_EX_REG.valid && !EX_flush && (ID_EX_REG.bits.ctrlSignals.decode.memValid || (if(params.useVector) { - // マスク無しまたは要素が有効な場合にのみtrue - ID_EX_REG.bits.vectorDataSignals.get.mask || vecRegFile.get.io.readReq(0).resp.vm - } else true.B)) + ldstUnit.io.cpu.req.valid := ID_EX_REG.valid && !EX_flush && (ID_EX_REG.bits.ctrlSignals.decode.memValid) ldstUnit.io.cpu.req.bits.addr := alu.io.out - ldstUnit.io.cpu.req.bits.data := (if(params.useVector) Mux(ID_EX_REG.bits.vectorCtrlSignals.get.mop === MOP.UNIT_STRIDE.asUInt, vecRegFile.get.io.readReq(0).resp.vdOut, ID_EX_REG.bits.dataSignals.rs2) else ID_EX_REG.bits.dataSignals.rs2) + ldstUnit.io.cpu.req.bits.data := ID_EX_REG.bits.dataSignals.rs2 ldstUnit.io.cpu.req.bits.funct := ID_EX_REG.bits.ctrlSignals.decode bypassingUnit.io.EX.in.bits.rd.bits.index := ID_EX_REG.bits.ctrlSignals.rd_index bypassingUnit.io.EX.in.bits.rd.bits.value := MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.writeback_selector, 0.U)(Seq( WB_SEL.PC4.asUInt -> ID_EX_REG.bits.dataSignals.pc.nextPC, WB_SEL.ARITH.asUInt -> EX_arithmetic_result, - WB_SEL.VECTOR.asUInt -> (if(params.useVector) EX_vector_result.get else 0.U) )) bypassingUnit.io.EX.in.bits.rd.valid := MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.writeback_selector, false.B)(Seq( WB_SEL.PC4.asUInt -> true.B, @@ -407,7 +325,6 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons WB_SEL.CSR.asUInt -> false.B, WB_SEL.MEM.asUInt -> false.B, WB_SEL.NONE.asUInt -> false.B, - WB_SEL.VECTOR.asUInt -> (if(params.useVector) true.B else false.B) )) && ID_EX_REG.valid bypassingUnit.io.EX.in.valid := ID_EX_REG.bits.ctrlSignals.decode.write_to_rd && ID_EX_REG.valid @@ -415,12 +332,10 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons // 乗算命令であればmultiplier.respがvalidである必要がある // vsetvl系でないベクタ命令ならば最終要素の実行である必要がある(idxReg == vl) EX_WB_REG.valid := ID_EX_REG.valid && (!ID_EX_REG.bits.ctrlSignals.decode.memValid || ldstUnit.io.cpu.req.ready) && - (if(params.useMulDiv) !ID_EX_REG.bits.ctrlSignals.decode.use_MUL || multiplier.get.io.resp.valid else true.B) && - (if(params.useVector) !ID_EX_REG.bits.ctrlSignals.decode.vector.get || ID_EX_REG.bits.vectorCtrlSignals.get.isConfsetInst || ((idxReg.get + 1.U((idxReg.get.getWidth+1).W)) === EX_WB_REG.bits.vectorCsrPorts.get.vl) else true.B) + (if(params.useMulDiv) !ID_EX_REG.bits.ctrlSignals.decode.use_MUL || multiplier.get.io.resp.valid else true.B) EX_WB_REG.bits.dataSignals.pc := ID_EX_REG.bits.dataSignals.pc EX_WB_REG.bits.dataSignals.exResult := MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.writeback_selector, 0.U)(Seq( WB_SEL.ARITH.asUInt -> EX_arithmetic_result, - WB_SEL.VECTOR.asUInt -> (if(params.useVector) EX_vector_result.get else 0.U), )) EX_WB_REG.bits.dataSignals.datatoCSR := Mux(ID_EX_REG.bits.ctrlSignals.decode.value1 === Value1.RS1.asUInt, ID_EX_REG.bits.dataSignals.rs1, ID_EX_REG.bits.dataSignals.imm) EX_WB_REG.bits.dataSignals.csr_addr := ID_EX_REG.bits.dataSignals.zimm @@ -436,21 +351,12 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons )) Mux(ID_EX_REG.bits.ctrlSignals.decode.branch === Branch.ECALL.asUInt, 0xb.U(params.xprlen.W), 0.U) - if(params.useVector) { - when(vecCtrlUnit.get.io.resp.valid) { - EX_WB_REG.bits.vectorCsrPorts.get := vecCtrlUnit.get.io.resp.bits - } - EX_WB_REG.bits.vectorExecNum.get := 0.U - } - if(params.debug) EX_WB_REG.bits.debug.get := ID_EX_REG.bits.debug.get // WBステージがvalidかつ破棄できないかつEXステージに有効な値がある場合,またはメモリアクセス命令かつldstUnit.reqがreadyでない,または乗算命令で乗算器がvalidでない // またはベクタ命令実行完了前にスカラ命令がID_EXレジスタにある,またはチェイニング不可能なベクタ命令(構造ハザード・0要素目の値が用意できていないなど) EX_stall := ID_EX_REG.valid && ((EX_WB_REG.valid && WB_stall) || (ID_EX_REG.bits.ctrlSignals.decode.memValid && !ldstUnit.io.cpu.req.ready) || (if(params.useMulDiv) { ID_EX_REG.bits.ctrlSignals.decode.use_MUL && !multiplier.get.io.resp.valid - } else false.B) || (if(params.useVector) { - ID_EX_REG.bits.ctrlSignals.decode.vector.get && !ID_EX_REG.bits.vectorCtrlSignals.get.isConfsetInst && (idxReg.get < EX_WB_REG.bits.vectorCsrPorts.get.vl-1.U) } else false.B)) when(WB_stall) { @@ -479,7 +385,6 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons WB_SEL.ARITH -> EX_WB_REG.bits.dataSignals.exResult, WB_SEL.CSR -> csrUnit.io.resp.data, WB_SEL.MEM -> ldstUnit.io.cpu.resp.bits.data, - WB_SEL.VECTOR -> (if(params.useVector) EX_WB_REG.bits.dataSignals.exResult else 0.U) ).map{ case (wb_sel, data) => (wb_sel.asUInt, data) }) @@ -498,16 +403,10 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons csrUnit.io.fromCPU.hartid := io.hartid csrUnit.io.fromCPU.cpu_operating := cpu_operating csrUnit.io.fromCPU.inst_retire := WB_inst_can_retire - if(params.useVector) { - csrUnit.io.fromCPU.vectorExecNum.get.valid := false.B - csrUnit.io.fromCPU.vectorExecNum.get.bits := DontCare - } csrUnit.io.exception.valid := (EX_WB_REG.bits.exceptionSignals.valid || dmemoryAccessException) && EX_WB_REG.valid csrUnit.io.exception.bits.mepc_write := EX_WB_REG.bits.dataSignals.pc.addr csrUnit.io.exception.bits.mcause_write := Mux(dmemoryAccessException, ldstUnit.io.cpu.resp.bits.exceptionSignals.bits, EX_WB_REG.bits.exceptionSignals.bits) - if(params.useVector) csrUnit.io.vectorCsrPorts.get := EX_WB_REG.bits.vectorCsrPorts.get - // EXまたはWBステージにfence, ecall, mretがある sysInst_in_pipeline := (ID_EX_REG.valid && ID_EX_REG.bits.ctrlSignals.decode.isSysInst) || (EX_WB_REG.valid && EX_WB_REG.bits.ctrlSignals.decode.isSysInst) diff --git a/src/main/scala/hajime/vectorOoO/Dispatcher.scala b/src/main/scala/hajime/vectorOoO/Dispatcher.scala new file mode 100644 index 00000000..c3372f21 --- /dev/null +++ b/src/main/scala/hajime/vectorOoO/Dispatcher.scala @@ -0,0 +1,41 @@ +package hajime.vectorOoO + +import chisel3._ +import circt.stage.ChiselStage +import chisel3.util._ +import hajime.common.BundleInitializer._ +import hajime.common._ +import hajime.simple4Stage._ +import hajime.vectormodules.VectorDecoderResp + +class DispatcherDataSignals(implicit params: HajimeCoreParams) extends Bundle { + import params._ + val pc = new ProgramCounter() + val renamedRs1 = Valid(UInt(physicalRegWidth.W)) + val renamedRs2 = Valid(UInt(physicalRegWidth.W)) + val renamedRd = Valid(UInt(physicalRegWidth.W)) + // jalr: immVal1 -> inst[31,20], immVal2 -> pc from RAS + // csr: immVal1 -> inst[31,20] (csr addr), immVal2 -> inst[4:0] + // vsetvli: immVal1 -> inst[30,20] + // vsetivli: immVal1 -> inst[29,20], immVal2 -> inst[4:0] + // vop.vi: immVal2 -> inst[4:0] + val immVal1 = UInt(xprlen.W) + val immVal2 = UInt(xprlen.W) +} +class DispatcherOutput(implicit params: HajimeCoreParams) extends Bundle { + val dataSignals = new DispatcherDataSignals() + val ctrlSignals = new BasicCtrlSignals() + val exceptionSignals = new Valid(UInt(params.xprlen.W)) + val vectorCtrlSignals = if(params.useVector) Some(new VectorDecoderResp()) else None + val debug = if(params.debug) Some(new Debug_Info()) else None +} + +class DispatcherIO(implicit params: HajimeCoreParams) extends Module { + val frontend = Flipped(new FrontEndCpuIO()) + val hartid = Input(UInt(params.xprlen.W)) + val toExecutor = new DecoupledIO(new DispatcherOutput()) +} + +class Dispatcher(implicit params: HajimeCoreParams) extends Module { + +} diff --git a/src/main/scala/hajime/vectorOoO/FrontEndForOoO.scala b/src/main/scala/hajime/vectorOoO/FrontEndForOoO.scala index 44367b48..230d7e2a 100644 --- a/src/main/scala/hajime/vectorOoO/FrontEndForOoO.scala +++ b/src/main/scala/hajime/vectorOoO/FrontEndForOoO.scala @@ -1,27 +1,48 @@ package hajime.vectorOoO -import circt.stage.ChiselStage import chisel3._ +import circt.stage.ChiselStage import chisel3.util._ -import hajime.axiIO.AXI4liteIO +import hajime.common.BundleInitializer._ import hajime.common._ import hajime.simple4Stage._ -import hajime.common.BundleInitializer._ class FrontEndForOoO(implicit params: HajimeCoreParams) extends Module { val io = IO(new FrontEndIO()) + io := DontCare val pc_reg = RegInit(Valid(new ProgramCounter()).Init( - _.valid -> true.B, + _.valid -> false.B, _.bits.addr -> io.reset_vector, )) - val toAxiAR = MuxCase(pc_reg.bits.nextPC, Seq( - io.cpu.req.valid -> io.cpu.req.bits.addr, - // axiがreadyでなければPCを維持 - (!io.icache_axi4lite.ar.ready || !io.icache_axi4lite.r.valid || !io.cpu.resp.ready) -> pc_reg.bits.addr - )) - // cpuがFrontEndから命令を読み取ればaddr - when(io.cpu.resp.valid && io.cpu.resp.ready) { - pc_reg := io.cpu.req.bits + // PCの更新はCPUが行う + when(io.cpu.req.valid) { + pc_reg := io.cpu.req + } + .otherwise { + pc_reg.valid := false.B } + + io.icache_axi4lite.ar.bits.addr := Mux(io.cpu.req.valid, io.cpu.req.bits.addr, pc_reg.bits.addr) + io.icache_axi4lite.ar.bits.prot := 0.U + io.icache_axi4lite.ar.valid := io.cpu.req.valid || pc_reg.valid + + io.cpu.resp.bits.pc := pc_reg.bits + io.cpu.resp.bits.inst.bits := io.icache_axi4lite.r.bits.data + io.cpu.resp.valid := io.icache_axi4lite.r.valid + io.icache_axi4lite.r.ready := io.cpu.resp.ready + + val instAccessFault = pc_reg.bits.addr > 0x1FFC.U + val instAddressMisaligned = pc_reg.bits.addr(1, 0) =/= 0.U + io.cpu.resp.bits.exceptionSignals.bits := MuxCase(0.U, Seq( + instAccessFault -> Causes.fetch_access.U, + instAddressMisaligned -> Causes.misaligned_fetch.U, + )) + io.cpu.resp.bits.exceptionSignals.valid := instAccessFault || instAddressMisaligned +} + +object FrontEndForOoO extends App { + implicit val params: HajimeCoreParams = HajimeCoreParams() + def apply(implicit params: HajimeCoreParams): FrontEndForOoO = new FrontEndForOoO() + ChiselStage.emitSystemVerilogFile(new FrontEndForOoO(), firtoolOpts = COMPILE_CONSTANTS.FIRTOOLOPS) } diff --git a/src/main/scala/hajime/vectormodules/VectorCpu.scala b/src/main/scala/hajime/vectormodules/VectorCpu.scala index dd74bd12..70516cba 100644 --- a/src/main/scala/hajime/vectormodules/VectorCpu.scala +++ b/src/main/scala/hajime/vectormodules/VectorCpu.scala @@ -293,10 +293,6 @@ class VectorCpu(implicit params: HajimeCoreParams) extends CpuModule with Scalar when(decoder.io.out.valid && decoder.io.out.bits.vector.get) { ID_EX_REG.bits.vectorCtrlSignals.get := vectorDecoder.io.out } - // 0 -> v0.mask[i]が1ならば書き込み,0ならば書き込まない - // 1 -> マスクなし,全て書き込む - // (マスクを使わないベクタ命令は全てvm=1か?) - ID_EX_REG.bits.vectorDataSignals.get.mask := vectorDecoder.io.out.vm ID_EX_REG.bits.vectorDataSignals.get.vs1 := decoded_inst.rs1 ID_EX_REG.bits.vectorDataSignals.get.vs2 := decoded_inst.rs2 ID_EX_REG.bits.vectorDataSignals.get.vd := decoded_inst.rd