diff --git a/src/main/scala/hajime/simple4Stage/Core.scala b/src/main/scala/hajime/simple4Stage/Core.scala index 1b4f61c..bece556 100644 --- a/src/main/scala/hajime/simple4Stage/Core.scala +++ b/src/main/scala/hajime/simple4Stage/Core.scala @@ -18,7 +18,7 @@ class debugIO(implicit params: HajimeCoreParams) extends Bundle { }) val debugAbiMap = new debug_map_physical_to_abi() val vrfMap = if(params.useVector) Some(Vec(32, UInt(params.vlen.W))) else None - // val ID_EX_Reg = Valid(new ID_EX_IO()) + // val ID_EX_Reg = Valid(new idExIo()) } object debugIO { @@ -121,7 +121,7 @@ class VectorDataSignals(implicit params: HajimeCoreParams) extends Bundle { val vd = UInt(5.W) } -class ID_EX_IO(implicit params: HajimeCoreParams) extends Bundle { +class idExIo(implicit params: HajimeCoreParams) extends Bundle { val dataSignals = new ID_EX_dataSignals() val ctrlSignals = new BasicCtrlSignals() val exceptionSignals = new Valid(UInt(params.xprlen.W)) @@ -130,7 +130,7 @@ class ID_EX_IO(implicit params: HajimeCoreParams) extends Bundle { val debug = if(params.debug) Some(new Debug_Info()) else None } -class EX_WB_IO(implicit params: HajimeCoreParams) extends Bundle { +class exWbIo(implicit params: HajimeCoreParams) extends Bundle { val dataSignals = new EX_WB_dataSignals() val ctrlSignals = new BasicCtrlSignals() val exceptionSignals = new Valid(UInt(params.xprlen.W)) @@ -139,7 +139,7 @@ class EX_WB_IO(implicit params: HajimeCoreParams) extends Bundle { val debug = if(params.debug) Some(new Debug_Info()) else None } -class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpConstants with VectorOpConstants { +class Cpu(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpConstants with VectorOpConstants { // val io = IO(new CpuIo()) io := DontCare @@ -190,8 +190,8 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons val decodedInst = Wire(new InstBundle()) decodedInst := io.frontend.resp.bits.inst - val idExReg = Reg(Valid(new ID_EX_IO())) - val exWbReg = Reg(Valid(new EX_WB_IO())) + val idExReg = Reg(Valid(new idExIo())) + val exWbReg = Reg(Valid(new exWbIo())) // io.debugIo.get.ID_EX_Reg := idExReg // EXステージがvalidであり,かつEXステージが破棄できない場合,またはIDステージで必要なレジスタ値を取得できない場合,またはfence命令がある場合にreadyを下げる @@ -518,6 +518,6 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons } } -object CPU extends App { - def apply(implicit params: HajimeCoreParams): CPU = new CPU() +object Cpu extends App { + def apply(implicit params: HajimeCoreParams): Cpu = new Cpu() } \ No newline at end of file diff --git a/src/main/scala/hajime/vectormodules/VectorCpu.scala b/src/main/scala/hajime/vectormodules/VectorCpu.scala index c4bd62d..2af7ec1 100644 --- a/src/main/scala/hajime/vectormodules/VectorCpu.scala +++ b/src/main/scala/hajime/vectormodules/VectorCpu.scala @@ -49,22 +49,22 @@ class VectorCpu(implicit params: HajimeCoreParams) extends CpuModule with Scalar cpuOperating := true.B } - val EX_stall = WireInit(false.B) - val WB_stall = WireInit(false.B) - val ID_stall = WireInit(false.B) - val ID_flush = WireInit(false.B) - val EX_flush = WireInit(false.B) + val exStall = WireInit(false.B) + val wbStall = WireInit(false.B) + val idStall = WireInit(false.B) + val idFlush = WireInit(false.B) + val exFlush = WireInit(false.B) // update PC in WB stage for ecall, mret or exception - val WB_pc_redirect = WireInit(false.B) - val rs1_required_but_not_valid = WireInit(false.B) - val rs2_required_but_not_valid = WireInit(false.B) + val wbPcRedirect = WireInit(false.B) + val rs1RequiredButNotValid = WireInit(false.B) + val rs2RequiredButNotValid = WireInit(false.B) - val decoded_inst = Wire(new InstBundle()) - decoded_inst := io.frontend.resp.bits.inst - val ID_EX_REG = RegInit(Valid(new ID_EX_IO()).Lit( + val decodedInst = Wire(new InstBundle()) + decodedInst := io.frontend.resp.bits.inst + val idExReg = RegInit(Valid(new idExIo()).Lit( _.valid -> false.B, )) - val EX_WB_REG = RegInit(Valid(new EX_WB_IO()).Lit( + val exWbReg = RegInit(Valid(new exWbIo()).Lit( _.valid -> false.B, )) @@ -95,7 +95,7 @@ class VectorCpu(implicit params: HajimeCoreParams) extends CpuModule with Scalar when(vecExecUnitsToExWbRegValid.map(_.asUInt).reduce (_ +& _) > 1.U) { printf("Multiple VecUnits last Error.\n") printf("Debug Info:\n") - val toExWbRegList: Seq[Valid[EX_WB_IO]] = Seq(vectorLdstUnit.io.toExWbReg) ++ vecAluExecUnit.map(_.io.toExWbReg) + val toExWbRegList: Seq[Valid[exWbIo]] = Seq(vectorLdstUnit.io.toExWbReg) ++ vecAluExecUnit.map(_.io.toExWbReg) toExWbRegList.zipWithIndex.foreach { case (d, i) => { printf("vecExecUnit%d:\n", i.U) @@ -107,9 +107,12 @@ class VectorCpu(implicit params: HajimeCoreParams) extends CpuModule with Scalar } } + /* + // Not used val vecExecUnitHasInstButNotRetire: Seq[Bool] = (vecExecUnitsReady.map(!_) zip (Seq(!vectorLdstUnit.io.toExWbReg.valid) ++ vecAluExecUnit.map(!_.io.toExWbReg.valid))).map { case (notReady, notRetire) => notReady && notRetire } + */ // START OF ID STAGE // TODO: 可読性向上のため,validのみのブロックとvalid && readyのブロックに分ける.EX,WBも同様 @@ -119,89 +122,88 @@ class VectorCpu(implicit params: HajimeCoreParams) extends CpuModule with Scalar // vecExecUnitのうちreadyでないものが存在する val vecInstInEx = !vecExecUnitsReady.reduce(_ && _) // flushならばストールさせる必要はない - ID_stall := !ID_flush && ((ID_EX_REG.valid && EX_stall) || rs1_required_but_not_valid || rs2_required_but_not_valid || sysInstInPipeline || vectorInstStall + idStall := !idFlush && ((idExReg.valid && exStall) || rs1RequiredButNotValid || rs2RequiredButNotValid || sysInstInPipeline || vectorInstStall || (vecInstInEx && decoder.io.out.valid && (!decoder.io.out.bits.vector.get || vectorDecoder.io.out.isConfsetInst || vectorDecoder.io.out.vecPermutation))) - io.frontend.resp.ready := cpuOperating && !ID_stall + io.frontend.resp.ready := cpuOperating && !idStall - io.frontend.req := Mux(branchEvaluator.io.out.valid && ID_EX_REG.valid, branchEvaluator.io.out, branchPredictor.io.out) - io.frontend.req.valid := WB_pc_redirect || (branchEvaluator.io.out.valid && ID_EX_REG.valid) || (branchPredictor.io.out.valid && io.frontend.resp.valid && io.frontend.resp.ready) + io.frontend.req := Mux(branchEvaluator.io.out.valid && idExReg.valid, branchEvaluator.io.out, branchPredictor.io.out) + io.frontend.req.valid := wbPcRedirect || (branchEvaluator.io.out.valid && idExReg.valid) || (branchPredictor.io.out.valid && io.frontend.resp.valid && io.frontend.resp.ready) branchPredictor.io.pc := io.frontend.resp.bits.pc - branchPredictor.io.imm := Mux(decoder.io.out.bits.isCondBranch, decoded_inst.b_imm, decoded_inst.j_imm) + branchPredictor.io.imm := Mux(decoder.io.out.bits.isCondBranch, decodedInst.b_imm, decodedInst.j_imm) branchPredictor.io.BranchType := decoder.io.out.bits.branch - decoder.io.inst := decoded_inst - rf.io.rs1 := decoded_inst.rs1 - rf.io.rs2 := decoded_inst.rs2 - - val ID_inst_valid = io.frontend.resp.valid && io.frontend.resp.ready - val ID_fetchException = io.frontend.resp.bits.exceptionSignals.valid && ID_inst_valid - val ID_illegal_instruction = !decoder.io.out.valid && ID_inst_valid - val ID_ecall = decoder.io.out.valid && (decoder.io.out.bits.branch === Branch.ECALL.asUInt) && ID_inst_valid - val ID_exception = ID_fetchException || ID_illegal_instruction || ID_ecall - - ID_EX_REG.valid := ID_inst_valid - ID_EX_REG.bits.exceptionSignals.valid := ID_exception - ID_EX_REG.bits.exceptionSignals.bits := MuxCase(0.U, Seq( - ID_fetchException -> io.frontend.resp.bits.exceptionSignals.bits, - ID_illegal_instruction -> Causes.illegal_instruction.U, - ID_ecall -> Causes.machine_ecall.U, + decoder.io.inst := decodedInst + rf.io.rs1 := decodedInst.rs1 + rf.io.rs2 := decodedInst.rs2 + + val idInstValid = io.frontend.resp.valid && io.frontend.resp.ready + val idFetchException = io.frontend.resp.bits.exceptionSignals.valid && idInstValid + val idIllegalInstruction = !decoder.io.out.valid && idInstValid + val idEcall = decoder.io.out.valid && (decoder.io.out.bits.branch === Branch.ECALL.asUInt) && idInstValid + val idException = idFetchException || idIllegalInstruction || idEcall + + idExReg.valid := idInstValid + idExReg.bits.exceptionSignals.valid := idException + idExReg.bits.exceptionSignals.bits := MuxCase(0.U, Seq( + idFetchException -> io.frontend.resp.bits.exceptionSignals.bits, + idIllegalInstruction -> Causes.illegal_instruction.U, + idEcall -> Causes.machine_ecall.U, )) - ID_EX_REG.bits.dataSignals.pc := io.frontend.resp.bits.pc - ID_EX_REG.bits.dataSignals.bpDestPc := branchPredictor.io.out.bits.pc - ID_EX_REG.bits.dataSignals.bpTaken := branchPredictor.io.out.valid - ID_EX_REG.bits.dataSignals.imm := MuxCase(0.U, Seq( - (decoder.io.out.bits.value1 === Value1.U_IMM.asUInt) -> decoded_inst.u_imm, - (decoder.io.out.bits.value1 === Value1.UIMM19_15.asUInt) -> decoded_inst.uimm19To15, - (decoder.io.out.bits.value2 === Value2.I_IMM.asUInt) -> decoded_inst.i_imm, - (decoder.io.out.bits.value2 === Value2.S_IMM.asUInt) -> decoded_inst.s_imm, + idExReg.bits.dataSignals.pc := io.frontend.resp.bits.pc + idExReg.bits.dataSignals.bpDestPc := branchPredictor.io.out.bits.pc + idExReg.bits.dataSignals.bpTaken := branchPredictor.io.out.valid + idExReg.bits.dataSignals.imm := MuxCase(0.U, Seq( + (decoder.io.out.bits.value1 === Value1.U_IMM.asUInt) -> decodedInst.u_imm, + (decoder.io.out.bits.value1 === Value1.UIMM19_15.asUInt) -> decodedInst.uimm19To15, + (decoder.io.out.bits.value2 === Value2.I_IMM.asUInt) -> decodedInst.i_imm, + (decoder.io.out.bits.value2 === Value2.S_IMM.asUInt) -> decodedInst.s_imm, )) val rs1ValueToEX = Mux(bypassingUnit.io.ID.out.rs1_value.valid, bypassingUnit.io.ID.out.rs1_value.bits, rf.io.rs1_out) val rs2ValueToEX = Mux(bypassingUnit.io.ID.out.rs2_value.valid, bypassingUnit.io.ID.out.rs2_value.bits, rf.io.rs2_out) - ID_EX_REG.bits.dataSignals.rs1 := rs1ValueToEX - ID_EX_REG.bits.dataSignals.rs2 := rs2ValueToEX - ID_EX_REG.bits.dataSignals.zimm := decoded_inst.zimm - ID_EX_REG.bits.ctrlSignals.decode := decoder.io.out.bits - ID_EX_REG.bits.ctrlSignals.rdIndex := decoded_inst.rd + idExReg.bits.dataSignals.rs1 := rs1ValueToEX + idExReg.bits.dataSignals.rs2 := rs2ValueToEX + idExReg.bits.dataSignals.zimm := decodedInst.zimm + idExReg.bits.ctrlSignals.decode := decoder.io.out.bits + idExReg.bits.ctrlSignals.rdIndex := decodedInst.rd // ベクトル命令を追加 - bypassingUnit.io.ID.in.rs1_index.bits := decoded_inst.rs1 + bypassingUnit.io.ID.in.rs1_index.bits := decodedInst.rs1 // exceptionの際にこれを下げる必要があるかもしれない bypassingUnit.io.ID.in.rs1_index.valid := decoder.io.out.bits.useRs1 && decoder.io.out.valid && io.frontend.resp.valid - bypassingUnit.io.ID.in.rs2_index.bits := decoded_inst.rs2 + bypassingUnit.io.ID.in.rs2_index.bits := decodedInst.rs2 bypassingUnit.io.ID.in.rs2_index.valid := decoder.io.out.bits.useRs2 && decoder.io.out.valid && io.frontend.resp.valid - rs1_required_but_not_valid := MuxCase(false.B, Seq( + rs1RequiredButNotValid := MuxCase(false.B, Seq( bypassingUnit.io.ID.out.rs1_bypassMatchAtEX -> (!bypassingUnit.io.EX.in.bits.rd.valid), bypassingUnit.io.ID.out.rs1_bypassMatchAtWB -> (!bypassingUnit.io.WB.in.bits.rd.valid), )) - rs2_required_but_not_valid := MuxCase(false.B, Seq( + rs2RequiredButNotValid := MuxCase(false.B, Seq( bypassingUnit.io.ID.out.rs2_bypassMatchAtEX -> (!bypassingUnit.io.EX.in.bits.rd.valid), bypassingUnit.io.ID.out.rs2_bypassMatchAtWB -> (!bypassingUnit.io.WB.in.bits.rd.valid), )) val vecConfBypass = Wire(new VecCtrlUnitResp()) val vtypeBypass = vecConfBypass.vtype - val vlBypass = vecConfBypass.vl - vecConfBypass := Mux(vecCtrlUnit.io.resp.valid, vecCtrlUnit.io.resp.bits, EX_WB_REG.bits.vectorCsrPorts.get) + vecConfBypass := Mux(vecCtrlUnit.io.resp.valid, vecCtrlUnit.io.resp.bits, exWbReg.bits.vectorCsrPorts.get) - vectorDecoder.io.inst := decoded_inst + vectorDecoder.io.inst := decodedInst // ベクトル命令がベクトル設定・メモリアクセスでなく,かつvvならばvs1を使用する vrfReadyTable.io.vs1Check.valid := decoder.io.out.valid && decoder.io.out.bits.vector.get && !vectorDecoder.io.out.isConfsetInst && vectorDecoder.io.out.mop === MOP.NONE.asUInt && (vectorDecoder.io.out.vSource === VSOURCE.VV.asUInt) - vrfReadyTable.io.vs1Check.bits.idx := decoded_inst.rs1 + vrfReadyTable.io.vs1Check.bits.idx := decodedInst.rs1 vrfReadyTable.io.vs1Check.bits.vtype := vtypeBypass vrfReadyTable.io.vs1Check.bits.vm := vectorDecoder.io.out.veuFun.isMaskInst // ベクトル設定命令でなく,かつメモリアクセスでないまたはインデックスならばvs2を使用する vrfReadyTable.io.vs2Check.valid := decoder.io.out.valid && decoder.io.out.bits.vector.get && !vectorDecoder.io.out.isConfsetInst && ((vectorDecoder.io.out.mop === MOP.NONE.asUInt) || (vectorDecoder.io.out.mop === MOP.IDX_ORDERED.asUInt)) - vrfReadyTable.io.vs2Check.bits.idx := decoded_inst.rs2 + vrfReadyTable.io.vs2Check.bits.idx := decodedInst.rs2 vrfReadyTable.io.vs2Check.bits.vtype := vtypeBypass vrfReadyTable.io.vs2Check.bits.vm := vectorDecoder.io.out.veuFun.isMaskInst // ベクトル設定命令でないならばvdを使用する vrfReadyTable.io.vdCheck.valid := decoder.io.out.valid && decoder.io.out.bits.vector.get && !vectorDecoder.io.out.isConfsetInst - vrfReadyTable.io.vdCheck.bits.idx := decoded_inst.rd + vrfReadyTable.io.vdCheck.bits.idx := decodedInst.rd vrfReadyTable.io.vdCheck.bits.vtype := vtypeBypass vrfReadyTable.io.vdCheck.bits.vm := vectorDecoder.io.out.veuFun.writeAsMask // vmフィールドが1ならばvmを使用する @@ -212,7 +214,7 @@ class VectorCpu(implicit params: HajimeCoreParams) extends CpuModule with Scalar !vectorDecoder.io.out.isConfsetInst && !decoder.io.out.bits.memWrite && !vectorDecoder.io.out.vecPermutation // vecAluExecUnitを使用するなら,空いている方をvalidにする - when(ID_flush || ID_stall) { + when(idFlush || idStall) { vecAluExecUnit.foreach(e => { e.io.signalIn.valid := false.B e.io.signalIn.bits := DontCare @@ -231,17 +233,17 @@ class VectorCpu(implicit params: HajimeCoreParams) extends CpuModule with Scalar printf("vecAluExecUnit%d valid\n", i.U) val vecSigs = x.io.signalIn.bits x.io.signalIn.valid := true.B - vecSigs.vs1 := decoded_inst.rs1 - vecSigs.vs2 := decoded_inst.rs2 - vecSigs.vd := decoded_inst.rd - vecSigs.scalarVal := Mux(vectorDecoder.io.out.vSource === VSOURCE.VX.asUInt, rs1ValueToEX, decoded_inst.imm19To15) + vecSigs.vs1 := decodedInst.rs1 + vecSigs.vs2 := decodedInst.rs2 + vecSigs.vd := decodedInst.rd + vecSigs.scalarVal := Mux(vectorDecoder.io.out.vSource === VSOURCE.VX.asUInt, rs1ValueToEX, decodedInst.imm19To15) vecSigs.vectorDecode := vectorDecoder.io.out vecSigs.scalarDecode := decoder.io.out.bits vecSigs.scalarDecode.vector.get := decoder.io.out.bits.vector.get vecSigs.vecConf := vecConfBypass vecSigs.pc := io.frontend.resp.bits.pc if(params.debug) { - x.io.signalIn.bits.debug.get.instruction := decoded_inst.bits + x.io.signalIn.bits.debug.get.instruction := decodedInst.bits x.io.signalIn.bits.debug.get.pc := io.frontend.resp.bits.pc } vecAluExecUnitAssigned(i) := true.B @@ -259,18 +261,18 @@ class VectorCpu(implicit params: HajimeCoreParams) extends CpuModule with Scalar ) } // vecLdstUnitに対しても同様 - when(ID_flush || ID_stall) { + when(idFlush || idStall) { vectorLdstUnit.io.signalIn.valid := false.B vectorLdstUnit.io.signalIn.bits := DontCare } .elsewhen(io.frontend.resp.valid && decoder.io.out.valid && decoder.io.out.bits.vector.get && vectorDecoder.io.out.useVecLdstExec && vecLdstUnitReady) { vectorLdstUnit.io.signalIn.valid := true.B vectorLdstUnit.io.signalIn.bits.scalar.rs2Value := rs2ValueToEX - vectorLdstUnit.io.signalIn.bits.scalar.immediate := Mux(decoder.io.out.bits.memRead, decoded_inst.i_imm, decoded_inst.s_imm) - vectorLdstUnit.io.signalIn.bits.scalar.rdIndex := decoded_inst.rd + vectorLdstUnit.io.signalIn.bits.scalar.immediate := Mux(decoder.io.out.bits.memRead, decodedInst.i_imm, decodedInst.s_imm) + vectorLdstUnit.io.signalIn.bits.scalar.rdIndex := decodedInst.rd val vecSigs = vectorLdstUnit.io.signalIn.bits.vector - vecSigs.vs1 := decoded_inst.rs1 - vecSigs.vs2 := decoded_inst.rs2 - vecSigs.vd := decoded_inst.rd + vecSigs.vs1 := decodedInst.rs1 + vecSigs.vs2 := decodedInst.rs2 + vecSigs.vd := decodedInst.rd vecSigs.scalarVal := rs1ValueToEX vecSigs.vectorDecode := vectorDecoder.io.out vecSigs.scalarDecode := decoder.io.out.bits @@ -278,14 +280,14 @@ class VectorCpu(implicit params: HajimeCoreParams) extends CpuModule with Scalar vecSigs.pc := io.frontend.resp.bits.pc if(params.debug) { vecSigs.debug.get.pc := io.frontend.resp.bits.pc - vecSigs.debug.get.instruction := decoded_inst.bits + vecSigs.debug.get.instruction := decodedInst.bits } // スカラメモリアクセスの場合 } .elsewhen(io.frontend.resp.valid && decoder.io.out.valid && decoder.io.out.bits.memValid && vecLdstUnitReady) { vectorLdstUnit.io.signalIn.valid := true.B vectorLdstUnit.io.signalIn.bits.scalar.rs2Value := rs2ValueToEX - vectorLdstUnit.io.signalIn.bits.scalar.immediate := Mux(decoder.io.out.bits.memRead, decoded_inst.i_imm, decoded_inst.s_imm) - vectorLdstUnit.io.signalIn.bits.scalar.rdIndex := decoded_inst.rd + vectorLdstUnit.io.signalIn.bits.scalar.immediate := Mux(decoder.io.out.bits.memRead, decodedInst.i_imm, decodedInst.s_imm) + vectorLdstUnit.io.signalIn.bits.scalar.rdIndex := decodedInst.rd vectorLdstUnit.io.signalIn.bits.vector := DontCare vectorLdstUnit.io.signalIn.bits.vector.scalarVal := rs1ValueToEX vectorLdstUnit.io.signalIn.bits.vector.scalarDecode := decoder.io.out.bits @@ -297,29 +299,29 @@ class VectorCpu(implicit params: HajimeCoreParams) extends CpuModule with Scalar } when(decoder.io.out.valid && decoder.io.out.bits.vector.get) { - ID_EX_REG.bits.vectorCtrlSignals.get := vectorDecoder.io.out + idExReg.bits.vectorCtrlSignals.get := vectorDecoder.io.out } // 0 -> v0.mask[i]が1ならば書き込み,0ならば書き込まない // 1 -> マスクなし,全て書き込む // (マスクを使わないベクタ命令は全てvm=1か?) - ID_EX_REG.bits.vectorDataSignals.get.mask := vectorDecoder.io.out.vm - ID_EX_REG.bits.vectorDataSignals.get.vs1 := decoded_inst.rs1 - ID_EX_REG.bits.vectorDataSignals.get.vs2 := decoded_inst.rs2 - ID_EX_REG.bits.vectorDataSignals.get.vd := decoded_inst.rd + idExReg.bits.vectorDataSignals.get.mask := vectorDecoder.io.out.vm + idExReg.bits.vectorDataSignals.get.vs1 := decodedInst.rs1 + idExReg.bits.vectorDataSignals.get.vs2 := decodedInst.rs2 + idExReg.bits.vectorDataSignals.get.vd := decodedInst.rd if (params.debug) { - ID_EX_REG.bits.debug.get.instruction := decoded_inst.bits - ID_EX_REG.bits.debug.get.pc := io.frontend.resp.bits.pc + idExReg.bits.debug.get.instruction := decodedInst.bits + idExReg.bits.debug.get.pc := io.frontend.resp.bits.pc } // retain the ID_EX register if stall - when(EX_stall) { - ID_EX_REG := ID_EX_REG + when(exStall) { + idExReg := idExReg } // flush the ID_EX register if branch miss, ecall, mret or exception - ID_flush := EX_flush || branchEvaluator.io.out.valid - when(ID_flush) { - ID_EX_REG.valid := false.B + idFlush := exFlush || branchEvaluator.io.out.valid + when(idFlush) { + idExReg.valid := false.B } // START OF EX STAGE @@ -341,193 +343,192 @@ class VectorCpu(implicit params: HajimeCoreParams) extends CpuModule with Scalar io.debugIo.get.vrfMap.get := vecRegFile.io.debug.get } - alu.io.in1 := MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.value1, 0.U)(Seq( - Value1.RS1.asUInt -> ID_EX_REG.bits.dataSignals.rs1, - Value1.U_IMM.asUInt -> ID_EX_REG.bits.dataSignals.imm, + alu.io.in1 := MuxLookup(idExReg.bits.ctrlSignals.decode.value1, 0.U)(Seq( + Value1.RS1.asUInt -> idExReg.bits.dataSignals.rs1, + Value1.U_IMM.asUInt -> idExReg.bits.dataSignals.imm, )) - alu.io.in2 := MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.value2, 0.U)(Seq( - Value2.RS2.asUInt -> ID_EX_REG.bits.dataSignals.rs2, - Value2.I_IMM.asUInt -> ID_EX_REG.bits.dataSignals.imm, - Value2.S_IMM.asUInt -> ID_EX_REG.bits.dataSignals.imm, - Value2.PC.asUInt -> ID_EX_REG.bits.dataSignals.pc.addr, + alu.io.in2 := MuxLookup(idExReg.bits.ctrlSignals.decode.value2, 0.U)(Seq( + Value2.RS2.asUInt -> idExReg.bits.dataSignals.rs2, + Value2.I_IMM.asUInt -> idExReg.bits.dataSignals.imm, + Value2.S_IMM.asUInt -> idExReg.bits.dataSignals.imm, + Value2.PC.asUInt -> idExReg.bits.dataSignals.pc.addr, )) - alu.io.funct := ID_EX_REG.bits.ctrlSignals.decode + alu.io.funct := idExReg.bits.ctrlSignals.decode branchEvaluator.io.req.bits.ALU_Result := alu.io.out - branchEvaluator.io.req.bits.BranchType := ID_EX_REG.bits.ctrlSignals.decode.branch - branchEvaluator.io.req.bits.destPC := ID_EX_REG.bits.dataSignals.bpDestPc - branchEvaluator.io.req.bits.pc := ID_EX_REG.bits.dataSignals.pc - branchEvaluator.io.req.bits.bp_taken := ID_EX_REG.bits.dataSignals.bpTaken - branchEvaluator.io.req.valid := ID_EX_REG.valid + branchEvaluator.io.req.bits.BranchType := idExReg.bits.ctrlSignals.decode.branch + branchEvaluator.io.req.bits.destPC := idExReg.bits.dataSignals.bpDestPc + branchEvaluator.io.req.bits.pc := idExReg.bits.dataSignals.pc + branchEvaluator.io.req.bits.bp_taken := idExReg.bits.dataSignals.bpTaken + branchEvaluator.io.req.valid := idExReg.valid if (params.useMulDiv) { val multiplier_hasValue = RegInit(false.B) // EXステージに有効な乗算命令があり,かつ乗算器の出力のvalidとreadyが共にtrueで無ければ乗算器に保持するべき情報(現在のEXステージの乗算命令)がある - multiplier_hasValue := ID_EX_REG.bits.ctrlSignals.decode.useMul && ID_EX_REG.valid && !(multiplier.get.io.resp.ready && multiplier.get.io.resp.valid) - multiplier.get.io.req.bits.rs1 := ID_EX_REG.bits.dataSignals.rs1 - multiplier.get.io.req.bits.rs2 := ID_EX_REG.bits.dataSignals.rs2 - multiplier.get.io.req.bits.funct := ID_EX_REG.bits.ctrlSignals.decode + multiplier_hasValue := idExReg.bits.ctrlSignals.decode.useMul && idExReg.valid && !(multiplier.get.io.resp.ready && multiplier.get.io.resp.valid) + multiplier.get.io.req.bits.rs1 := idExReg.bits.dataSignals.rs1 + multiplier.get.io.req.bits.rs2 := idExReg.bits.dataSignals.rs2 + multiplier.get.io.req.bits.funct := idExReg.bits.ctrlSignals.decode // 乗算器に保持するべき情報(現在のEXステージの乗算命令)があればvalidを下げる(乗算器が既に情報を受け取っているため) - multiplier.get.io.req.valid := ID_EX_REG.bits.ctrlSignals.decode.useMul && !multiplier_hasValue && ID_EX_REG.valid && !EX_flush - multiplier.get.io.resp.ready := !(EX_WB_REG.valid && WB_stall) + multiplier.get.io.req.valid := idExReg.bits.ctrlSignals.decode.useMul && !multiplier_hasValue && idExReg.valid && !exFlush + multiplier.get.io.resp.ready := !(exWbReg.valid && wbStall) } // TODO: rs1がx0かつrdが非x0の場合にvlを最大に,rs1・rdともにx0の場合にvlを変更しないように仕様変更 - vecCtrlUnit.io.req.valid := ID_EX_REG.valid && ID_EX_REG.bits.ctrlSignals.decode.vector.get && ID_EX_REG.bits.vectorCtrlSignals.get.isConfsetInst - vecCtrlUnit.io.req.bits.vDecode := ID_EX_REG.bits.vectorCtrlSignals.get - vecCtrlUnit.io.req.bits.rs1_value := ID_EX_REG.bits.dataSignals.rs1 - vecCtrlUnit.io.req.bits.rs2_value := ID_EX_REG.bits.dataSignals.rs2 - vecCtrlUnit.io.req.bits.zimm := ID_EX_REG.bits.dataSignals.zimm - vecCtrlUnit.io.req.bits.uimm := ID_EX_REG.bits.dataSignals.imm + vecCtrlUnit.io.req.valid := idExReg.valid && idExReg.bits.ctrlSignals.decode.vector.get && idExReg.bits.vectorCtrlSignals.get.isConfsetInst + vecCtrlUnit.io.req.bits.vDecode := idExReg.bits.vectorCtrlSignals.get + vecCtrlUnit.io.req.bits.rs1_value := idExReg.bits.dataSignals.rs1 + vecCtrlUnit.io.req.bits.rs2_value := idExReg.bits.dataSignals.rs2 + vecCtrlUnit.io.req.bits.zimm := idExReg.bits.dataSignals.zimm + vecCtrlUnit.io.req.bits.uimm := idExReg.bits.dataSignals.imm val exScalarRes = if (params.useMulDiv) { - Mux(ID_EX_REG.bits.ctrlSignals.decode.useMul, multiplier.get.io.resp.bits, alu.io.out) + Mux(idExReg.bits.ctrlSignals.decode.useMul, multiplier.get.io.resp.bits, alu.io.out) } else { alu.io.out } // placefolder for vec->scalar inst (vcpop.m, vfirst.m, vmv.x.s) - val exVectorRes = Mux(ID_EX_REG.bits.vectorCtrlSignals.get.vecPermutation, vecAluExecUnit(0).io.toExWbReg.bits.dataSignals.exResult, vecCtrlUnit.io.resp.bits.vl) + val exVectorRes = Mux(idExReg.bits.vectorCtrlSignals.get.vecPermutation, vecAluExecUnit(0).io.toExWbReg.bits.dataSignals.exResult, vecCtrlUnit.io.resp.bits.vl) - bypassingUnit.io.EX.in.bits.rd.bits.index := ID_EX_REG.bits.ctrlSignals.rdIndex - bypassingUnit.io.EX.in.bits.rd.bits.value := MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.writeBackSelector, 0.U)(Seq( - WB_SEL.PC4.asUInt -> ID_EX_REG.bits.dataSignals.pc.nextPC, + bypassingUnit.io.EX.in.bits.rd.bits.index := idExReg.bits.ctrlSignals.rdIndex + bypassingUnit.io.EX.in.bits.rd.bits.value := MuxLookup(idExReg.bits.ctrlSignals.decode.writeBackSelector, 0.U)(Seq( + WB_SEL.PC4.asUInt -> idExReg.bits.dataSignals.pc.nextPC, WB_SEL.ARITH.asUInt -> exScalarRes, WB_SEL.VECTOR.asUInt -> exVectorRes, )) - bypassingUnit.io.EX.in.bits.rd.valid := MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.writeBackSelector, false.B)(Seq( + bypassingUnit.io.EX.in.bits.rd.valid := MuxLookup(idExReg.bits.ctrlSignals.decode.writeBackSelector, false.B)(Seq( WB_SEL.PC4.asUInt -> true.B, - WB_SEL.ARITH.asUInt -> (if (params.useMulDiv) !ID_EX_REG.bits.ctrlSignals.decode.useMul || multiplier.get.io.resp.valid else true.B), + WB_SEL.ARITH.asUInt -> (if (params.useMulDiv) !idExReg.bits.ctrlSignals.decode.useMul || multiplier.get.io.resp.valid else true.B), WB_SEL.CSR.asUInt -> false.B, WB_SEL.MEM.asUInt -> false.B, WB_SEL.NONE.asUInt -> false.B, WB_SEL.VECTOR.asUInt -> (if (params.useVector) true.B else false.B) - )) && ID_EX_REG.valid - bypassingUnit.io.EX.in.valid := ID_EX_REG.bits.ctrlSignals.decode.writeToRd && ID_EX_REG.valid - - EX_WB_REG.valid := ID_EX_REG.valid && (!ID_EX_REG.bits.ctrlSignals.decode.memValid || vectorLdstUnit.io.signalIn.ready) && - (if (params.useMulDiv) !ID_EX_REG.bits.ctrlSignals.decode.useMul || multiplier.get.io.resp.valid else true.B) && - (!ID_EX_REG.bits.ctrlSignals.decode.vector.get || ID_EX_REG.bits.vectorCtrlSignals.get.isConfsetInst || vecExecUnitsToExWbRegValid.reduce(_ || _)) - EX_WB_REG.bits.dataSignals.pc := ID_EX_REG.bits.dataSignals.pc - EX_WB_REG.bits.dataSignals.exResult := MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.writeBackSelector, 0.U)(Seq( + )) && idExReg.valid + bypassingUnit.io.EX.in.valid := idExReg.bits.ctrlSignals.decode.writeToRd && idExReg.valid + + exWbReg.valid := idExReg.valid && (!idExReg.bits.ctrlSignals.decode.memValid || vectorLdstUnit.io.signalIn.ready) && + (if (params.useMulDiv) !idExReg.bits.ctrlSignals.decode.useMul || multiplier.get.io.resp.valid else true.B) && + (!idExReg.bits.ctrlSignals.decode.vector.get || idExReg.bits.vectorCtrlSignals.get.isConfsetInst || vecExecUnitsToExWbRegValid.reduce(_ || _)) + exWbReg.bits.dataSignals.pc := idExReg.bits.dataSignals.pc + exWbReg.bits.dataSignals.exResult := MuxLookup(idExReg.bits.ctrlSignals.decode.writeBackSelector, 0.U)(Seq( WB_SEL.ARITH.asUInt -> exScalarRes, WB_SEL.VECTOR.asUInt -> exVectorRes, )) - EX_WB_REG.bits.dataSignals.datatoCSR := Mux(ID_EX_REG.bits.ctrlSignals.decode.value1 === Value1.RS1.asUInt, ID_EX_REG.bits.dataSignals.rs1, ID_EX_REG.bits.dataSignals.imm) - EX_WB_REG.bits.dataSignals.csrAddr := ID_EX_REG.bits.dataSignals.zimm + exWbReg.bits.dataSignals.datatoCSR := Mux(idExReg.bits.ctrlSignals.decode.value1 === Value1.RS1.asUInt, idExReg.bits.dataSignals.rs1, idExReg.bits.dataSignals.imm) + exWbReg.bits.dataSignals.csrAddr := idExReg.bits.dataSignals.zimm - EX_WB_REG.bits.ctrlSignals := ID_EX_REG.bits.ctrlSignals + exWbReg.bits.ctrlSignals := idExReg.bits.ctrlSignals - EX_WB_REG.bits.exceptionSignals.valid := (ID_EX_REG.valid && ID_EX_REG.bits.exceptionSignals.valid) || (ID_EX_REG.bits.ctrlSignals.decode.branch === Branch.ECALL.asUInt) && ID_EX_REG.valid + exWbReg.bits.exceptionSignals.valid := (idExReg.valid && idExReg.bits.exceptionSignals.valid) || (idExReg.bits.ctrlSignals.decode.branch === Branch.ECALL.asUInt) && idExReg.valid // only machine-mode ecall and illegal inst is supported now - EX_WB_REG.bits.exceptionSignals.bits := MuxCase(0.U, Seq( + exWbReg.bits.exceptionSignals.bits := MuxCase(0.U, Seq( // if there is already exception before ID, then retain - ID_EX_REG.bits.exceptionSignals.valid -> ID_EX_REG.bits.exceptionSignals.bits, + idExReg.bits.exceptionSignals.valid -> idExReg.bits.exceptionSignals.bits, // else if exception in EX (load/store misaligned or access fault), )) - Mux(ID_EX_REG.bits.ctrlSignals.decode.branch === Branch.ECALL.asUInt, 0xb.U(params.xprlen.W), 0.U) + Mux(idExReg.bits.ctrlSignals.decode.branch === Branch.ECALL.asUInt, 0xb.U(params.xprlen.W), 0.U) // EX_WB_REGに信号自体がvalidかを覚えさせておく when(vecCtrlUnit.io.resp.valid) { - EX_WB_REG.bits.vectorCsrPorts.get := vecCtrlUnit.io.resp.bits + exWbReg.bits.vectorCsrPorts.get := vecCtrlUnit.io.resp.bits } .otherwise { - EX_WB_REG.bits.vectorCsrPorts.get := EX_WB_REG.bits.vectorCsrPorts.get + exWbReg.bits.vectorCsrPorts.get := exWbReg.bits.vectorCsrPorts.get } - if (params.debug) EX_WB_REG.bits.debug.get := ID_EX_REG.bits.debug.get + if (params.debug) exWbReg.bits.debug.get := idExReg.bits.debug.get // WBステージがvalidかつ破棄できないかつEXステージに有効な値がある場合,またはメモリアクセス命令かつldstUnit.reqがreadyでない,または乗算命令で乗算器がvalidでない // またはベクタ命令実行完了前にスカラ命令がID_EXレジスタにある,またはチェイニング不可能なベクタ命令(構造ハザード・0要素目の値が用意できていないなど) - EX_stall := ID_EX_REG.valid && ((EX_WB_REG.valid && WB_stall) || (if (params.useMulDiv) { - ID_EX_REG.bits.ctrlSignals.decode.useMul && !multiplier.get.io.resp.valid + exStall := idExReg.valid && ((exWbReg.valid && wbStall) || (if (params.useMulDiv) { + idExReg.bits.ctrlSignals.decode.useMul && !multiplier.get.io.resp.valid } else false.B)) // ベクトル命令がEXにある場合,IDがスカラ命令,またはIDのベクトル命令が発行できないならばIDの方でストールさせる // EX_WB_REGのvectorExecNumのデフォルト値 - EX_WB_REG.bits.vectorExecNum.get.valid := false.B - EX_WB_REG.bits.vectorExecNum.get.bits := 0.U + exWbReg.bits.vectorExecNum.get.valid := false.B + exWbReg.bits.vectorExecNum.get.bits := 0.U // リタイアするベクトル命令があればそれでEX_WB_REGを上書き for(d <- vecAluExecUnit) { when(d.io.toExWbReg.valid) { - EX_WB_REG := d.io.toExWbReg - EX_WB_REG.bits.vectorCsrPorts.get := EX_WB_REG.bits.vectorCsrPorts.get + exWbReg := d.io.toExWbReg + exWbReg.bits.vectorCsrPorts.get := exWbReg.bits.vectorCsrPorts.get } } when(vectorLdstUnit.io.toExWbReg.valid) { - EX_WB_REG := vectorLdstUnit.io.toExWbReg - EX_WB_REG.bits.vectorCsrPorts.get := EX_WB_REG.bits.vectorCsrPorts.get + exWbReg := vectorLdstUnit.io.toExWbReg + exWbReg.bits.vectorCsrPorts.get := exWbReg.bits.vectorCsrPorts.get } - when(WB_stall) { - EX_WB_REG := EX_WB_REG - EX_WB_REG.bits.vectorCsrPorts.get := EX_WB_REG.bits.vectorCsrPorts.get + when(wbStall) { + exWbReg := exWbReg + exWbReg.bits.vectorCsrPorts.get := exWbReg.bits.vectorCsrPorts.get } // flush the EX_WB register if ecall, mret or exception - EX_flush := WB_pc_redirect - when(EX_flush) { - EX_WB_REG.valid := false.B + exFlush := wbPcRedirect + when(exFlush) { + exWbReg.valid := false.B } // START OF WB STAGE - // ここの論理がたぶん違う // メモリアクセス命令かつ,ベクトル実行ユニットのベクトル命令がリタイアしない(toExWbRegがvalidでない)かつ,respがvalidでなければストール // 面倒くさいのでメモリ応答は常に1クロックで返ってくることにする - WB_stall := (if(true) false.B else EX_WB_REG.valid && (EX_WB_REG.bits.ctrlSignals.decode.memValid && !vectorLdstUnit.io.vectorResp.toVRF.valid && !vectorLdstUnit.io.scalarResp.valid)) + wbStall := (if(true) false.B else exWbReg.valid && (exWbReg.bits.ctrlSignals.decode.memValid && !vectorLdstUnit.io.vectorResp.toVRF.valid && !vectorLdstUnit.io.scalarResp.valid)) // let's just ignore exception val dmemoryAccessException = if(params.useException) { - EX_WB_REG.bits.ctrlSignals.decode.memValid && vectorLdstUnit.io.scalarResp.valid && vectorLdstUnit.io.scalarResp.bits.exceptionSignals.valid + exWbReg.bits.ctrlSignals.decode.memValid && vectorLdstUnit.io.scalarResp.valid && vectorLdstUnit.io.scalarResp.bits.exceptionSignals.valid } else { false.B } - WB_pc_redirect := EX_WB_REG.valid && (EX_WB_REG.bits.ctrlSignals.decode.branch === Branch.MRET.asUInt - || (if(params.useException) EX_WB_REG.bits.exceptionSignals.valid || dmemoryAccessException else EX_WB_REG.bits.ctrlSignals.decode.branch === Branch.ECALL.asUInt)) + wbPcRedirect := exWbReg.valid && (exWbReg.bits.ctrlSignals.decode.branch === Branch.MRET.asUInt + || (if(params.useException) exWbReg.bits.exceptionSignals.valid || dmemoryAccessException else exWbReg.bits.ctrlSignals.decode.branch === Branch.ECALL.asUInt)) - when(WB_pc_redirect) { + when(wbPcRedirect) { io.frontend.req.bits.pc := csrUnit.io.resp.data } // 割り込みまたは例外の場合は、PCのみ更新しリタイアしない(命令を破棄) - val WB_inst_can_retire = EX_WB_REG.valid && !(EX_WB_REG.bits.exceptionSignals.valid || dmemoryAccessException) && !WB_stall - rf.io.req.valid := WB_inst_can_retire && EX_WB_REG.bits.ctrlSignals.decode.writeToRd - rf.io.req.bits.data := MuxLookup(EX_WB_REG.bits.ctrlSignals.decode.writeBackSelector, 0.U)(Seq( - WB_SEL.PC4 -> EX_WB_REG.bits.dataSignals.pc.nextPC, - WB_SEL.ARITH -> EX_WB_REG.bits.dataSignals.exResult, + val wbInstCanRetire = exWbReg.valid && !(exWbReg.bits.exceptionSignals.valid || dmemoryAccessException) && !wbStall + rf.io.req.valid := wbInstCanRetire && exWbReg.bits.ctrlSignals.decode.writeToRd + rf.io.req.bits.data := MuxLookup(exWbReg.bits.ctrlSignals.decode.writeBackSelector, 0.U)(Seq( + WB_SEL.PC4 -> exWbReg.bits.dataSignals.pc.nextPC, + WB_SEL.ARITH -> exWbReg.bits.dataSignals.exResult, WB_SEL.CSR -> csrUnit.io.resp.data, WB_SEL.MEM -> vectorLdstUnit.io.scalarResp.bits.data, - WB_SEL.VECTOR -> EX_WB_REG.bits.dataSignals.exResult + WB_SEL.VECTOR -> exWbReg.bits.dataSignals.exResult ).map { case (wb_sel, data) => (wb_sel.asUInt, data) }) - rf.io.req.bits.rd := EX_WB_REG.bits.ctrlSignals.rdIndex + rf.io.req.bits.rd := exWbReg.bits.ctrlSignals.rdIndex - bypassingUnit.io.WB.in.bits.rd.valid := bypassingUnit.io.WB.in.valid && (!EX_WB_REG.bits.ctrlSignals.decode.memRead || vectorLdstUnit.io.scalarResp.valid) - bypassingUnit.io.WB.in.bits.rd.bits.index := EX_WB_REG.bits.ctrlSignals.rdIndex + bypassingUnit.io.WB.in.bits.rd.valid := bypassingUnit.io.WB.in.valid && (!exWbReg.bits.ctrlSignals.decode.memRead || vectorLdstUnit.io.scalarResp.valid) + bypassingUnit.io.WB.in.bits.rd.bits.index := exWbReg.bits.ctrlSignals.rdIndex bypassingUnit.io.WB.in.bits.rd.bits.value := rf.io.req.bits.data - bypassingUnit.io.WB.in.valid := EX_WB_REG.bits.ctrlSignals.decode.writeToRd && WB_inst_can_retire + bypassingUnit.io.WB.in.valid := exWbReg.bits.ctrlSignals.decode.writeToRd && wbInstCanRetire - csrUnit.io.req.valid := EX_WB_REG.valid + csrUnit.io.req.valid := exWbReg.valid // ecallやmretの処理はcsrUnit内で行われる - csrUnit.io.req.bits.funct := EX_WB_REG.bits.ctrlSignals.decode - csrUnit.io.req.bits.data := EX_WB_REG.bits.dataSignals.datatoCSR - csrUnit.io.req.bits.csr_addr := EX_WB_REG.bits.dataSignals.csrAddr + csrUnit.io.req.bits.funct := exWbReg.bits.ctrlSignals.decode + csrUnit.io.req.bits.data := exWbReg.bits.dataSignals.datatoCSR + csrUnit.io.req.bits.csr_addr := exWbReg.bits.dataSignals.csrAddr csrUnit.io.fromCPU.hartid := io.hartid csrUnit.io.fromCPU.cpu_operating := cpuOperating - csrUnit.io.fromCPU.inst_retire := WB_inst_can_retire - csrUnit.io.fromCPU.vectorExecNum.get := EX_WB_REG.bits.vectorExecNum.get - csrUnit.io.exception.valid := (EX_WB_REG.bits.exceptionSignals.valid || dmemoryAccessException) && EX_WB_REG.valid - csrUnit.io.exception.bits.mepc_write := EX_WB_REG.bits.dataSignals.pc.addr - csrUnit.io.exception.bits.mcause_write := Mux(dmemoryAccessException, vectorLdstUnit.io.scalarResp.bits.exceptionSignals.bits, EX_WB_REG.bits.exceptionSignals.bits) + csrUnit.io.fromCPU.inst_retire := wbInstCanRetire + csrUnit.io.fromCPU.vectorExecNum.get := exWbReg.bits.vectorExecNum.get + csrUnit.io.exception.valid := (exWbReg.bits.exceptionSignals.valid || dmemoryAccessException) && exWbReg.valid + csrUnit.io.exception.bits.mepc_write := exWbReg.bits.dataSignals.pc.addr + csrUnit.io.exception.bits.mcause_write := Mux(dmemoryAccessException, vectorLdstUnit.io.scalarResp.bits.exceptionSignals.bits, exWbReg.bits.exceptionSignals.bits) - csrUnit.io.vectorCsrPorts.get := EX_WB_REG.bits.vectorCsrPorts.get + csrUnit.io.vectorCsrPorts.get := exWbReg.bits.vectorCsrPorts.get // EXまたはWBステージにfence, ecall, mretがある - sysInstInPipeline := (ID_EX_REG.valid && ID_EX_REG.bits.ctrlSignals.decode.isSysInst) || (EX_WB_REG.valid && EX_WB_REG.bits.ctrlSignals.decode.isSysInst) + sysInstInPipeline := (idExReg.valid && idExReg.bits.ctrlSignals.decode.isSysInst) || (exWbReg.valid && exWbReg.bits.ctrlSignals.decode.isSysInst) if (params.debug) { - io.debugIo.get.debugRetired.bits.instruction.bits := EX_WB_REG.bits.debug.get.instruction & Fill(32, EX_WB_REG.valid) - io.debugIo.get.debugRetired.bits.pc.addr := EX_WB_REG.bits.debug.get.pc.addr & Fill(params.xprlen, EX_WB_REG.valid) - io.debugIo.get.debugRetired.valid := WB_inst_can_retire + io.debugIo.get.debugRetired.bits.instruction.bits := exWbReg.bits.debug.get.instruction & Fill(32, exWbReg.valid) + io.debugIo.get.debugRetired.bits.pc.addr := exWbReg.bits.debug.get.pc.addr & Fill(params.xprlen, exWbReg.valid) + io.debugIo.get.debugRetired.valid := wbInstCanRetire io.debugIo.get.debugAbiMap := rf.io.debug_abi_map.get } } diff --git a/src/main/scala/hajime/vectormodules/VectorExecUnit.scala b/src/main/scala/hajime/vectormodules/VectorExecUnit.scala index a66410a..788280f 100644 --- a/src/main/scala/hajime/vectormodules/VectorExecUnit.scala +++ b/src/main/scala/hajime/vectormodules/VectorExecUnit.scala @@ -31,7 +31,7 @@ class VectorExecUnitIO(implicit params: HajimeCoreParams) extends Bundle { val signalIn = Flipped(DecoupledIO(new VectorExecUnitSignalIn())) val readVrf = Flipped(new VecRegFileReadIO()) val dataOut = Output(new VectorExecUnitDataOut()) - val toExWbReg = Output(Valid(new EX_WB_IO())) + val toExWbReg = Output(Valid(new exWbIo())) } /** diff --git a/src/main/scala/hajime/vectormodules/VectorLdstUnit.scala b/src/main/scala/hajime/vectormodules/VectorLdstUnit.scala index 0e71ca5..737217a 100644 --- a/src/main/scala/hajime/vectormodules/VectorLdstUnit.scala +++ b/src/main/scala/hajime/vectormodules/VectorLdstUnit.scala @@ -28,7 +28,7 @@ class VectorLdstUnitIO(implicit params: HajimeCoreParams) extends Bundle { val scalarResp = ValidIO(new LDSTResp()) val vectorResp = Output(new VectorExecUnitDataOut()) val dcache = new AXI4liteIO(addrWidth = params.xprlen, dataWidth = params.xprlen) - val toExWbReg = Output(Valid(new EX_WB_IO())) + val toExWbReg = Output(Valid(new exWbIo())) } // TODO: VtypeのSEWと異なるEEWをillegalにする diff --git a/src/test/scala/hajime/publicmodules/ALUTest.scala b/src/test/scala/hajime/publicmodules/ALUTest.scala deleted file mode 100644 index c032374..0000000 --- a/src/test/scala/hajime/publicmodules/ALUTest.scala +++ /dev/null @@ -1,364 +0,0 @@ -package hajime.publicmodules - -import chisel3._ -import chiseltest._ -import hajime.common.Instructions._ -import hajime.common._ -import org.scalatest.flatspec._ - -/* -class ALUTest extends AnyFlatSpec with ChiselScalatestTester with ScalarOpConstants { - import ContentValid._ - it should "not act sussy" in { - // wtf? I have never curried ALU.apply - test(new ALU()(HajimeCoreParams())).withAnnotations(Seq(WriteVcdAnnotation)) { c => - def instDecode(inst: String): List[Int] = { - inst match { - case "add" => List(ARITHMETIC_FCN.ADDSUB.litValue.toInt, 0, 0) - case "sub" => List(ARITHMETIC_FCN.ADDSUB.litValue.toInt, 1, 0) - case "sll" => List(ARITHMETIC_FCN.SLL.litValue.toInt, 0, 0) - case "slt" => List(ARITHMETIC_FCN.SLT.litValue.toInt, 0, 0) - case "sltu" => List(ARITHMETIC_FCN.SLTU.litValue.toInt, 0, 0) - case "xor" => List(ARITHMETIC_FCN.XOR.litValue.toInt, 0, 0) - case "srl" => List(ARITHMETIC_FCN.SR.litValue.toInt, 0, 0) - case "sra" => List(ARITHMETIC_FCN.SR.litValue.toInt, 1, 0) - case "or" => List(ARITHMETIC_FCN.OR.litValue.toInt, 0, 0) - case "and" => List(ARITHMETIC_FCN.AND.litValue.toInt, 0, 0) - case "addw" => List(ARITHMETIC_FCN.ADDSUB.litValue.toInt, 0, 1) - case "subw" => List(ARITHMETIC_FCN.ADDSUB.litValue.toInt, 1, 1) - case "sllw" => List(ARITHMETIC_FCN.SLL.litValue.toInt, 0, 1) - case "srlw" => List(ARITHMETIC_FCN.SR.litValue.toInt, 0, 1) - case "sraw" => List(ARITHMETIC_FCN.SR.litValue.toInt, 1, 1) - case _ => List(ARITHMETIC_FCN.NONE.litValue.toInt, 0, 0) - } - } - - def DO_TEST(testNum: BigInt, inst: String, out: String, in1: String, in2: String): Unit = { - c.io.in1.poke(in1.U(RISCV_Consts.XLEN.W)) - c.io.in2.poke(in2.U(RISCV_Consts.XLEN.W)) - c.io.funct.arithmeticFunct.poke((instDecode(inst)).head) - c.io.funct.aluFlag.poke((instDecode(inst)(1))) - c.io.funct.op32.poke((instDecode(inst)(2))) - c.io.out.expect(out.U(RISCV_Consts.XLEN.W)) - if(c.io.out.peekInt() == out.U.litValue) println(s"test $testNum for $inst passed") - } - - def TEST_RR_OP(testNum: BigInt, inst: String, out: String, in1: String, in2: String): Unit = { - DO_TEST(testNum, inst, out, in1, in2) - c.clock.step() - } - - TEST_RR_OP(2, "add", "h00000000", "h00000000", "h00000000") - TEST_RR_OP(3, "add", "h00000002", "h00000001", "h00000001") - TEST_RR_OP(4, "add", "h0000000a", "h00000003", "h00000007") - - TEST_RR_OP(5, "add", "hffffffffffff8000", "h0000000000000000", "hffffffffffff8000") - TEST_RR_OP(6, "add", "hffffffff80000000", "hffffffff80000000", "h00000000") - TEST_RR_OP(7, "add", "hffffffff7fff8000", "hffffffff80000000", "hffffffffffff8000") - - TEST_RR_OP(8, "add", "h0000000000007fff", "h0000000000000000", "h0000000000007fff") - TEST_RR_OP(9, "add", "h000000007fffffff", "h000000007fffffff", "h0000000000000000") - TEST_RR_OP(10, "add", "h0000000080007ffe", "h000000007fffffff", "h0000000000007fff") - - TEST_RR_OP(11, "add", "hffffffff80007fff", "hffffffff80000000", "h0000000000007fff") - TEST_RR_OP(12, "add", "h000000007fff7fff", "h000000007fffffff", "hffffffffffff8000") - - TEST_RR_OP(13, "add", "hffffffffffffffff", "h0000000000000000", "hffffffffffffffff") - TEST_RR_OP(14, "add", "h0000000000000000", "hffffffffffffffff", "h0000000000000001") - TEST_RR_OP(15, "add", "hfffffffffffffffe", "hffffffffffffffff", "hffffffffffffffff") - - TEST_RR_OP(16, "add", "h0000000080000000", "h0000000000000001", "h000000007fffffff") - - - TEST_RR_OP(2, "addw", "h00000000", "h00000000", "h00000000") - TEST_RR_OP(3, "addw", "h00000002", "h00000001", "h00000001") - TEST_RR_OP(4, "addw", "h0000000a", "h00000003", "h00000007") - - TEST_RR_OP(5, "addw", "hffffffffffff8000", "h0000000000000000", "hffffffffffff8000") - TEST_RR_OP(6, "addw", "hffffffff80000000", "hffffffff80000000", "h00000000") - TEST_RR_OP(7, "addw", "h000000007fff8000", "hffffffff80000000", "hffffffffffff8000") - - TEST_RR_OP(8, "addw", "h0000000000007fff", "h0000000000000000", "h0000000000007fff") - TEST_RR_OP(9, "addw", "h000000007fffffff", "h000000007fffffff", "h0000000000000000") - TEST_RR_OP(10, "addw", "hffffffff80007ffe", "h000000007fffffff", "h0000000000007fff") - - TEST_RR_OP(11, "addw", "hffffffff80007fff", "hffffffff80000000", "h0000000000007fff") - TEST_RR_OP(12, "addw", "h000000007fff7fff", "h000000007fffffff", "hffffffffffff8000") - - TEST_RR_OP(13, "addw", "hffffffffffffffff", "h0000000000000000", "hffffffffffffffff") - TEST_RR_OP(14, "addw", "h0000000000000000", "hffffffffffffffff", "h0000000000000001") - TEST_RR_OP(15, "addw", "hfffffffffffffffe", "hffffffffffffffff", "hffffffffffffffff") - - TEST_RR_OP(16, "addw", "hffffffff80000000", "h0000000000000001", "h000000007fffffff") - - - TEST_RR_OP(2, "and", "h0f000f00", "hff00ff00", "h0f0f0f0f") - TEST_RR_OP(3, "and", "h00f000f0", "h0ff00ff0", "hf0f0f0f0") - TEST_RR_OP(4, "and", "h000f000f", "h00ff00ff", "h0f0f0f0f") - TEST_RR_OP(5, "and", "hf000f000", "hf00ff00f", "hf0f0f0f0") - - - TEST_RR_OP(2, "or", "hff0fff0f", "hff00ff00", "h0f0f0f0f") - TEST_RR_OP(3, "or", "hfff0fff0", "h0ff00ff0", "hf0f0f0f0") - TEST_RR_OP(4, "or", "h0fff0fff", "h00ff00ff", "h0f0f0f0f") - TEST_RR_OP(5, "or", "hf0fff0ff", "hf00ff00f", "hf0f0f0f0") - - - TEST_RR_OP(2, "sll", "h0000000000000001", "h0000000000000001", "d0") - TEST_RR_OP(3, "sll", "h0000000000000002", "h0000000000000001", "d1"); - TEST_RR_OP(4, "sll", "h0000000000000080", "h0000000000000001", "d7"); - TEST_RR_OP(5, "sll", "h0000000000004000", "h0000000000000001", "d14"); - TEST_RR_OP(6, "sll", "h0000000080000000", "h0000000000000001", "d31"); - - TEST_RR_OP(7, "sll", "hffffffffffffffff", "hffffffffffffffff", "d0"); - TEST_RR_OP(8, "sll", "hfffffffffffffffe", "hffffffffffffffff", "d1"); - TEST_RR_OP(9, "sll", "hffffffffffffff80", "hffffffffffffffff", "d7"); - TEST_RR_OP(10, "sll", "hffffffffffffc000", "hffffffffffffffff", "d14"); - TEST_RR_OP(11, "sll", "hffffffff80000000", "hffffffffffffffff", "d31"); - - TEST_RR_OP(12, "sll", "h0000000021212121", "h0000000021212121", "d0"); - TEST_RR_OP(13, "sll", "h0000000042424242", "h0000000021212121", "d1"); - TEST_RR_OP(14, "sll", "h0000001090909080", "h0000000021212121", "d7"); - TEST_RR_OP(15, "sll", "h0000084848484000", "h0000000021212121", "d14"); - TEST_RR_OP(16, "sll", "h1090909080000000", "h0000000021212121", "d31"); - - TEST_RR_OP(17, "sll", "h0000000021212121", "h0000000021212121", "hffffffffffffffc0"); - TEST_RR_OP(18, "sll", "h0000000042424242", "h0000000021212121", "hffffffffffffffc1"); - TEST_RR_OP(19, "sll", "h0000001090909080", "h0000000021212121", "hffffffffffffffc7"); - TEST_RR_OP(20, "sll", "h0000084848484000", "h0000000021212121", "hffffffffffffffce"); - - TEST_RR_OP(21, "sll", "h8000000000000000", "h0000000021212121", "hffffffffffffffff"); - TEST_RR_OP(50, "sll", "h8000000000000000", "h0000000000000001", "d63"); - TEST_RR_OP(51, "sll", "hffffff8000000000", "hffffffffffffffff", "d39"); - TEST_RR_OP(52, "sll", "h0909080000000000", "h0000000021212121", "d43"); - - - TEST_RR_OP(2, "sllw", "h0000000000000001", "h0000000000000001", "d0"); - TEST_RR_OP(3, "sllw", "h0000000000000002", "h0000000000000001", "d1"); - TEST_RR_OP(4, "sllw", "h0000000000000080", "h0000000000000001", "d7"); - TEST_RR_OP(5, "sllw", "h0000000000004000", "h0000000000000001", "d14"); - TEST_RR_OP(6, "sllw", "hffffffff80000000", "h0000000000000001", "d31"); - - TEST_RR_OP(7, "sllw", "hffffffffffffffff", "hffffffffffffffff", "d0"); - TEST_RR_OP(8, "sllw", "hfffffffffffffffe", "hffffffffffffffff", "d1"); - TEST_RR_OP(9, "sllw", "hffffffffffffff80", "hffffffffffffffff", "d7"); - TEST_RR_OP(10, "sllw", "hffffffffffffc000", "hffffffffffffffff", "d14"); - TEST_RR_OP(11, "sllw", "hffffffff80000000", "hffffffffffffffff", "d31"); - - TEST_RR_OP(12, "sllw", "h0000000021212121", "h0000000021212121", "d0"); - TEST_RR_OP(13, "sllw", "h0000000042424242", "h0000000021212121", "d1"); - TEST_RR_OP(14, "sllw", "hffffffff90909080", "h0000000021212121", "d7"); - TEST_RR_OP(15, "sllw", "h0000000048484000", "h0000000021212121", "d14"); - TEST_RR_OP(16, "sllw", "hffffffff80000000", "h0000000021212121", "d31"); - - TEST_RR_OP(17, "sllw", "h0000000021212121", "h0000000021212121", "hffffffffffffffe0"); - TEST_RR_OP(18, "sllw", "h0000000042424242", "h0000000021212121", "hffffffffffffffe1"); - TEST_RR_OP(19, "sllw", "hffffffff90909080", "h0000000021212121", "hffffffffffffffe7"); - TEST_RR_OP(20, "sllw", "h0000000048484000", "h0000000021212121", "hffffffffffffffee"); - TEST_RR_OP(21, "sllw", "hffffffff80000000", "h0000000021212121", "hffffffffffffffff"); - - TEST_RR_OP(44, "sllw", "h0000000012345678", "hffffffff12345678", "d0"); - TEST_RR_OP(45, "sllw", "h0000000023456780", "hffffffff12345678", "d4"); - TEST_RR_OP(46, "sllw", "hffffffff92345678", "h0000000092345678", "d0"); - TEST_RR_OP(47, "sllw", "hffffffff93456780", "h0000000099345678", "d4"); - - - TEST_RR_OP(2, "slt", "d0", "h0000000000000000", "h0000000000000000") - TEST_RR_OP(3, "slt", "d0", "h0000000000000001", "h0000000000000001") - TEST_RR_OP(4, "slt", "d1", "h0000000000000003", "h0000000000000007") - TEST_RR_OP(5, "slt", "d0", "h0000000000000007", "h0000000000000003") - - TEST_RR_OP(6, "slt", "d0", "h0000000000000000", "hffffffffffff8000") - TEST_RR_OP(7, "slt", "d1", "hffffffff80000000", "h0000000000000000") - TEST_RR_OP(8, "slt", "d1", "hffffffff80000000", "hffffffffffff8000") - - TEST_RR_OP(9, "slt", "d1", "h0000000000000000", "h0000000000007fff") - TEST_RR_OP(10, "slt", "d0", "h000000007fffffff", "h0000000000000000") - TEST_RR_OP(11, "slt", "d0", "h000000007fffffff", "h0000000000007fff") - - TEST_RR_OP(12, "slt", "d1", "hffffffff80000000", "h0000000000007fff") - TEST_RR_OP(13, "slt", "d0", "h000000007fffffff", "hffffffffffff8000") - - TEST_RR_OP(14, "slt", "d0", "h0000000000000000", "hffffffffffffffff") - TEST_RR_OP(15, "slt", "d1", "hffffffffffffffff", "h0000000000000001") - TEST_RR_OP(16, "slt", "d0", "hffffffffffffffff", "hffffffffffffffff") - - - TEST_RR_OP(2, "sltu", "d0", "h00000000", "h00000000") - TEST_RR_OP(3, "sltu", "d0", "h00000001", "h00000001") - TEST_RR_OP(4, "sltu", "d1", "h00000003", "h00000007") - TEST_RR_OP(5, "sltu", "d0", "h00000007", "h00000003") - - TEST_RR_OP(6, "sltu", "d1", "h00000000", "hffff8000") - TEST_RR_OP(7, "sltu", "d0", "h80000000", "h00000000") - TEST_RR_OP(8, "sltu", "d1", "h80000000", "hffff8000") - - TEST_RR_OP(9, "sltu", "d1", "h00000000", "h00007fff") - TEST_RR_OP(10, "sltu", "d0", "h7fffffff", "h00000000") - TEST_RR_OP(11, "sltu", "d0", "h7fffffff", "h00007fff") - - TEST_RR_OP(12, "sltu", "d0", "h80000000", "h00007fff") - TEST_RR_OP(13, "sltu", "d1", "h7fffffff", "hffff8000") - - TEST_RR_OP(14, "sltu", "d1", "h00000000", "hffffffff") - TEST_RR_OP(15, "sltu", "d0", "hffffffff", "h00000001") - TEST_RR_OP(16, "sltu", "d0", "hffffffff", "hffffffff") - - - TEST_RR_OP(2, "sra", "hffffffff80000000", "hffffffff80000000", "d0"); - TEST_RR_OP(3, "sra", "hffffffffc0000000", "hffffffff80000000", "d1"); - TEST_RR_OP(4, "sra", "hffffffffff000000", "hffffffff80000000", "d7"); - TEST_RR_OP(5, "sra", "hfffffffffffe0000", "hffffffff80000000", "d14"); - TEST_RR_OP(6, "sra", "hffffffffffffffff", "hffffffff80000001", "d31"); - - TEST_RR_OP(7, "sra", "h000000007fffffff", "h000000007fffffff", "d0"); - TEST_RR_OP(8, "sra", "h000000003fffffff", "h000000007fffffff", "d1"); - TEST_RR_OP(9, "sra", "h0000000000ffffff", "h000000007fffffff", "d7"); - TEST_RR_OP(10, "sra", "h000000000001ffff", "h000000007fffffff", "d14"); - TEST_RR_OP(11, "sra", "h0000000000000000", "h000000007fffffff", "d31"); - - TEST_RR_OP(12, "sra", "hffffffff81818181", "hffffffff81818181", "d0"); - TEST_RR_OP(13, "sra", "hffffffffc0c0c0c0", "hffffffff81818181", "d1"); - TEST_RR_OP(14, "sra", "hffffffffff030303", "hffffffff81818181", "d7"); - TEST_RR_OP(15, "sra", "hfffffffffffe0606", "hffffffff81818181", "d14"); - TEST_RR_OP(16, "sra", "hffffffffffffffff", "hffffffff81818181", "d31"); - - TEST_RR_OP(17, "sra", "hffffffff81818181", "hffffffff81818181", "hffffffffffffffc0"); - TEST_RR_OP(18, "sra", "hffffffffc0c0c0c0", "hffffffff81818181", "hffffffffffffffc1"); - TEST_RR_OP(19, "sra", "hffffffffff030303", "hffffffff81818181", "hffffffffffffffc7"); - TEST_RR_OP(20, "sra", "hfffffffffffe0606", "hffffffff81818181", "hffffffffffffffce"); - TEST_RR_OP(21, "sra", "hffffffffffffffff", "hffffffff81818181", "hffffffffffffffff"); - - - TEST_RR_OP(2, "sraw", "hffffffff80000000", "hffffffff80000000", "d0"); - TEST_RR_OP(3, "sraw", "hffffffffc0000000", "hffffffff80000000", "d1"); - TEST_RR_OP(4, "sraw", "hffffffffff000000", "hffffffff80000000", "d7"); - TEST_RR_OP(5, "sraw", "hfffffffffffe0000", "hffffffff80000000", "d14"); - TEST_RR_OP(6, "sraw", "hffffffffffffffff", "hffffffff80000001", "d31"); - - TEST_RR_OP(7, "sraw", "h000000007fffffff", "h000000007fffffff", "d0"); - TEST_RR_OP(8, "sraw", "h000000003fffffff", "h000000007fffffff", "d1"); - TEST_RR_OP(9, "sraw", "h0000000000ffffff", "h000000007fffffff", "d7"); - TEST_RR_OP(10, "sraw", "h000000000001ffff", "h000000007fffffff", "d14"); - TEST_RR_OP(11, "sraw", "h0000000000000000", "h000000007fffffff", "d31"); - - TEST_RR_OP(12, "sraw", "hffffffff81818181", "hffffffff81818181", "d0"); - TEST_RR_OP(13, "sraw", "hffffffffc0c0c0c0", "hffffffff81818181", "d1"); - TEST_RR_OP(14, "sraw", "hffffffffff030303", "hffffffff81818181", "d7"); - TEST_RR_OP(15, "sraw", "hfffffffffffe0606", "hffffffff81818181", "d14"); - TEST_RR_OP(16, "sraw", "hffffffffffffffff", "hffffffff81818181", "d31"); - - TEST_RR_OP(17, "sraw", "hffffffff81818181", "hffffffff81818181", "hffffffffffffffe0"); - TEST_RR_OP(18, "sraw", "hffffffffc0c0c0c0", "hffffffff81818181", "hffffffffffffffe1"); - TEST_RR_OP(19, "sraw", "hffffffffff030303", "hffffffff81818181", "hffffffffffffffe7"); - TEST_RR_OP(20, "sraw", "hfffffffffffe0606", "hffffffff81818181", "hffffffffffffffee"); - TEST_RR_OP(21, "sraw", "hffffffffffffffff", "hffffffff81818181", "hffffffffffffffff"); - - TEST_RR_OP(44, "sraw", "h0000000012345678", "hffffffff12345678", "d0"); - TEST_RR_OP(45, "sraw", "h0000000001234567", "hffffffff12345678", "d4"); - TEST_RR_OP(46, "sraw", "hffffffff92345678", "h0000000092345678", "d0"); - TEST_RR_OP(47, "sraw", "hfffffffff9234567", "h0000000092345678", "d4"); - - - TEST_RR_OP(2, "srl", "hffffffff80000000", "hffffffff80000000", "d0") - TEST_RR_OP(3, "srl", "h7fffffffc0000000", "hffffffff80000000", "d1") - TEST_RR_OP(4, "srl", "h01FFFFFFFF000000", "hffffffff80000000", "d7") - TEST_RR_OP(5, "srl", "h0003FFFFFFFE0000", "hffffffff80000000", "d14") - TEST_RR_OP(6, "srl", "h00000001FFFFFFFF", "hffffffff80000001", "d31") - - TEST_RR_OP(2, "srl", "hffffffffffffffff", "hffffffffffffffff", "d0") - TEST_RR_OP(3, "srl", "h7fffffffffffffff", "hffffffffffffffff", "d1") - TEST_RR_OP(4, "srl", "h01ffffffffffffff", "hffffffffffffffff", "d7") - TEST_RR_OP(5, "srl", "h0003ffffffffffff", "hffffffffffffffff", "d14") - TEST_RR_OP(6, "srl", "h00000001ffffffff", "hffffffffffffffff", "d31") - - TEST_RR_OP(2, "srl", "h0000000021212121", "h0000000021212121", "d0") - TEST_RR_OP(3, "srl", "h0000000010909090", "h0000000021212121", "d1") - TEST_RR_OP(4, "srl", "h0000000000424242", "h0000000021212121", "d7") - TEST_RR_OP(5, "srl", "h0000000000008484", "h0000000021212121", "d14") - TEST_RR_OP(6, "srl", "h0000000000000000", "h0000000021212121", "d31") - - TEST_RR_OP(17, "srl", "h0000000021212121", "h0000000021212121", "hffffffffffffffc0") - TEST_RR_OP(18, "srl", "h0000000010909090", "h0000000021212121", "hffffffffffffffc1") - TEST_RR_OP(19, "srl", "h0000000000424242", "h0000000021212121", "hffffffffffffffc7") - TEST_RR_OP(20, "srl", "h0000000000008484", "h0000000021212121", "hffffffffffffffce") - TEST_RR_OP(21, "srl", "h0000000000000000", "h0000000021212121", "hffffffffffffffff") - - - TEST_RR_OP(2, "srlw", "hffffffff80000000", "hffffffff80000000", "d0"); - TEST_RR_OP(3, "srlw", "h0000000040000000", "hffffffff80000000", "d1"); - TEST_RR_OP(4, "srlw", "h0000000001000000", "hffffffff80000000", "d7"); - TEST_RR_OP(5, "srlw", "h0000000000020000", "hffffffff80000000", "d14"); - TEST_RR_OP(6, "srlw", "h0000000000000001", "hffffffff80000001", "d31"); - - TEST_RR_OP(7, "srlw", "hffffffffffffffff", "hffffffffffffffff", "d0"); - TEST_RR_OP(8, "srlw", "h000000007fffffff", "hffffffffffffffff", "d1"); - TEST_RR_OP(9, "srlw", "h0000000001ffffff", "hffffffffffffffff", "d7"); - TEST_RR_OP(10, "srlw", "h000000000003ffff", "hffffffffffffffff", "d14"); - TEST_RR_OP(11, "srlw", "h0000000000000001", "hffffffffffffffff", "d31"); - - TEST_RR_OP(12, "srlw", "h0000000021212121", "h0000000021212121", "d0"); - TEST_RR_OP(13, "srlw", "h0000000010909090", "h0000000021212121", "d1"); - TEST_RR_OP(14, "srlw", "h0000000000424242", "h0000000021212121", "d7"); - TEST_RR_OP(15, "srlw", "h0000000000008484", "h0000000021212121", "d14"); - TEST_RR_OP(16, "srlw", "h0000000000000000", "h0000000021212121", "d31"); - - TEST_RR_OP(17, "srlw", "h0000000021212121", "h0000000021212121", "hffffffffffffffe0"); - TEST_RR_OP(18, "srlw", "h0000000010909090", "h0000000021212121", "hffffffffffffffe1"); - TEST_RR_OP(19, "srlw", "h0000000000424242", "h0000000021212121", "hffffffffffffffe7"); - TEST_RR_OP(20, "srlw", "h0000000000008484", "h0000000021212121", "hffffffffffffffee"); - TEST_RR_OP(21, "srlw", "h0000000000000000", "h0000000021212121", "hffffffffffffffff"); - - TEST_RR_OP(44, "srlw", "h0000000012345678", "hffffffff12345678", "d0"); - TEST_RR_OP(45, "srlw", "h0000000001234567", "hffffffff12345678", "d4"); - TEST_RR_OP(46, "srlw", "hffffffff92345678", "h0000000092345678", "d0"); - TEST_RR_OP(47, "srlw", "h0000000009234567", "h0000000092345678", "d4"); - - TEST_RR_OP(2, "sub", "h0000000000000000", "h0000000000000000", "h0000000000000000") - TEST_RR_OP(3, "sub", "h0000000000000000", "h0000000000000001", "h0000000000000001") - TEST_RR_OP(4, "sub", "hfffffffffffffffc", "h0000000000000003", "h0000000000000007") - - TEST_RR_OP(5, "sub", "h0000000000008000", "h0000000000000000", "hffffffffffff8000") - TEST_RR_OP(6, "sub", "hffffffff80000000", "hffffffff80000000", "h0000000000000000") - TEST_RR_OP(7, "sub", "hffffffff80008000", "hffffffff80000000", "hffffffffffff8000") - - TEST_RR_OP(8, "sub", "hffffffffffff8001", "h0000000000000000", "h0000000000007fff") - TEST_RR_OP(9, "sub", "h000000007fffffff", "h000000007fffffff", "h0000000000000000") - TEST_RR_OP(10, "sub", "h000000007fff8000", "h000000007fffffff", "h0000000000007fff") - - TEST_RR_OP(11, "sub", "hffffffff7fff8001", "hffffffff80000000", "h0000000000007fff") - TEST_RR_OP(12, "sub", "h0000000080007fff", "h000000007fffffff", "hffffffffffff8000") - - TEST_RR_OP(13, "sub", "h0000000000000001", "h0000000000000000", "hffffffffffffffff") - TEST_RR_OP(14, "sub", "hfffffffffffffffe", "hffffffffffffffff", "h0000000000000001") - TEST_RR_OP(15, "sub", "h0000000000000000", "hffffffffffffffff", "hffffffffffffffff") - - - TEST_RR_OP(2, "subw", "h0000000000000000", "h0000000000000000", "h0000000000000000") - TEST_RR_OP(3, "subw", "h0000000000000000", "h0000000000000001", "h0000000000000001") - TEST_RR_OP(4, "subw", "hfffffffffffffffc", "h0000000000000003", "h0000000000000007") - - TEST_RR_OP(5, "subw", "h0000000000008000", "h0000000000000000", "hffffffffffff8000") - TEST_RR_OP(6, "subw", "hffffffff80000000", "hffffffff80000000", "h0000000000000000") - TEST_RR_OP(7, "subw", "hffffffff80008000", "hffffffff80000000", "hffffffffffff8000") - - TEST_RR_OP(8, "subw", "hffffffffffff8001", "h0000000000000000", "h0000000000007fff") - TEST_RR_OP(9, "subw", "h000000007fffffff", "h000000007fffffff", "h0000000000000000") - TEST_RR_OP(10, "subw", "h000000007fff8000", "h000000007fffffff", "h0000000000007fff") - - TEST_RR_OP(11, "subw", "h000000007fff8001", "hffffffff80000000", "h0000000000007fff") - TEST_RR_OP(12, "subw", "hffffffff80007fff", "h000000007fffffff", "hffffffffffff8000") - - TEST_RR_OP(13, "subw", "h0000000000000001", "h0000000000000000", "hffffffffffffffff") - TEST_RR_OP(14, "subw", "hfffffffffffffffe", "hffffffffffffffff", "h0000000000000001") - TEST_RR_OP(15, "subw", "h0000000000000000", "hffffffffffffffff", "hffffffffffffffff") - - - TEST_RR_OP(2, "xor", "hf00ff00f", "hff00ff00", "h0f0f0f0f") - TEST_RR_OP(3, "xor", "hff00ff00", "h0ff00ff0", "hf0f0f0f0") - TEST_RR_OP(4, "xor", "h0ff00ff0", "h00ff00ff", "h0f0f0f0f") - TEST_RR_OP(5, "xor", "h00ff00ff", "hf00ff00f", "hf0f0f0f0") - } - } -} - */ diff --git a/src/test/scala/hajime/publicmodules/Dcache_for_Verilator.scala b/src/test/scala/hajime/publicmodules/DcacheForVerilator.scala similarity index 78% rename from src/test/scala/hajime/publicmodules/Dcache_for_Verilator.scala rename to src/test/scala/hajime/publicmodules/DcacheForVerilator.scala index 37cb218..68ac5cb 100644 --- a/src/test/scala/hajime/publicmodules/Dcache_for_Verilator.scala +++ b/src/test/scala/hajime/publicmodules/DcacheForVerilator.scala @@ -12,7 +12,7 @@ import scala.io._ // 命令キャッシュと異なりマスター側のreadyが下がることは無いので,出力のストールは考えない // TODO: FPGA用に例えばledへの出力を追加する,正常終了フラグや例外終了フラグなど -class Dcache_for_Verilator(dcacheBaseAddr: Int, tohost: Int, memsize: Int = 0x2000) extends Module with ChecksAxiReadResp with ChecksAxiWriteResp{ +class DcacheForVerilator(dcacheBaseAddr: Int, tohost: Int, memsize: Int = 0x2000) extends Module with ChecksAxiReadResp with ChecksAxiWriteResp{ require(memsize % 8 == 0, s"memsize $memsize is not multiple of 8") val io = IO(Flipped(new AXI4liteIO(addrWidth = 64, dataWidth = 64))) @@ -55,17 +55,17 @@ class Dcache_for_Verilator(dcacheBaseAddr: Int, tohost: Int, memsize: Int = 0x20 io.r.valid := RegNext(io.ar.valid && io.ar.ready) // write - val writeData_asVec = Wire(Vec(8, UInt(8.W))) + val writeDataAsVec = Wire(Vec(8, UInt(8.W))) val shiftedData = MuxLookup(internalWriteAddr(2,0), io.w.bits.data)( (0 until 8).map( i => i.U -> (io.w.bits.data << (i*8).U).asUInt ) ) - for((w,i) <- writeData_asVec.zipWithIndex) { + for((w,i) <- writeDataAsVec.zipWithIndex) { w := shiftedData(8*i+7, 8*i) } when(io.aw.valid && io.w.valid && internalWriteAddr < 0x00001FFF.U) { - mem.write(internalWriteAddr.head(61), writeData_asVec, MuxLookup(internalWriteAddr(2,0), io.w.bits.strb)( + mem.write(internalWriteAddr.head(61), writeDataAsVec, MuxLookup(internalWriteAddr(2,0), io.w.bits.strb)( (0 until 8).map( i => i.U -> (io.w.bits.strb << i.U).asUInt(7,0) ) @@ -78,7 +78,7 @@ class Dcache_for_Verilator(dcacheBaseAddr: Int, tohost: Int, memsize: Int = 0x20 io.b.bits.resp := W_OKEY.U } -object Dcache_for_Verilator extends App { - def apply(dcacheBaseAddr: Int = 0x00004000, tohost: Int = 0x10000000, memsize: Int = 0x2000): Dcache_for_Verilator = new Dcache_for_Verilator(dcacheBaseAddr, tohost, memsize) - ChiselStage.emitSystemVerilogFile(Dcache_for_Verilator(dcacheBaseAddr = 0x00004000, tohost = 0x10000000, memsize = 8192), firtoolOpts = COMPILE_CONSTANTS.FIRTOOLOPS) +object DcacheForVerilator extends App { + def apply(dcacheBaseAddr: Int = 0x00004000, tohost: Int = 0x10000000, memsize: Int = 0x2000): DcacheForVerilator = new DcacheForVerilator(dcacheBaseAddr, tohost, memsize) + ChiselStage.emitSystemVerilogFile(DcacheForVerilator(dcacheBaseAddr = 0x00004000, tohost = 0x10000000, memsize = 8192), firtoolOpts = COMPILE_CONSTANTS.FIRTOOLOPS) } \ No newline at end of file diff --git a/src/test/scala/hajime/publicmodules/Icache_for_Verilator.scala b/src/test/scala/hajime/publicmodules/IcacheForVerilator.scala similarity index 62% rename from src/test/scala/hajime/publicmodules/Icache_for_Verilator.scala rename to src/test/scala/hajime/publicmodules/IcacheForVerilator.scala index 3aa8fcc..b537d80 100644 --- a/src/test/scala/hajime/publicmodules/Icache_for_Verilator.scala +++ b/src/test/scala/hajime/publicmodules/IcacheForVerilator.scala @@ -11,7 +11,7 @@ import org.scalatest.flatspec._ import scala.io._ // Should I check unaligned exception in Core or Cache? -class Icache_for_Verilator(memsize: Int = 0x2000) extends Module { +class IcacheForVerilator(memsize: Int = 0x2000) extends Module { val io = IO(Flipped(new AXI4liteIO(addrWidth = 64, dataWidth = 32))) // AR channel io.ar.ready := true.B @@ -33,50 +33,50 @@ class Icache_for_Verilator(memsize: Int = 0x2000) extends Module { readDataFromMem.data := Cat(mem.read(io.ar.bits.addr.head(62)).reverse) readDataFromMem.resp := 0.U - val r_channel_bits_reg = Reg(chiselTypeOf(io.r.bits)) - val r_channel_valid_reg = Reg(Bool()) - val r_stall = io.r.valid && !io.r.ready - val retain_r_channel = RegNext(r_stall) - when(r_stall) { - r_channel_bits_reg := io.r.bits - r_channel_valid_reg := io.r.valid + val rChannelBitsReg = Reg(chiselTypeOf(io.r.bits)) + val rChannelValidReg = Reg(Bool()) + val rStall = io.r.valid && !io.r.ready + val retainRchannel = RegNext(rStall) + when(rStall) { + rChannelBitsReg := io.r.bits + rChannelValidReg := io.r.valid io.ar.ready := false.B }.otherwise { - r_channel_bits_reg := readDataFromMem - r_channel_valid_reg := RegNext(io.ar.valid && io.ar.ready) + rChannelBitsReg := readDataFromMem + rChannelValidReg := RegNext(io.ar.valid && io.ar.ready) } - io.r.bits := Mux(retain_r_channel, r_channel_bits_reg, readDataFromMem) - io.r.valid := Mux(retain_r_channel, r_channel_valid_reg, RegNext(io.ar.valid && io.ar.ready)) + io.r.bits := Mux(retainRchannel, rChannelBitsReg, readDataFromMem) + io.r.valid := Mux(retainRchannel, rChannelValidReg, RegNext(io.ar.valid && io.ar.ready)) // write io.b.bits.resp := 0.U - val b_valid = RegInit(false.B) - val b_resp = RegInit(0.U(3.W)) - val writeData_asVec = Wire(Vec(4, UInt(8.W))) - for((w,i) <- writeData_asVec.zipWithIndex) { + val bValid = RegInit(false.B) + val bResp = RegInit(0.U(3.W)) + val writeDataAsVec = Wire(Vec(4, UInt(8.W))) + for((w,i) <- writeDataAsVec.zipWithIndex) { w := io.w.bits.data(8*i+7, 8*i) } when(io.aw.valid && io.w.valid) { - mem.write(io.aw.bits.addr.head(62), writeData_asVec, io.w.bits.strb.asBools) - b_valid := true.B - b_resp := 0.U + mem.write(io.aw.bits.addr.head(62), writeDataAsVec, io.w.bits.strb.asBools) + bValid := true.B + bResp := 0.U } .otherwise { - b_valid := false.B - b_resp := 0.U + bValid := false.B + bResp := 0.U } - io.b.valid := b_valid - io.b.bits.resp := b_resp + io.b.valid := bValid + io.b.bits.resp := bResp } -object Icache_for_Verilator extends App { - def apply(memsize: Int = 8192): Icache_for_Verilator = new Icache_for_Verilator(memsize) - ChiselStage.emitSystemVerilogFile(Icache_for_Verilator(memsize = 8192), firtoolOpts = COMPILE_CONSTANTS.FIRTOOLOPS) +object IcacheForVerilator extends App { + def apply(memsize: Int = 8192): IcacheForVerilator = new IcacheForVerilator(memsize) + ChiselStage.emitSystemVerilogFile(IcacheForVerilator(memsize = 8192), firtoolOpts = COMPILE_CONSTANTS.FIRTOOLOPS) } -class Icache_for_VerilatorSpec extends AnyFlatSpec with ChiselScalatestTester { +class IcacheForVerilatorSpec extends AnyFlatSpec with ChiselScalatestTester { it should "write and read correctly" in { - test(Icache_for_Verilator(memsize = 1024)).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(IcacheForVerilator(memsize = 1024)).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => dut.io.ar.bits.addr.poke(0.U) dut.io.ar.bits.prot.poke(0.U) dut.io.ar.valid.poke(false.B) diff --git a/src/test/scala/hajime/publicmodules/MultiplierTest.scala b/src/test/scala/hajime/publicmodules/MultiplierTest.scala index c7444e3..008770a 100644 --- a/src/test/scala/hajime/publicmodules/MultiplierTest.scala +++ b/src/test/scala/hajime/publicmodules/MultiplierTest.scala @@ -8,11 +8,11 @@ import org.scalatest.flatspec._ import scala.util.Random object Functions { - def generate_Int64RandomHexString(): String = { + def generateInt64RandomHexString(): String = { IndexedSeq.fill(16)(Random.nextInt(16).toHexString).reduce(_ + _) } - def generate_Int64RandomHexString(n: Int): String = { + def generateInt64RandomHexString(n: Int): String = { val number_of_zeros = 16 - n (0 until 16).map(i => if(i < number_of_zeros) "0" else Random.nextInt(16).toHexString).reduce(_ + _) } @@ -25,18 +25,18 @@ import hajime.publicmodules.Functions._ class MultiplierTest extends AnyFlatSpec with ChiselScalatestTester { Random.setSeed(0) - val multiplicand_array = (0 until HajimeCoreParams().robEntries).map(_ => BigInt("0000000000000000" + generate_Int64RandomHexString(), 16)) - // multiplicand_array.foreach(x => println(x)) - val multiplier_array = (0 until HajimeCoreParams().robEntries).map(_ => BigInt("0000000000000000" + generate_Int64RandomHexString(), 16)) - // multiplier_array.foreach(x => println(x)) - val answer_array = (multiplicand_array zip multiplier_array).map{ + val multiplicandArray = (0 until HajimeCoreParams().robEntries).map(_ => BigInt("0000000000000000" + generateInt64RandomHexString(), 16)) + // multiplicandArray.foreach(x => println(x)) + val multiplierArray = (0 until HajimeCoreParams().robEntries).map(_ => BigInt("0000000000000000" + generateInt64RandomHexString(), 16)) + // multiplierArray.foreach(x => println(x)) + val answerArray = (multiplicandArray zip multiplierArray).map{ case (num1, num2) => num1 * num2 } - var result_array: IndexedSeq[BigInt] = IndexedSeq() - var result_tag_array: IndexedSeq[BigInt] = IndexedSeq() + var resultArray: IndexedSeq[BigInt] = IndexedSeq() + var resultTagArray: IndexedSeq[BigInt] = IndexedSeq() it should s"perform pipelined multiplication" in { test(Multiplier(HajimeCoreParams())).withAnnotations(Seq(WriteVcdAnnotation)) { dut => - for (((num1, num2), i) <- (multiplicand_array zip multiplier_array).zipWithIndex) { + for (((num1, num2), i) <- (multiplicandArray zip multiplierArray).zipWithIndex) { dut.io.req.valid.poke(true.B) dut.io.req.bits.multiplicand.bits.poke(num1.U(64.W)) // dut.io.out.bits.multiplicand.signed.poke(false.B) @@ -46,8 +46,8 @@ class MultiplierTest extends AnyFlatSpec with ChiselScalatestTester { dut.io.resp.ready.poke(true.B) dut.clock.step() if(dut.io.resp.valid.peekBoolean()) { - result_array :+= dut.io.resp.bits.result.peekInt() - result_tag_array :+= dut.io.resp.bits.tag.peekInt() + resultArray :+= dut.io.resp.bits.result.peekInt() + resultTagArray :+= dut.io.resp.bits.tag.peekInt() } } dut.io.req.valid.poke(false.B) @@ -56,10 +56,10 @@ class MultiplierTest extends AnyFlatSpec with ChiselScalatestTester { dut.io.req.bits.tag.poke(0.U) while(dut.io.resp.valid.peekBoolean()) { dut.clock.step() - result_array :+= dut.io.resp.bits.result.peekInt() - result_tag_array :+= dut.io.resp.bits.tag.peekInt() + resultArray :+= dut.io.resp.bits.result.peekInt() + resultTagArray :+= dut.io.resp.bits.tag.peekInt() } - result_tag_array.lazyZip(result_array.map(bigIntToString32format)).lazyZip(answer_array.map(bigIntToString32format)).toIndexedSeq.foreach { + resultTagArray.lazyZip(resultArray.map(bigIntToString32format)).lazyZip(answerArray.map(bigIntToString32format)).toIndexedSeq.foreach { case (tag, result, answer) => { println(s"tag: $tag, result: 0x$result, answer: 0x$answer") assert(result == answer) @@ -71,11 +71,11 @@ class MultiplierTest extends AnyFlatSpec with ChiselScalatestTester { class NonPipelinedMultiplierSpec extends AnyFlatSpec with ChiselScalatestTester { Random.setSeed(0) - val multiplicand_array = (0 until HajimeCoreParams().robEntries).map(_ => BigInt("0000000000000000" + generate_Int64RandomHexString(Random.nextInt(16)), 16)) - val multiplier_array = (0 until HajimeCoreParams().robEntries).map(_ => BigInt("0000000000000000" + generate_Int64RandomHexString(Random.nextInt(16)), 16)) + val multiplicandArray = (0 until HajimeCoreParams().robEntries).map(_ => BigInt("0000000000000000" + generateInt64RandomHexString(Random.nextInt(16)), 16)) + val multiplierArray = (0 until HajimeCoreParams().robEntries).map(_ => BigInt("0000000000000000" + generateInt64RandomHexString(Random.nextInt(16)), 16)) it should s"perform multiplication" in { test(NonPipelinedMultiplier(HajimeCoreParams())).withAnnotations(Seq(WriteVcdAnnotation)) { dut => - for((num1, num2, i) <- (multiplicand_array zip multiplier_array).zipWithIndex.map { + for((num1, num2, i) <- (multiplicandArray zip multiplierArray).zipWithIndex.map { case ((num1, num2), i) => (num1, num2, i) }) { dut.io.req.valid.poke(true.B) diff --git a/src/test/scala/hajime/simple4Stage/Core_and_cache.scala b/src/test/scala/hajime/simple4Stage/CoreAndCache.scala similarity index 57% rename from src/test/scala/hajime/simple4Stage/Core_and_cache.scala rename to src/test/scala/hajime/simple4Stage/CoreAndCache.scala index 55ca7a7..f9877ad 100644 --- a/src/test/scala/hajime/simple4Stage/Core_and_cache.scala +++ b/src/test/scala/hajime/simple4Stage/CoreAndCache.scala @@ -8,45 +8,45 @@ import hajime.common._ import hajime.publicmodules._ import hajime.vectormodules.VectorCpu -class Core_and_cache[T <: CpuModule](icache_memsize: Int = 8192, dcache_memsize: Int = 8192, tohost: Int = 0x10000000, useVector: Boolean = false, cpu: Class[T]) extends Module { +class CoreAndCache[T <: CpuModule](iCacheMemsize: Int = 8192, dCacheMemsize: Int = 8192, tohost: Int = 0x10000000, useVector: Boolean = false, cpu: Class[T]) extends Module { implicit val params = HajimeCoreParams(useException = true, useVector = if(cpu == classOf[VectorCpu] && !useVector) throw new Exception("useVector is false") else useVector) val io = IO(new Bundle{ - val reset_vector = Input(UInt(64.W)) + val resetVector = Input(UInt(64.W)) val hartid = Input(UInt(64.W)) val toHost = ValidIO(UInt(64.W)) - val debug_io = Output(new debugIO()) - val icache_initialising = Input(Bool()) - val dcache_initialising = Input(Bool()) - val imem_initialiseAXI = Flipped(new AXI4liteIO(addrWidth = 64, dataWidth = 32)) + val debugIO = Output(new debugIO()) + val iCacheInitialising = Input(Bool()) + val dCacheInitialising = Input(Bool()) + val iMemInitialiseAxi = Flipped(new AXI4liteIO(addrWidth = 64, dataWidth = 32)) val dmem_initialiseAXI = Flipped(new AXI4liteIO(addrWidth = 64, dataWidth = 64)) }) - val core = withReset(io.icache_initialising || io.dcache_initialising || reset.asBool) { + val core = withReset(io.iCacheInitialising || io.dCacheInitialising || reset.asBool) { Module(new Core(cpu)) } - val icache = Module(Icache_for_Verilator(memsize = icache_memsize)) - val dcache = Module(Dcache_for_Verilator(dcacheBaseAddr = 0x00004000, tohost = tohost, memsize = dcache_memsize)) + val icache = Module(IcacheForVerilator(memsize = iCacheMemsize)) + val dcache = Module(DcacheForVerilator(dcacheBaseAddr = 0x00004000, tohost = tohost, memsize = dCacheMemsize)) icache.io := DontCare - io.imem_initialiseAXI := DontCare + io.iMemInitialiseAxi := DontCare core.io.iCacheAxi4Lite := DontCare dcache.io := DontCare io.dmem_initialiseAXI := DontCare core.io.dCacheAxi4Lite := DontCare - when(io.icache_initialising) { - icache.io <> io.imem_initialiseAXI + when(io.iCacheInitialising) { + icache.io <> io.iMemInitialiseAxi core.io.iCacheAxi4Lite.ar.ready := false.B core.io.iCacheAxi4Lite.aw.ready := false.B core.io.iCacheAxi4Lite.w.ready := false.B } .otherwise { icache.io <> core.io.iCacheAxi4Lite - io.imem_initialiseAXI.ar.ready := false.B - io.imem_initialiseAXI.aw.ready := false.B - io.imem_initialiseAXI.w.ready := false.B + io.iMemInitialiseAxi.ar.ready := false.B + io.iMemInitialiseAxi.aw.ready := false.B + io.iMemInitialiseAxi.w.ready := false.B } - when(io.dcache_initialising) { + when(io.dCacheInitialising) { dcache.io <> io.dmem_initialiseAXI core.io.dCacheAxi4Lite.ar.ready := false.B core.io.dCacheAxi4Lite.aw.ready := false.B @@ -58,13 +58,13 @@ class Core_and_cache[T <: CpuModule](icache_memsize: Int = 8192, dcache_memsize: io.dmem_initialiseAXI.w.ready := false.B } - core.io.resetVector := io.reset_vector + core.io.resetVector := io.resetVector core.io.hartid := io.hartid io.toHost := dcache.debug - io.debug_io := core.io.debugIo.get + io.debugIO := core.io.debugIo.get } -object Core_and_cache extends App { - def apply[T <: CpuModule](icache_memsize: Int, dcache_memsize: Int, tohost: Int, useVector: Boolean = false, cpu: Class[T]): Core_and_cache[T] = new Core_and_cache(icache_memsize, dcache_memsize, tohost, useVector, cpu) - ChiselStage.emitSystemVerilogFile(apply(icache_memsize = 8192, dcache_memsize = 8192, tohost = 0x10000000, useVector = false, classOf[CPU]), firtoolOpts = COMPILE_CONSTANTS.FIRTOOLOPS) +object CoreAndCache extends App { + def apply[T <: CpuModule](icache_memsize: Int, dcache_memsize: Int, tohost: Int, useVector: Boolean = false, cpu: Class[T]): CoreAndCache[T] = new CoreAndCache(icache_memsize, dcache_memsize, tohost, useVector, cpu) + ChiselStage.emitSystemVerilogFile(apply(icache_memsize = 8192, dcache_memsize = 8192, tohost = 0x10000000, useVector = false, classOf[Cpu]), firtoolOpts = COMPILE_CONSTANTS.FIRTOOLOPS) } diff --git a/src/test/scala/hajime/simple4Stage/CoreTest.scala b/src/test/scala/hajime/simple4Stage/CoreTest.scala index bcf5a7a..5b5b1a3 100644 --- a/src/test/scala/hajime/simple4Stage/CoreTest.scala +++ b/src/test/scala/hajime/simple4Stage/CoreTest.scala @@ -9,12 +9,12 @@ import scala.sys.process._ import scala.collection.parallel.CollectionConverters._ class CoreTest extends AnyFlatSpec with ChiselScalatestTester { - def initialiseImem[T <: CpuModule](filename: String, dut: Core_and_cache[T]): Unit = { - hajime.vectormodules.MemInitializer.initialiseMemWithAxi(filename, dut.io.imem_initialiseAXI, dut.io.icache_initialising, dut.clock, 0) + def initialiseImem[T <: CpuModule](filename: String, dut: CoreAndCache[T]): Unit = { + hajime.vectormodules.MemInitializer.initialiseMemWithAxi(filename, dut.io.iMemInitialiseAxi, dut.io.iCacheInitialising, dut.clock, 0) } - def initialiseDmem[T <: CpuModule](filename: String, dut: Core_and_cache[T]): Unit = { - hajime.vectormodules.MemInitializer.initialiseMemWithAxi(filename, dut.io.dmem_initialiseAXI, dut.io.dcache_initialising, dut.clock, 0x4000) + def initialiseDmem[T <: CpuModule](filename: String, dut: CoreAndCache[T]): Unit = { + hajime.vectormodules.MemInitializer.initialiseMemWithAxi(filename, dut.io.dmem_initialiseAXI, dut.io.dCacheInitialising, dut.clock, 0x4000) } val instList_noDmem = Seq( @@ -37,9 +37,9 @@ class CoreTest extends AnyFlatSpec with ChiselScalatestTester { // 0x0000_0000 ~ 0x0000_1FFF : icache // 0x0000_4000 ~ 0x0000_5FFF : dcache // 0x1000_0000 : tohost - test(new Core_and_cache(icache_memsize = 8192, dcache_memsize = 8192, tohost = 0x10000000, cpu = classOf[CPU])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(new CoreAndCache(iCacheMemsize = 8192, dCacheMemsize = 8192, tohost = 0x10000000, cpu = classOf[Cpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => dut.clock.setTimeout(1024) - dut.io.reset_vector.poke(0.U) + dut.io.resetVector.poke(0.U) dut.io.hartid.poke(0.U) initialiseImem(s"src/main/resources/rv64ui/${e}_inst.hex", dut) while (dut.io.toHost.peek().litValue == 0) { @@ -65,9 +65,9 @@ class CoreTest extends AnyFlatSpec with ChiselScalatestTester { } for(e <- instList_withDmem) { it should s"pass the test ${e}" in { - test(new Core_and_cache(icache_memsize = 8192, dcache_memsize = 8192, tohost = 0x10000000, cpu = classOf[CPU])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(new CoreAndCache(iCacheMemsize = 8192, dCacheMemsize = 8192, tohost = 0x10000000, cpu = classOf[Cpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => dut.clock.setTimeout(1024) - dut.io.reset_vector.poke(0.U) + dut.io.resetVector.poke(0.U) dut.io.hartid.poke(0.U) fork { initialiseImem(s"src/main/resources/rv64ui/${e}_inst.hex", dut) @@ -97,9 +97,9 @@ class CoreTest extends AnyFlatSpec with ChiselScalatestTester { } for(e <- instList_multiply) { it should s"pass the test ${e}" in { - test(new Core_and_cache(icache_memsize = 8192, dcache_memsize = 8192, tohost = 0x10000000, cpu = classOf[CPU])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(new CoreAndCache(iCacheMemsize = 8192, dCacheMemsize = 8192, tohost = 0x10000000, cpu = classOf[Cpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => dut.clock.setTimeout(1024) - dut.io.reset_vector.poke(0.U) + dut.io.resetVector.poke(0.U) dut.io.hartid.poke(0.U) initialiseImem(s"src/main/resources/rv64um/${e}_inst.hex", dut) diff --git a/src/test/scala/hajime/simple4Stage/Core_ApplicationTest.scala b/src/test/scala/hajime/simple4Stage/Core_ApplicationTest.scala index d87f938..24db4a2 100644 --- a/src/test/scala/hajime/simple4Stage/Core_ApplicationTest.scala +++ b/src/test/scala/hajime/simple4Stage/Core_ApplicationTest.scala @@ -6,23 +6,23 @@ import org.scalatest.flatspec._ import hajime.vectormodules.MemInitializer._ object Core_ApplicationTest { - def get_toHostChar[T <: CpuModule](dut: Core_and_cache[T]): Char = { + def get_toHostChar[T <: CpuModule](dut: CoreAndCache[T]): Char = { dut.io.toHost.bits.peekInt().toChar } - def get_toHostValid[T <: CpuModule](dut: Core_and_cache[T]): Boolean = { + def get_toHostValid[T <: CpuModule](dut: CoreAndCache[T]): Boolean = { dut.io.toHost.valid.peekBoolean() } - def executeTest[T <: CpuModule](dut: Core_and_cache[T], testName: String, testType: String): Unit = { + def executeTest[T <: CpuModule](dut: CoreAndCache[T], testName: String, testType: String): Unit = { println(s"test $testName:") fork { - initialiseMemWithAxi(s"src/main/resources/applications_${testType}/${testName}_inst.hex", dut.io.imem_initialiseAXI, dut.io.icache_initialising, dut.clock, 0) + initialiseMemWithAxi(s"src/main/resources/applications_${testType}/${testName}_inst.hex", dut.io.iMemInitialiseAxi, dut.io.iCacheInitialising, dut.clock, 0) }.fork { - initialiseMemWithAxi(s"src/main/resources/applications_${testType}/${testName}_data.hex", dut.io.dmem_initialiseAXI, dut.io.dcache_initialising, dut.clock, 0x4000) + initialiseMemWithAxi(s"src/main/resources/applications_${testType}/${testName}_data.hex", dut.io.dmem_initialiseAXI, dut.io.dCacheInitialising, dut.clock, 0x4000) }.join() dut.clock.setTimeout(1048576) - dut.io.reset_vector.poke(0.U) + dut.io.resetVector.poke(0.U) dut.io.hartid.poke(0.U) var toHostWrittenChar: List[Char] = Nil @@ -33,7 +33,7 @@ object Core_ApplicationTest { toHostWrittenChar = toHostWrittenChar :+ get_toHostChar(dut) } } - dut.io.debug_io.debugAbiMap.a0.expect(0.U(64.W)) + dut.io.debugIO.debugAbiMap.a0.expect(0.U(64.W)) toHostWrittenChar.foreach(print) println() } @@ -47,7 +47,7 @@ class Rv64iApplicationTest extends AnyFlatSpec with ChiselScalatestTester { ) for(e <- rv64iTestList) { it should s"execute $e" in { - test(new Core_and_cache(cpu = classOf[CPU])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(new CoreAndCache(cpu = classOf[Cpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => executeTest(dut, e, "rv64i") } } @@ -60,7 +60,7 @@ class Rv64mApplicationTest extends AnyFlatSpec with ChiselScalatestTester { ) for(e <- rv64mTestList) { it should s"execute $e" in { - test(new Core_and_cache(cpu = classOf[CPU])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(new CoreAndCache(cpu = classOf[Cpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => executeTest(dut, e, "rv64m") } } @@ -74,7 +74,7 @@ class ExceptionApplicationTest extends AnyFlatSpec with ChiselScalatestTester { ) for(e <- exceptionTestList) { it should s"execute $e" in { - test(new Core_and_cache(cpu = classOf[CPU])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(new CoreAndCache(cpu = classOf[Cpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => executeTest(dut, e, "exceptions") } } diff --git a/src/test/scala/hajime/simple4Stage/VectorTest.scala b/src/test/scala/hajime/simple4Stage/VectorTest.scala index 101ba7e..ff5ce24 100644 --- a/src/test/scala/hajime/simple4Stage/VectorTest.scala +++ b/src/test/scala/hajime/simple4Stage/VectorTest.scala @@ -10,7 +10,7 @@ class VectorTest extends AnyFlatSpec with ChiselScalatestTester { ) for(e <- vectorTestList) { ignore should s"execute $e" in { - test(new Core_and_cache(useVector = true, cpu = classOf[CPU])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(new CoreAndCache(useVector = true, cpu = classOf[Cpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => executeTest(dut, e, "vector") } } diff --git a/src/test/scala/hajime/vectormodules/VectorCpuSpec.scala b/src/test/scala/hajime/vectormodules/VectorCpuSpec.scala index a17d43c..f8efc20 100644 --- a/src/test/scala/hajime/vectormodules/VectorCpuSpec.scala +++ b/src/test/scala/hajime/vectormodules/VectorCpuSpec.scala @@ -21,14 +21,14 @@ class VectorCpuSpec extends AnyFlatSpec with ChiselScalatestTester { // 0x0000_0000 ~ 0x0000_1FFF : icache // 0x0000_4000 ~ 0x0000_5FFF : dcache // 0x1000_0000 : tohost - test(new Core_and_cache(icache_memsize = 8192, dcache_memsize = 8192, tohost = 0x10000000, useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(new CoreAndCache(iCacheMemsize = 8192, dCacheMemsize = 8192, tohost = 0x10000000, useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => dut.clock.setTimeout(1024) - dut.io.reset_vector.poke(0.U) + dut.io.resetVector.poke(0.U) dut.io.hartid.poke(0.U) initialiseMemWithAxi( filename = s"src/main/resources/rv64ui/${e}_inst.hex", - axi = dut.io.imem_initialiseAXI, - initialising = dut.io.icache_initialising, + axi = dut.io.iMemInitialiseAxi, + initialising = dut.io.iCacheInitialising, clock = dut.clock, baseAddr = 0 ) @@ -50,15 +50,15 @@ class VectorCpuSpec extends AnyFlatSpec with ChiselScalatestTester { ) else Nil for (e <- instListWithDmem) { it should s"Vector CPU pass the test ${e}" in { - test(new Core_and_cache(icache_memsize = 8192, dcache_memsize = 8192, tohost = 0x10000000, useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(new CoreAndCache(iCacheMemsize = 8192, dCacheMemsize = 8192, tohost = 0x10000000, useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => dut.clock.setTimeout(1024) - dut.io.reset_vector.poke(0.U) + dut.io.resetVector.poke(0.U) dut.io.hartid.poke(0.U) fork { initialiseMemWithAxi( filename = s"src/main/resources/rv64ui/${e}_inst.hex", - axi = dut.io.imem_initialiseAXI, - initialising = dut.io.icache_initialising, + axi = dut.io.iMemInitialiseAxi, + initialising = dut.io.iCacheInitialising, clock = dut.clock, baseAddr = 0 ) @@ -66,7 +66,7 @@ class VectorCpuSpec extends AnyFlatSpec with ChiselScalatestTester { initialiseMemWithAxi( filename = s"src/main/resources/rv64ui/${e}_data.hex", axi = dut.io.dmem_initialiseAXI, - initialising = dut.io.dcache_initialising, + initialising = dut.io.dCacheInitialising, clock = dut.clock, baseAddr = 0x4000 ) @@ -86,14 +86,14 @@ class VectorCpuSpec extends AnyFlatSpec with ChiselScalatestTester { ) else Nil for (e <- instListMult) { it should s"Vector CPU pass the test ${e}" in { - test(new Core_and_cache(icache_memsize = 8192, dcache_memsize = 8192, tohost = 0x10000000, useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(new CoreAndCache(iCacheMemsize = 8192, dCacheMemsize = 8192, tohost = 0x10000000, useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => dut.clock.setTimeout(1024) - dut.io.reset_vector.poke(0.U) + dut.io.resetVector.poke(0.U) dut.io.hartid.poke(0.U) initialiseMemWithAxi( filename = s"src/main/resources/rv64um/${e}_inst.hex", - axi = dut.io.imem_initialiseAXI, - initialising = dut.io.icache_initialising, + axi = dut.io.iMemInitialiseAxi, + initialising = dut.io.iCacheInitialising, clock = dut.clock, baseAddr = 0 ) @@ -116,7 +116,7 @@ class Rv64imAppTestForVecCpu extends AnyFlatSpec with ChiselScalatestTester { ) for (e <- rv64iTestList) { it should s"Vector CPU execute $e" in { - test(new Core_and_cache(useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(new CoreAndCache(useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => executeTest(dut, e, "rv64i") } } @@ -126,7 +126,7 @@ class Rv64imAppTestForVecCpu extends AnyFlatSpec with ChiselScalatestTester { ) for (e <- rv64mTestList) { it should s"Vector CPU execute $e" in { - test(new Core_and_cache(useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(new CoreAndCache(useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => executeTest(dut, e, "rv64m") } } @@ -149,7 +149,7 @@ class Zve64xAppTestForVecCpu extends AnyFlatSpec with ChiselScalatestTester { val zve64xTestList: Seq[String] = ldstTest ++ arithmeticTest ++ applicationTest for (e <- zve64xTestList) { it should s"Vector CPU execute $e" in { - test(new Core_and_cache(useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => + test(new CoreAndCache(useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, VerilatorBackendAnnotation)) { dut => executeTest(dut, e, "vector") } } @@ -157,15 +157,15 @@ class Zve64xAppTestForVecCpu extends AnyFlatSpec with ChiselScalatestTester { } class FpgaTestForVecCpu extends AnyFlatSpec with ChiselScalatestTester { - def _executeTest[T <: CpuModule](dut: Core_and_cache[T], testName: String, testType: String): Unit = { + def _executeTest[T <: CpuModule](dut: CoreAndCache[T], testName: String, testType: String): Unit = { println(s"test $testName:") fork { - initialiseMemWithAxi(s"src/main/resources/applications_${testType}/${testName}_inst.mem", dut.io.imem_initialiseAXI, dut.io.icache_initialising, dut.clock, 0) + initialiseMemWithAxi(s"src/main/resources/applications_${testType}/${testName}_inst.mem", dut.io.iMemInitialiseAxi, dut.io.iCacheInitialising, dut.clock, 0) }.fork { - initialiseMemWithAxi(s"src/main/resources/applications_${testType}/${testName}_data.mem", dut.io.dmem_initialiseAXI, dut.io.dcache_initialising, dut.clock, 0x4000) + initialiseMemWithAxi(s"src/main/resources/applications_${testType}/${testName}_data.mem", dut.io.dmem_initialiseAXI, dut.io.dCacheInitialising, dut.clock, 0x4000) }.join() dut.clock.setTimeout(0) - dut.io.reset_vector.poke(0.U) + dut.io.resetVector.poke(0.U) dut.io.hartid.poke(0.U) for(_ <- 0 until 1048576) { @@ -174,7 +174,7 @@ class FpgaTestForVecCpu extends AnyFlatSpec with ChiselScalatestTester { println(dut.io.toHost.bits.peekInt()) } it should "Vector CPU execute matmul for FPGA" in { - test(new Core_and_cache(useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, IcarusBackendAnnotation)) { dut => + test(new CoreAndCache(useVector = true, cpu = classOf[VectorCpu])).withAnnotations(Seq(WriteVcdAnnotation, IcarusBackendAnnotation)) { dut => _executeTest(dut, "vector_matmul", "fpga") } } diff --git a/src/test/scala/hajime/vectormodules/VectorLdstUnitWithDcache.scala b/src/test/scala/hajime/vectormodules/VectorLdstUnitWithDcache.scala index 11600c3..a515896 100644 --- a/src/test/scala/hajime/vectormodules/VectorLdstUnitWithDcache.scala +++ b/src/test/scala/hajime/vectormodules/VectorLdstUnitWithDcache.scala @@ -17,7 +17,7 @@ class VectorLdstUnitWithDcache(dcache_memsize: Int = 8192, tohost: Int = 0x10000 val readVrf = Flipped(new VecRegFileReadIO()) val scalarResp = ValidIO(new LDSTResp()) val vectorResp = Output(new VectorExecUnitDataOut()) - val toExWbReg = Output(Valid(new EX_WB_IO())) + val toExWbReg = Output(Valid(new exWbIo())) }) val dCacheInitialiseIO = IO(new Bundle { val valid = Input(Bool()) @@ -27,7 +27,7 @@ class VectorLdstUnitWithDcache(dcache_memsize: Int = 8192, tohost: Int = 0x10000 val vecLdstUnit = withReset(dCacheInitialiseIO.valid || reset.asBool) { Module(new VectorLdstUnit()) } - val dCache = Module(new Dcache_for_Verilator(dcacheBaseAddr = 0x00004000, tohost = tohost, memsize = dcache_memsize)) + val dCache = Module(new DcacheForVerilator(dcacheBaseAddr = 0x00004000, tohost = tohost, memsize = dcache_memsize)) dCacheInitialiseIO := DontCare vecLdstUnit.io := DontCare