Skip to content

Commit

Permalink
a
Browse files Browse the repository at this point in the history
  • Loading branch information
HidetaroTanaka committed Nov 6, 2023
1 parent 1b2c315 commit 0488c46
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 120 deletions.
1 change: 1 addition & 0 deletions src/main/scala/hajime/common/HajimeCoreParams.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ case class HajimeCoreParams(
vlen: Int = 256,
vecAluExecUnitNum: Int = 2,
) {
def physicalRegWidth: Int = log2Up(physicalRegFileEntriesFor1Thread)
def robTagWidth: Int = log2Up(robEntries)
def generateDefaultMISA: UInt = {
Cat((xprlen match {
Expand Down
107 changes: 3 additions & 104 deletions src/main/scala/hajime/simple4Stage/Core.scala
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,6 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons
val csrUnit = Module(new CSRUnit())
csrUnit.io := DontCare
val multiplier = if(params.useMulDiv) Some(Module(new NonPipelinedMultiplierWrap())) else None
val vectorDecoder = if(params.useVector) Some(Module(new VectorDecoder())) else None
val vecCtrlUnit = if(params.useVector) Some(Module(new VecCtrlUnit())) else None
val vecRegFile = if(params.useVector) Some(Module(new VecRegFile(vrfPortNum = 2))) else None
if(params.useMulDiv) multiplier.get.io := DontCare

ldstUnit.io.dcache_axi4lite <> io.dcache_axi4lite
Expand Down Expand Up @@ -257,20 +254,6 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons
bypassingUnit.io.ID.out.rs2_bypassMatchAtWB -> (!bypassingUnit.io.WB.in.bits.rd.valid),
))

if(params.useVector) {
vectorDecoder.get.io.inst := decoded_inst
when(decoder.io.out.valid && decoder.io.out.bits.vector.get) {
ID_EX_REG.bits.vectorCtrlSignals.get := vectorDecoder.get.io.out
}
// 0 -> v0.mask[i]が1ならば書き込み,0ならば書き込まない
// 1 -> マスクなし,全て書き込む
// (マスクを使わないベクタ命令は全てvm=1か?)
ID_EX_REG.bits.vectorDataSignals.get.mask := decoded_inst.bits(25)
ID_EX_REG.bits.vectorDataSignals.get.vs1 := decoded_inst.rs1
ID_EX_REG.bits.vectorDataSignals.get.vs2 := decoded_inst.rs2
ID_EX_REG.bits.vectorDataSignals.get.vd := decoded_inst.rd
}

if(params.debug) {
ID_EX_REG.bits.debug.get.instruction := decoded_inst.bits
ID_EX_REG.bits.debug.get.pc := io.frontend.resp.bits.pc
Expand All @@ -287,44 +270,6 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons
}

// START OF EX STAGE
val idxReg = if(params.useVector) Some(RegInit(0.U(log2Up(params.vlen/8).W))) else None
val EX_WB_idxReg = if(params.useVector) Some(RegNext(idxReg.get)) else None
// TODO: ロードストアユニット内に入れる,他のベクタ実行ユニットも同様
val vecValid = if(params.useVector) Some(RegInit(false.B)) else None
val vecDataReg = if(params.useVector) Some(RegNext(ID_EX_REG.bits.vectorDataSignals.get)) else None
if (params.useVector) {
// ベクタ命令がベクタレジスタに書き込み,かつinst.vmが1またはv0.mask[i]=1ならば書き込み
val vecWriteBack = ID_EX_REG.bits.vectorCtrlSignals.get.vrfWrite && (ID_EX_REG.bits.vectorDataSignals.get.mask || vecRegFile.get.io.readReq(0).resp.vm)
// vsetvli系でないベクタ命令が実行され,かつ最終要素でないならばインクリメント,それ以外ならばリセット
idxReg.get := MuxCase(0.U, Seq(
(ID_EX_REG.valid && ID_EX_REG.bits.ctrlSignals.decode.vector.get && !ID_EX_REG.bits.vectorCtrlSignals.get.isConfsetInst &&
((idxReg.get + 1.U) < EX_WB_REG.bits.vectorCsrPorts.get.vl)) -> (idxReg.get + 1.U)
))
// Mux(!EX_stall && ID_inst_valid && decoder.io.out.bits.vector.get && !vectorDecoder.get.io.out.isConfsetInst, 0.U, idxReg.get + 1.U)
vecValid.get := ID_EX_REG.valid && ID_EX_REG.bits.ctrlSignals.decode.vector.get && vecWriteBack

// vecRegFileへの入力
vecRegFile.get.io.readReq(1) := DontCare
vecRegFile.get.io.writeReq(1) := DontCare
vecRegFile.get.io.readReq(0).req.sew := EX_WB_REG.bits.vectorCsrPorts.get.vtype.vsew
vecRegFile.get.io.readReq(0).req.idx := idxReg.get
vecRegFile.get.io.readReq(0).req.vs1 := ID_EX_REG.bits.vectorDataSignals.get.vs1
vecRegFile.get.io.readReq(0).req.vs2 := ID_EX_REG.bits.vectorDataSignals.get.vs2
vecRegFile.get.io.readReq(0).req.vd := ID_EX_REG.bits.vectorDataSignals.get.vd

vecRegFile.get.io.writeReq(0).valid := vecValid.get
vecRegFile.get.io.writeReq(0).bits.vd := vecDataReg.get.vd
vecRegFile.get.io.writeReq(0).bits.vtype := EX_WB_REG.bits.vectorCsrPorts.get.vtype
vecRegFile.get.io.writeReq(0).bits.index := EX_WB_idxReg.get
vecRegFile.get.io.writeReq(0).bits.last := (EX_WB_idxReg.get-1.U) === EX_WB_REG.bits.vectorCsrPorts.get.vl
vecRegFile.get.io.writeReq(0).bits.data := ldstUnit.io.cpu.resp.bits.data
vecRegFile.get.io.writeReq(0).bits.vm := false.B
vecRegFile.get.io.writeReq(0).bits.writeReq := vecValid.get

if(params.debug) {
io.debug_io.get.vrfMap.get := vecRegFile.get.io.debug.get
}
}

alu.io.in1 := MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.value1, 0.U)(Seq(
Value1.RS1.asUInt -> ID_EX_REG.bits.dataSignals.rs1,
Expand All @@ -337,18 +282,6 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons
Value2.PC.asUInt -> ID_EX_REG.bits.dataSignals.pc.addr,
))

if (params.useVector) {
// ベクタメモリアクセス命令が有効ならaluへの入力を上書き
// UNIT_STRIDEならrs1+index*elen
when(ID_EX_REG.valid && ID_EX_REG.bits.ctrlSignals.decode.vector.get && ID_EX_REG.bits.vectorCtrlSignals.get.mop === MOP.UNIT_STRIDE.asUInt) {
alu.io.in2 := idxReg.get << MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.memory_length, 0.U)(Seq(
MEM_LEN.B.asUInt -> 0.U,
MEM_LEN.H.asUInt -> 1.U,
MEM_LEN.W.asUInt -> 2.U,
MEM_LEN.D.asUInt -> 3.U,
))
}
}
alu.io.funct := ID_EX_REG.bits.ctrlSignals.decode

branch_evaluator.io.req.bits.ALU_Result := alu.io.out
Expand All @@ -370,57 +303,39 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons
multiplier.get.io.resp.ready := !(EX_WB_REG.valid && WB_stall)
}

if(params.useVector) {
vecCtrlUnit.get.io.req.valid := ID_EX_REG.valid && ID_EX_REG.bits.ctrlSignals.decode.vector.get && ID_EX_REG.bits.vectorCtrlSignals.get.isConfsetInst
vecCtrlUnit.get.io.req.bits.vDecode := ID_EX_REG.bits.vectorCtrlSignals.get
vecCtrlUnit.get.io.req.bits.rs1_value := ID_EX_REG.bits.dataSignals.rs1
vecCtrlUnit.get.io.req.bits.rs2_value := ID_EX_REG.bits.dataSignals.rs2
vecCtrlUnit.get.io.req.bits.zimm := ID_EX_REG.bits.dataSignals.zimm
vecCtrlUnit.get.io.req.bits.uimm := ID_EX_REG.bits.dataSignals.imm
}

val EX_arithmetic_result = if(params.useMulDiv) {
Mux(ID_EX_REG.bits.ctrlSignals.decode.use_MUL, multiplier.get.io.resp.bits, alu.io.out)
} else {
alu.io.out
}

val EX_vector_result = if(params.useVector) Some(vecCtrlUnit.get.io.resp.bits.vl) else None

ldstUnit.io.cpu.req.valid := ID_EX_REG.valid && !EX_flush && (ID_EX_REG.bits.ctrlSignals.decode.memValid || (if(params.useVector) {
// マスク無しまたは要素が有効な場合にのみtrue
ID_EX_REG.bits.vectorDataSignals.get.mask || vecRegFile.get.io.readReq(0).resp.vm
} else true.B))
ldstUnit.io.cpu.req.valid := ID_EX_REG.valid && !EX_flush && (ID_EX_REG.bits.ctrlSignals.decode.memValid)
ldstUnit.io.cpu.req.bits.addr := alu.io.out
ldstUnit.io.cpu.req.bits.data := (if(params.useVector) Mux(ID_EX_REG.bits.vectorCtrlSignals.get.mop === MOP.UNIT_STRIDE.asUInt, vecRegFile.get.io.readReq(0).resp.vdOut, ID_EX_REG.bits.dataSignals.rs2) else ID_EX_REG.bits.dataSignals.rs2)
ldstUnit.io.cpu.req.bits.data := ID_EX_REG.bits.dataSignals.rs2
ldstUnit.io.cpu.req.bits.funct := ID_EX_REG.bits.ctrlSignals.decode

bypassingUnit.io.EX.in.bits.rd.bits.index := ID_EX_REG.bits.ctrlSignals.rd_index
bypassingUnit.io.EX.in.bits.rd.bits.value := MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.writeback_selector, 0.U)(Seq(
WB_SEL.PC4.asUInt -> ID_EX_REG.bits.dataSignals.pc.nextPC,
WB_SEL.ARITH.asUInt -> EX_arithmetic_result,
WB_SEL.VECTOR.asUInt -> (if(params.useVector) EX_vector_result.get else 0.U)
))
bypassingUnit.io.EX.in.bits.rd.valid := MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.writeback_selector, false.B)(Seq(
WB_SEL.PC4.asUInt -> true.B,
WB_SEL.ARITH.asUInt -> (if(params.useMulDiv) !ID_EX_REG.bits.ctrlSignals.decode.use_MUL || multiplier.get.io.resp.valid else true.B),
WB_SEL.CSR.asUInt -> false.B,
WB_SEL.MEM.asUInt -> false.B,
WB_SEL.NONE.asUInt -> false.B,
WB_SEL.VECTOR.asUInt -> (if(params.useVector) true.B else false.B)
)) && ID_EX_REG.valid
bypassingUnit.io.EX.in.valid := ID_EX_REG.bits.ctrlSignals.decode.write_to_rd && ID_EX_REG.valid

// メモリアクセス命令であればldstUnitがreadyである必要があり,
// 乗算命令であればmultiplier.respがvalidである必要がある
// vsetvl系でないベクタ命令ならば最終要素の実行である必要がある(idxReg == vl)
EX_WB_REG.valid := ID_EX_REG.valid && (!ID_EX_REG.bits.ctrlSignals.decode.memValid || ldstUnit.io.cpu.req.ready) &&
(if(params.useMulDiv) !ID_EX_REG.bits.ctrlSignals.decode.use_MUL || multiplier.get.io.resp.valid else true.B) &&
(if(params.useVector) !ID_EX_REG.bits.ctrlSignals.decode.vector.get || ID_EX_REG.bits.vectorCtrlSignals.get.isConfsetInst || ((idxReg.get + 1.U((idxReg.get.getWidth+1).W)) === EX_WB_REG.bits.vectorCsrPorts.get.vl) else true.B)
(if(params.useMulDiv) !ID_EX_REG.bits.ctrlSignals.decode.use_MUL || multiplier.get.io.resp.valid else true.B)
EX_WB_REG.bits.dataSignals.pc := ID_EX_REG.bits.dataSignals.pc
EX_WB_REG.bits.dataSignals.exResult := MuxLookup(ID_EX_REG.bits.ctrlSignals.decode.writeback_selector, 0.U)(Seq(
WB_SEL.ARITH.asUInt -> EX_arithmetic_result,
WB_SEL.VECTOR.asUInt -> (if(params.useVector) EX_vector_result.get else 0.U),
))
EX_WB_REG.bits.dataSignals.datatoCSR := Mux(ID_EX_REG.bits.ctrlSignals.decode.value1 === Value1.RS1.asUInt, ID_EX_REG.bits.dataSignals.rs1, ID_EX_REG.bits.dataSignals.imm)
EX_WB_REG.bits.dataSignals.csr_addr := ID_EX_REG.bits.dataSignals.zimm
Expand All @@ -436,21 +351,12 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons
))
Mux(ID_EX_REG.bits.ctrlSignals.decode.branch === Branch.ECALL.asUInt, 0xb.U(params.xprlen.W), 0.U)

if(params.useVector) {
when(vecCtrlUnit.get.io.resp.valid) {
EX_WB_REG.bits.vectorCsrPorts.get := vecCtrlUnit.get.io.resp.bits
}
EX_WB_REG.bits.vectorExecNum.get := 0.U
}

if(params.debug) EX_WB_REG.bits.debug.get := ID_EX_REG.bits.debug.get

// WBステージがvalidかつ破棄できないかつEXステージに有効な値がある場合,またはメモリアクセス命令かつldstUnit.reqがreadyでない,または乗算命令で乗算器がvalidでない
// またはベクタ命令実行完了前にスカラ命令がID_EXレジスタにある,またはチェイニング不可能なベクタ命令(構造ハザード・0要素目の値が用意できていないなど)
EX_stall := ID_EX_REG.valid && ((EX_WB_REG.valid && WB_stall) || (ID_EX_REG.bits.ctrlSignals.decode.memValid && !ldstUnit.io.cpu.req.ready) || (if(params.useMulDiv) {
ID_EX_REG.bits.ctrlSignals.decode.use_MUL && !multiplier.get.io.resp.valid
} else false.B) || (if(params.useVector) {
ID_EX_REG.bits.ctrlSignals.decode.vector.get && !ID_EX_REG.bits.vectorCtrlSignals.get.isConfsetInst && (idxReg.get < EX_WB_REG.bits.vectorCsrPorts.get.vl-1.U)
} else false.B))

when(WB_stall) {
Expand Down Expand Up @@ -479,7 +385,6 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons
WB_SEL.ARITH -> EX_WB_REG.bits.dataSignals.exResult,
WB_SEL.CSR -> csrUnit.io.resp.data,
WB_SEL.MEM -> ldstUnit.io.cpu.resp.bits.data,
WB_SEL.VECTOR -> (if(params.useVector) EX_WB_REG.bits.dataSignals.exResult else 0.U)
).map{
case (wb_sel, data) => (wb_sel.asUInt, data)
})
Expand All @@ -498,16 +403,10 @@ class CPU(implicit params: HajimeCoreParams) extends CpuModule with ScalarOpCons
csrUnit.io.fromCPU.hartid := io.hartid
csrUnit.io.fromCPU.cpu_operating := cpu_operating
csrUnit.io.fromCPU.inst_retire := WB_inst_can_retire
if(params.useVector) {
csrUnit.io.fromCPU.vectorExecNum.get.valid := false.B
csrUnit.io.fromCPU.vectorExecNum.get.bits := DontCare
}
csrUnit.io.exception.valid := (EX_WB_REG.bits.exceptionSignals.valid || dmemoryAccessException) && EX_WB_REG.valid
csrUnit.io.exception.bits.mepc_write := EX_WB_REG.bits.dataSignals.pc.addr
csrUnit.io.exception.bits.mcause_write := Mux(dmemoryAccessException, ldstUnit.io.cpu.resp.bits.exceptionSignals.bits, EX_WB_REG.bits.exceptionSignals.bits)

if(params.useVector) csrUnit.io.vectorCsrPorts.get := EX_WB_REG.bits.vectorCsrPorts.get

// EXまたはWBステージにfence, ecall, mretがある
sysInst_in_pipeline := (ID_EX_REG.valid && ID_EX_REG.bits.ctrlSignals.decode.isSysInst) || (EX_WB_REG.valid && EX_WB_REG.bits.ctrlSignals.decode.isSysInst)

Expand Down
41 changes: 41 additions & 0 deletions src/main/scala/hajime/vectorOoO/Dispatcher.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package hajime.vectorOoO

import chisel3._
import circt.stage.ChiselStage
import chisel3.util._
import hajime.common.BundleInitializer._
import hajime.common._
import hajime.simple4Stage._
import hajime.vectormodules.VectorDecoderResp

class DispatcherDataSignals(implicit params: HajimeCoreParams) extends Bundle {
import params._
val pc = new ProgramCounter()
val renamedRs1 = Valid(UInt(physicalRegWidth.W))
val renamedRs2 = Valid(UInt(physicalRegWidth.W))
val renamedRd = Valid(UInt(physicalRegWidth.W))
// jalr: immVal1 -> inst[31,20], immVal2 -> pc from RAS
// csr: immVal1 -> inst[31,20] (csr addr), immVal2 -> inst[4:0]
// vsetvli: immVal1 -> inst[30,20]
// vsetivli: immVal1 -> inst[29,20], immVal2 -> inst[4:0]
// vop.vi: immVal2 -> inst[4:0]
val immVal1 = UInt(xprlen.W)
val immVal2 = UInt(xprlen.W)
}
class DispatcherOutput(implicit params: HajimeCoreParams) extends Bundle {
val dataSignals = new DispatcherDataSignals()
val ctrlSignals = new BasicCtrlSignals()
val exceptionSignals = new Valid(UInt(params.xprlen.W))
val vectorCtrlSignals = if(params.useVector) Some(new VectorDecoderResp()) else None
val debug = if(params.debug) Some(new Debug_Info()) else None
}

class DispatcherIO(implicit params: HajimeCoreParams) extends Module {
val frontend = Flipped(new FrontEndCpuIO())
val hartid = Input(UInt(params.xprlen.W))
val toExecutor = new DecoupledIO(new DispatcherOutput())
}

class Dispatcher(implicit params: HajimeCoreParams) extends Module {

}
45 changes: 33 additions & 12 deletions src/main/scala/hajime/vectorOoO/FrontEndForOoO.scala
Original file line number Diff line number Diff line change
@@ -1,27 +1,48 @@
package hajime.vectorOoO

import circt.stage.ChiselStage
import chisel3._
import circt.stage.ChiselStage
import chisel3.util._
import hajime.axiIO.AXI4liteIO
import hajime.common.BundleInitializer._
import hajime.common._
import hajime.simple4Stage._
import hajime.common.BundleInitializer._

class FrontEndForOoO(implicit params: HajimeCoreParams) extends Module {
val io = IO(new FrontEndIO())
io := DontCare

val pc_reg = RegInit(Valid(new ProgramCounter()).Init(
_.valid -> true.B,
_.valid -> false.B,
_.bits.addr -> io.reset_vector,
))
val toAxiAR = MuxCase(pc_reg.bits.nextPC, Seq(
io.cpu.req.valid -> io.cpu.req.bits.addr,
// axiがreadyでなければPCを維持
(!io.icache_axi4lite.ar.ready || !io.icache_axi4lite.r.valid || !io.cpu.resp.ready) -> pc_reg.bits.addr
))
// cpuがFrontEndから命令を読み取ればaddr
when(io.cpu.resp.valid && io.cpu.resp.ready) {
pc_reg := io.cpu.req.bits
// PCの更新はCPUが行う
when(io.cpu.req.valid) {
pc_reg := io.cpu.req
}
.otherwise {
pc_reg.valid := false.B
}

io.icache_axi4lite.ar.bits.addr := Mux(io.cpu.req.valid, io.cpu.req.bits.addr, pc_reg.bits.addr)
io.icache_axi4lite.ar.bits.prot := 0.U
io.icache_axi4lite.ar.valid := io.cpu.req.valid || pc_reg.valid

io.cpu.resp.bits.pc := pc_reg.bits
io.cpu.resp.bits.inst.bits := io.icache_axi4lite.r.bits.data
io.cpu.resp.valid := io.icache_axi4lite.r.valid
io.icache_axi4lite.r.ready := io.cpu.resp.ready

val instAccessFault = pc_reg.bits.addr > 0x1FFC.U
val instAddressMisaligned = pc_reg.bits.addr(1, 0) =/= 0.U
io.cpu.resp.bits.exceptionSignals.bits := MuxCase(0.U, Seq(
instAccessFault -> Causes.fetch_access.U,
instAddressMisaligned -> Causes.misaligned_fetch.U,
))
io.cpu.resp.bits.exceptionSignals.valid := instAccessFault || instAddressMisaligned
}

object FrontEndForOoO extends App {
implicit val params: HajimeCoreParams = HajimeCoreParams()
def apply(implicit params: HajimeCoreParams): FrontEndForOoO = new FrontEndForOoO()
ChiselStage.emitSystemVerilogFile(new FrontEndForOoO(), firtoolOpts = COMPILE_CONSTANTS.FIRTOOLOPS)
}
4 changes: 0 additions & 4 deletions src/main/scala/hajime/vectormodules/VectorCpu.scala
Original file line number Diff line number Diff line change
Expand Up @@ -293,10 +293,6 @@ class VectorCpu(implicit params: HajimeCoreParams) extends CpuModule with Scalar
when(decoder.io.out.valid && decoder.io.out.bits.vector.get) {
ID_EX_REG.bits.vectorCtrlSignals.get := vectorDecoder.io.out
}
// 0 -> v0.mask[i]が1ならば書き込み,0ならば書き込まない
// 1 -> マスクなし,全て書き込む
// (マスクを使わないベクタ命令は全てvm=1か?)
ID_EX_REG.bits.vectorDataSignals.get.mask := vectorDecoder.io.out.vm
ID_EX_REG.bits.vectorDataSignals.get.vs1 := decoded_inst.rs1
ID_EX_REG.bits.vectorDataSignals.get.vs2 := decoded_inst.rs2
ID_EX_REG.bits.vectorDataSignals.get.vd := decoded_inst.rd
Expand Down

0 comments on commit 0488c46

Please sign in to comment.