diff --git a/ipemu/src/TestBench.scala b/ipemu/src/TestBench.scala index e29782144..8826670d0 100644 --- a/ipemu/src/TestBench.scala +++ b/ipemu/src/TestBench.scala @@ -23,16 +23,21 @@ class TestBenchOM extends Class { t1 := t1In } -class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) extends RawModule { +class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) + extends RawModule + with ImplicitClock + with ImplicitReset { val omInstance: Instance[TestBenchOM] = Instantiate(new TestBenchOM) - val omType: ClassType = omInstance.toDefinition.getClassType + val omType: ClassType = omInstance.toDefinition.getClassType @public val om: Property[ClassType] = IO(Output(Property[omType.Type]())) om := omInstance.getPropertyReference - val clockGen = Module(new ExtModule with HasExtModuleInline { + lazy val clockGen = Module(new ExtModule with HasExtModuleInline { + override def desiredName = "ClockGen" - setInline(s"$desiredName.sv", + setInline( + s"$desiredName.sv", s"""module $desiredName(output reg clock, output reg reset); | export "DPI-C" function dump_wave; | function dump_wave(input string file); @@ -54,179 +59,214 @@ class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) extends val clock = IO(Output(Bool())) val reset = IO(Output(Bool())) }) + def clock = clockGen.clock.asClock + def reset = clockGen.reset + override def implicitClock = clockGen.clock.asClock + override def implicitReset = clockGen.reset + val dut: Instance[T1] = generator.instance() - val clock: Clock = clockGen.clock.asClock - val reset: Bool = clockGen.reset - val simulationTime: UInt = withClockAndReset(clock, reset)(RegInit(0.U(64.W))) + val simulationTime: UInt = RegInit(0.U(64.W)) simulationTime := simulationTime + 1.U - val dut: T1 = withClockAndReset(clock, reset)(Module(generator.module())) - omInstance.t1In := Property(dut.om.asAnyClassType) + dut.io.clock := clockGen.clock.asClock + dut.io.reset := clockGen.reset + omInstance.t1In := Property(dut.io.om.asAnyClassType) // Instruction Drivers - withClockAndReset(clock, reset) { - // TODO: this initial way cannot happen before reset... - val initFlag = RegInit(false.B) - when(!initFlag) { - initFlag := true.B - printf(cf"""{"event":"SimulationStart","cycle":${simulationTime}}\n""") - } - val watchdog = RawUnclockedNonVoidFunctionCall("cosim_watchdog", UInt(8.W))(simulationTime(9, 0) === 0.U) - when(watchdog =/= 0.U) { - stop(cf"""{"event":"SimulationStop","reason": ${watchdog},"cycle":${simulationTime}}\n""") - } + // TODO: this initial way cannot happen before reset... + val initFlag = RegInit(false.B) + when(!initFlag) { + initFlag := true.B + printf(cf"""{"event":"SimulationStart","cycle":${simulationTime}}\n""") + } + val watchdog = RawUnclockedNonVoidFunctionCall("cosim_watchdog", UInt(8.W))(simulationTime(9, 0) === 0.U) + when(watchdog =/= 0.U) { + stop(cf"""{"event":"SimulationStop","reason": ${watchdog},"cycle":${simulationTime}}\n""") + } - // uint32_t -> svBitVecVal -> reference type with 7 length. - class Issue extends Bundle { - val instruction: UInt = UInt(32.W) - val src1Data: UInt = UInt(32.W) - val src2Data: UInt = UInt(32.W) - // mstatus, vstatus? - val vtype: UInt = UInt(32.W) - val vl: UInt = UInt(32.W) - // vlenb - val vstart: UInt = UInt(32.W) - // vxrm, vxsat are merged to vcsr - val vcsr: UInt = UInt(32.W) - // meta is used to control the simulation. - // 0 is reserved, aka not valid - // 1 is normal, it's a valid instruction - // 2 is fence, it will request - // others are exit, will end the simulation immediately - val meta: UInt = UInt(32.W) - } - class Retire extends Bundle { - val rd: UInt = UInt(32.W) - val data: UInt = UInt(32.W) - val writeRd: UInt = UInt(32.W) - val vxsat: UInt = UInt(32.W) - } - val issue = WireDefault(0.U.asTypeOf(new Issue)) - val fence = RegInit(false.B) - val outstanding = RegInit(0.U(4.W)) - val doIssue: Bool = dut.request.ready && !fence - outstanding := outstanding + (doIssue && (issue.meta === 1.U)) - dut.response.valid - fence := Mux(doIssue, issue.meta === 2.U, fence && !dut.response.valid && !(outstanding === 0.U)) - - issue := RawClockedNonVoidFunctionCall("issue_vector_instruction", new Issue)( - clock, - doIssue, - ) - dut.request.bits.instruction := issue.instruction - dut.request.bits.src1Data := issue.src1Data - dut.request.bits.src2Data := issue.src2Data - dut.csrInterface.vlmul := issue.vtype(2, 0) - dut.csrInterface.vSew := issue.vtype(5, 3) - dut.csrInterface.vta := issue.vtype(6) - dut.csrInterface.vma := issue.vtype(7) - dut.csrInterface.vl := issue.vl - dut.csrInterface.vStart := issue.vstart - dut.csrInterface.vxrm := issue.vcsr(2, 1) - - dut.csrInterface.ignoreException := 0.U - dut.storeBufferClear := true.B - dut.request.valid := issue.meta === 1.U - when(issue.meta =/= 0.U && issue.meta =/= 1.U && issue.meta =/= 2.U) { - stop(cf"""{"event":"SimulationStop","reason": ${issue.meta},"cycle":${simulationTime}}\n""") - } - val retire = Wire(new Retire) - retire.rd := dut.response.bits.rd.bits - retire.data := dut.response.bits.data - retire.writeRd := dut.response.bits.rd.valid - retire.vxsat := dut.response.bits.vxsat - RawClockedVoidFunctionCall("retire_vector_instruction")(clock, dut.response.valid, retire) - val dummy = Wire(Bool()) - dummy := false.B - RawClockedVoidFunctionCall("retire_vector_mem")(clock, dut.response.bits.mem && dut.response.valid, dummy) + // uint32_t -> svBitVecVal -> reference type with 7 length. + class Issue extends Bundle { + val instruction: UInt = UInt(32.W) + val src1Data: UInt = UInt(32.W) + val src2Data: UInt = UInt(32.W) + // mstatus, vstatus? + val vtype: UInt = UInt(32.W) + val vl: UInt = UInt(32.W) + // vlenb + val vstart: UInt = UInt(32.W) + // vxrm, vxsat are merged to vcsr + val vcsr: UInt = UInt(32.W) + // meta is used to control the simulation. + // 0 is reserved, aka not valid + // 1 is normal, it's a valid instruction + // 2 is fence, it will request + // others are exit, will end the simulation immediately + val meta: UInt = UInt(32.W) } + class Retire extends Bundle { + val rd: UInt = UInt(32.W) + val data: UInt = UInt(32.W) + val writeRd: UInt = UInt(32.W) + val vxsat: UInt = UInt(32.W) + } + val issue = WireDefault(0.U.asTypeOf(new Issue)) + val fence = RegInit(false.B) + val outstanding = RegInit(0.U(4.W)) + val doIssue: Bool = dut.io.request.ready && !fence + outstanding := outstanding + (doIssue && (issue.meta === 1.U)) - dut.io.response.valid + fence := Mux(doIssue, issue.meta === 2.U, fence && !dut.io.response.valid && !(outstanding === 0.U)) + + issue := RawClockedNonVoidFunctionCall("issue_vector_instruction", new Issue)( + clock, + doIssue + ) + dut.io.request.bits.instruction := issue.instruction + dut.io.request.bits.src1Data := issue.src1Data + dut.io.request.bits.src2Data := issue.src2Data + dut.io.csrInterface.vlmul := issue.vtype(2, 0) + dut.io.csrInterface.vSew := issue.vtype(5, 3) + dut.io.csrInterface.vta := issue.vtype(6) + dut.io.csrInterface.vma := issue.vtype(7) + dut.io.csrInterface.vl := issue.vl + dut.io.csrInterface.vStart := issue.vstart + dut.io.csrInterface.vxrm := issue.vcsr(2, 1) + + dut.io.csrInterface.ignoreException := 0.U + dut.io.storeBufferClear := true.B + dut.io.request.valid := issue.meta === 1.U + when(issue.meta =/= 0.U && issue.meta =/= 1.U && issue.meta =/= 2.U) { + stop(cf"""{"event":"SimulationStop","reason": ${issue.meta},"cycle":${simulationTime}}\n""") + } + val retire = Wire(new Retire) + retire.rd := dut.io.response.bits.rd.bits + retire.data := dut.io.response.bits.data + retire.writeRd := dut.io.response.bits.rd.valid + retire.vxsat := dut.io.response.bits.vxsat + RawClockedVoidFunctionCall("retire_vector_instruction")(clock, dut.io.response.valid, retire) + val dummy = Wire(Bool()) + dummy := false.B + RawClockedVoidFunctionCall("retire_vector_mem")(clock, dut.io.response.bits.mem && dut.io.response.valid, dummy) // Memory Drivers Seq( - dut.highBandwidthLoadStorePort, - dut.indexedLoadStorePort - ).map(_.viewAs[AXI4RWIrrevocableVerilog]).lazyZip( - Seq("highBandwidthPort", "indexedAccessPort") - ).zipWithIndex.foreach { - case ((bundle: AXI4RWIrrevocableVerilog, channelName: String), index: Int) => - val agent = Module(new AXI4SlaveAgent( - AXI4SlaveAgentParameter( - name= channelName, - axiParameter = bundle.parameter, - outstanding = 4, - readPayloadSize = 1, - writePayloadSize = 1, - ) - )).suggestName(s"axi4_channel${index}_${channelName}") - agent.io.channel match { - case io: AXI4RWIrrevocableVerilog => io <> bundle - } - agent.io.clock := clock - agent.io.reset := reset - agent.io.channelId := index.U - agent.io.gateRead := false.B - agent.io.gateWrite := false.B - } + dut.io.highBandwidthLoadStorePort, + dut.io.indexedLoadStorePort + ).map(_.viewAs[AXI4RWIrrevocableVerilog]) + .lazyZip( + Seq("highBandwidthPort", "indexedAccessPort") + ) + .zipWithIndex + .foreach { + case ((bundle: AXI4RWIrrevocableVerilog, channelName: String), index: Int) => + val agent = Module( + new AXI4SlaveAgent( + AXI4SlaveAgentParameter( + name = channelName, + axiParameter = bundle.parameter, + outstanding = 4, + readPayloadSize = 1, + writePayloadSize = 1 + ) + ) + ).suggestName(s"axi4_channel${index}_${channelName}") + agent.io.channel match { + case io: AXI4RWIrrevocableVerilog => io <> bundle + } + agent.io.clock := clock + agent.io.reset := reset + agent.io.channelId := index.U + agent.io.gateRead := false.B + agent.io.gateWrite := false.B + } // Events for difftest and performance modeling - val laneProbes = dut.laneProbes.zipWithIndex.map{case (p, idx) => - val wire = Wire(p.cloneType).suggestName(s"lane${idx}Probe") - wire := probe.read(p) - wire + val laneProbes = dut.io.laneProbes.zipWithIndex.map { + case (p, idx) => + val wire = Wire(p.cloneType).suggestName(s"lane${idx}Probe") + wire := probe.read(p) + wire } - val lsuProbe = probe.read(dut.lsuProbe).suggestName("lsuProbe") + val lsuProbe = probe.read(dut.io.lsuProbe).suggestName("lsuProbe") val storeUnitProbe = lsuProbe.storeUnitProbe.suggestName("storeUnitProbe") val otherUnitProbe = lsuProbe.otherUnitProbe.suggestName("otherUnitProbe") - val laneVrfProbes = dut.laneVrfProbes.zipWithIndex.map{ case (p, idx) => - val wire = Wire(p.cloneType).suggestName(s"lane${idx}VrfProbe") - wire := probe.read(p) - wire + val laneVrfProbes = dut.io.laneVrfProbes.zipWithIndex.map { + case (p, idx) => + val wire = Wire(p.cloneType).suggestName(s"lane${idx}VrfProbe") + wire := probe.read(p) + wire + } + + val t1Probe = probe.read(dut.io.t1Probe) + + // vrf write + laneVrfProbes.zipWithIndex.foreach { + case (lane, i) => + when(lane.valid)( + printf( + cf"""{"event":"VrfWrite","issue_idx":${lane.requestInstruction},"vd":${lane.requestVd},"offset":${lane.requestOffset},"mask":"${lane.requestMask}%x","data":"${lane.requestData}%x","lane":$i,"cycle":${simulationTime}}\n""" + ) + ) } + // memory write from store unit + when(storeUnitProbe.valid)( + printf( + cf"""{"event":"MemoryWrite","lsu_idx":${storeUnitProbe.index},"mask":"${storeUnitProbe.mask}%x","data":"${storeUnitProbe.data}%x","address":"${storeUnitProbe.address}%x","cycle":${simulationTime}}\n""" + ) + ) + // memory write from other unit + when(otherUnitProbe.valid)( + printf( + cf"""{"event":"MemoryWrite","lsu_idx":${otherUnitProbe.index},"mask":"${otherUnitProbe.mask}%x","data":"${otherUnitProbe.data}%x","address":"${otherUnitProbe.address}%x","cycle":${simulationTime}}\n""" + ) + ) + // issue + when(dut.io.request.fire)( + printf(cf"""{"event":"Issue","idx":${t1Probe.instructionCounter},"cycle":${simulationTime}}\n""") + ) + // check rd + when(dut.io.response.bits.rd.valid)( + printf( + cf"""{"event":"CheckRd","data":"${dut.io.response.bits.data}%x","issue_idx":${t1Probe.responseCounter},"cycle":${simulationTime}}\n""" + ) + ) + // lsu enq + when(lsuProbe.reqEnq.orR)(printf(cf"""{"event":"LsuEnq","enq":${lsuProbe.reqEnq},"cycle":${simulationTime}}\n""")) - val t1Probe = probe.read(dut.t1Probe) - - withClockAndReset(clock, reset) { - // vrf write - laneVrfProbes.zipWithIndex.foreach { case (lane, i) => when(lane.valid)(printf(cf"""{"event":"VrfWrite","issue_idx":${lane.requestInstruction},"vd":${lane.requestVd},"offset":${lane.requestOffset},"mask":"${lane.requestMask}%x","data":"${lane.requestData}%x","lane":$i,"cycle":${simulationTime}}\n""")) } - // memory write from store unit - when(storeUnitProbe.valid)(printf(cf"""{"event":"MemoryWrite","lsu_idx":${storeUnitProbe.index},"mask":"${storeUnitProbe.mask}%x","data":"${storeUnitProbe.data}%x","address":"${storeUnitProbe.address}%x","cycle":${simulationTime}}\n""")) - // memory write from other unit - when(otherUnitProbe.valid)(printf(cf"""{"event":"MemoryWrite","lsu_idx":${otherUnitProbe.index},"mask":"${otherUnitProbe.mask}%x","data":"${otherUnitProbe.data}%x","address":"${otherUnitProbe.address}%x","cycle":${simulationTime}}\n""")) - // issue - when(dut.request.fire)(printf(cf"""{"event":"Issue","idx":${t1Probe.instructionCounter},"cycle":${simulationTime}}\n""")) - // check rd - when(dut.response.bits.rd.valid)(printf(cf"""{"event":"CheckRd","data":"${dut.response.bits.data}%x","issue_idx":${t1Probe.responseCounter},"cycle":${simulationTime}}\n""")) - // lsu enq - when(lsuProbe.reqEnq.orR)(printf(cf"""{"event":"LsuEnq","enq":${lsuProbe.reqEnq},"cycle":${simulationTime}}\n""")) - - // allocate 2 * chainingSize scoreboards - val vrfWriteScoreboard: Seq[Valid[UInt]] = Seq.tabulate(2 * dut.parameter.chainingSize) { _ => RegInit(0.U.asTypeOf(Valid(UInt(16.W))))} - vrfWriteScoreboard.foreach(scoreboard => dontTouch(scoreboard)) - val instructionValid = - (laneProbes.map(laneProbe => laneProbe.instructionValid ## laneProbe.instructionValid) :+ - lsuProbe.lsuInstructionValid :+ t1Probe.instructionValid).reduce(_ | _) - val scoreboardEnq = Mux(t1Probe.instructionIssue, UIntToOH(t1Probe.issueTag), 0.U((2 * dut.parameter.chainingSize).W)) - vrfWriteScoreboard.zipWithIndex.foreach { case (scoreboard, tag) => + // allocate 2 * chainingSize scoreboards + val vrfWriteScoreboard: Seq[Valid[UInt]] = Seq.tabulate(2 * generator.parameter.chainingSize) { _ => + RegInit(0.U.asTypeOf(Valid(UInt(16.W)))) + } + vrfWriteScoreboard.foreach(scoreboard => dontTouch(scoreboard)) + val instructionValid = + (laneProbes.map(laneProbe => laneProbe.instructionValid ## laneProbe.instructionValid) :+ + lsuProbe.lsuInstructionValid :+ t1Probe.instructionValid).reduce(_ | _) + val scoreboardEnq = + Mux(t1Probe.instructionIssue, UIntToOH(t1Probe.issueTag), 0.U((2 * generator.parameter.chainingSize).W)) + vrfWriteScoreboard.zipWithIndex.foreach { + case (scoreboard, tag) => val writeEnq: UInt = VecInit( // vrf write from lane - laneProbes.flatMap(laneProbe => laneProbe.slots.map(slot => - slot.writeTag === tag.U && slot.writeQueueEnq && slot.writeMask.orR - )) ++ laneProbes.flatMap(laneProbe => laneProbe.crossWriteProbe.map(cp => - cp.bits.writeTag === tag.U && cp.valid && cp.bits.writeMask.orR - )) ++ + laneProbes.flatMap(laneProbe => + laneProbe.slots.map(slot => slot.writeTag === tag.U && slot.writeQueueEnq && slot.writeMask.orR) + ) ++ laneProbes.flatMap(laneProbe => + laneProbe.crossWriteProbe.map(cp => cp.bits.writeTag === tag.U && cp.valid && cp.bits.writeMask.orR) + ) ++ // vrf write from lsu lsuProbe.slots.map(slot => slot.dataInstruction === tag.U && slot.writeValid && slot.dataMask.orR) ++ // vrf write from Sequencer Some(t1Probe.writeQueueEnq.bits === tag.U && t1Probe.writeQueueEnq.valid && t1Probe.writeQueueEnqMask.orR) - ).asUInt + ).asUInt // always equal to array index scoreboard.bits := scoreboard.bits + PopCount(writeEnq) - when(scoreboard.valid && !instructionValid(tag)){ - printf(cf"""{"event":"VrfScoreboardReport","count":${scoreboard.bits},"issue_idx":${tag},"cycle":${simulationTime}}\n""") + when(scoreboard.valid && !instructionValid(tag)) { + printf( + cf"""{"event":"VrfScoreboardReport","count":${scoreboard.bits},"issue_idx":${tag},"cycle":${simulationTime}}\n""" + ) scoreboard.valid := false.B } when(scoreboardEnq(tag)) { @@ -234,6 +274,5 @@ class TestBench(generator: SerializableModuleGenerator[T1, T1Parameter]) extends assert(!scoreboard.valid) scoreboard.bits := 0.U } - } } } diff --git a/t1/src/T1.scala b/t1/src/T1.scala index aeb7d07d0..901a11f24 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -4,11 +4,11 @@ package org.chipsalliance.t1.rtl import chisel3._ -import chisel3.experimental.hierarchy.{Instance, Instantiate, instantiable, public} +import chisel3.experimental.hierarchy.{Definition, Instance, Instantiate, instantiable, public} import chisel3.experimental.{SerializableModule, SerializableModuleParameter} import chisel3.probe.{Probe, ProbeValue, define} import chisel3.properties.{AnyClassType, Class, ClassType, Property} -import chisel3.util._ +import chisel3.util.{Decoupled, _} import chisel3.util.experimental.BitSet import org.chipsalliance.rvdecoderdb.Instruction import org.chipsalliance.t1.rtl.decoder.{Decoder, DecoderParam, T1CustomInstruction} @@ -17,6 +17,8 @@ import org.chipsalliance.amba.axi4.bundle.{AXI4BundleParameter, AXI4RWIrrevocabl import org.chipsalliance.t1.rtl.lsu.{LSU, LSUParameter, LSUProbe} import org.chipsalliance.t1.rtl.vrf.{RamType, VRFParam, VRFProbe} +import scala.collection.immutable.{ListMap, SeqMap} + // TODO: this should be a object model. There should 3 object model here: // 1. T1SubsystemOM(T1(OM), MemoryRegion, Cache configuration) // 2. T1(Lane(OM), VLEN, DLEN, uarch parameters, customer IDs(for floorplan);) @@ -283,54 +285,73 @@ class T1Probe(param: T1Parameter) extends Bundle { val responseCounter: UInt = UInt(param.instructionIndexBits.W) } +class T1Interface(parameter: T1Parameter) extends Record { + def clock = elements("clock").asInstanceOf[Clock] + def reset = elements("reset").asInstanceOf[Bool] + /** request from CPU. + * because the interrupt and exception of previous instruction is unpredictable, + * and the `kill` logic in Vector processor is too high, + * thus the request should come from commit stage to avoid any interrupt or excepiton. + */ + def request = elements("request").asInstanceOf[DecoupledIO[VRequest]] + /** response to CPU. */ + def response: ValidIO[VResponse] = elements("response").asInstanceOf[ValidIO[VResponse]] + /** CSR interface from CPU. */ + def csrInterface: CSRInterface = elements("csrInterface").asInstanceOf[CSRInterface] + /** from CPU LSU, store buffer is cleared, memory can observe memory requests after this is asserted. */ + def storeBufferClear: Bool = elements("storeBufferClear").asInstanceOf[Bool] + def highBandwidthLoadStorePort: AXI4RWIrrevocable = elements("highBandwidthLoadStorePort").asInstanceOf[AXI4RWIrrevocable] + def indexedLoadStorePort: AXI4RWIrrevocable = elements("indexedLoadStorePort").asInstanceOf[AXI4RWIrrevocable] + def om: Property[ClassType] = elements("om").asInstanceOf[Property[ClassType]] + def lsuProbe: LSUProbe = elements("lsuProbe").asInstanceOf[LSUProbe] + def t1Probe: T1Probe = elements("t1Probe").asInstanceOf[T1Probe] + def laneProbes: Seq[LaneProbe] = Seq.tabulate(parameter.laneNumber)(i => elements(s"lane${i}Probe").asInstanceOf[LaneProbe]) + def laneVrfProbes: Seq[VRFProbe] = Seq.tabulate(parameter.laneNumber)(i => elements(s"lane${i}VrfProbe").asInstanceOf[VRFProbe]) + + val elements: SeqMap[String, Data] = SeqMap.from( + Seq( + "clock" -> Input(Clock()), + "reset" -> Input(Bool()), + "request" -> Flipped(Decoupled(new VRequest(parameter.xLen))), + "response" -> Valid(new VResponse(parameter.xLen)), + "csrInterface" -> Input(new CSRInterface(parameter.laneParam.vlMaxBits)), + "storeBufferClear" -> Input(Bool()), + "highBandwidthLoadStorePort" -> new AXI4RWIrrevocable(parameter.axi4BundleParameter), + "indexedLoadStorePort" -> new AXI4RWIrrevocable(parameter.axi4BundleParameter.copy(dataWidth=32)), + "om" -> Output(Property[AnyClassType]()), + "lsuProbe" -> Output(Probe(new LSUProbe(parameter.lsuParameters))), + "t1Probe" -> Output(Probe(new T1Probe(parameter))), + ) ++ + Seq.tabulate(parameter.laneNumber)( + i => s"lane${i}Probe" -> Output(Probe(new LaneProbe(parameter.chainingSize, parameter.instructionIndexBits))) + ) ++ + Seq.tabulate(parameter.laneNumber)( + i => s"lane${i}VrfProbe" -> Output(Probe(new VRFProbe(parameter.laneParam.vrfParam.regNumBits, parameter.laneParam.vrfOffsetBits, parameter.laneParam.instructionIndexBits, parameter.laneParam.datapathWidth))) + ) + ) +} + /** Top of Vector processor: * couple to Rocket Core; * instantiate LSU, Decoder, Lane, CSR, Instruction Queue. * The logic of [[T1]] contains the Vector Sequencer and Mask Unit. */ -class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Parameter] { +@instantiable +class T1(val parameter: T1Parameter) + extends FixedIORawModule(new T1Interface(parameter)) + with SerializableModule[T1Parameter] + with ImplicitClock + with ImplicitReset { + def implicitClock: Clock = io.clock + def implicitReset: Reset = io.reset + val omInstance: Instance[T1OM] = Instantiate(new T1OM) val omType: ClassType = omInstance.toDefinition.getClassType - @public - val om: Property[ClassType] = IO(Output(Property[omType.Type]())) - om := omInstance.getPropertyReference + io.om := omInstance.getPropertyReference.asAnyClassType omInstance.vlenIn := Property(parameter.vLen) omInstance.dlenIn := Property(parameter.dLen) - /** request from CPU. - * because the interrupt and exception of previous instruction is unpredictable, - * and the `kill` logic in Vector processor is too high, - * thus the request should come from commit stage to avoid any interrupt or excepiton. - */ - @public - val request: DecoupledIO[VRequest] = IO(Flipped(Decoupled(new VRequest(parameter.xLen)))) - /** response to CPU. */ - @public - val response: ValidIO[VResponse] = IO(Valid(new VResponse(parameter.xLen))) - /** CSR interface from CPU. */ - @public - val csrInterface: CSRInterface = IO(Input(new CSRInterface(parameter.laneParam.vlMaxBits))) - /** from CPU LSU, store buffer is cleared, memory can observe memory requests after this is asserted. */ - @public - val storeBufferClear: Bool = IO(Input(Bool())) - @public - val highBandwidthLoadStorePort: AXI4RWIrrevocable = IO(new AXI4RWIrrevocable(parameter.axi4BundleParameter)) - @public - val indexedLoadStorePort: AXI4RWIrrevocable = IO(new AXI4RWIrrevocable(parameter.axi4BundleParameter.copy(dataWidth=32))) - // TODO: this is an example of adding a new Probe - @public - val lsuProbe = IO(Probe(new LSUProbe(parameter.lsuParameters))) - @public - val laneProbes = Seq.tabulate(parameter.laneNumber)(laneIdx => IO(Probe(new LaneProbe(parameter.chainingSize, parameter.instructionIndexBits))).suggestName(s"lane${laneIdx}Probe")) - @public - val laneVrfProbes = Seq.tabulate(parameter.laneNumber)(laneIdx => IO(Probe(new VRFProbe( - parameter.laneParam.vrfParam.regNumBits, - parameter.laneParam.vrfOffsetBits, - parameter.laneParam.instructionIndexBits, - parameter.laneParam.datapathWidth - ))).suggestName(s"lane${laneIdx}VrfProbe")) - /** the LSU Module */ val lsu: Instance[LSU] = Instantiate(new LSU(parameter.lsuParameters)) @@ -341,12 +362,12 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa // TODO: uarch doc about the order of instructions val instructionCounter: UInt = RegInit(0.U(parameter.instructionIndexBits.W)) val nextInstructionCounter: UInt = instructionCounter + 1.U - when(request.fire) { instructionCounter := nextInstructionCounter } + when(io.request.fire) { instructionCounter := nextInstructionCounter } // todo: handle waw val responseCounter: UInt = RegInit(0.U(parameter.instructionIndexBits.W)) val nextResponseCounter: UInt = responseCounter + 1.U - when(response.fire) { responseCounter := nextResponseCounter } + when(io.response.fire) { responseCounter := nextResponseCounter } // maintained a 1 depth queue for VRequest. // TODO: directly maintain a `ready` signal @@ -356,24 +377,24 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa /** maintain a [[DecoupleIO]] for [[requestReg]]. */ val requestRegDequeue = Wire(Decoupled(new VRequest(parameter.xLen))) // latch instruction, csr, decode result and instruction index to requestReg. - when(request.fire) { + when(io.request.fire) { // The LSU only need to know the instruction, and don't need information from decoder. // Thus we latch the request here, and send it to LSU. - requestReg.bits.request := request.bits + requestReg.bits.request := io.request.bits requestReg.bits.decodeResult := decode.decodeResult - requestReg.bits.csr := csrInterface + requestReg.bits.csr := io.csrInterface requestReg.bits.instructionIndex := instructionCounter // vd === 0 && not store type - requestReg.bits.vdIsV0 := (request.bits.instruction(11, 7) === 0.U) && - (request.bits.instruction(6) || !request.bits.instruction(5)) + requestReg.bits.vdIsV0 := (io.request.bits.instruction(11, 7) === 0.U) && + (io.request.bits.instruction(6) || !io.request.bits.instruction(5)) requestReg.bits.writeByte := Mux( decode.decodeResult(Decoder.red), // Must be smaller than dataPath 1.U, Mux( decode.decodeResult(Decoder.maskDestination), - (csrInterface.vl >> 3).asUInt + csrInterface.vl(2, 0).orR, - csrInterface.vl << (csrInterface.vSew + decode.decodeResult(Decoder.crossWrite)) + (io.csrInterface.vl >> 3).asUInt + io.csrInterface.vl(2, 0).orR, + io.csrInterface.vl << (io.csrInterface.vSew + decode.decodeResult(Decoder.crossWrite)) ) ) } @@ -381,13 +402,13 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa // 0 1 -> update to false // 1 0 -> update to true // 1 1 -> don't update - requestReg.valid := Mux(request.fire ^ requestRegDequeue.fire, request.fire, requestReg.valid) + requestReg.valid := Mux(io.request.fire ^ requestRegDequeue.fire, io.request.fire, requestReg.valid) // ready when requestReg is free or it will be free in this cycle. - request.ready := !requestReg.valid || requestRegDequeue.ready + io.request.ready := !requestReg.valid || requestRegDequeue.ready // manually maintain a queue for requestReg. requestRegDequeue.bits := requestReg.bits.request requestRegDequeue.valid := requestReg.valid - decode.decodeInput := request.bits.instruction + decode.decodeInput := io.request.bits.instruction /** alias to [[requestReg.bits.decodeResult]], it is commonly used. */ val decodeResult: DecodeBundle = requestReg.bits.decodeResult @@ -626,7 +647,7 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa control.state.wVRFWrite := true.B } - when(responseCounter === control.record.instructionIndex && response.fire) { + when(responseCounter === control.record.instructionIndex && io.response.fire) { control.state.sCommit := true.B } @@ -684,12 +705,12 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa // first type instruction val firstLane = ffo(completedVec.asUInt) val firstLaneIndex: UInt = OHToUInt(firstLane)(log2Ceil(parameter.laneNumber) - 1, 0) - response.bits.rd.valid := lastSlotCommit && decodeResultReg(Decoder.targetRd) - response.bits.rd.bits := vd + io.response.bits.rd.valid := lastSlotCommit && decodeResultReg(Decoder.targetRd) + io.response.bits.rd.bits := vd if (parameter.fpuEnable) { - response.bits.float := decodeResultReg(Decoder.float) + io.response.bits.float := decodeResultReg(Decoder.float) } else { - response.bits.float := false.B + io.response.bits.float := false.B } when(requestRegDequeue.fire) { ffoIndexReg.valid := false.B @@ -1542,15 +1563,17 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa completedVec(index) := lane.laneResponse.bits.ffoSuccess flotReduceValid(index).foreach(d => d := lane.laneResponse.bits.fpReduceValid.get) } - // TODO: add other probes for lane at here. - define(laneProbes(index), lane.probe) - define(laneVrfProbes(index), lane.vrfProbe) lane } + laneVec.zipWithIndex.foreach { case (lane, index) => + define(io.laneProbes(index), lane.probe) + define(io.laneVrfProbes(index), lane.vrfProbe) + } + omInstance.lanesIn := Property(laneVec.map(_.om.asAnyClassType)) - define(lsuProbe, lsu._probe) + define(io.lsuProbe, lsu._probe) dataInWritePipeVec := VecInit(laneVec.map(_.writeQueueValid)) @@ -1606,8 +1629,8 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa } } - highBandwidthLoadStorePort <> lsu.axi4Port - indexedLoadStorePort <> lsu.simpleAccessPorts + io.highBandwidthLoadStorePort <> lsu.axi4Port + io.indexedLoadStorePort <> lsu.simpleAccessPorts // 暂时直接连lsu的写,后续需要处理scheduler的写 vrfWrite.zip(lsu.vrfWritePort).foreach { case (sink, source) => sink <> source } @@ -1658,10 +1681,10 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa // Ensuring commit order inst.record.instructionIndex === responseCounter }) - response.valid := slotCommit.asUInt.orR - response.bits.data := Mux(ffoType, ffoIndexReg.bits, dataResult.bits) - response.bits.vxsat := DontCare - response.bits.mem := (slotCommit.asUInt & VecInit(slots.map(_.record.isLoadStore)).asUInt).orR + io.response.valid := slotCommit.asUInt.orR + io.response.bits.data := Mux(ffoType, ffoIndexReg.bits, dataResult.bits) + io.response.bits.vxsat := DontCare + io.response.bits.mem := (slotCommit.asUInt & VecInit(slots.map(_.record.isLoadStore)).asUInt).orR lastSlotCommit := slotCommit.last } @@ -1688,10 +1711,8 @@ class T1(val parameter: T1Parameter) extends Module with SerializableModule[T1Pa /** * Probes */ - @public - val t1Probe = IO(Output(Probe(new T1Probe(parameter)))) val probeWire = Wire(new T1Probe(parameter)) - define(t1Probe, ProbeValue(probeWire)) + define(io.t1Probe, ProbeValue(probeWire)) probeWire.instructionCounter := instructionCounter probeWire.instructionIssue := requestRegDequeue.fire probeWire.issueTag := requestReg.bits.instructionIndex