Skip to content

Commit

Permalink
[RISCV] Add 32 bit GPR sub-register for Zfinx. (#108336)
Browse files Browse the repository at this point in the history
This patches adds a 32 bit register class for use with Zfinx instructions. This makes them more similar to F instructions and allows us to only spill 32 bits.
    
I've added CodeGenOnly instructions for load/store using GPRF32 as that gave better results than insert_subreg/extract_subreg.
    
Function arguments use this new GPRF32 register class for f32 arguments with Zfinx. Eliminating the need to use RISCVISD::FMV* nodes.
    
This is similar to #107446 which adds a 16 bit register class.
  • Loading branch information
topperc authored Oct 2, 2024
1 parent 0004fba commit bc91f3c
Show file tree
Hide file tree
Showing 22 changed files with 900 additions and 359 deletions.
10 changes: 10 additions & 0 deletions llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,8 +485,14 @@ struct RISCVOperand final : public MCParsedAsmOperand {
RISCVMCRegisterClasses[RISCV::GPRF16RegClassID].contains(Reg.RegNum);
}

bool isGPRF32() const {
return Kind == KindTy::Register &&
RISCVMCRegisterClasses[RISCV::GPRF32RegClassID].contains(Reg.RegNum);
}

bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; }
bool isGPRAsFPR16() const { return isGPRF16() && Reg.IsGPRAsFPR; }
bool isGPRAsFPR32() const { return isGPRF32() && Reg.IsGPRAsFPR; }
bool isGPRPairAsFPR() const { return isGPRPair() && Reg.IsGPRAsFPR; }

bool isGPRPair() const {
Expand Down Expand Up @@ -1352,6 +1358,10 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_H;
return Match_Success;
}
if (Kind == MCK_GPRAsFPR32 && Op.isGPRAsFPR()) {
Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_W;
return Match_Success;
}

// There are some GPRF64AsFPR instructions that have no RV32 equivalent. We
// reject them at parsing thinking we should match as GPRPairAsFPR for RV32.
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,19 @@ static DecodeStatus DecodeGPRF16RegisterClass(MCInst &Inst, uint32_t RegNo,
return MCDisassembler::Success;
}

static DecodeStatus DecodeGPRF32RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureStdExtE);

if (RegNo >= 32 || (IsRVE && RegNo >= 16))
return MCDisassembler::Fail;

MCRegister Reg = RISCV::X0_W + RegNo;
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
}

static DecodeStatus DecodeGPRX1X5RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
Expand Down
76 changes: 67 additions & 9 deletions llvm/lib/Target/RISCV/RISCVCallingConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,23 @@ static ArrayRef<MCPhysReg> getArgGPR16s(const RISCVABI::ABI ABI) {
return ArrayRef(ArgIGPRs);
}

static ArrayRef<MCPhysReg> getArgGPR32s(const RISCVABI::ABI ABI) {
// The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
// the ILP32E ABI.
static const MCPhysReg ArgIGPRs[] = {RISCV::X10_W, RISCV::X11_W, RISCV::X12_W,
RISCV::X13_W, RISCV::X14_W, RISCV::X15_W,
RISCV::X16_W, RISCV::X17_W};
// The GPRs used for passing arguments in the ILP32E/LP64E ABI.
static const MCPhysReg ArgEGPRs[] = {RISCV::X10_W, RISCV::X11_W,
RISCV::X12_W, RISCV::X13_W,
RISCV::X14_W, RISCV::X15_W};

if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
return ArrayRef(ArgEGPRs);

return ArrayRef(ArgIGPRs);
}

static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
// The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
// for save-restore libcall, so we don't use them.
Expand Down Expand Up @@ -194,6 +211,26 @@ static ArrayRef<MCPhysReg> getFastCCArgGPRF16s(const RISCVABI::ABI ABI) {
return ArrayRef(FastCCIGPRs);
}

static ArrayRef<MCPhysReg> getFastCCArgGPRF32s(const RISCVABI::ABI ABI) {
// The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
// for save-restore libcall, so we don't use them.
// Don't use X7 for fastcc, since Zicfilp uses X7 as the label register.
static const MCPhysReg FastCCIGPRs[] = {
RISCV::X10_W, RISCV::X11_W, RISCV::X12_W, RISCV::X13_W,
RISCV::X14_W, RISCV::X15_W, RISCV::X16_W, RISCV::X17_W,
RISCV::X28_W, RISCV::X29_W, RISCV::X30_W, RISCV::X31_W};

// The GPRs used for passing arguments in the FastCC when using ILP32E/LP64E.
static const MCPhysReg FastCCEGPRs[] = {RISCV::X10_W, RISCV::X11_W,
RISCV::X12_W, RISCV::X13_W,
RISCV::X14_W, RISCV::X15_W};

if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
return ArrayRef(FastCCEGPRs);

return ArrayRef(FastCCIGPRs);
}

// Pass a 2*XLEN argument that has been split into two XLEN values through
// registers or the stack as necessary.
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
Expand Down Expand Up @@ -364,11 +401,17 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}

if (ValVT == MVT::f32 && Subtarget.hasStdExtZfinx()) {
if (MCRegister Reg = State.AllocateReg(getArgGPR32s(ABI))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}

ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);

// Zfinx/Zdinx use GPR without a bitcast when possible.
if ((LocVT == MVT::f32 && XLen == 32 && Subtarget.hasStdExtZfinx()) ||
(LocVT == MVT::f64 && XLen == 64 && Subtarget.hasStdExtZdinx())) {
// Zdinx use GPR without a bitcast when possible.
if (LocVT == MVT::f64 && XLen == 64 && Subtarget.hasStdExtZdinx()) {
if (MCRegister Reg = State.AllocateReg(ArgGPRs)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
Expand Down Expand Up @@ -616,10 +659,16 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}

// Check if there is an available GPRF32 before hitting the stack.
if (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) {
if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRF32s(ABI))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}

// Check if there is an available GPR before hitting the stack.
if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
(LocVT == MVT::f64 && Subtarget.is64Bit() &&
Subtarget.hasStdExtZdinx())) {
if (LocVT == MVT::f64 && Subtarget.is64Bit() && Subtarget.hasStdExtZdinx()) {
if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
if (LocVT.getSizeInBits() != Subtarget.getXLen()) {
LocVT = XLenVT;
Expand Down Expand Up @@ -723,9 +772,18 @@ bool llvm::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}

if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
(LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
Subtarget.is64Bit())) {
if (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) {
static const MCPhysReg GPR32List[] = {
RISCV::X9_W, RISCV::X18_W, RISCV::X19_W, RISCV::X20_W,
RISCV::X21_W, RISCV::X22_W, RISCV::X23_W, RISCV::X24_W,
RISCV::X25_W, RISCV::X26_W, RISCV::X27_W};
if (MCRegister Reg = State.AllocateReg(GPR32List)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}

if (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && Subtarget.is64Bit()) {
if (MCRegister Reg = State.AllocateReg(GPRList)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
if (RC && RC->contains(RISCV::X0)) {
X0Reg = RISCV::X0;
} else if (RC && RC->contains(RISCV::X0_W)) {
X0Reg = RISCV::X0_W;
} else if (RC && RC->contains(RISCV::X0_H)) {
X0Reg = RISCV::X0_H;
} else {
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class RISCVExpandPseudo : public MachineFunctionPass {
MachineBasicBlock::iterator MBBI, unsigned Opcode);
bool expandMV_FPR16INX(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
bool expandMV_FPR32INX(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
bool expandRV32ZdinxStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
bool expandRV32ZdinxLoad(MachineBasicBlock &MBB,
Expand Down Expand Up @@ -108,6 +110,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
switch (MBBI->getOpcode()) {
case RISCV::PseudoMV_FPR16INX:
return expandMV_FPR16INX(MBB, MBBI);
case RISCV::PseudoMV_FPR32INX:
return expandMV_FPR32INX(MBB, MBBI);
case RISCV::PseudoRV32ZdinxSD:
return expandRV32ZdinxStore(MBB, MBBI);
case RISCV::PseudoRV32ZdinxLD:
Expand Down Expand Up @@ -287,6 +291,23 @@ bool RISCVExpandPseudo::expandMV_FPR16INX(MachineBasicBlock &MBB,
return true;
}

bool RISCVExpandPseudo::expandMV_FPR32INX(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
DebugLoc DL = MBBI->getDebugLoc();
const TargetRegisterInfo *TRI = STI->getRegisterInfo();
Register DstReg = TRI->getMatchingSuperReg(
MBBI->getOperand(0).getReg(), RISCV::sub_32, &RISCV::GPRRegClass);
Register SrcReg = TRI->getMatchingSuperReg(
MBBI->getOperand(1).getReg(), RISCV::sub_32, &RISCV::GPRRegClass);

BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DstReg)
.addReg(SrcReg, getKillRegState(MBBI->getOperand(1).isKill()))
.addImm(0);

MBBI->eraseFromParent(); // The pseudo instruction is gone now.
return true;
}

// This function expands the PseudoRV32ZdinxSD for storing a double-precision
// floating-point value into memory by generating an equivalent instruction
// sequence for RV32.
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
Res =
CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
} else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
Res =
CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
} else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
Res = CurDAG->getMachineNode(
Opc, DL, VT, Imm,
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
MemBytes = 2;
break;
case RISCV::LW:
case RISCV::LW_INX:
case RISCV::FLW:
case RISCV::LWU:
MemBytes = 4;
Expand Down Expand Up @@ -150,6 +151,7 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
MemBytes = 2;
break;
case RISCV::SW:
case RISCV::SW_INX:
case RISCV::FSW:
MemBytes = 4;
break;
Expand Down Expand Up @@ -471,6 +473,13 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}

if (RISCV::GPRF32RegClass.contains(DstReg, SrcReg)) {
BuildMI(MBB, MBBI, DL, get(RISCV::PseudoMV_FPR32INX), DstReg)
.addReg(SrcReg,
getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc));
return;
}

if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
// Emit an ADDI for both parts of GPRPair.
BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
Expand Down Expand Up @@ -595,6 +604,9 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
} else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::SH_INX;
IsScalableVector = false;
} else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::SW_INX;
IsScalableVector = false;
} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoRV32ZdinxSD;
IsScalableVector = false;
Expand Down Expand Up @@ -681,6 +693,9 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
} else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::LH_INX;
IsScalableVector = false;
} else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::LW_INX;
IsScalableVector = false;
} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoRV32ZdinxLD;
IsScalableVector = false;
Expand Down Expand Up @@ -1554,6 +1569,7 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {

switch (Opcode) {
case RISCV::PseudoMV_FPR16INX:
case RISCV::PseudoMV_FPR32INX:
// MV is always compressible to either c.mv or c.li rd, 0.
return STI.hasStdExtCOrZca() ? 2 : 4;
case TargetOpcode::STACKMAP:
Expand Down Expand Up @@ -2614,6 +2630,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
case RISCV::LH_INX:
case RISCV::LHU:
case RISCV::LW:
case RISCV::LW_INX:
case RISCV::LWU:
case RISCV::LD:
case RISCV::FLH:
Expand All @@ -2623,6 +2640,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
case RISCV::SH:
case RISCV::SH_INX:
case RISCV::SW:
case RISCV::SW_INX:
case RISCV::SD:
case RISCV::FSH:
case RISCV::FSW:
Expand Down Expand Up @@ -2692,9 +2710,11 @@ bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
case RISCV::SH_INX:
case RISCV::FSH:
case RISCV::LW:
case RISCV::LW_INX:
case RISCV::LWU:
case RISCV::FLW:
case RISCV::SW:
case RISCV::SW_INX:
case RISCV::FSW:
case RISCV::LD:
case RISCV::FLD:
Expand Down
Loading

0 comments on commit bc91f3c

Please sign in to comment.