diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index fbad7d5d02db6c..78d6d7587160a3 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -485,8 +485,14 @@ struct RISCVOperand final : public MCParsedAsmOperand { RISCVMCRegisterClasses[RISCV::GPRF16RegClassID].contains(Reg.RegNum); } + bool isGPRF32() const { + return Kind == KindTy::Register && + RISCVMCRegisterClasses[RISCV::GPRF32RegClassID].contains(Reg.RegNum); + } + bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; } bool isGPRAsFPR16() const { return isGPRF16() && Reg.IsGPRAsFPR; } + bool isGPRAsFPR32() const { return isGPRF32() && Reg.IsGPRAsFPR; } bool isGPRPairAsFPR() const { return isGPRPair() && Reg.IsGPRAsFPR; } bool isGPRPair() const { @@ -1352,6 +1358,10 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_H; return Match_Success; } + if (Kind == MCK_GPRAsFPR32 && Op.isGPRAsFPR()) { + Op.Reg.RegNum = Reg - RISCV::X0 + RISCV::X0_W; + return Match_Success; + } // There are some GPRF64AsFPR instructions that have no RV32 equivalent. We // reject them at parsing thinking we should match as GPRPairAsFPR for RV32. diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index c2659a51b02096..7c8206cb44dec2 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -94,6 +94,19 @@ static DecodeStatus DecodeGPRF16RegisterClass(MCInst &Inst, uint32_t RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodeGPRF32RegisterClass(MCInst &Inst, uint32_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureStdExtE); + + if (RegNo >= 32 || (IsRVE && RegNo >= 16)) + return MCDisassembler::Fail; + + MCRegister Reg = RISCV::X0_W + RegNo; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeGPRX1X5RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp index d610f0b956027a..d3bfbb0943766e 100644 --- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp +++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp @@ -156,6 +156,23 @@ static ArrayRef getArgGPR16s(const RISCVABI::ABI ABI) { return ArrayRef(ArgIGPRs); } +static ArrayRef getArgGPR32s(const RISCVABI::ABI ABI) { + // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except + // the ILP32E ABI. + static const MCPhysReg ArgIGPRs[] = {RISCV::X10_W, RISCV::X11_W, RISCV::X12_W, + RISCV::X13_W, RISCV::X14_W, RISCV::X15_W, + RISCV::X16_W, RISCV::X17_W}; + // The GPRs used for passing arguments in the ILP32E/LP64E ABI. + static const MCPhysReg ArgEGPRs[] = {RISCV::X10_W, RISCV::X11_W, + RISCV::X12_W, RISCV::X13_W, + RISCV::X14_W, RISCV::X15_W}; + + if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) + return ArrayRef(ArgEGPRs); + + return ArrayRef(ArgIGPRs); +} + static ArrayRef getFastCCArgGPRs(const RISCVABI::ABI ABI) { // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used // for save-restore libcall, so we don't use them. @@ -194,6 +211,26 @@ static ArrayRef getFastCCArgGPRF16s(const RISCVABI::ABI ABI) { return ArrayRef(FastCCIGPRs); } +static ArrayRef getFastCCArgGPRF32s(const RISCVABI::ABI ABI) { + // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used + // for save-restore libcall, so we don't use them. + // Don't use X7 for fastcc, since Zicfilp uses X7 as the label register. + static const MCPhysReg FastCCIGPRs[] = { + RISCV::X10_W, RISCV::X11_W, RISCV::X12_W, RISCV::X13_W, + RISCV::X14_W, RISCV::X15_W, RISCV::X16_W, RISCV::X17_W, + RISCV::X28_W, RISCV::X29_W, RISCV::X30_W, RISCV::X31_W}; + + // The GPRs used for passing arguments in the FastCC when using ILP32E/LP64E. + static const MCPhysReg FastCCEGPRs[] = {RISCV::X10_W, RISCV::X11_W, + RISCV::X12_W, RISCV::X13_W, + RISCV::X14_W, RISCV::X15_W}; + + if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) + return ArrayRef(FastCCEGPRs); + + return ArrayRef(FastCCIGPRs); +} + // Pass a 2*XLEN argument that has been split into two XLEN values through // registers or the stack as necessary. static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, @@ -364,11 +401,17 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, } } + if (ValVT == MVT::f32 && Subtarget.hasStdExtZfinx()) { + if (MCRegister Reg = State.AllocateReg(getArgGPR32s(ABI))) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + ArrayRef ArgGPRs = RISCV::getArgGPRs(ABI); - // Zfinx/Zdinx use GPR without a bitcast when possible. - if ((LocVT == MVT::f32 && XLen == 32 && Subtarget.hasStdExtZfinx()) || - (LocVT == MVT::f64 && XLen == 64 && Subtarget.hasStdExtZdinx())) { + // Zdinx use GPR without a bitcast when possible. + if (LocVT == MVT::f64 && XLen == 64 && Subtarget.hasStdExtZdinx()) { if (MCRegister Reg = State.AllocateReg(ArgGPRs)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; @@ -616,10 +659,16 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, } } + // Check if there is an available GPRF32 before hitting the stack. + if (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) { + if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRF32s(ABI))) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + // Check if there is an available GPR before hitting the stack. - if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || - (LocVT == MVT::f64 && Subtarget.is64Bit() && - Subtarget.hasStdExtZdinx())) { + if (LocVT == MVT::f64 && Subtarget.is64Bit() && Subtarget.hasStdExtZdinx()) { if (MCRegister Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { if (LocVT.getSizeInBits() != Subtarget.getXLen()) { LocVT = XLenVT; @@ -723,9 +772,18 @@ bool llvm::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, } } - if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || - (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && - Subtarget.is64Bit())) { + if (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) { + static const MCPhysReg GPR32List[] = { + RISCV::X9_W, RISCV::X18_W, RISCV::X19_W, RISCV::X20_W, + RISCV::X21_W, RISCV::X22_W, RISCV::X23_W, RISCV::X24_W, + RISCV::X25_W, RISCV::X26_W, RISCV::X27_W}; + if (MCRegister Reg = State.AllocateReg(GPR32List)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + if (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() && Subtarget.is64Bit()) { if (MCRegister Reg = State.AllocateReg(GPRList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; diff --git a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp index 713c7a0661defe..d913c0b201a20c 100644 --- a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp +++ b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp @@ -97,6 +97,8 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF); if (RC && RC->contains(RISCV::X0)) { X0Reg = RISCV::X0; + } else if (RC && RC->contains(RISCV::X0_W)) { + X0Reg = RISCV::X0_W; } else if (RC && RC->contains(RISCV::X0_H)) { X0Reg = RISCV::X0_H; } else { diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 2501256ca6adf0..5dcec078856ead 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -50,6 +50,8 @@ class RISCVExpandPseudo : public MachineFunctionPass { MachineBasicBlock::iterator MBBI, unsigned Opcode); bool expandMV_FPR16INX(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandMV_FPR32INX(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); bool expandRV32ZdinxStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool expandRV32ZdinxLoad(MachineBasicBlock &MBB, @@ -108,6 +110,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, switch (MBBI->getOpcode()) { case RISCV::PseudoMV_FPR16INX: return expandMV_FPR16INX(MBB, MBBI); + case RISCV::PseudoMV_FPR32INX: + return expandMV_FPR32INX(MBB, MBBI); case RISCV::PseudoRV32ZdinxSD: return expandRV32ZdinxStore(MBB, MBBI); case RISCV::PseudoRV32ZdinxLD: @@ -287,6 +291,23 @@ bool RISCVExpandPseudo::expandMV_FPR16INX(MachineBasicBlock &MBB, return true; } +bool RISCVExpandPseudo::expandMV_FPR32INX(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + DebugLoc DL = MBBI->getDebugLoc(); + const TargetRegisterInfo *TRI = STI->getRegisterInfo(); + Register DstReg = TRI->getMatchingSuperReg( + MBBI->getOperand(0).getReg(), RISCV::sub_32, &RISCV::GPRRegClass); + Register SrcReg = TRI->getMatchingSuperReg( + MBBI->getOperand(1).getReg(), RISCV::sub_32, &RISCV::GPRRegClass); + + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), DstReg) + .addReg(SrcReg, getKillRegState(MBBI->getOperand(1).isKill())) + .addImm(0); + + MBBI->eraseFromParent(); // The pseudo instruction is gone now. + return true; +} + // This function expands the PseudoRV32ZdinxSD for storing a double-precision // floating-point value into memory by generating an equivalent instruction // sequence for RV32. diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 23479c2edf1d91..3e3f3c2eca1468 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -931,6 +931,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) { Res = CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode(); + } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) { + Res = + CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode(); } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W) Res = CurDAG->getMachineNode( Opc, DL, VT, Imm, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 91503bd7f41f3c..b8539a5d1add14 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -110,6 +110,7 @@ Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, MemBytes = 2; break; case RISCV::LW: + case RISCV::LW_INX: case RISCV::FLW: case RISCV::LWU: MemBytes = 4; @@ -150,6 +151,7 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, MemBytes = 2; break; case RISCV::SW: + case RISCV::SW_INX: case RISCV::FSW: MemBytes = 4; break; @@ -471,6 +473,13 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + if (RISCV::GPRF32RegClass.contains(DstReg, SrcReg)) { + BuildMI(MBB, MBBI, DL, get(RISCV::PseudoMV_FPR32INX), DstReg) + .addReg(SrcReg, + getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc)); + return; + } + if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) { // Emit an ADDI for both parts of GPRPair. BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), @@ -595,6 +604,9 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::SH_INX; IsScalableVector = false; + } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::SW_INX; + IsScalableVector = false; } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxSD; IsScalableVector = false; @@ -681,6 +693,9 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::LH_INX; IsScalableVector = false; + } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::LW_INX; + IsScalableVector = false; } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxLD; IsScalableVector = false; @@ -1554,6 +1569,7 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { switch (Opcode) { case RISCV::PseudoMV_FPR16INX: + case RISCV::PseudoMV_FPR32INX: // MV is always compressible to either c.mv or c.li rd, 0. return STI.hasStdExtCOrZca() ? 2 : 4; case TargetOpcode::STACKMAP: @@ -2614,6 +2630,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, case RISCV::LH_INX: case RISCV::LHU: case RISCV::LW: + case RISCV::LW_INX: case RISCV::LWU: case RISCV::LD: case RISCV::FLH: @@ -2623,6 +2640,7 @@ bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, case RISCV::SH: case RISCV::SH_INX: case RISCV::SW: + case RISCV::SW_INX: case RISCV::SD: case RISCV::FSH: case RISCV::FSW: @@ -2692,9 +2710,11 @@ bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( case RISCV::SH_INX: case RISCV::FSH: case RISCV::LW: + case RISCV::LW_INX: case RISCV::LWU: case RISCV::FLW: case RISCV::SW: + case RISCV::SW_INX: case RISCV::FSW: case RISCV::LD: case RISCV::FLD: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index 3f279b7a58ca68..7d742322b42969 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -331,6 +331,15 @@ def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00>, let Inst{5} = imm{6}; } +let isCodeGenOnly = 1 in +def C_LW_INX : CLoad_ri<0b010, "c.lw", GPRF32C, uimm7_lsb00>, + Sched<[WriteLDW, ReadMemBase]> { + bits<7> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6} = imm{2}; + let Inst{5} = imm{6}; +} + let DecoderNamespace = "RISCV32Only_", Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>, @@ -365,6 +374,15 @@ def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00>, let Inst{5} = imm{6}; } +let isCodeGenOnly = 1 in +def C_SW_INX : CStore_rri<0b110, "c.sw", GPRF32C, uimm7_lsb00>, + Sched<[WriteSTW, ReadStoreData, ReadMemBase]> { + bits<7> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6} = imm{2}; + let Inst{5} = imm{6}; +} + let DecoderNamespace = "RISCV32Only_", Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>, @@ -517,6 +535,13 @@ def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00>, let Inst{3-2} = imm{7-6}; } +let isCodeGenOnly = 1 in +def C_LWSP_INX : CStackLoad<0b010, "c.lwsp", GPRF32NoX0, uimm8_lsb00>, + Sched<[WriteLDW, ReadMemBase]> { + let Inst{6-4} = imm{4-2}; + let Inst{3-2} = imm{7-6}; +} + let DecoderNamespace = "RISCV32Only_", Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>, @@ -575,6 +600,13 @@ def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00>, let Inst{8-7} = imm{7-6}; } +let isCodeGenOnly = 1 in +def C_SWSP_INX : CStackStore<0b110, "c.swsp", GPRF32, uimm8_lsb00>, + Sched<[WriteSTW, ReadStoreData, ReadMemBase]> { + let Inst{12-9} = imm{5-2}; + let Inst{8-7} = imm{7-6}; +} + let DecoderNamespace = "RISCV32Only_", Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>, @@ -869,6 +901,10 @@ def : CompressPat<(FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm), let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm), (C_LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>; + +let isCompressOnly = true in +def : CompressPat<(LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm), + (C_LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>; } // Predicates = [HasStdExtCOrZca] let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { @@ -889,6 +925,10 @@ def : CompressPat<(FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm), let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm), (C_SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>; + +let isCompressOnly = true in +def : CompressPat<(SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm), + (C_SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>; } // Predicates = [HasStdExtCOrZca] let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { @@ -992,6 +1032,10 @@ def : CompressPat<(FLD FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm), let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(LW GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm), (C_LWSP GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>; + +let isCompressOnly = true in +def : CompressPat<(LW_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm), + (C_LWSP_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>; } // Predicates = [HasStdExtCOrZca] let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { @@ -1034,6 +1078,10 @@ def : CompressPat<(FSD FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm), let Predicates = [HasStdExtCOrZca] in { def : CompressPat<(SW GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm), (C_SWSP GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>; + +let isCompressOnly = true in +def : CompressPat<(SW_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm), + (C_SWSP_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>; } // Predicates = [HasStdExtCOrZca] let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index a00acb372dc2a2..000b7cfedb0f91 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -83,15 +83,14 @@ def any_fma_nsz : PatFrag<(ops node:$rs1, node:$rs2, node:$rs3), // Zfinx -def GPRAsFPR : AsmOperandClass { - let Name = "GPRAsFPR"; +def GPRAsFPR32 : AsmOperandClass { + let Name = "GPRAsFPR32"; let ParserMethod = "parseGPRAsFPR"; let RenderMethod = "addRegOperands"; } def FPR32INX : RegisterOperand { - let ParserMatchClass = GPRAsFPR; - let DecoderMethod = "DecodeGPRRegisterClass"; + let ParserMatchClass = GPRAsFPR32; } // Describes a combination of predicates from F/D/Zfh/Zfhmin or @@ -306,6 +305,19 @@ def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>; def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>; } // Predicates = [HasStdExtF] +let Predicates = [HasStdExtZfinx], isCodeGenOnly = 1 in { +def LW_INX : Load_ri<0b010, "lw", GPRF32>, Sched<[WriteLDW, ReadMemBase]>; +def SW_INX : Store_rri<0b010, "sw", GPRF32>, + Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; + +// ADDI with GPRF32 register class to use for copy. This should not be used as +// general ADDI, so the immediate should always be zero. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveReg = 1, + hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def PseudoMV_FPR32INX : Pseudo<(outs GPRF32:$rd), (ins GPRF32:$rs), []>, + Sched<[WriteIALU, ReadIALU]>; +} + foreach Ext = FExts in { let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32Addend] in { defm FMADD_S : FPFMA_rrr_frm_m; @@ -685,12 +697,10 @@ defm Select_FPR32INX : SelectCC_GPR_rrirr; def PseudoFROUND_S_INX : PseudoFROUND; /// Loads -def : Pat<(f32 (load (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))), - (COPY_TO_REGCLASS (LW GPR:$rs1, simm12:$imm12), GPRF32)>; +def : LdPat; /// Stores -def : Pat<(store (f32 FPR32INX:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)), - (SW (COPY_TO_REGCLASS FPR32INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>; +def : StPat; } // Predicates = [HasStdExtZfinx] let Predicates = [HasStdExtF] in { @@ -701,8 +711,8 @@ def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>; let Predicates = [HasStdExtZfinx] in { // Moves (no conversion) -def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (COPY_TO_REGCLASS GPR:$rs1, GPRF32)>; -def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (COPY_TO_REGCLASS FPR32INX:$rs1, GPR)>; +def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (EXTRACT_SUBREG GPR:$rs1, sub_32)>; +def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (INSERT_SUBREG (XLenVT (IMPLICIT_DEF)), FPR32INX:$rs1, sub_32)>; } // Predicates = [HasStdExtZfinx] let Predicates = [HasStdExtF] in { @@ -781,8 +791,8 @@ def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, FRM_DYN)>; let Predicates = [HasStdExtZfinx, IsRV64] in { // Moves (no conversion) -def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (COPY_TO_REGCLASS GPR:$src, GPRF32)>; -def : Pat<(riscv_fmv_x_anyextw_rv64 GPRF32:$src), (COPY_TO_REGCLASS GPRF32:$src, GPR)>; +def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (EXTRACT_SUBREG GPR:$src, sub_32)>; +def : Pat<(riscv_fmv_x_anyextw_rv64 GPRF32:$src), (INSERT_SUBREG (XLenVT (IMPLICIT_DEF)), FPR32INX:$src, sub_32)>; // Use target specific isd nodes to help us remember the result is sign // extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp index 5973e5bf2e5252..df5501e37f8313 100644 --- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp +++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp @@ -109,7 +109,9 @@ static unsigned log2LdstWidth(unsigned Opcode) { case RISCV::SH_INX: return 1; case RISCV::LW: + case RISCV::LW_INX: case RISCV::SW: + case RISCV::SW_INX: case RISCV::FLW: case RISCV::FSW: return 2; @@ -136,7 +138,9 @@ static unsigned offsetMask(unsigned Opcode) { case RISCV::SH_INX: return maskTrailingOnes(1U); case RISCV::LW: + case RISCV::LW_INX: case RISCV::SW: + case RISCV::SW_INX: case RISCV::FLW: case RISCV::FSW: case RISCV::LD: @@ -178,6 +182,7 @@ static int64_t getBaseAdjustForCompression(int64_t Offset, unsigned Opcode) { static bool isCompressedReg(Register Reg) { return RISCV::GPRCRegClass.contains(Reg) || RISCV::GPRF16CRegClass.contains(Reg) || + RISCV::GPRF32CRegClass.contains(Reg) || RISCV::FPR32CRegClass.contains(Reg) || RISCV::FPR64CRegClass.contains(Reg); } @@ -195,6 +200,7 @@ static bool isCompressibleLoad(const MachineInstr &MI) { case RISCV::LHU: return STI.hasStdExtZcb(); case RISCV::LW: + case RISCV::LW_INX: case RISCV::LD: return STI.hasStdExtCOrZca(); case RISCV::FLW: @@ -216,6 +222,7 @@ static bool isCompressibleStore(const MachineInstr &MI) { case RISCV::SH_INX: return STI.hasStdExtZcb(); case RISCV::SW: + case RISCV::SW_INX: case RISCV::SD: return STI.hasStdExtCOrZca(); case RISCV::FSW: @@ -329,6 +336,8 @@ static Register analyzeCompressibleUses(MachineInstr &FirstMI, RCToScavenge = &RISCV::GPRCRegClass; else if (RISCV::GPRF16RegClass.contains(RegImm.Reg)) RCToScavenge = &RISCV::GPRF16CRegClass; + else if (RISCV::GPRF32RegClass.contains(RegImm.Reg)) + RCToScavenge = &RISCV::GPRF32CRegClass; else if (RISCV::FPR32RegClass.contains(RegImm.Reg)) RCToScavenge = &RISCV::FPR32CRegClass; else if (RISCV::FPR64RegClass.contains(RegImm.Reg)) @@ -424,6 +433,11 @@ bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) { BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::PseudoMV_FPR16INX), NewReg) .addReg(RegImm.Reg); + } else if (RISCV::GPRF32RegClass.contains(RegImm.Reg)) { + assert(RegImm.Imm == 0); + BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::PseudoMV_FPR32INX), + NewReg) + .addReg(RegImm.Reg); } else { // If we are looking at replacing an FPR register we don't expect to // have any offset. The only compressible FP instructions with an offset diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp index b3a2877edde4e3..a324deb4e48f5c 100644 --- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -387,6 +387,7 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, case RISCV::LH: case RISCV::LH_INX: case RISCV::LW: + case RISCV::LW_INX: case RISCV::LBU: case RISCV::LHU: case RISCV::LWU: @@ -398,6 +399,7 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, case RISCV::SH: case RISCV::SH_INX: case RISCV::SW: + case RISCV::SW_INX: case RISCV::SD: case RISCV::FSH: case RISCV::FSW: diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index e3c9ac52d66a35..33363aa8b71830 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -124,41 +124,81 @@ let RegAltNameIndices = [ABIRegAltName] in { let SubRegIndices = [sub_16] in { let isConstant = true in - def X0 : RISCVRegWithSubRegs<0, "x0", [X0_H], ["zero"]>, DwarfRegNum<[0]>; + def X0_W : RISCVRegWithSubRegs<0, "x0", [X0_H], ["zero"]>; let CostPerUse = [0, 1] in { - def X1 : RISCVRegWithSubRegs<1, "x1", [X1_H], ["ra"]>, DwarfRegNum<[1]>; - def X2 : RISCVRegWithSubRegs<2, "x2", [X2_H], ["sp"]>, DwarfRegNum<[2]>; - def X3 : RISCVRegWithSubRegs<3, "x3", [X3_H], ["gp"]>, DwarfRegNum<[3]>; - def X4 : RISCVRegWithSubRegs<4, "x4", [X4_H], ["tp"]>, DwarfRegNum<[4]>; - def X5 : RISCVRegWithSubRegs<5, "x5", [X5_H], ["t0"]>, DwarfRegNum<[5]>; - def X6 : RISCVRegWithSubRegs<6, "x6", [X6_H], ["t1"]>, DwarfRegNum<[6]>; - def X7 : RISCVRegWithSubRegs<7, "x7", [X7_H], ["t2"]>, DwarfRegNum<[7]>; + def X1_W : RISCVRegWithSubRegs<1, "x1", [X1_H], ["ra"]>; + def X2_W : RISCVRegWithSubRegs<2, "x2", [X2_H], ["sp"]>; + def X3_W : RISCVRegWithSubRegs<3, "x3", [X3_H], ["gp"]>; + def X4_W : RISCVRegWithSubRegs<4, "x4", [X4_H], ["tp"]>; + def X5_W : RISCVRegWithSubRegs<5, "x5", [X5_H], ["t0"]>; + def X6_W : RISCVRegWithSubRegs<6, "x6", [X6_H], ["t1"]>; + def X7_W : RISCVRegWithSubRegs<7, "x7", [X7_H], ["t2"]>; } - def X8 : RISCVRegWithSubRegs<8, "x8", [X8_H], ["s0", "fp"]>, DwarfRegNum<[8]>; - def X9 : RISCVRegWithSubRegs<9, "x9", [X9_H], ["s1"]>, DwarfRegNum<[9]>; - def X10 : RISCVRegWithSubRegs<10,"x10", [X10_H], ["a0"]>, DwarfRegNum<[10]>; - def X11 : RISCVRegWithSubRegs<11,"x11", [X11_H], ["a1"]>, DwarfRegNum<[11]>; - def X12 : RISCVRegWithSubRegs<12,"x12", [X12_H], ["a2"]>, DwarfRegNum<[12]>; - def X13 : RISCVRegWithSubRegs<13,"x13", [X13_H], ["a3"]>, DwarfRegNum<[13]>; - def X14 : RISCVRegWithSubRegs<14,"x14", [X14_H], ["a4"]>, DwarfRegNum<[14]>; - def X15 : RISCVRegWithSubRegs<15,"x15", [X15_H], ["a5"]>, DwarfRegNum<[15]>; + def X8_W : RISCVRegWithSubRegs<8, "x8", [X8_H], ["s0", "fp"]>; + def X9_W : RISCVRegWithSubRegs<9, "x9", [X9_H], ["s1"]>; + def X10_W : RISCVRegWithSubRegs<10,"x10", [X10_H], ["a0"]>; + def X11_W : RISCVRegWithSubRegs<11,"x11", [X11_H], ["a1"]>; + def X12_W : RISCVRegWithSubRegs<12,"x12", [X12_H], ["a2"]>; + def X13_W : RISCVRegWithSubRegs<13,"x13", [X13_H], ["a3"]>; + def X14_W : RISCVRegWithSubRegs<14,"x14", [X14_H], ["a4"]>; + def X15_W : RISCVRegWithSubRegs<15,"x15", [X15_H], ["a5"]>; let CostPerUse = [0, 1] in { - def X16 : RISCVRegWithSubRegs<16,"x16", [X16_H], ["a6"]>, DwarfRegNum<[16]>; - def X17 : RISCVRegWithSubRegs<17,"x17", [X17_H], ["a7"]>, DwarfRegNum<[17]>; - def X18 : RISCVRegWithSubRegs<18,"x18", [X18_H], ["s2"]>, DwarfRegNum<[18]>; - def X19 : RISCVRegWithSubRegs<19,"x19", [X19_H], ["s3"]>, DwarfRegNum<[19]>; - def X20 : RISCVRegWithSubRegs<20,"x20", [X20_H], ["s4"]>, DwarfRegNum<[20]>; - def X21 : RISCVRegWithSubRegs<21,"x21", [X21_H], ["s5"]>, DwarfRegNum<[21]>; - def X22 : RISCVRegWithSubRegs<22,"x22", [X22_H], ["s6"]>, DwarfRegNum<[22]>; - def X23 : RISCVRegWithSubRegs<23,"x23", [X23_H], ["s7"]>, DwarfRegNum<[23]>; - def X24 : RISCVRegWithSubRegs<24,"x24", [X24_H], ["s8"]>, DwarfRegNum<[24]>; - def X25 : RISCVRegWithSubRegs<25,"x25", [X25_H], ["s9"]>, DwarfRegNum<[25]>; - def X26 : RISCVRegWithSubRegs<26,"x26", [X26_H], ["s10"]>, DwarfRegNum<[26]>; - def X27 : RISCVRegWithSubRegs<27,"x27", [X27_H], ["s11"]>, DwarfRegNum<[27]>; - def X28 : RISCVRegWithSubRegs<28,"x28", [X28_H], ["t3"]>, DwarfRegNum<[28]>; - def X29 : RISCVRegWithSubRegs<29,"x29", [X29_H], ["t4"]>, DwarfRegNum<[29]>; - def X30 : RISCVRegWithSubRegs<30,"x30", [X30_H], ["t5"]>, DwarfRegNum<[30]>; - def X31 : RISCVRegWithSubRegs<31,"x31", [X31_H], ["t6"]>, DwarfRegNum<[31]>; + def X16_W : RISCVRegWithSubRegs<16,"x16", [X16_H], ["a6"]>; + def X17_W : RISCVRegWithSubRegs<17,"x17", [X17_H], ["a7"]>; + def X18_W : RISCVRegWithSubRegs<18,"x18", [X18_H], ["s2"]>; + def X19_W : RISCVRegWithSubRegs<19,"x19", [X19_H], ["s3"]>; + def X20_W : RISCVRegWithSubRegs<20,"x20", [X20_H], ["s4"]>; + def X21_W : RISCVRegWithSubRegs<21,"x21", [X21_H], ["s5"]>; + def X22_W : RISCVRegWithSubRegs<22,"x22", [X22_H], ["s6"]>; + def X23_W : RISCVRegWithSubRegs<23,"x23", [X23_H], ["s7"]>; + def X24_W : RISCVRegWithSubRegs<24,"x24", [X24_H], ["s8"]>; + def X25_W : RISCVRegWithSubRegs<25,"x25", [X25_H], ["s9"]>; + def X26_W : RISCVRegWithSubRegs<26,"x26", [X26_H], ["s10"]>; + def X27_W : RISCVRegWithSubRegs<27,"x27", [X27_H], ["s11"]>; + def X28_W : RISCVRegWithSubRegs<28,"x28", [X28_H], ["t3"]>; + def X29_W : RISCVRegWithSubRegs<29,"x29", [X29_H], ["t4"]>; + def X30_W : RISCVRegWithSubRegs<30,"x30", [X30_H], ["t5"]>; + def X31_W : RISCVRegWithSubRegs<31,"x31", [X31_H], ["t6"]>; + } + } + + let SubRegIndices = [sub_32] in { + let isConstant = true in + def X0 : RISCVRegWithSubRegs<0, "x0", [X0_W], ["zero"]>, DwarfRegNum<[0]>; + let CostPerUse = [0, 1] in { + def X1 : RISCVRegWithSubRegs<1, "x1", [X1_W], ["ra"]>, DwarfRegNum<[1]>; + def X2 : RISCVRegWithSubRegs<2, "x2", [X2_W], ["sp"]>, DwarfRegNum<[2]>; + def X3 : RISCVRegWithSubRegs<3, "x3", [X3_W], ["gp"]>, DwarfRegNum<[3]>; + def X4 : RISCVRegWithSubRegs<4, "x4", [X4_W], ["tp"]>, DwarfRegNum<[4]>; + def X5 : RISCVRegWithSubRegs<5, "x5", [X5_W], ["t0"]>, DwarfRegNum<[5]>; + def X6 : RISCVRegWithSubRegs<6, "x6", [X6_W], ["t1"]>, DwarfRegNum<[6]>; + def X7 : RISCVRegWithSubRegs<7, "x7", [X7_W], ["t2"]>, DwarfRegNum<[7]>; + } + def X8 : RISCVRegWithSubRegs<8, "x8", [X8_W], ["s0", "fp"]>, DwarfRegNum<[8]>; + def X9 : RISCVRegWithSubRegs<9, "x9", [X9_W], ["s1"]>, DwarfRegNum<[9]>; + def X10 : RISCVRegWithSubRegs<10,"x10", [X10_W], ["a0"]>, DwarfRegNum<[10]>; + def X11 : RISCVRegWithSubRegs<11,"x11", [X11_W], ["a1"]>, DwarfRegNum<[11]>; + def X12 : RISCVRegWithSubRegs<12,"x12", [X12_W], ["a2"]>, DwarfRegNum<[12]>; + def X13 : RISCVRegWithSubRegs<13,"x13", [X13_W], ["a3"]>, DwarfRegNum<[13]>; + def X14 : RISCVRegWithSubRegs<14,"x14", [X14_W], ["a4"]>, DwarfRegNum<[14]>; + def X15 : RISCVRegWithSubRegs<15,"x15", [X15_W], ["a5"]>, DwarfRegNum<[15]>; + let CostPerUse = [0, 1] in { + def X16 : RISCVRegWithSubRegs<16,"x16", [X16_W], ["a6"]>, DwarfRegNum<[16]>; + def X17 : RISCVRegWithSubRegs<17,"x17", [X17_W], ["a7"]>, DwarfRegNum<[17]>; + def X18 : RISCVRegWithSubRegs<18,"x18", [X18_W], ["s2"]>, DwarfRegNum<[18]>; + def X19 : RISCVRegWithSubRegs<19,"x19", [X19_W], ["s3"]>, DwarfRegNum<[19]>; + def X20 : RISCVRegWithSubRegs<20,"x20", [X20_W], ["s4"]>, DwarfRegNum<[20]>; + def X21 : RISCVRegWithSubRegs<21,"x21", [X21_W], ["s5"]>, DwarfRegNum<[21]>; + def X22 : RISCVRegWithSubRegs<22,"x22", [X22_W], ["s6"]>, DwarfRegNum<[22]>; + def X23 : RISCVRegWithSubRegs<23,"x23", [X23_W], ["s7"]>, DwarfRegNum<[23]>; + def X24 : RISCVRegWithSubRegs<24,"x24", [X24_W], ["s8"]>, DwarfRegNum<[24]>; + def X25 : RISCVRegWithSubRegs<25,"x25", [X25_W], ["s9"]>, DwarfRegNum<[25]>; + def X26 : RISCVRegWithSubRegs<26,"x26", [X26_W], ["s10"]>, DwarfRegNum<[26]>; + def X27 : RISCVRegWithSubRegs<27,"x27", [X27_W], ["s11"]>, DwarfRegNum<[27]>; + def X28 : RISCVRegWithSubRegs<28,"x28", [X28_W], ["t3"]>, DwarfRegNum<[28]>; + def X29 : RISCVRegWithSubRegs<29,"x29", [X29_W], ["t4"]>, DwarfRegNum<[29]>; + def X30 : RISCVRegWithSubRegs<30,"x30", [X30_W], ["t5"]>, DwarfRegNum<[30]>; + def X31 : RISCVRegWithSubRegs<31,"x31", [X31_W], ["t6"]>, DwarfRegNum<[31]>; } } } @@ -617,9 +657,15 @@ def GPRF16 : RISCVRegisterClass<[f16], 16, (add (sequence "X%u_H", 10, 17), def GPRF16C : RISCVRegisterClass<[f16], 16, (add (sequence "X%u_H", 10, 15), (sequence "X%u_H", 8, 9))>; -let RegInfos = XLenRI in { -def GPRF32 : RISCVRegisterClass<[f32], 32, (add GPR)>; -} // RegInfos = XLenRI +def GPRF32 : RISCVRegisterClass<[f32], 32, (add (sequence "X%u_W", 10, 17), + (sequence "X%u_W", 5, 7), + (sequence "X%u_W", 28, 31), + (sequence "X%u_W", 8, 9), + (sequence "X%u_W", 18, 27), + (sequence "X%u_W", 0, 4))>; +def GPRF32C : RISCVRegisterClass<[f32], 32, (add (sequence "X%u_W", 10, 15), + (sequence "X%u_W", 8, 9))>; +def GPRF32NoX0 : RISCVRegisterClass<[f32], 32, (sub GPRF32, X0_W)>; // Dummy zero register for use in the register pair containing X0 (as X1 is // not read to or written when the X0 register pair is used). diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll index de5bb8a30db16b..0eefc34ad552a9 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll @@ -23,26 +23,34 @@ define half @caller_half(half %x) nounwind { ; ; ZFINX32-LABEL: caller_half: ; ZFINX32: # %bb.0: # %entry +; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZFINX32-NEXT: lui a1, 1048560 ; ZFINX32-NEXT: or a0, a0, a1 +; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZFINX32-NEXT: tail h ; ; ZFINX64-LABEL: caller_half: ; ZFINX64: # %bb.0: # %entry +; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZFINX64-NEXT: lui a1, 1048560 ; ZFINX64-NEXT: or a0, a0, a1 +; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZFINX64-NEXT: tail h ; ; ZDINX32-LABEL: caller_half: ; ZDINX32: # %bb.0: # %entry +; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZDINX32-NEXT: lui a1, 1048560 ; ZDINX32-NEXT: or a0, a0, a1 +; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZDINX32-NEXT: tail h ; ; ZDINX64-LABEL: caller_half: ; ZDINX64: # %bb.0: # %entry +; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZDINX64-NEXT: lui a1, 1048560 ; ZDINX64-NEXT: or a0, a0, a1 +; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZDINX64-NEXT: tail h entry: %0 = tail call fastcc half @h(half %x) @@ -60,26 +68,34 @@ define internal fastcc half @h(half %x) nounwind { ; ; ZFINX32-LABEL: h: ; ZFINX32: # %bb.0: +; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZFINX32-NEXT: lui a1, 1048560 ; ZFINX32-NEXT: or a0, a0, a1 +; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZFINX32-NEXT: ret ; ; ZFINX64-LABEL: h: ; ZFINX64: # %bb.0: +; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZFINX64-NEXT: lui a1, 1048560 ; ZFINX64-NEXT: or a0, a0, a1 +; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZFINX64-NEXT: ret ; ; ZDINX32-LABEL: h: ; ZDINX32: # %bb.0: +; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZDINX32-NEXT: lui a1, 1048560 ; ZDINX32-NEXT: or a0, a0, a1 +; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZDINX32-NEXT: ret ; ; ZDINX64-LABEL: h: ; ZDINX64: # %bb.0: +; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZDINX64-NEXT: lui a1, 1048560 ; ZDINX64-NEXT: or a0, a0, a1 +; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZDINX64-NEXT: ret ret half %x } @@ -220,24 +236,28 @@ define fastcc half @callee_half_32(<32 x half> %A) nounwind { ; ZFINX32: # %bb.0: ; ZFINX32-NEXT: lui a1, 1048560 ; ZFINX32-NEXT: or a0, a0, a1 +; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZFINX32-NEXT: ret ; ; ZFINX64-LABEL: callee_half_32: ; ZFINX64: # %bb.0: ; ZFINX64-NEXT: lui a1, 1048560 ; ZFINX64-NEXT: or a0, a0, a1 +; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZFINX64-NEXT: ret ; ; ZDINX32-LABEL: callee_half_32: ; ZDINX32: # %bb.0: ; ZDINX32-NEXT: lui a1, 1048560 ; ZDINX32-NEXT: or a0, a0, a1 +; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZDINX32-NEXT: ret ; ; ZDINX64-LABEL: callee_half_32: ; ZDINX64: # %bb.0: ; ZDINX64-NEXT: lui a1, 1048560 ; ZDINX64-NEXT: or a0, a0, a1 +; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZDINX64-NEXT: ret %B = extractelement <32 x half> %A, i32 0 ret half %B @@ -492,8 +512,10 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZFINX32-NEXT: lw a3, 96(sp) # 4-byte Folded Reload ; ZFINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload ; ZFINX32-NEXT: call callee_half_32 +; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZFINX32-NEXT: lui a1, 1048560 ; ZFINX32-NEXT: or a0, a0, a1 +; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZFINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload ; ZFINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload ; ZFINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload @@ -588,8 +610,10 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZFINX64-NEXT: ld a3, 176(sp) # 8-byte Folded Reload ; ZFINX64-NEXT: ld a4, 168(sp) # 8-byte Folded Reload ; ZFINX64-NEXT: call callee_half_32 +; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZFINX64-NEXT: lui a1, 1048560 ; ZFINX64-NEXT: or a0, a0, a1 +; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZFINX64-NEXT: ld ra, 296(sp) # 8-byte Folded Reload ; ZFINX64-NEXT: ld s0, 288(sp) # 8-byte Folded Reload ; ZFINX64-NEXT: ld s1, 280(sp) # 8-byte Folded Reload @@ -684,8 +708,10 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZDINX32-NEXT: lw a3, 96(sp) # 4-byte Folded Reload ; ZDINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload ; ZDINX32-NEXT: call callee_half_32 +; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZDINX32-NEXT: lui a1, 1048560 ; ZDINX32-NEXT: or a0, a0, a1 +; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZDINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload ; ZDINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload ; ZDINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload @@ -780,8 +806,10 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZDINX64-NEXT: ld a3, 176(sp) # 8-byte Folded Reload ; ZDINX64-NEXT: ld a4, 168(sp) # 8-byte Folded Reload ; ZDINX64-NEXT: call callee_half_32 +; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; ZDINX64-NEXT: lui a1, 1048560 ; ZDINX64-NEXT: or a0, a0, a1 +; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; ZDINX64-NEXT: ld ra, 296(sp) # 8-byte Folded Reload ; ZDINX64-NEXT: ld s0, 288(sp) # 8-byte Folded Reload ; ZDINX64-NEXT: ld s1, 280(sp) # 8-byte Folded Reload @@ -917,79 +945,87 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZHINX64-LABEL: caller_float_32: ; ZHINX64: # %bb.0: -; ZHINX64-NEXT: addi sp, sp, -192 -; ZHINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill -; ZHINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: addi sp, sp, -208 +; ZHINX64-NEXT: sd ra, 200(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s0, 192(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s1, 184(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s2, 176(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s3, 168(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s4, 160(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s5, 152(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s6, 144(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s7, 136(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s8, 128(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s9, 120(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s10, 112(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: sd s11, 104(sp) # 8-byte Folded Spill +; ZHINX64-NEXT: lw t0, 208(sp) +; ZHINX64-NEXT: sw t0, 100(sp) # 4-byte Folded Spill +; ZHINX64-NEXT: lw t0, 216(sp) +; ZHINX64-NEXT: sw t0, 96(sp) # 4-byte Folded Spill ; ZHINX64-NEXT: lw t0, 224(sp) -; ZHINX64-NEXT: lw t1, 232(sp) -; ZHINX64-NEXT: lw t2, 240(sp) -; ZHINX64-NEXT: lw s0, 248(sp) -; ZHINX64-NEXT: lw t3, 256(sp) -; ZHINX64-NEXT: lw t4, 264(sp) -; ZHINX64-NEXT: lw t5, 272(sp) -; ZHINX64-NEXT: lw t6, 280(sp) -; ZHINX64-NEXT: lw s1, 288(sp) -; ZHINX64-NEXT: lw s2, 296(sp) -; ZHINX64-NEXT: lw s3, 304(sp) -; ZHINX64-NEXT: lw s4, 312(sp) -; ZHINX64-NEXT: lw s5, 320(sp) -; ZHINX64-NEXT: lw s6, 328(sp) -; ZHINX64-NEXT: lw s7, 336(sp) -; ZHINX64-NEXT: lw s8, 344(sp) -; ZHINX64-NEXT: lw s9, 352(sp) -; ZHINX64-NEXT: lw s10, 360(sp) -; ZHINX64-NEXT: lw s11, 368(sp) -; ZHINX64-NEXT: lw ra, 376(sp) -; ZHINX64-NEXT: sw ra, 76(sp) -; ZHINX64-NEXT: sw s11, 72(sp) -; ZHINX64-NEXT: sw s10, 68(sp) -; ZHINX64-NEXT: sw s9, 64(sp) -; ZHINX64-NEXT: sw s8, 60(sp) -; ZHINX64-NEXT: sw s7, 56(sp) -; ZHINX64-NEXT: sw s6, 52(sp) -; ZHINX64-NEXT: sw s5, 48(sp) -; ZHINX64-NEXT: sw s4, 44(sp) -; ZHINX64-NEXT: sw s3, 40(sp) -; ZHINX64-NEXT: sw s2, 36(sp) -; ZHINX64-NEXT: sw s1, 32(sp) -; ZHINX64-NEXT: sw t6, 28(sp) -; ZHINX64-NEXT: sw t5, 24(sp) -; ZHINX64-NEXT: sw t4, 20(sp) -; ZHINX64-NEXT: sw t3, 16(sp) -; ZHINX64-NEXT: lw t3, 192(sp) -; ZHINX64-NEXT: lw t4, 200(sp) -; ZHINX64-NEXT: lw t5, 208(sp) -; ZHINX64-NEXT: lw t6, 216(sp) +; ZHINX64-NEXT: sw t0, 92(sp) # 4-byte Folded Spill +; ZHINX64-NEXT: lw t0, 232(sp) +; ZHINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill +; ZHINX64-NEXT: lw t6, 240(sp) +; ZHINX64-NEXT: lw t5, 248(sp) +; ZHINX64-NEXT: lw t4, 256(sp) +; ZHINX64-NEXT: lw s0, 264(sp) +; ZHINX64-NEXT: lw s1, 272(sp) +; ZHINX64-NEXT: lw s2, 280(sp) +; ZHINX64-NEXT: lw s3, 288(sp) +; ZHINX64-NEXT: lw s4, 296(sp) +; ZHINX64-NEXT: lw s5, 304(sp) +; ZHINX64-NEXT: lw s6, 312(sp) +; ZHINX64-NEXT: lw s7, 320(sp) +; ZHINX64-NEXT: lw s8, 328(sp) +; ZHINX64-NEXT: lw s9, 336(sp) +; ZHINX64-NEXT: lw s10, 344(sp) +; ZHINX64-NEXT: lw s11, 352(sp) +; ZHINX64-NEXT: lw ra, 360(sp) +; ZHINX64-NEXT: lw t3, 368(sp) +; ZHINX64-NEXT: lw t2, 376(sp) +; ZHINX64-NEXT: lw t1, 384(sp) +; ZHINX64-NEXT: lw t0, 392(sp) +; ZHINX64-NEXT: sw t0, 76(sp) +; ZHINX64-NEXT: sw t1, 72(sp) +; ZHINX64-NEXT: sw t2, 68(sp) +; ZHINX64-NEXT: sw t3, 64(sp) +; ZHINX64-NEXT: sw ra, 60(sp) +; ZHINX64-NEXT: sw s11, 56(sp) +; ZHINX64-NEXT: sw s10, 52(sp) +; ZHINX64-NEXT: sw s9, 48(sp) +; ZHINX64-NEXT: sw s8, 44(sp) +; ZHINX64-NEXT: sw s7, 40(sp) +; ZHINX64-NEXT: sw s6, 36(sp) +; ZHINX64-NEXT: sw s5, 32(sp) +; ZHINX64-NEXT: sw s4, 28(sp) +; ZHINX64-NEXT: sw s3, 24(sp) +; ZHINX64-NEXT: sw s2, 20(sp) +; ZHINX64-NEXT: sw s1, 16(sp) ; ZHINX64-NEXT: sw s0, 12(sp) -; ZHINX64-NEXT: sw t2, 8(sp) -; ZHINX64-NEXT: sw t1, 4(sp) -; ZHINX64-NEXT: sw t0, 0(sp) +; ZHINX64-NEXT: sw t4, 8(sp) +; ZHINX64-NEXT: sw t5, 4(sp) +; ZHINX64-NEXT: sw t6, 0(sp) +; ZHINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload +; ZHINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload +; ZHINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload +; ZHINX64-NEXT: lw t6, 88(sp) # 4-byte Folded Reload ; ZHINX64-NEXT: call callee_float_32 -; ZHINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload -; ZHINX64-NEXT: addi sp, sp, 192 +; ZHINX64-NEXT: ld ra, 200(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s0, 192(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s1, 184(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s2, 176(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s3, 168(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s4, 160(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s5, 152(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s6, 144(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s7, 136(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s8, 128(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s9, 120(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s10, 112(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: ld s11, 104(sp) # 8-byte Folded Reload +; ZHINX64-NEXT: addi sp, sp, 208 ; ZHINX64-NEXT: ret ; ; ZFINX32-LABEL: caller_float_32: @@ -1079,79 +1115,87 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZFINX64-LABEL: caller_float_32: ; ZFINX64: # %bb.0: -; ZFINX64-NEXT: addi sp, sp, -192 -; ZFINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill -; ZFINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: addi sp, sp, -208 +; ZFINX64-NEXT: sd ra, 200(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s0, 192(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s1, 184(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s2, 176(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s3, 168(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s4, 160(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s5, 152(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s6, 144(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s7, 136(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s8, 128(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s9, 120(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s10, 112(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: sd s11, 104(sp) # 8-byte Folded Spill +; ZFINX64-NEXT: lw t0, 208(sp) +; ZFINX64-NEXT: sw t0, 100(sp) # 4-byte Folded Spill +; ZFINX64-NEXT: lw t0, 216(sp) +; ZFINX64-NEXT: sw t0, 96(sp) # 4-byte Folded Spill ; ZFINX64-NEXT: lw t0, 224(sp) -; ZFINX64-NEXT: lw t1, 232(sp) -; ZFINX64-NEXT: lw t2, 240(sp) -; ZFINX64-NEXT: lw s0, 248(sp) -; ZFINX64-NEXT: lw t3, 256(sp) -; ZFINX64-NEXT: lw t4, 264(sp) -; ZFINX64-NEXT: lw t5, 272(sp) -; ZFINX64-NEXT: lw t6, 280(sp) -; ZFINX64-NEXT: lw s1, 288(sp) -; ZFINX64-NEXT: lw s2, 296(sp) -; ZFINX64-NEXT: lw s3, 304(sp) -; ZFINX64-NEXT: lw s4, 312(sp) -; ZFINX64-NEXT: lw s5, 320(sp) -; ZFINX64-NEXT: lw s6, 328(sp) -; ZFINX64-NEXT: lw s7, 336(sp) -; ZFINX64-NEXT: lw s8, 344(sp) -; ZFINX64-NEXT: lw s9, 352(sp) -; ZFINX64-NEXT: lw s10, 360(sp) -; ZFINX64-NEXT: lw s11, 368(sp) -; ZFINX64-NEXT: lw ra, 376(sp) -; ZFINX64-NEXT: sw ra, 76(sp) -; ZFINX64-NEXT: sw s11, 72(sp) -; ZFINX64-NEXT: sw s10, 68(sp) -; ZFINX64-NEXT: sw s9, 64(sp) -; ZFINX64-NEXT: sw s8, 60(sp) -; ZFINX64-NEXT: sw s7, 56(sp) -; ZFINX64-NEXT: sw s6, 52(sp) -; ZFINX64-NEXT: sw s5, 48(sp) -; ZFINX64-NEXT: sw s4, 44(sp) -; ZFINX64-NEXT: sw s3, 40(sp) -; ZFINX64-NEXT: sw s2, 36(sp) -; ZFINX64-NEXT: sw s1, 32(sp) -; ZFINX64-NEXT: sw t6, 28(sp) -; ZFINX64-NEXT: sw t5, 24(sp) -; ZFINX64-NEXT: sw t4, 20(sp) -; ZFINX64-NEXT: sw t3, 16(sp) -; ZFINX64-NEXT: lw t3, 192(sp) -; ZFINX64-NEXT: lw t4, 200(sp) -; ZFINX64-NEXT: lw t5, 208(sp) -; ZFINX64-NEXT: lw t6, 216(sp) +; ZFINX64-NEXT: sw t0, 92(sp) # 4-byte Folded Spill +; ZFINX64-NEXT: lw t0, 232(sp) +; ZFINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill +; ZFINX64-NEXT: lw t6, 240(sp) +; ZFINX64-NEXT: lw t5, 248(sp) +; ZFINX64-NEXT: lw t4, 256(sp) +; ZFINX64-NEXT: lw s0, 264(sp) +; ZFINX64-NEXT: lw s1, 272(sp) +; ZFINX64-NEXT: lw s2, 280(sp) +; ZFINX64-NEXT: lw s3, 288(sp) +; ZFINX64-NEXT: lw s4, 296(sp) +; ZFINX64-NEXT: lw s5, 304(sp) +; ZFINX64-NEXT: lw s6, 312(sp) +; ZFINX64-NEXT: lw s7, 320(sp) +; ZFINX64-NEXT: lw s8, 328(sp) +; ZFINX64-NEXT: lw s9, 336(sp) +; ZFINX64-NEXT: lw s10, 344(sp) +; ZFINX64-NEXT: lw s11, 352(sp) +; ZFINX64-NEXT: lw ra, 360(sp) +; ZFINX64-NEXT: lw t3, 368(sp) +; ZFINX64-NEXT: lw t2, 376(sp) +; ZFINX64-NEXT: lw t1, 384(sp) +; ZFINX64-NEXT: lw t0, 392(sp) +; ZFINX64-NEXT: sw t0, 76(sp) +; ZFINX64-NEXT: sw t1, 72(sp) +; ZFINX64-NEXT: sw t2, 68(sp) +; ZFINX64-NEXT: sw t3, 64(sp) +; ZFINX64-NEXT: sw ra, 60(sp) +; ZFINX64-NEXT: sw s11, 56(sp) +; ZFINX64-NEXT: sw s10, 52(sp) +; ZFINX64-NEXT: sw s9, 48(sp) +; ZFINX64-NEXT: sw s8, 44(sp) +; ZFINX64-NEXT: sw s7, 40(sp) +; ZFINX64-NEXT: sw s6, 36(sp) +; ZFINX64-NEXT: sw s5, 32(sp) +; ZFINX64-NEXT: sw s4, 28(sp) +; ZFINX64-NEXT: sw s3, 24(sp) +; ZFINX64-NEXT: sw s2, 20(sp) +; ZFINX64-NEXT: sw s1, 16(sp) ; ZFINX64-NEXT: sw s0, 12(sp) -; ZFINX64-NEXT: sw t2, 8(sp) -; ZFINX64-NEXT: sw t1, 4(sp) -; ZFINX64-NEXT: sw t0, 0(sp) +; ZFINX64-NEXT: sw t4, 8(sp) +; ZFINX64-NEXT: sw t5, 4(sp) +; ZFINX64-NEXT: sw t6, 0(sp) +; ZFINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload +; ZFINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload +; ZFINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload +; ZFINX64-NEXT: lw t6, 88(sp) # 4-byte Folded Reload ; ZFINX64-NEXT: call callee_float_32 -; ZFINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload -; ZFINX64-NEXT: addi sp, sp, 192 +; ZFINX64-NEXT: ld ra, 200(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s0, 192(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s1, 184(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s2, 176(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s3, 168(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s4, 160(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s5, 152(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s6, 144(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s7, 136(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s8, 128(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s9, 120(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s10, 112(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: ld s11, 104(sp) # 8-byte Folded Reload +; ZFINX64-NEXT: addi sp, sp, 208 ; ZFINX64-NEXT: ret ; ; ZDINX32-LABEL: caller_float_32: @@ -1241,79 +1285,87 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ; ZDINX64-LABEL: caller_float_32: ; ZDINX64: # %bb.0: -; ZDINX64-NEXT: addi sp, sp, -192 -; ZDINX64-NEXT: sd ra, 184(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s0, 176(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s1, 168(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s2, 160(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s3, 152(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s4, 144(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s5, 136(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s6, 128(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s7, 120(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s8, 112(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s9, 104(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s10, 96(sp) # 8-byte Folded Spill -; ZDINX64-NEXT: sd s11, 88(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: addi sp, sp, -208 +; ZDINX64-NEXT: sd ra, 200(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s0, 192(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s1, 184(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s2, 176(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s3, 168(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s4, 160(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s5, 152(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s6, 144(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s7, 136(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s8, 128(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s9, 120(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s10, 112(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: sd s11, 104(sp) # 8-byte Folded Spill +; ZDINX64-NEXT: lw t0, 208(sp) +; ZDINX64-NEXT: sw t0, 100(sp) # 4-byte Folded Spill +; ZDINX64-NEXT: lw t0, 216(sp) +; ZDINX64-NEXT: sw t0, 96(sp) # 4-byte Folded Spill ; ZDINX64-NEXT: lw t0, 224(sp) -; ZDINX64-NEXT: lw t1, 232(sp) -; ZDINX64-NEXT: lw t2, 240(sp) -; ZDINX64-NEXT: lw s0, 248(sp) -; ZDINX64-NEXT: lw t3, 256(sp) -; ZDINX64-NEXT: lw t4, 264(sp) -; ZDINX64-NEXT: lw t5, 272(sp) -; ZDINX64-NEXT: lw t6, 280(sp) -; ZDINX64-NEXT: lw s1, 288(sp) -; ZDINX64-NEXT: lw s2, 296(sp) -; ZDINX64-NEXT: lw s3, 304(sp) -; ZDINX64-NEXT: lw s4, 312(sp) -; ZDINX64-NEXT: lw s5, 320(sp) -; ZDINX64-NEXT: lw s6, 328(sp) -; ZDINX64-NEXT: lw s7, 336(sp) -; ZDINX64-NEXT: lw s8, 344(sp) -; ZDINX64-NEXT: lw s9, 352(sp) -; ZDINX64-NEXT: lw s10, 360(sp) -; ZDINX64-NEXT: lw s11, 368(sp) -; ZDINX64-NEXT: lw ra, 376(sp) -; ZDINX64-NEXT: sw ra, 76(sp) -; ZDINX64-NEXT: sw s11, 72(sp) -; ZDINX64-NEXT: sw s10, 68(sp) -; ZDINX64-NEXT: sw s9, 64(sp) -; ZDINX64-NEXT: sw s8, 60(sp) -; ZDINX64-NEXT: sw s7, 56(sp) -; ZDINX64-NEXT: sw s6, 52(sp) -; ZDINX64-NEXT: sw s5, 48(sp) -; ZDINX64-NEXT: sw s4, 44(sp) -; ZDINX64-NEXT: sw s3, 40(sp) -; ZDINX64-NEXT: sw s2, 36(sp) -; ZDINX64-NEXT: sw s1, 32(sp) -; ZDINX64-NEXT: sw t6, 28(sp) -; ZDINX64-NEXT: sw t5, 24(sp) -; ZDINX64-NEXT: sw t4, 20(sp) -; ZDINX64-NEXT: sw t3, 16(sp) -; ZDINX64-NEXT: lw t3, 192(sp) -; ZDINX64-NEXT: lw t4, 200(sp) -; ZDINX64-NEXT: lw t5, 208(sp) -; ZDINX64-NEXT: lw t6, 216(sp) +; ZDINX64-NEXT: sw t0, 92(sp) # 4-byte Folded Spill +; ZDINX64-NEXT: lw t0, 232(sp) +; ZDINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill +; ZDINX64-NEXT: lw t6, 240(sp) +; ZDINX64-NEXT: lw t5, 248(sp) +; ZDINX64-NEXT: lw t4, 256(sp) +; ZDINX64-NEXT: lw s0, 264(sp) +; ZDINX64-NEXT: lw s1, 272(sp) +; ZDINX64-NEXT: lw s2, 280(sp) +; ZDINX64-NEXT: lw s3, 288(sp) +; ZDINX64-NEXT: lw s4, 296(sp) +; ZDINX64-NEXT: lw s5, 304(sp) +; ZDINX64-NEXT: lw s6, 312(sp) +; ZDINX64-NEXT: lw s7, 320(sp) +; ZDINX64-NEXT: lw s8, 328(sp) +; ZDINX64-NEXT: lw s9, 336(sp) +; ZDINX64-NEXT: lw s10, 344(sp) +; ZDINX64-NEXT: lw s11, 352(sp) +; ZDINX64-NEXT: lw ra, 360(sp) +; ZDINX64-NEXT: lw t3, 368(sp) +; ZDINX64-NEXT: lw t2, 376(sp) +; ZDINX64-NEXT: lw t1, 384(sp) +; ZDINX64-NEXT: lw t0, 392(sp) +; ZDINX64-NEXT: sw t0, 76(sp) +; ZDINX64-NEXT: sw t1, 72(sp) +; ZDINX64-NEXT: sw t2, 68(sp) +; ZDINX64-NEXT: sw t3, 64(sp) +; ZDINX64-NEXT: sw ra, 60(sp) +; ZDINX64-NEXT: sw s11, 56(sp) +; ZDINX64-NEXT: sw s10, 52(sp) +; ZDINX64-NEXT: sw s9, 48(sp) +; ZDINX64-NEXT: sw s8, 44(sp) +; ZDINX64-NEXT: sw s7, 40(sp) +; ZDINX64-NEXT: sw s6, 36(sp) +; ZDINX64-NEXT: sw s5, 32(sp) +; ZDINX64-NEXT: sw s4, 28(sp) +; ZDINX64-NEXT: sw s3, 24(sp) +; ZDINX64-NEXT: sw s2, 20(sp) +; ZDINX64-NEXT: sw s1, 16(sp) ; ZDINX64-NEXT: sw s0, 12(sp) -; ZDINX64-NEXT: sw t2, 8(sp) -; ZDINX64-NEXT: sw t1, 4(sp) -; ZDINX64-NEXT: sw t0, 0(sp) +; ZDINX64-NEXT: sw t4, 8(sp) +; ZDINX64-NEXT: sw t5, 4(sp) +; ZDINX64-NEXT: sw t6, 0(sp) +; ZDINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload +; ZDINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload +; ZDINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload +; ZDINX64-NEXT: lw t6, 88(sp) # 4-byte Folded Reload ; ZDINX64-NEXT: call callee_float_32 -; ZDINX64-NEXT: ld ra, 184(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s0, 176(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s1, 168(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s2, 160(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s3, 152(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s4, 144(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s5, 136(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s6, 128(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s7, 120(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s8, 112(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s9, 104(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s10, 96(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: ld s11, 88(sp) # 8-byte Folded Reload -; ZDINX64-NEXT: addi sp, sp, 192 +; ZDINX64-NEXT: ld ra, 200(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s0, 192(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s1, 184(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s2, 176(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s3, 168(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s4, 160(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s5, 152(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s6, 144(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s7, 136(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s8, 128(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s9, 120(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s10, 112(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: ld s11, 104(sp) # 8-byte Folded Reload +; ZDINX64-NEXT: addi sp, sp, 208 ; ZDINX64-NEXT: ret %C = call fastcc float @callee_float_32(<32 x float> %A) ret float %C diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll index 3f32734db0ba71..bf500d1a2adb39 100644 --- a/llvm/test/CodeGen/RISCV/float-arith.ll +++ b/llvm/test/CodeGen/RISCV/float-arith.ll @@ -706,18 +706,11 @@ define float @fnmadd_s_3(float %a, float %b, float %c) nounwind { ; CHECKIF-NEXT: fneg.s fa0, fa5 ; CHECKIF-NEXT: ret ; -; RV32IZFINX-LABEL: fnmadd_s_3: -; RV32IZFINX: # %bb.0: -; RV32IZFINX-NEXT: fmadd.s a0, a0, a1, a2 -; RV32IZFINX-NEXT: fneg.s a0, a0 -; RV32IZFINX-NEXT: ret -; -; RV64IZFINX-LABEL: fnmadd_s_3: -; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: fmadd.s a0, a0, a1, a2 -; RV64IZFINX-NEXT: lui a1, 524288 -; RV64IZFINX-NEXT: xor a0, a0, a1 -; RV64IZFINX-NEXT: ret +; CHECKIZFINX-LABEL: fnmadd_s_3: +; CHECKIZFINX: # %bb.0: +; CHECKIZFINX-NEXT: fmadd.s a0, a0, a1, a2 +; CHECKIZFINX-NEXT: fneg.s a0, a0 +; CHECKIZFINX-NEXT: ret ; ; RV32I-LABEL: fnmadd_s_3: ; RV32I: # %bb.0: @@ -761,17 +754,10 @@ define float @fnmadd_nsz(float %a, float %b, float %c) nounwind { ; CHECKIF-NEXT: fnmadd.s fa0, fa0, fa1, fa2 ; CHECKIF-NEXT: ret ; -; RV32IZFINX-LABEL: fnmadd_nsz: -; RV32IZFINX: # %bb.0: -; RV32IZFINX-NEXT: fnmadd.s a0, a0, a1, a2 -; RV32IZFINX-NEXT: ret -; -; RV64IZFINX-LABEL: fnmadd_nsz: -; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: fmadd.s a0, a0, a1, a2 -; RV64IZFINX-NEXT: lui a1, 524288 -; RV64IZFINX-NEXT: xor a0, a0, a1 -; RV64IZFINX-NEXT: ret +; CHECKIZFINX-LABEL: fnmadd_nsz: +; CHECKIZFINX: # %bb.0: +; CHECKIZFINX-NEXT: fnmadd.s a0, a0, a1, a2 +; CHECKIZFINX-NEXT: ret ; ; RV32I-LABEL: fnmadd_nsz: ; RV32I: # %bb.0: @@ -1247,3 +1233,6 @@ define float @fsgnjx_f32(float %x, float %y) nounwind { %mul = fmul float %z, %y ret float %mul } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32IZFINX: {{.*}} +; RV64IZFINX: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll index 2338219687ef75..86f6f079243c26 100644 --- a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll +++ b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll @@ -50,8 +50,7 @@ define float @fneg(float %a) nounwind { ; ; RV64IZFINX-LABEL: fneg: ; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: lui a1, 524288 -; RV64IZFINX-NEXT: xor a0, a0, a1 +; RV64IZFINX-NEXT: fneg.s a0, a0 ; RV64IZFINX-NEXT: ret %1 = fneg float %a ret float %1 @@ -91,8 +90,7 @@ define float @fabs(float %a) nounwind { ; ; RV64IZFINX-LABEL: fabs: ; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: slli a0, a0, 33 -; RV64IZFINX-NEXT: srli a0, a0, 33 +; RV64IZFINX-NEXT: fabs.s a0, a0 ; RV64IZFINX-NEXT: ret %1 = call float @llvm.fabs.f32(float %a) ret float %1 diff --git a/llvm/test/CodeGen/RISCV/float-frem.ll b/llvm/test/CodeGen/RISCV/float-frem.ll index 651b1b116adc76..31d39a5ab6d6ea 100644 --- a/llvm/test/CodeGen/RISCV/float-frem.ll +++ b/llvm/test/CodeGen/RISCV/float-frem.ll @@ -27,12 +27,7 @@ define float @frem_f32(float %a, float %b) nounwind { ; ; RV64IZFINX-LABEL: frem_f32: ; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: addi sp, sp, -16 -; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFINX-NEXT: call fmodf -; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFINX-NEXT: addi sp, sp, 16 -; RV64IZFINX-NEXT: ret +; RV64IZFINX-NEXT: tail fmodf ; ; RV32I-LABEL: frem_f32: ; RV32I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/float-imm.ll b/llvm/test/CodeGen/RISCV/float-imm.ll index 69a506cd850f2c..58cbc72e2197c9 100644 --- a/llvm/test/CodeGen/RISCV/float-imm.ll +++ b/llvm/test/CodeGen/RISCV/float-imm.ll @@ -20,12 +20,14 @@ define float @float_imm() nounwind { ; RV32ZFINX: # %bb.0: ; RV32ZFINX-NEXT: lui a0, 263313 ; RV32ZFINX-NEXT: addi a0, a0, -37 +; RV32ZFINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; RV32ZFINX-NEXT: ret ; ; RV64ZFINX-LABEL: float_imm: ; RV64ZFINX: # %bb.0: ; RV64ZFINX-NEXT: lui a0, 263313 ; RV64ZFINX-NEXT: addiw a0, a0, -37 +; RV64ZFINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; RV64ZFINX-NEXT: ret ret float 3.14159274101257324218750 } diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll index 52442026dab502..b05eac9c9dee26 100644 --- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll @@ -136,12 +136,7 @@ define float @sin_f32(float %a) nounwind { ; ; RV64IZFINX-LABEL: sin_f32: ; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: addi sp, sp, -16 -; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFINX-NEXT: call sinf -; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFINX-NEXT: addi sp, sp, 16 -; RV64IZFINX-NEXT: ret +; RV64IZFINX-NEXT: tail sinf ; ; RV32I-LABEL: sin_f32: ; RV32I: # %bb.0: @@ -181,12 +176,7 @@ define float @cos_f32(float %a) nounwind { ; ; RV64IZFINX-LABEL: cos_f32: ; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: addi sp, sp, -16 -; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFINX-NEXT: call cosf -; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFINX-NEXT: addi sp, sp, 16 -; RV64IZFINX-NEXT: ret +; RV64IZFINX-NEXT: tail cosf ; ; RV32I-LABEL: cos_f32: ; RV32I: # %bb.0: @@ -327,12 +317,7 @@ define float @pow_f32(float %a, float %b) nounwind { ; ; RV64IZFINX-LABEL: pow_f32: ; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: addi sp, sp, -16 -; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFINX-NEXT: call powf -; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFINX-NEXT: addi sp, sp, 16 -; RV64IZFINX-NEXT: ret +; RV64IZFINX-NEXT: tail powf ; ; RV32I-LABEL: pow_f32: ; RV32I: # %bb.0: @@ -372,12 +357,7 @@ define float @exp_f32(float %a) nounwind { ; ; RV64IZFINX-LABEL: exp_f32: ; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: addi sp, sp, -16 -; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFINX-NEXT: call expf -; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFINX-NEXT: addi sp, sp, 16 -; RV64IZFINX-NEXT: ret +; RV64IZFINX-NEXT: tail expf ; ; RV32I-LABEL: exp_f32: ; RV32I: # %bb.0: @@ -417,12 +397,7 @@ define float @exp2_f32(float %a) nounwind { ; ; RV64IZFINX-LABEL: exp2_f32: ; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: addi sp, sp, -16 -; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFINX-NEXT: call exp2f -; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFINX-NEXT: addi sp, sp, 16 -; RV64IZFINX-NEXT: ret +; RV64IZFINX-NEXT: tail exp2f ; ; RV32I-LABEL: exp2_f32: ; RV32I: # %bb.0: @@ -462,12 +437,7 @@ define float @log_f32(float %a) nounwind { ; ; RV64IZFINX-LABEL: log_f32: ; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: addi sp, sp, -16 -; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFINX-NEXT: call logf -; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFINX-NEXT: addi sp, sp, 16 -; RV64IZFINX-NEXT: ret +; RV64IZFINX-NEXT: tail logf ; ; RV32I-LABEL: log_f32: ; RV32I: # %bb.0: @@ -507,12 +477,7 @@ define float @log10_f32(float %a) nounwind { ; ; RV64IZFINX-LABEL: log10_f32: ; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: addi sp, sp, -16 -; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFINX-NEXT: call log10f -; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFINX-NEXT: addi sp, sp, 16 -; RV64IZFINX-NEXT: ret +; RV64IZFINX-NEXT: tail log10f ; ; RV32I-LABEL: log10_f32: ; RV32I: # %bb.0: @@ -552,12 +517,7 @@ define float @log2_f32(float %a) nounwind { ; ; RV64IZFINX-LABEL: log2_f32: ; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: addi sp, sp, -16 -; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFINX-NEXT: call log2f -; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFINX-NEXT: addi sp, sp, 16 -; RV64IZFINX-NEXT: ret +; RV64IZFINX-NEXT: tail log2f ; ; RV32I-LABEL: log2_f32: ; RV32I: # %bb.0: @@ -698,8 +658,7 @@ define float @fabs_f32(float %a) nounwind { ; ; RV64IZFINX-LABEL: fabs_f32: ; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: slli a0, a0, 33 -; RV64IZFINX-NEXT: srli a0, a0, 33 +; RV64IZFINX-NEXT: fabs.s a0, a0 ; RV64IZFINX-NEXT: ret ; ; RV32I-LABEL: fabs_f32: @@ -1195,12 +1154,7 @@ define float @nearbyint_f32(float %a) nounwind { ; ; RV64IZFINX-LABEL: nearbyint_f32: ; RV64IZFINX: # %bb.0: -; RV64IZFINX-NEXT: addi sp, sp, -16 -; RV64IZFINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFINX-NEXT: call nearbyintf -; RV64IZFINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFINX-NEXT: addi sp, sp, 16 -; RV64IZFINX-NEXT: ret +; RV64IZFINX-NEXT: tail nearbyintf ; ; RV32I-LABEL: nearbyint_f32: ; RV32I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll index 81e29329e71817..18cdb18106f343 100644 --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -153,8 +153,8 @@ define half @powi_f16(half %a, i32 %b) nounwind { ; RV64IZHINX: # %bb.0: ; RV64IZHINX-NEXT: addi sp, sp, -16 ; RV64IZHINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZHINX-NEXT: sext.w a1, a1 ; RV64IZHINX-NEXT: fcvt.s.h a0, a0 +; RV64IZHINX-NEXT: sext.w a1, a1 ; RV64IZHINX-NEXT: call __powisf2 ; RV64IZHINX-NEXT: fcvt.h.s a0, a0 ; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -233,8 +233,8 @@ define half @powi_f16(half %a, i32 %b) nounwind { ; RV64IZHINXMIN: # %bb.0: ; RV64IZHINXMIN-NEXT: addi sp, sp, -16 ; RV64IZHINXMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZHINXMIN-NEXT: sext.w a1, a1 ; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 +; RV64IZHINXMIN-NEXT: sext.w a1, a1 ; RV64IZHINXMIN-NEXT: call __powisf2 ; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0 ; RV64IZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/kcfi-mir.ll b/llvm/test/CodeGen/RISCV/kcfi-mir.ll index e478930d59abc5..f35be0564cb25f 100644 --- a/llvm/test/CodeGen/RISCV/kcfi-mir.ll +++ b/llvm/test/CodeGen/RISCV/kcfi-mir.ll @@ -10,7 +10,7 @@ define void @f1(ptr noundef %x) !kcfi_type !1 { ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 ; CHECK-NEXT: SD killed $x1, $x2, 8 :: (store (s64) into %stack.0) ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 - ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_h, implicit-def dead $x1, implicit-def $x2, implicit-def $x2_h, implicit killed $x10 { + ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_w, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_w, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_w, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_w, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_w, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_w, implicit-def $x31_h, implicit-def dead $x1, implicit-def $x2, implicit-def $x2_w, implicit-def $x2_h, implicit killed $x10 { ; CHECK-NEXT: KCFI_CHECK $x10, 12345678, implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31 ; CHECK-NEXT: PseudoCALLIndirect killed $x10, csr_ilp32_lp64, implicit-def dead $x1, implicit-def $x2 ; CHECK-NEXT: } @@ -26,7 +26,7 @@ define void @f2(ptr noundef %x) #0 { ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $x10 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_h, implicit killed $x10, implicit $x2 { + ; CHECK-NEXT: BUNDLE implicit-def $x6, implicit-def $x6_w, implicit-def $x6_h, implicit-def $x7, implicit-def $x7_w, implicit-def $x7_h, implicit-def $x28, implicit-def $x28_w, implicit-def $x28_h, implicit-def $x29, implicit-def $x29_w, implicit-def $x29_h, implicit-def $x30, implicit-def $x30_w, implicit-def $x30_h, implicit-def $x31, implicit-def $x31_w, implicit-def $x31_h, implicit killed $x10, implicit $x2 { ; CHECK-NEXT: KCFI_CHECK $x10, 12345678, implicit-def $x6, implicit-def $x7, implicit-def $x28, implicit-def $x29, implicit-def $x30, implicit-def $x31 ; CHECK-NEXT: PseudoTAILIndirect killed $x10, implicit $x2 ; CHECK-NEXT: } diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll index 442b0cf5b4a856..2c9d640e03a634 100644 --- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll +++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll @@ -62,8 +62,10 @@ define { half, i32 } @test_frexp_f16_i32(half %a) nounwind { ; RV32IZFINXZDINX-NEXT: call frexpf ; RV32IZFINXZDINX-NEXT: call __truncsfhf2 ; RV32IZFINXZDINX-NEXT: lw a1, 8(sp) +; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; RV32IZFINXZDINX-NEXT: lui a2, 1048560 ; RV32IZFINXZDINX-NEXT: or a0, a0, a2 +; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 ; RV32IZFINXZDINX-NEXT: ret @@ -77,8 +79,10 @@ define { half, i32 } @test_frexp_f16_i32(half %a) nounwind { ; RV64IZFINXZDINX-NEXT: call frexpf ; RV64IZFINXZDINX-NEXT: call __truncsfhf2 ; RV64IZFINXZDINX-NEXT: ld a1, 0(sp) +; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; RV64IZFINXZDINX-NEXT: lui a2, 1048560 ; RV64IZFINXZDINX-NEXT: or a0, a0, a2 +; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZFINXZDINX-NEXT: addi sp, sp, 16 ; RV64IZFINXZDINX-NEXT: ret @@ -157,8 +161,10 @@ define half @test_frexp_f16_i32_only_use_fract(half %a) nounwind { ; RV32IZFINXZDINX-NEXT: addi a1, sp, 8 ; RV32IZFINXZDINX-NEXT: call frexpf ; RV32IZFINXZDINX-NEXT: call __truncsfhf2 +; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; RV32IZFINXZDINX-NEXT: lui a1, 1048560 ; RV32IZFINXZDINX-NEXT: or a0, a0, a1 +; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; RV32IZFINXZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 ; RV32IZFINXZDINX-NEXT: ret @@ -171,8 +177,10 @@ define half @test_frexp_f16_i32_only_use_fract(half %a) nounwind { ; RV64IZFINXZDINX-NEXT: mv a1, sp ; RV64IZFINXZDINX-NEXT: call frexpf ; RV64IZFINXZDINX-NEXT: call __truncsfhf2 +; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; RV64IZFINXZDINX-NEXT: lui a1, 1048560 ; RV64IZFINXZDINX-NEXT: or a0, a0, a1 +; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10 ; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZFINXZDINX-NEXT: addi sp, sp, 16 ; RV64IZFINXZDINX-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/make-compressible-zfinx.mir b/llvm/test/CodeGen/RISCV/make-compressible-zfinx.mir new file mode 100644 index 00000000000000..d0223dc5911ad3 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/make-compressible-zfinx.mir @@ -0,0 +1,296 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -mtriple=riscv32 -mattr=+c,+zfinx -simplify-mir \ +# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=CHECK %s +# RUN: llc -o - %s -mtriple=riscv64 -mattr=+c,+zfinx -simplify-mir \ +# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=CHECK %s + +--- | + + define void @store_common_value_float(ptr %a, ptr %b, ptr %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j) #0 { + entry: + store float %j, ptr %a, align 4 + store float %j, ptr %b, align 4 + store float %j, ptr %c, align 4 + ret void + } + + define void @store_common_ptr_float(float %a, float %b, float %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, ptr %p) #0 { + entry: + store volatile float %a, ptr %p, align 4 + store volatile float %b, ptr %p, align 4 + store volatile float %c, ptr %p, align 4 + ret void + } + + define void @load_common_ptr_float(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, ptr %g) #0 { + entry: + %0 = load float, ptr %g, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %g, i32 1 + %1 = load float, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %g, i32 2 + %2 = load float, ptr %arrayidx2, align 4 + tail call void @load_common_ptr_float_1(float %0, float %1, float %2) + ret void + } + + declare void @load_common_ptr_float_1(float, float, float) #0 + + define void @store_large_offset_float(ptr %p, float %a, float %b, float %c, float %d) #0 { + entry: + %0 = getelementptr inbounds float, ptr %p, i32 100 + store volatile float %a, ptr %0, align 4 + %1 = getelementptr inbounds float, ptr %p, i32 101 + store volatile float %b, ptr %1, align 4 + %2 = getelementptr inbounds float, ptr %p, i32 102 + store volatile float %c, ptr %2, align 4 + %3 = getelementptr inbounds float, ptr %p, i32 103 + store volatile float %d, ptr %3, align 4 + ret void + } + + define void @load_large_offset_float(ptr %p) #0 { + entry: + %arrayidx = getelementptr inbounds float, ptr %p, i32 100 + %0 = load float, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %p, i32 101 + %1 = load float, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %p, i32 102 + %2 = load float, ptr %arrayidx2, align 4 + tail call void @load_large_offset_float_1(float %0, float %1, float %2) + ret void + } + + declare void @load_large_offset_float_1(float, float, float) #0 + + define void @store_common_value_float_no_opt(ptr %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h) #0 { + entry: + store float %h, ptr %a, align 4 + ret void + } + + define void @store_common_ptr_float_no_opt(float %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, ptr %p) #0 { + entry: + store volatile float %a, ptr %p, align 4 + ret void + } + + define float @load_common_ptr_float_no_opt(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, ptr %g) #0 { + entry: + %0 = load float, ptr %g, align 4 + ret float %0 + } + + define void @store_large_offset_float_no_opt(ptr %p, float %a, float %b) #0 { + entry: + %0 = getelementptr inbounds float, ptr %p, i32 100 + store volatile float %a, ptr %0, align 4 + %1 = getelementptr inbounds float, ptr %p, i32 101 + store volatile float %b, ptr %1, align 4 + ret void + } + + define { float, float } @load_large_offset_float_no_opt(ptr %p) #0 { + entry: + %arrayidx = getelementptr inbounds float, ptr %p, i32 100 + %0 = load float, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, ptr %p, i32 101 + %1 = load float, ptr %arrayidx1, align 4 + %2 = insertvalue { float, float } undef, float %0, 0 + %3 = insertvalue { float, float } %2, float %1, 1 + ret { float, float } %3 + } + + attributes #0 = { minsize } + +... +--- +name: store_common_value_float +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11, $x12 + + ; CHECK-LABEL: name: store_common_value_float + ; CHECK: liveins: $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x13_w = PseudoMV_FPR32INX $x0_w + ; CHECK-NEXT: SW_INX $x13_w, killed renamable $x10, 0 :: (store (s32) into %ir.a) + ; CHECK-NEXT: SW_INX $x13_w, killed renamable $x11, 0 :: (store (s32) into %ir.b) + ; CHECK-NEXT: SW_INX killed $x13_w, killed renamable $x12, 0 :: (store (s32) into %ir.c) + ; CHECK-NEXT: PseudoRET + SW_INX $x0_w, killed renamable $x10, 0 :: (store (s32) into %ir.a) + SW_INX $x0_w, killed renamable $x11, 0 :: (store (s32) into %ir.b) + SW_INX killed $x0_w, killed renamable $x12, 0 :: (store (s32) into %ir.c) + PseudoRET + +... +--- +name: store_common_ptr_float +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10_w, $x11_w, $x12_w, $x16 + + ; CHECK-LABEL: name: store_common_ptr_float + ; CHECK: liveins: $x10_w, $x11_w, $x12_w, $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x13 = ADDI $x16, 0 + ; CHECK-NEXT: SW_INX killed renamable $x10_w, $x13, 0 :: (volatile store (s32) into %ir.p) + ; CHECK-NEXT: SW_INX killed renamable $x11_w, $x13, 0 :: (volatile store (s32) into %ir.p) + ; CHECK-NEXT: SW_INX killed renamable $x12_w, killed $x13, 0 :: (volatile store (s32) into %ir.p) + ; CHECK-NEXT: PseudoRET + SW_INX killed renamable $x10_w, renamable $x16, 0 :: (volatile store (s32) into %ir.p) + SW_INX killed renamable $x11_w, renamable $x16, 0 :: (volatile store (s32) into %ir.p) + SW_INX killed renamable $x12_w, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) + PseudoRET + +... +--- +name: load_common_ptr_float +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: load_common_ptr_float + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x13 = ADDI $x16, 0 + ; CHECK-NEXT: renamable $x10_w = LW_INX $x13, 0 :: (load (s32) from %ir.g) + ; CHECK-NEXT: renamable $x11_w = LW_INX $x13, 4 :: (load (s32) from %ir.arrayidx1) + ; CHECK-NEXT: renamable $x12_w = LW_INX killed $x13, 8 :: (load (s32) from %ir.arrayidx2) + ; CHECK-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $x10_w, implicit $x11_w, implicit $x12_w + renamable $x10_w = LW_INX renamable $x16, 0 :: (load (s32) from %ir.g) + renamable $x11_w = LW_INX renamable $x16, 4 :: (load (s32) from %ir.arrayidx1) + renamable $x12_w = LW_INX killed renamable $x16, 8 :: (load (s32) from %ir.arrayidx2) + PseudoTAIL target-flags(riscv-call) @load_common_ptr_float_1, implicit $x2, implicit $x10_w, implicit $x11_w, implicit $x12_w + +... +--- +name: store_large_offset_float +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11_w, $x11_w, $x12_w, $x13_w + + ; CHECK-LABEL: name: store_large_offset_float + ; CHECK: liveins: $x10, $x11_w, $x11_w, $x12_w, $x13_w + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x14 = ADDI $x10, 384 + ; CHECK-NEXT: SW_INX killed renamable $x10_w, $x14, 16 :: (volatile store (s32) into %ir.0) + ; CHECK-NEXT: SW_INX killed renamable $x11_w, $x14, 20 :: (volatile store (s32) into %ir.1) + ; CHECK-NEXT: SW_INX killed renamable $x12_w, $x14, 24 :: (volatile store (s32) into %ir.2) + ; CHECK-NEXT: SW_INX killed renamable $x13_w, killed $x14, 28 :: (volatile store (s32) into %ir.3) + ; CHECK-NEXT: PseudoRET + SW_INX killed renamable $x10_w, renamable $x10, 400 :: (volatile store (s32) into %ir.0) + SW_INX killed renamable $x11_w, renamable $x10, 404 :: (volatile store (s32) into %ir.1) + SW_INX killed renamable $x12_w, renamable $x10, 408 :: (volatile store (s32) into %ir.2) + SW_INX killed renamable $x13_w, killed renamable $x10, 412 :: (volatile store (s32) into %ir.3) + PseudoRET + +... +--- +name: load_large_offset_float +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: load_large_offset_float + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x14 = ADDI $x10, 384 + ; CHECK-NEXT: renamable $x11_w = LW_INX $x14, 16 :: (load (s32) from %ir.arrayidx) + ; CHECK-NEXT: renamable $x12_w = LW_INX $x14, 20 :: (load (s32) from %ir.arrayidx1) + ; CHECK-NEXT: renamable $x13_w = LW_INX killed $x14, 24 :: (load (s32) from %ir.arrayidx2) + ; CHECK-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $x11_w, implicit $x12_w, implicit $x12_w + renamable $x11_w = LW_INX renamable $x10, 400 :: (load (s32) from %ir.arrayidx) + renamable $x12_w = LW_INX renamable $x10, 404 :: (load (s32) from %ir.arrayidx1) + renamable $x13_w = LW_INX killed renamable $x10, 408 :: (load (s32) from %ir.arrayidx2) + PseudoTAIL target-flags(riscv-call) @load_large_offset_float_1, implicit $x2, implicit $x11_w, implicit $x12_w, implicit $x12_w + +... +--- +name: store_common_value_float_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x16_w + + ; CHECK-LABEL: name: store_common_value_float_no_opt + ; CHECK: liveins: $x10, $x16_w + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SW_INX killed renamable $x16_w, killed renamable $x10, 0 :: (store (s32) into %ir.a) + ; CHECK-NEXT: PseudoRET + SW_INX killed renamable $x16_w, killed renamable $x10, 0 :: (store (s32) into %ir.a) + PseudoRET + +... +--- +name: store_common_ptr_float_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16, $x10_w + + ; CHECK-LABEL: name: store_common_ptr_float_no_opt + ; CHECK: liveins: $x16, $x10_w + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SW_INX killed renamable $x10_w, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) + ; CHECK-NEXT: PseudoRET + SW_INX killed renamable $x10_w, killed renamable $x16, 0 :: (volatile store (s32) into %ir.p) + PseudoRET + +... +--- +name: load_common_ptr_float_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x16 + + ; CHECK-LABEL: name: load_common_ptr_float_no_opt + ; CHECK: liveins: $x16 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x10_w = LW_INX killed renamable $x16, 0 :: (load (s32) from %ir.g) + ; CHECK-NEXT: PseudoRET implicit $x10_w + renamable $x10_w = LW_INX killed renamable $x16, 0 :: (load (s32) from %ir.g) + PseudoRET implicit $x10_w + +... +--- +name: store_large_offset_float_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11_w, $x12_w + + ; CHECK-LABEL: name: store_large_offset_float_no_opt + ; CHECK: liveins: $x10, $x11_w, $x12_w + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SW_INX killed renamable $x11_w, renamable $x10, 400 :: (volatile store (s32) into %ir.0) + ; CHECK-NEXT: SW_INX killed renamable $x12_w, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1) + ; CHECK-NEXT: PseudoRET + SW_INX killed renamable $x11_w, renamable $x10, 400 :: (volatile store (s32) into %ir.0) + SW_INX killed renamable $x12_w, killed renamable $x10, 404 :: (volatile store (s32) into %ir.1) + PseudoRET + +... +--- +name: load_large_offset_float_no_opt +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + + ; CHECK-LABEL: name: load_large_offset_float_no_opt + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x11_w = LW_INX renamable $x10, 400 :: (load (s32) from %ir.arrayidx) + ; CHECK-NEXT: renamable $x12_w = LW_INX killed renamable $x10, 404 :: (load (s32) from %ir.arrayidx1) + ; CHECK-NEXT: PseudoRET implicit $x11_w, implicit $x12_w + renamable $x11_w = LW_INX renamable $x10, 400 :: (load (s32) from %ir.arrayidx) + renamable $x12_w = LW_INX killed renamable $x10, 404 :: (load (s32) from %ir.arrayidx1) + PseudoRET implicit $x11_w, implicit $x12_w + +...