Skip to content

Commit

Permalink
Merge pull request #34 from schweitzpgi/release_50
Browse files Browse the repository at this point in the history
[not flang] merge LLVM Release 50 changes
  • Loading branch information
sscalpone authored Mar 8, 2018
2 parents c492f2d + a984ae3 commit 4158932
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 134 deletions.
11 changes: 10 additions & 1 deletion lib/Target/X86/X86AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,8 @@ static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI,
static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
char Mode, raw_ostream &O) {
unsigned Reg = MO.getReg();
bool EmitPercent = true;

switch (Mode) {
default: return true; // Unknown mode.
case 'b': // Print QImode register
Expand All @@ -358,14 +360,20 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
case 'k': // Print SImode register
Reg = getX86SubSuperRegister(Reg, 32);
break;
case 'V':
EmitPercent = false;
LLVM_FALLTHROUGH;
case 'q':
// Print 64-bit register names if 64-bit integer registers are available.
// Otherwise, print 32-bit register names.
Reg = getX86SubSuperRegister(Reg, P.getSubtarget().is64Bit() ? 64 : 32);
break;
}

O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
if (EmitPercent)
O << '%';

O << X86ATTInstPrinter::getRegisterName(Reg);
return false;
}

Expand Down Expand Up @@ -438,6 +446,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
case 'w': // Print HImode register
case 'k': // Print SImode register
case 'q': // Print DImode register
case 'V': // Print native register without '%'
if (MO.isReg())
return printAsmMRegister(*this, MO, ExtraCode[0], O);
printOperand(*this, MI, OpNo, O);
Expand Down
101 changes: 58 additions & 43 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26250,28 +26250,57 @@ static unsigned getOpcodeForRetpoline(unsigned RPOpc) {

static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
unsigned Reg) {
if (Subtarget.useRetpolineExternalThunk()) {
// When using an external thunk for retpolines, we pick names that match the
// names GCC happens to use as well. This helps simplify the implementation
// of the thunks for kernels where they have no easy ability to create
// aliases and are doing non-trivial configuration of the thunk's body. For
// example, the Linux kernel will do boot-time hot patching of the thunk
// bodies and cannot easily export aliases of these to loaded modules.
//
// Note that at any point in the future, we may need to change the semantics
// of how we implement retpolines and at that time will likely change the
// name of the called thunk. Essentially, there is no hard guarantee that
// LLVM will generate calls to specific thunks, we merely make a best-effort
// attempt to help out kernels and other systems where duplicating the
// thunks is costly.
switch (Reg) {
case X86::EAX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__x86_indirect_thunk_eax";
case X86::ECX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__x86_indirect_thunk_ecx";
case X86::EDX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__x86_indirect_thunk_edx";
case X86::EDI:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__x86_indirect_thunk_edi";
case X86::R11:
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
return "__x86_indirect_thunk_r11";
}
llvm_unreachable("unexpected reg for retpoline");
}

// When targeting an internal COMDAT thunk use an LLVM-specific name.
switch (Reg) {
case 0:
assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
return Subtarget.useRetpolineExternalThunk()
? "__llvm_external_retpoline_push"
: "__llvm_retpoline_push";
case X86::EAX:
return Subtarget.useRetpolineExternalThunk()
? "__llvm_external_retpoline_eax"
: "__llvm_retpoline_eax";
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_eax";
case X86::ECX:
return Subtarget.useRetpolineExternalThunk()
? "__llvm_external_retpoline_ecx"
: "__llvm_retpoline_ecx";
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_ecx";
case X86::EDX:
return Subtarget.useRetpolineExternalThunk()
? "__llvm_external_retpoline_edx"
: "__llvm_retpoline_edx";
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_edx";
case X86::EDI:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_edi";
case X86::R11:
return Subtarget.useRetpolineExternalThunk()
? "__llvm_external_retpoline_r11"
: "__llvm_retpoline_r11";
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
return "__llvm_retpoline_r11";
}
llvm_unreachable("unexpected reg for retpoline");
}
Expand All @@ -26290,15 +26319,13 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
// just use R11, but we scan for uses anyway to ensure we don't generate
// incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
// already a register use operand to the call to hold the callee. If none
// are available, push the callee instead. This is less efficient, but is
// necessary for functions using 3 regparms. Such function calls are
// (currently) not eligible for tail call optimization, because there is no
// scratch register available to hold the address of the callee.
// are available, use EDI instead. EDI is chosen because EBX is the PIC base
// register and ESI is the base pointer to realigned stack frames with VLAs.
SmallVector<unsigned, 3> AvailableRegs;
if (Subtarget.is64Bit())
AvailableRegs.push_back(X86::R11);
else
AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX});
AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI});

// Zero out any registers that are already used.
for (const auto &MO : MI.operands()) {
Expand All @@ -26316,30 +26343,18 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
break;
}
}
if (!AvailableReg)
report_fatal_error("calling convention incompatible with retpoline, no "
"available registers");

const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);

if (AvailableReg == 0) {
// No register available. Use PUSH. This must not be a tailcall, and this
// must not be x64.
if (Subtarget.is64Bit())
report_fatal_error(
"Cannot make an indirect call on x86-64 using both retpoline and a "
"calling convention that preservers r11");
if (Opc != X86::CALLpcrel32)
report_fatal_error("Cannot make an indirect tail call on x86 using "
"retpoline without a preserved register");
BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg);
MI.getOperand(0).ChangeToES(Symbol);
MI.setDesc(TII->get(Opc));
} else {
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
.addReg(CalleeVReg);
MI.getOperand(0).ChangeToES(Symbol);
MI.setDesc(TII->get(Opc));
MachineInstrBuilder(*BB->getParent(), &MI)
.addReg(AvailableReg, RegState::Implicit | RegState::Kill);
}
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
.addReg(CalleeVReg);
MI.getOperand(0).ChangeToES(Symbol);
MI.setDesc(TII->get(Opc));
MachineInstrBuilder(*BB->getParent(), &MI)
.addReg(AvailableReg, RegState::Implicit | RegState::Kill);
return BB;
}

Expand Down
68 changes: 11 additions & 57 deletions lib/Target/X86/X86RetpolineThunks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ static const char R11ThunkName[] = "__llvm_retpoline_r11";
static const char EAXThunkName[] = "__llvm_retpoline_eax";
static const char ECXThunkName[] = "__llvm_retpoline_ecx";
static const char EDXThunkName[] = "__llvm_retpoline_edx";
static const char PushThunkName[] = "__llvm_retpoline_push";
static const char EDIThunkName[] = "__llvm_retpoline_edi";

namespace {
class X86RetpolineThunks : public MachineFunctionPass {
Expand Down Expand Up @@ -74,7 +74,6 @@ class X86RetpolineThunks : public MachineFunctionPass {

void createThunkFunction(Module &M, StringRef Name);
void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
void insert32BitPushReturnAddrClobber(MachineBasicBlock &MBB);
void populateThunk(MachineFunction &MF, Optional<unsigned> Reg = None);
};

Expand Down Expand Up @@ -127,7 +126,7 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
createThunkFunction(M, R11ThunkName);
else
for (StringRef Name :
{EAXThunkName, ECXThunkName, EDXThunkName, PushThunkName})
{EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
createThunkFunction(M, Name);
InsertedThunks = true;
return true;
Expand All @@ -151,9 +150,8 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
populateThunk(MF, X86::R11);
} else {
// For 32-bit targets we need to emit a collection of thunks for various
// possible scratch registers as well as a fallback that is used when
// there are no scratch registers and assumes the retpoline target has
// been pushed.
// possible scratch registers as well as a fallback that uses EDI, which is
// normally callee saved.
// __llvm_retpoline_eax:
// calll .Leax_call_target
// .Leax_capture_spec:
Expand All @@ -174,32 +172,18 @@ bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
// movl %edx, (%esp)
// retl
//
// This last one is a bit more special and so needs a little extra
// handling.
// __llvm_retpoline_push:
// calll .Lpush_call_target
// .Lpush_capture_spec:
// pause
// lfence
// jmp .Lpush_capture_spec
// .align 16
// .Lpush_call_target:
// # Clear pause_loop return address.
// addl $4, %esp
// # Top of stack words are: Callee, RA. Exchange Callee and RA.
// pushl 4(%esp) # Push callee
// pushl 4(%esp) # Push RA
// popl 8(%esp) # Pop RA to final RA
// popl (%esp) # Pop callee to next top of stack
// retl # Ret to callee
// __llvm_retpoline_edi:
// ... # Same setup
// movl %edi, (%esp)
// retl
if (MF.getName() == EAXThunkName)
populateThunk(MF, X86::EAX);
else if (MF.getName() == ECXThunkName)
populateThunk(MF, X86::ECX);
else if (MF.getName() == EDXThunkName)
populateThunk(MF, X86::EDX);
else if (MF.getName() == PushThunkName)
populateThunk(MF);
else if (MF.getName() == EDIThunkName)
populateThunk(MF, X86::EDI);
else
llvm_unreachable("Invalid thunk name on x86-32!");
}
Expand Down Expand Up @@ -240,31 +224,6 @@ void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
.addReg(Reg);
}

void X86RetpolineThunks::insert32BitPushReturnAddrClobber(
MachineBasicBlock &MBB) {
// The instruction sequence we use to replace the return address without
// a scratch register is somewhat complicated:
// # Clear capture_spec from return address.
// addl $4, %esp
// # Top of stack words are: Callee, RA. Exchange Callee and RA.
// pushl 4(%esp) # Push callee
// pushl 4(%esp) # Push RA
// popl 8(%esp) # Pop RA to final RA
// popl (%esp) # Pop callee to next top of stack
// retl # Ret to callee
BuildMI(&MBB, DebugLoc(), TII->get(X86::ADD32ri), X86::ESP)
.addReg(X86::ESP)
.addImm(4);
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP,
false, 4);
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::PUSH32rmm)), X86::ESP,
false, 4);
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP,
false, 8);
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(X86::POP32rmm)), X86::ESP,
false, 0);
}

void X86RetpolineThunks::populateThunk(MachineFunction &MF,
Optional<unsigned> Reg) {
// Set MF properties. We never use vregs...
Expand Down Expand Up @@ -301,11 +260,6 @@ void X86RetpolineThunks::populateThunk(MachineFunction &MF,
CaptureSpec->addSuccessor(CaptureSpec);

CallTarget->setAlignment(4);
if (Reg) {
insertRegReturnAddrClobber(*CallTarget, *Reg);
} else {
assert(!Is64Bit && "We only support non-reg thunks on 32-bit x86!");
insert32BitPushReturnAddrClobber(*CallTarget);
}
insertRegReturnAddrClobber(*CallTarget, *Reg);
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
}
14 changes: 14 additions & 0 deletions test/CodeGen/X86/inline-asm-modifier-V.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
; RUN: llc < %s -mtriple=i686-- -no-integrated-as | FileCheck -check-prefix=X86 %s
; RUN: llc < %s -mtriple=x86_64-- -no-integrated-as | FileCheck -check-prefix=X64 %s

; If the target does not have 64-bit integer registers, emit 32-bit register
; names.

; X86: call __x86_indirect_thunk_e{{[abcd]}}x
; X64: call __x86_indirect_thunk_r

define void @q_modifier(i32* %p) {
entry:
tail call void asm sideeffect "call __x86_indirect_thunk_${0:V}", "r,~{dirflag},~{fpsr},~{flags}"(i32* %p)
ret void
}
Loading

0 comments on commit 4158932

Please sign in to comment.