Skip to content

Commit

Permalink
Merge pull request #3804 from Sonicadvance1/revert_rclse_drop
Browse files Browse the repository at this point in the history
Revert removing RCLSE
  • Loading branch information
alyssarosenzweig authored Jul 3, 2024
2 parents ba04da8 + 2d617ad commit 02a218c
Show file tree
Hide file tree
Showing 45 changed files with 24,286 additions and 18,112 deletions.
1 change: 1 addition & 0 deletions FEXCore/Source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ set (SRCS
Interface/IR/IREmitter.cpp
Interface/IR/PassManager.cpp
Interface/IR/Passes/ConstProp.cpp
Interface/IR/Passes/DeadContextStoreElimination.cpp
Interface/IR/Passes/IRDumperPass.cpp
Interface/IR/Passes/IRValidation.cpp
Interface/IR/Passes/RAValidation.cpp
Expand Down
14 changes: 0 additions & 14 deletions FEXCore/Source/Interface/Core/Core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -614,20 +614,6 @@ ContextImpl::GenerateIR(FEXCore::Core::InternalThreadState* Thread, uint64_t Gue
DecodedInfo = &Block.DecodedInstructions[i];
bool IsLocked = DecodedInfo->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK;

// Do a partial register cache flush before every instruction. This
// prevents cross-instruction static register caching, while allowing
// context load/stores to be optimized within a block. Theoretically,
// this flush is not required for correctness, all mandatory flushes are
// included in instruction-specific handlers. Instead, this is a blunt
// heuristic to make the register cache less aggressive, as the current
// RA generates bad code in common cases with tied registers otherwise.
//
// However, it makes our exception handling behaviour more predictable.
// It is potentially correctness bearing in that sense, but that is a
// side effect here and (if that behaviour is required) we should handle
// that more explicitly later.
Thread->OpDispatcher->FlushRegisterCache(true);

if (ExtendedDebugInfo || Thread->OpDispatcher->CanHaveSideEffects(TableInfo, DecodedInfo)) {
Thread->OpDispatcher->_GuestOpcode(Block.Entry + BlockInstructionsLength - GuestRIP);
}
Expand Down
2 changes: 1 addition & 1 deletion FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ DEF_OP(ShiftFlags) {

// Set the output outside the branch to avoid needing an extra leg of the
// branch. We specifically do not hardcode the PF register anywhere (relying
// on a tied SRA register instead) to avoid fighting with RA.
// on a tied SRA register instead) to avoid fighting with RA/RCLSE.
if (PFTemp != PFInput) {
mov(ARMEmitter::Size::i64Bit, PFTemp, PFInput);
}
Expand Down
42 changes: 22 additions & 20 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ void OpDispatchBuilder::SyscallOp(OpcodeArgs) {
StoreGPRRegister(X86State::REG_RCX, RIPAfterInst, 8);
}

FlushRegisterCache();
CalculateDeferredFlags();
auto SyscallOp = _Syscall(Arguments[0], Arguments[1], Arguments[2], Arguments[3], Arguments[4], Arguments[5], Arguments[6], DefaultSyscallFlags);

if (OSABI != FEXCore::HLE::SyscallOSABI::OS_HANGOVER &&
Expand Down Expand Up @@ -395,7 +395,7 @@ void OpDispatchBuilder::PUSHOp(OpcodeArgs) {

// Store the new stack pointer
StoreGPRRegister(X86State::REG_RSP, NewSP);
FlushRegisterCache();
CalculateDeferredFlags();
}

void OpDispatchBuilder::PUSHREGOp(OpcodeArgs) {
Expand All @@ -408,7 +408,7 @@ void OpDispatchBuilder::PUSHREGOp(OpcodeArgs) {
auto NewSP = _Push(GPRSize, Size, Src, OldSP);
// Store the new stack pointer
StoreGPRRegister(X86State::REG_RSP, NewSP);
FlushRegisterCache();
CalculateDeferredFlags();
}

void OpDispatchBuilder::PUSHAOp(OpcodeArgs) {
Expand Down Expand Up @@ -458,7 +458,7 @@ void OpDispatchBuilder::PUSHAOp(OpcodeArgs) {

// Store the new stack pointer
StoreGPRRegister(X86State::REG_RSP, NewSP, 4);
FlushRegisterCache();
CalculateDeferredFlags();
}

template<uint32_t SegmentReg>
Expand Down Expand Up @@ -843,7 +843,7 @@ void OpDispatchBuilder::CondJUMPOp(OpcodeArgs) {
Target &= 0xFFFFFFFFU;
}

FlushRegisterCache();
CalculateDeferredFlags();
auto TrueBlock = JumpTargets.find(Target);
auto FalseBlock = JumpTargets.find(Op->PC + Op->InstSize);

Expand Down Expand Up @@ -3366,8 +3366,8 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size);

// We'll calculate PF/AF after the loop, so use them as temporaries here.
StoreRegister(Core::CPUState::PF_AS_GREG, false, Src1);
StoreRegister(Core::CPUState::AF_AS_GREG, false, Src2);
_StoreRegister(Src1, Core::CPUState::PF_AS_GREG, GPRClass, CTX->GetGPRSize());
_StoreRegister(Src2, Core::CPUState::AF_AS_GREG, GPRClass, CTX->GetGPRSize());

Ref TailCounter = LoadGPRRegister(X86State::REG_RCX);

Expand Down Expand Up @@ -3406,8 +3406,8 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
// Make sure to start a new block after ending this one
{
// Grab the sources from the last iteration so we can set flags.
auto Src1 = LoadGPR(Core::CPUState::PF_AS_GREG);
auto Src2 = LoadGPR(Core::CPUState::AF_AS_GREG);
auto Src1 = _LoadRegister(Core::CPUState::PF_AS_GREG, GPRClass, CTX->GetGPRSize());
auto Src2 = _LoadRegister(Core::CPUState::AF_AS_GREG, GPRClass, CTX->GetGPRSize());
GenerateFlags_SUB(Op, Src2, Src1);
}
auto Jump_ = Jump();
Expand Down Expand Up @@ -3996,7 +3996,7 @@ void OpDispatchBuilder::BeginFunction(uint64_t RIP, const fextl::vector<FEXCore:

void OpDispatchBuilder::Finalize() {
// This usually doesn't emit any IR but in the case of hitting the block instruction limit it will
FlushRegisterCache();
CalculateDeferredFlags();
const uint8_t GPRSize = CTX->GetGPRSize();

// Node 0 is invalid node
Expand Down Expand Up @@ -4363,7 +4363,7 @@ Ref OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, const X86T
const auto highIndex = Operand.Data.GPR.HighBits ? 1 : 0;

if (gpr >= FEXCore::X86State::REG_MM_0) {
A.Base = LoadContext(MM0Index + gpr - FEXCore::X86State::REG_MM_0);
A.Base = _LoadContext(OpSize, FPRClass, offsetof(FEXCore::Core::CPUState, mm[gpr - FEXCore::X86State::REG_MM_0]));
} else if (gpr >= FEXCore::X86State::REG_XMM_0) {
const auto gprIndex = gpr - X86State::REG_XMM_0;

Expand Down Expand Up @@ -4400,7 +4400,7 @@ Ref OpDispatchBuilder::LoadGPRRegister(uint32_t GPR, int8_t Size, uint8_t Offset
if (Size == -1) {
Size = GPRSize;
}
Ref Reg = LoadGPR(GPR);
Ref Reg = _LoadRegister(GPR, GPRClass, GPRSize);

if ((!AllowUpperGarbage && (Size != GPRSize)) || Offset != 0) {
// Extract the subregister if requested.
Expand All @@ -4414,6 +4414,10 @@ Ref OpDispatchBuilder::LoadGPRRegister(uint32_t GPR, int8_t Size, uint8_t Offset
return Reg;
}

Ref OpDispatchBuilder::LoadXMMRegister(uint32_t XMM) {
return _LoadRegister(XMM, FPRClass, GetGuestVectorLength());
}

void OpDispatchBuilder::StoreGPRRegister(uint32_t GPR, const Ref Src, int8_t Size, uint8_t Offset) {
const uint8_t GPRSize = CTX->GetGPRSize();
if (Size == -1) {
Expand All @@ -4423,14 +4427,15 @@ void OpDispatchBuilder::StoreGPRRegister(uint32_t GPR, const Ref Src, int8_t Siz
Ref Reg = Src;
if (Size != GPRSize || Offset != 0) {
// Need to do an insert if not automatic size or zero offset.
Reg = _Bfi(IR::SizeToOpSize(GPRSize), Size * 8, Offset, LoadGPRRegister(GPR), Src);
Reg = LoadGPRRegister(GPR);
Reg = _Bfi(IR::SizeToOpSize(GPRSize), Size * 8, Offset, Reg, Src);
}

StoreRegister(GPR, false, Reg);
_StoreRegister(Reg, GPR, GPRClass, GPRSize);
}

void OpDispatchBuilder::StoreXMMRegister(uint32_t XMM, const Ref Src) {
StoreRegister(XMM, true, Src);
_StoreRegister(Src, XMM, FPRClass, GetGuestVectorLength());
}

Ref OpDispatchBuilder::LoadSource(RegisterClassType Class, const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand,
Expand All @@ -4449,8 +4454,7 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl

const auto gpr = Operand.Data.GPR.GPR;
if (gpr >= FEXCore::X86State::REG_MM_0) {
LOGMAN_THROW_A_FMT(OpSize == 8, "full");
StoreContext(MM0Index + gpr - FEXCore::X86State::REG_MM_0, Src);
_StoreContext(OpSize, Class, Src, offsetof(FEXCore::Core::CPUState, mm[gpr - FEXCore::X86State::REG_MM_0]));
} else if (gpr >= FEXCore::X86State::REG_XMM_0) {
const auto gprIndex = gpr - X86State::REG_XMM_0;
const auto VectorSize = (CTX->HostFeatures.SupportsSVE256 && CTX->HostFeatures.SupportsAVX) ? 32 : 16;
Expand Down Expand Up @@ -4548,8 +4552,6 @@ void OpDispatchBuilder::ResetWorkingList() {
DecodeFailure = false;
ShouldDump = false;
CurrentCodeBlock = nullptr;
RegCache.Written = 0;
RegCache.Cached = 0;
}

void OpDispatchBuilder::UnhandledOp(OpcodeArgs) {
Expand Down Expand Up @@ -4703,7 +4705,7 @@ void OpDispatchBuilder::INTOp(OpcodeArgs) {
}

// Calculate flags early.
FlushRegisterCache();
CalculateDeferredFlags();

const uint8_t GPRSize = CTX->GetGPRSize();

Expand Down
Loading

0 comments on commit 02a218c

Please sign in to comment.