Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JitArm64_Integer: Carry flag optimizations #13251

Merged
merged 12 commits into from
Jan 6, 2025
Merged
262 changes: 179 additions & 83 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1128,47 +1128,85 @@ void JitArm64::addzex(UGeckoInstruction inst)

int a = inst.RA, d = inst.RD;

switch (js.carryFlag)
{
case CarryFlag::InPPCState:
if (gpr.IsImm(a) && (gpr.GetImm(a) == 0 || HasConstantCarry()))
{
const bool allocate_reg = d == a;
gpr.BindToRegister(d, allocate_reg);
const u32 imm = gpr.GetImm(a);
const bool is_all_ones = imm == 0xFFFFFFFF;

switch (js.carryFlag)
{
auto WA = allocate_reg ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(gpr.R(d));
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), WA);
case CarryFlag::InPPCState:
{
gpr.BindToRegister(d, false);
LDRB(IndexType::Unsigned, gpr.R(d), PPC_REG, PPCSTATE_OFF(xer_ca));
ComputeCarry(false);
break;
}
case CarryFlag::InHostCarry:
{
gpr.BindToRegister(d, false);
CSET(gpr.R(d), CCFlags::CC_CS);
ComputeCarry(false);
break;
}
case CarryFlag::ConstantTrue:
{
gpr.SetImmediate(d, imm + 1);
ComputeCarry(is_all_ones);
break;
}
case CarryFlag::ConstantFalse:
{
gpr.SetImmediate(d, imm);
ComputeCarry(false);
break;
}
}

ComputeCarry();
break;
}
case CarryFlag::InHostCarry:
{
gpr.BindToRegister(d, d == a);
CARRY_IF_NEEDED(ADC, ADCS, gpr.R(d), gpr.R(a), ARM64Reg::WZR);
ComputeCarry();
break;
}
case CarryFlag::ConstantTrue:
{
gpr.BindToRegister(d, d == a);
CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), 1);
ComputeCarry();
break;
}
case CarryFlag::ConstantFalse:
else
{
if (d != a)
switch (js.carryFlag)
{
gpr.BindToRegister(d, false);
MOV(gpr.R(d), gpr.R(a));
case CarryFlag::InPPCState:
{
const bool allocate_reg = d == a;
gpr.BindToRegister(d, allocate_reg);

{
auto WA = allocate_reg ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(gpr.R(d));
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), WA);
}

ComputeCarry();
break;
}
case CarryFlag::InHostCarry:
{
gpr.BindToRegister(d, d == a);
CARRY_IF_NEEDED(ADC, ADCS, gpr.R(d), gpr.R(a), ARM64Reg::WZR);
ComputeCarry();
break;
}
case CarryFlag::ConstantTrue:
{
gpr.BindToRegister(d, d == a);
CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), 1);
ComputeCarry();
break;
}
case CarryFlag::ConstantFalse:
{
if (d != a)
{
gpr.BindToRegister(d, false);
MOV(gpr.R(d), gpr.R(a));
}

ComputeCarry(false);
break;
}
ComputeCarry(false);
break;
}
}
}

if (inst.Rc)
Expand Down Expand Up @@ -1216,40 +1254,62 @@ void JitArm64::subfex(UGeckoInstruction inst)

if (gpr.IsImm(a) && (mex || gpr.IsImm(b)))
{
u32 i = gpr.GetImm(a), j = mex ? -1 : gpr.GetImm(b);

gpr.BindToRegister(d, false);
const u32 i = gpr.GetImm(a);
const u32 j = mex ? -1 : gpr.GetImm(b);
const u32 imm = ~i + j;
const bool is_zero = imm == 0;
const bool is_all_ones = imm == 0xFFFFFFFF;

switch (js.carryFlag)
{
case CarryFlag::InPPCState:
{
auto WA = gpr.GetScopedReg();
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ADDI2R(gpr.R(d), WA, ~i + j, gpr.R(d));
gpr.BindToRegister(d, false);
ARM64Reg RD = gpr.R(d);
if (is_zero)
{
LDRB(IndexType::Unsigned, RD, PPC_REG, PPCSTATE_OFF(xer_ca));
}
else
{
auto WA = gpr.GetScopedReg();
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ADDI2R(RD, WA, imm, RD);
}
break;
}
case CarryFlag::InHostCarry:
{
auto WA = gpr.GetScopedReg();
MOVI2R(WA, ~i + j);
ADC(gpr.R(d), WA, ARM64Reg::WZR);
gpr.BindToRegister(d, false);
ARM64Reg RD = gpr.R(d);
if (is_all_ones)
{
// RD = -1 + carry = carry ? 0 : -1
// CSETM sets the destination to -1 if the condition is true, 0
// otherwise. Hence, the condition must be carry clear.
CSETM(RD, CC_CC);
}
else
{
MOVI2R(RD, imm);
ADC(RD, RD, ARM64Reg::WZR);
}
break;
}
case CarryFlag::ConstantTrue:
{
gpr.SetImmediate(d, ~i + j + 1);
gpr.SetImmediate(d, imm + 1);
break;
}
case CarryFlag::ConstantFalse:
{
gpr.SetImmediate(d, ~i + j);
gpr.SetImmediate(d, imm);
break;
}
}

const bool must_have_carry = Interpreter::Helper_Carry(~i, j);
const bool might_have_carry = (~i + j) == 0xFFFFFFFF;
const bool might_have_carry = is_all_ones;

if (must_have_carry)
{
Expand Down Expand Up @@ -1337,39 +1397,49 @@ void JitArm64::subfzex(UGeckoInstruction inst)

int a = inst.RA, d = inst.RD;

gpr.BindToRegister(d, d == a);

switch (js.carryFlag)
if (gpr.IsImm(a) && HasConstantCarry())
{
case CarryFlag::InPPCState:
const u32 imm = ~gpr.GetImm(a);
const u32 carry = js.carryFlag == CarryFlag::ConstantTrue;
gpr.SetImmediate(d, imm + carry);
ComputeCarry(Interpreter::Helper_Carry(imm, carry));
}
else
{
gpr.BindToRegister(d, d == a);

switch (js.carryFlag)
{
case CarryFlag::InPPCState:
{
{
auto WA = gpr.GetScopedReg();
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
MVN(gpr.R(d), gpr.R(a));
CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(d), WA);
}
ComputeCarry();
break;
}
case CarryFlag::InHostCarry:
{
CARRY_IF_NEEDED(SBC, SBCS, gpr.R(d), ARM64Reg::WZR, gpr.R(a));
ComputeCarry();
break;
}
case CarryFlag::ConstantTrue:
{
CARRY_IF_NEEDED(NEG, NEGS, gpr.R(d), gpr.R(a));
ComputeCarry();
break;
}
case CarryFlag::ConstantFalse:
{
auto WA = gpr.GetScopedReg();
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
MVN(gpr.R(d), gpr.R(a));
CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(d), WA);
ComputeCarry(false);
break;
}
}
ComputeCarry();
break;
}
case CarryFlag::InHostCarry:
{
CARRY_IF_NEEDED(SBC, SBCS, gpr.R(d), ARM64Reg::WZR, gpr.R(a));
ComputeCarry();
break;
}
case CarryFlag::ConstantTrue:
{
CARRY_IF_NEEDED(NEG, NEGS, gpr.R(d), gpr.R(a));
ComputeCarry();
break;
}
case CarryFlag::ConstantFalse:
{
MVN(gpr.R(d), gpr.R(a));
ComputeCarry(false);
break;
}
}

if (inst.Rc)
Expand Down Expand Up @@ -1436,40 +1506,66 @@ void JitArm64::addex(UGeckoInstruction inst)

if (gpr.IsImm(a) && (mex || gpr.IsImm(b)))
{
u32 i = gpr.GetImm(a), j = mex ? -1 : gpr.GetImm(b);

gpr.BindToRegister(d, false);
const u32 i = gpr.GetImm(a), j = mex ? -1 : gpr.GetImm(b);
const u32 imm = i + j;
const bool is_zero = imm == 0;
const bool is_all_ones = imm == 0xFFFFFFFF;

switch (js.carryFlag)
{
case CarryFlag::InPPCState:
{
auto WA = gpr.GetScopedReg();
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ADDI2R(gpr.R(d), WA, i + j, gpr.R(d));
gpr.BindToRegister(d, false);
ARM64Reg RD = gpr.R(d);
if (is_zero)
{
LDRB(IndexType::Unsigned, RD, PPC_REG, PPCSTATE_OFF(xer_ca));
}
else
{
auto WA = gpr.GetScopedReg();
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ADDI2R(RD, WA, imm, RD);
}
break;
}
case CarryFlag::InHostCarry:
{
gpr.BindToRegister(d, false);
ARM64Reg RD = gpr.R(d);
MOVI2R(RD, i + j);
ADC(RD, RD, ARM64Reg::WZR);
if (is_zero)
{
// RD = 0 + carry = carry ? 1 : 0
CSET(RD, CC_CS);
}
else if (is_all_ones)
{
// RD = -1 + carry = carry ? 0 : -1
// Note that CSETM sets the destination to -1 if the condition is true,
// and 0 otherwise. Hence, the condition must be carry clear.
CSETM(RD, CC_CC);
}
else
{
MOVI2R(RD, imm);
ADC(RD, RD, ARM64Reg::WZR);
}
break;
}
case CarryFlag::ConstantTrue:
{
gpr.SetImmediate(d, i + j + 1);
gpr.SetImmediate(d, imm + 1);
break;
}
case CarryFlag::ConstantFalse:
{
gpr.SetImmediate(d, i + j);
gpr.SetImmediate(d, imm);
break;
}
}

const bool must_have_carry = Interpreter::Helper_Carry(i, j);
const bool might_have_carry = (i + j) == 0xFFFFFFFF;
const bool might_have_carry = is_all_ones;

if (must_have_carry)
{
Expand Down
4 changes: 2 additions & 2 deletions Source/Core/Core/PowerPC/JitCommon/JitBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ JitBase::~JitBase()
CPUThreadConfigCallback::RemoveConfigChangedCallback(m_registered_config_callback_id);
}

bool JitBase::DoesConfigNeedRefresh()
bool JitBase::DoesConfigNeedRefresh() const
{
return std::ranges::any_of(JIT_SETTINGS, [this](const auto& pair) {
return this->*pair.first != Config::Get(*pair.second);
Expand Down Expand Up @@ -276,7 +276,7 @@ bool JitBase::CanMergeNextInstructions(int count) const
return true;
}

bool JitBase::ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op)
bool JitBase::ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op) const
{
if (jo.fp_exceptions)
return (op->opinfo->flags & FL_FLOAT_EXCEPTION) != 0;
Expand Down
12 changes: 10 additions & 2 deletions Source/Core/Core/PowerPC/JitCommon/JitBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ class JitBase : public CPUCoreBase

static const std::array<std::pair<bool JitBase::*, const Config::Info<bool>*>, 23> JIT_SETTINGS;

bool DoesConfigNeedRefresh();
bool DoesConfigNeedRefresh() const;
void RefreshConfig();

void InitFastmemArena();
Expand All @@ -178,8 +178,16 @@ class JitBase : public CPUCoreBase
void CleanUpAfterStackFault();

bool CanMergeNextInstructions(int count) const;
bool HasConstantCarry() const
{
#ifdef _M_ARM_64
return js.carryFlag == CarryFlag::ConstantTrue || js.carryFlag == CarryFlag::ConstantFalse;
#else
return false;
#endif
}

bool ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op);
bool ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op) const;

public:
explicit JitBase(Core::System& system);
Expand Down