Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Support containing compares in GT_SELECT for xarch (i.e. start emitting cmov instructions) #81267

Merged
merged 24 commits into from
Feb 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2963b54
JIT: Support containing compares in GT_SELECT for xarch
jakobbotsch Jan 27, 2023
2137d36
Flip ifdef
jakobbotsch Jan 28, 2023
b25d4b0
Run jit-format
jakobbotsch Jan 28, 2023
2f2942c
Improve LSRA building
jakobbotsch Jan 28, 2023
4da299f
Move some code up
jakobbotsch Jan 28, 2023
4ca94e2
Fix compares that were swapped
jakobbotsch Jan 28, 2023
dfa5617
Fix GT_SELECT_HI handling
jakobbotsch Jan 28, 2023
b6b1f41
Handle constraints for compare operands correctly on x86
jakobbotsch Jan 28, 2023
640e59a
Disallow 64-bit GT_SELECT on x86 in another case
jakobbotsch Jan 28, 2023
a91e977
Nit
jakobbotsch Jan 28, 2023
9499a66
Nit
jakobbotsch Jan 28, 2023
ac1c27b
Revise delay free logic, fix som FP conditionals
jakobbotsch Jan 29, 2023
934f719
Fix bad copy paste
jakobbotsch Jan 29, 2023
3fb62ea
Avoid if-conversion for some badly handled cases
jakobbotsch Jan 30, 2023
5a95fd4
Remove leftover code
jakobbotsch Jan 30, 2023
292f5fa
Rename CanSetZeroFlag -> SupportsSettingZeroFlag
jakobbotsch Jan 30, 2023
6f59c5e
Improve preferencing
jakobbotsch Jan 30, 2023
1f5fe5a
Make csel test less fragile
jakobbotsch Jan 30, 2023
5a7a569
Do not if-convert some HW intrinsic cases that the backend handles in…
jakobbotsch Feb 1, 2023
3f56930
Fix linux-x86 build
jakobbotsch Feb 1, 2023
3845b74
Add x64 disasm tests
jakobbotsch Feb 2, 2023
9da5486
Fix test case
jakobbotsch Feb 2, 2023
93ae093
Actually fix test case
jakobbotsch Feb 2, 2023
44a89aa
Update src/coreclr/jit/lowerxarch.cpp
jakobbotsch Feb 9, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1532,9 +1532,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue);
#endif // TARGET_XARCH

#ifdef TARGET_ARM64
#if defined(TARGET_ARM64)
static insCflags InsCflagsForCcmp(GenCondition cond);
static insCond JumpKindToInsCond(emitJumpKind condition);
#elif defined(TARGET_XARCH)
static instruction JumpKindToCmov(emitJumpKind condition);
#endif

#ifndef TARGET_LOONGARCH64
Expand Down
13 changes: 10 additions & 3 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1595,6 +1595,14 @@ void CodeGen::genConsumeRegs(GenTree* tree)
{
genConsumeAddress(tree);
}
#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
else if (tree->OperIsCompare())
{
// Compares can be contained by SELECT/compare chains.
genConsumeRegs(tree->gtGetOp1());
genConsumeRegs(tree->gtGetOp2());
}
#endif
#ifdef TARGET_ARM64
else if (tree->OperIs(GT_BFIZ))
{
Expand All @@ -1610,10 +1618,9 @@ void CodeGen::genConsumeRegs(GenTree* tree)
assert(cast->isContained());
genConsumeAddress(cast->CastOp());
}
else if (tree->OperIsCompare() || tree->OperIs(GT_AND))
else if (tree->OperIs(GT_AND))
{
// Compares can be contained by a SELECT.
// ANDs and Cmp Compares may be contained in a chain.
// ANDs may be contained in a chain.
genConsumeRegs(tree->gtGetOp1());
genConsumeRegs(tree->gtGetOp2());
}
Expand Down
137 changes: 118 additions & 19 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1303,6 +1303,46 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree)
}
}

//------------------------------------------------------------------------
// JumpKindToCmov:
// Convert an emitJumpKind to the corresponding cmov instruction.
//
// Arguments:
// condition - the condition
//
// Returns:
// A cmov instruction.
//
instruction CodeGen::JumpKindToCmov(emitJumpKind condition)
{
static constexpr instruction s_table[EJ_COUNT] = {
INS_none, INS_none, INS_cmovo, INS_cmovno, INS_cmovb, INS_cmovae, INS_cmove, INS_cmovne, INS_cmovbe,
INS_cmova, INS_cmovs, INS_cmovns, INS_cmovp, INS_cmovnp, INS_cmovl, INS_cmovge, INS_cmovle, INS_cmovg,
};

static_assert_no_msg(s_table[EJ_NONE] == INS_none);
static_assert_no_msg(s_table[EJ_jmp] == INS_none);
static_assert_no_msg(s_table[EJ_jo] == INS_cmovo);
static_assert_no_msg(s_table[EJ_jno] == INS_cmovno);
static_assert_no_msg(s_table[EJ_jb] == INS_cmovb);
static_assert_no_msg(s_table[EJ_jae] == INS_cmovae);
static_assert_no_msg(s_table[EJ_je] == INS_cmove);
static_assert_no_msg(s_table[EJ_jne] == INS_cmovne);
static_assert_no_msg(s_table[EJ_jbe] == INS_cmovbe);
static_assert_no_msg(s_table[EJ_ja] == INS_cmova);
static_assert_no_msg(s_table[EJ_js] == INS_cmovs);
static_assert_no_msg(s_table[EJ_jns] == INS_cmovns);
static_assert_no_msg(s_table[EJ_jp] == INS_cmovp);
static_assert_no_msg(s_table[EJ_jnp] == INS_cmovnp);
static_assert_no_msg(s_table[EJ_jl] == INS_cmovl);
static_assert_no_msg(s_table[EJ_jge] == INS_cmovge);
static_assert_no_msg(s_table[EJ_jle] == INS_cmovle);
static_assert_no_msg(s_table[EJ_jg] == INS_cmovg);

assert((condition >= EJ_NONE) && (condition < EJ_COUNT));
return s_table[condition];
}

//------------------------------------------------------------------------
// genCodeForCompare: Produce code for a GT_SELECT/GT_SELECT_HI node.
//
Expand All @@ -1317,37 +1357,99 @@ void CodeGen::genCodeForSelect(GenTreeOp* select)
assert(select->OperIs(GT_SELECT));
#endif

regNumber dstReg = select->GetRegNum();
if (select->OperIs(GT_SELECT))
{
genConsumeReg(select->AsConditional()->gtCond);
genConsumeRegs(select->AsConditional()->gtCond);
}

genConsumeOperands(select);

instruction cmovKind = INS_cmovne;
GenTree* trueVal = select->gtOp1;
GenTree* falseVal = select->gtOp2;
regNumber dstReg = select->GetRegNum();

// If the 'true' operand was allocated the same register as the target
// register then flip it to the false value so we can skip a reg-reg mov.
if (trueVal->isUsedFromReg() && (trueVal->GetRegNum() == dstReg))
GenTree* trueVal = select->gtOp1;
GenTree* falseVal = select->gtOp2;

GenCondition cc = GenCondition::NE;

if (select->OperIs(GT_SELECT))
{
GenTree* cond = select->AsConditional()->gtCond;
if (cond->isContained())
{
assert(cond->OperIsCompare());
genCodeForCompare(cond->AsOp());
cc = GenCondition::FromRelop(cond);

if (cc.PreferSwap())
{
// genCodeForCompare generated the compare with swapped
// operands because this swap requires fewer branches/cmovs.
cc = GenCondition::Swap(cc);
}
}
else
{
regNumber condReg = cond->GetRegNum();
GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg);
}
}

// The usual codegen will be
// mov targetReg, falseValue
// cmovne targetReg, trueValue
//
// However, if the 'true' operand was allocated the same register as the
// target register then prefer to generate
//
// mov targetReg, trueValue
// cmove targetReg, falseValue
//
// so the first mov is elided.
//
if (falseVal->isUsedFromReg() && (falseVal->GetRegNum() == dstReg))
{
std::swap(trueVal, falseVal);
cmovKind = INS_cmove;
cc = GenCondition::Reverse(cc);
}

if (select->OperIs(GT_SELECT))
// If there is a conflict then swap the condition anyway. LSRA should have
// ensured the other way around has no conflict.
if ((trueVal->gtGetContainedRegMask() & genRegMask(dstReg)) != 0)
{
// TODO-CQ: Support contained relops here.
assert(select->AsConditional()->gtCond->isUsedFromReg());
std::swap(trueVal, falseVal);
cc = GenCondition::Reverse(cc);
}

GenConditionDesc desc = GenConditionDesc::Get(cc);

regNumber condReg = select->AsConditional()->gtCond->GetRegNum();
GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg);
// There may also be a conflict with the falseVal in case this is an AND
// condition. Once again, after swapping there should be no conflict as
// ensured by LSRA.
if ((desc.oper == GT_AND) && (falseVal->gtGetContainedRegMask() & genRegMask(dstReg)) != 0)
{
std::swap(trueVal, falseVal);
cc = GenCondition::Reverse(cc);
desc = GenConditionDesc::Get(cc);
}

inst_RV_TT(INS_mov, emitTypeSize(select), dstReg, falseVal);
inst_RV_TT(cmovKind, emitTypeSize(select), dstReg, trueVal);

assert(!trueVal->isContained() || trueVal->isUsedFromMemory());
assert((trueVal->gtGetContainedRegMask() & genRegMask(dstReg)) == 0);
inst_RV_TT(JumpKindToCmov(desc.jumpKind1), emitTypeSize(select), dstReg, trueVal);

if (desc.oper == GT_AND)
{
assert(falseVal->isUsedFromReg());
assert((falseVal->gtGetContainedRegMask() & genRegMask(dstReg)) == 0);
inst_RV_TT(JumpKindToCmov(emitter::emitReverseJumpKind(desc.jumpKind2)), emitTypeSize(select), dstReg,
falseVal);
}
else if (desc.oper == GT_OR)
{
assert(trueVal->isUsedFromReg());
inst_RV_TT(JumpKindToCmov(desc.jumpKind2), emitTypeSize(select), dstReg, trueVal);
}

genProduceReg(select);
}
Expand Down Expand Up @@ -1765,6 +1867,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
case GT_TEST_EQ:
case GT_TEST_NE:
case GT_CMP:
genConsumeOperands(treeNode->AsOp());
genCodeForCompare(treeNode->AsOp());
break;

Expand Down Expand Up @@ -6479,8 +6582,6 @@ void CodeGen::genCompareFloat(GenTree* treeNode)
var_types op1Type = op1->TypeGet();
var_types op2Type = op2->TypeGet();

genConsumeOperands(tree);

assert(varTypeIsFloating(op1Type));
assert(op1Type == op2Type);

Expand Down Expand Up @@ -6554,8 +6655,6 @@ void CodeGen::genCompareInt(GenTree* treeNode)
emitter* emit = GetEmitter();
bool canReuseFlags = false;

genConsumeOperands(tree);

assert(!op1->isContainedIntOrIImmed());
assert(!varTypeIsFloating(op2Type));

Expand Down
42 changes: 41 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1064,7 +1064,7 @@ regMaskTP GenTree::gtGetContainedRegMask()
{
if (!isContained())
{
return gtGetRegMask();
return isUsedFromReg() ? gtGetRegMask() : RBM_NONE;
}

regMaskTP mask = 0;
Expand Down Expand Up @@ -18720,6 +18720,46 @@ bool GenTree::IsArrayAddr(GenTreeArrAddr** pArrAddr)
return false;
}

//------------------------------------------------------------------------
// SupportsSettingZeroFlag: Returns true if this is an arithmetic operation
// whose codegen supports setting the "zero flag" as part of its operation.
//
// Return Value:
// True if so. A false return does not imply that codegen for the node will
// not trash the zero flag.
//
// Remarks:
// For example, for EQ (AND x y) 0, both xarch and arm64 can emit
// instructions that directly set the flags after the 'AND' and thus no
// comparison is needed.
//
// The backend expects any node for which the flags will be consumed to be
// marked with GTF_SET_FLAGS.
//
bool GenTree::SupportsSettingZeroFlag()
{
#if defined(TARGET_XARCH)
if (OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is missing many different cases, particularly those that leave ZF in an undefined state such as imul

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just factoring the code that was already there. I can add a TODO if you want.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(It was factored out of Lowering::OptimizeConstCompare)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see. I wonder if its going to cause any problematic behavior if we're depending on this to be accurate.

I could easily see someone checking !CanSetZeroFlag() and then assuming the zero flag can't be overrwritten. Inversely I could see someone checking CanSetZeroFlag() and assuming that it will write some 0/1 value and that it is strictly tied to the computed result.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I can rename this. The specific meaning I was intending was that this is a node that supports setting the zero flag if GTF_SET_FLAGS is set. Maybe SupportsSettingZeroFlag() is better?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure Supports fixes the main concern around people thinking !Supports means "does not support" (and therefore will not modify zf).

I don't have a good suggestion for a different name, however.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might just be something we'll have to cover with docs and code review.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed it to SupportsSettingZeroFlag and added some more docs on it

{
return true;
}

#ifdef FEATURE_HW_INTRINSICS
if (OperIs(GT_HWINTRINSIC) && emitter::DoesWriteZeroFlag(HWIntrinsicInfo::lookupIns(AsHWIntrinsic())))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth noting that this isn't necessarily "correct".

lookupIns catches the most common cases, but certainly not all cases particularly when various intrinsics represent complex helpers that are lowered or when the table tracked instruction is INS_invalid due to it requiring additional lookups/handling.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. I added another check to the heuristic in if-conversion for the rest of the hw intrinsic cases that this PR doesn't handle yet, so that we don't regress those.

{
return true;
}
#endif
#elif defined(TARGET_ARM64)
if (OperIs(GT_AND, GT_ADD, GT_SUB))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this going to be problematic on Arm64 since we have two different instructions for each of these?

That is one that sets the flags (adds) and one that does not (add)

Copy link
Member Author

@jakobbotsch jakobbotsch Jan 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I think codegen just handles this via gtSetFlags(). Anyway there shouldn't be any behavior differences for arm64 here, this is just factoring this.

{
return true;
}
#endif

return false;
}

//------------------------------------------------------------------------
// Create: Create or retrieve a field sequence for the given field handle.
//
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1985,6 +1985,8 @@ struct GenTree

bool IsArrayAddr(GenTreeArrAddr** pArrAddr);

bool SupportsSettingZeroFlag();

// These are only used for dumping.
// The GetRegNum() is only valid in LIR, but the dumping methods are not easily
// modified to check this.
Expand Down
Loading