From c017de20d8f4e6c7713e7ce0b68c3a9b6b70a1a0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 18 Feb 2023 11:40:43 +0100 Subject: [PATCH 1/9] GT_SELECTCC squashed --- src/coreclr/jit/codegen.h | 2 +- src/coreclr/jit/codegenxarch.cpp | 66 ++++----- src/coreclr/jit/compiler.h | 3 + src/coreclr/jit/decomposelongs.cpp | 4 +- src/coreclr/jit/gentree.cpp | 17 +++ src/coreclr/jit/gentree.h | 32 ++++- src/coreclr/jit/gtlist.h | 13 +- src/coreclr/jit/gtstructs.h | 1 + src/coreclr/jit/ifconversion.cpp | 98 -------------- src/coreclr/jit/liveness.cpp | 6 + src/coreclr/jit/lower.cpp | 211 +++++++++++++++++++++-------- src/coreclr/jit/lower.h | 1 + src/coreclr/jit/lowerxarch.cpp | 68 ++++------ src/coreclr/jit/lsraxarch.cpp | 66 ++++----- 14 files changed, 307 insertions(+), 281 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index c75ae055e52d7..87310728dd28b 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -904,7 +904,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCompareInt(GenTree* treeNode); #ifdef TARGET_XARCH bool genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opType); - GenTreeCC* genTryFindFlagsConsumer(GenTree* flagsProducer); + GenTree* genTryFindFlagsConsumer(GenTree* flagsProducer, GenCondition** condition); #endif #ifdef FEATURE_SIMD diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index eed18f248c73e..595dcb01d7a2b 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1363,11 +1363,7 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition) // void CodeGen::genCodeForSelect(GenTreeOp* select) { -#ifdef TARGET_X86 - assert(select->OperIs(GT_SELECT, GT_SELECT_HI)); -#else - assert(select->OperIs(GT_SELECT)); -#endif + assert(select->OperIs(GT_SELECT, GT_SELECTCC)); if (select->OperIs(GT_SELECT)) { @@ -1385,25 +1381,13 @@ void CodeGen::genCodeForSelect(GenTreeOp* select) if (select->OperIs(GT_SELECT)) { - GenTree* cond = select->AsConditional()->gtCond; - if (cond->isContained()) - { - assert(cond->OperIsCompare()); - genCodeForCompare(cond->AsOp()); - cc = GenCondition::FromRelop(cond); - - if (cc.PreferSwap()) - { - // genCodeForCompare generated the compare with swapped - // operands because this swap requires fewer branches/cmovs. - cc = GenCondition::Swap(cc); - } - } - else - { - regNumber condReg = cond->GetRegNum(); - GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg); - } + GenTree* cond = select->AsConditional()->gtCond; + regNumber condReg = cond->GetRegNum(); + GetEmitter()->emitIns_R_R(INS_test, emitActualTypeSize(cond), condReg, condReg); + } + else + { + cc = select->AsOpCC()->gtCondition; } // The usual codegen will be @@ -1875,11 +1859,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genCodeForSelect(treeNode->AsConditional()); break; -#ifdef TARGET_X86 - case GT_SELECT_HI: + case GT_SELECTCC: genCodeForSelect(treeNode->AsOp()); break; -#endif case GT_RETURNTRAP: genCodeForReturnTrap(treeNode->AsOp()); @@ -6809,8 +6791,9 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT return false; } - GenTreeCC* cc = nullptr; - GenCondition cond; + GenTree* consumer = nullptr; + GenCondition* mutableCond = nullptr; + GenCondition cond; if (tree->OperIsCompare()) { @@ -6818,13 +6801,13 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT } else { - cc = genTryFindFlagsConsumer(tree); - if (cc == nullptr) + consumer = genTryFindFlagsConsumer(tree, &mutableCond); + if (consumer == nullptr) { return false; } - cond = cc->gtCondition; + cond = *mutableCond; } if (GetEmitter()->AreFlagsSetToZeroCmp(op1->GetRegNum(), emitTypeSize(opType), cond)) @@ -6833,11 +6816,12 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT return true; } - if ((cc != nullptr) && GetEmitter()->AreFlagsSetForSignJumpOpt(op1->GetRegNum(), emitTypeSize(opType), cond)) + if ((mutableCond != nullptr) && + GetEmitter()->AreFlagsSetForSignJumpOpt(op1->GetRegNum(), emitTypeSize(opType), cond)) { JITDUMP("Not emitting compare due to sign being already set; modifying [%06u] to check sign flag\n", - Compiler::dspTreeID(cc)); - cc->gtCondition = + Compiler::dspTreeID(consumer)); + *mutableCond = (cond.GetCode() == GenCondition::SLT) ? GenCondition(GenCondition::S) : GenCondition(GenCondition::NS); return true; } @@ -6851,11 +6835,12 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT // // Parameters: // producer - the node that produces CPU flags +// cond - [out] the pointer to the condition inside that consumer. // // Returns: // A node that consumes the flags, or nullptr if no such node was found. // -GenTreeCC* CodeGen::genTryFindFlagsConsumer(GenTree* producer) +GenTree* CodeGen::genTryFindFlagsConsumer(GenTree* producer, GenCondition** cond) { assert((producer->gtFlags & GTF_SET_FLAGS) != 0); // We allow skipping some nodes where we know for sure that the flags are @@ -6866,7 +6851,14 @@ GenTreeCC* CodeGen::genTryFindFlagsConsumer(GenTree* producer) { if (candidate->OperIs(GT_JCC, GT_SETCC)) { - return candidate->AsCC(); + *cond = &candidate->AsCC()->gtCondition; + return candidate; + } + + if (candidate->OperIs(GT_SELECTCC)) + { + *cond = &candidate->AsOpCC()->gtCondition; + return candidate; } // The following nodes can be inserted between the compare and the user diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index dd4b883679f8e..70316d599bda8 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2414,6 +2414,9 @@ class Compiler // For binary opers. GenTree* gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2); + GenTreeCC* gtNewCC(genTreeOps oper, var_types type, GenCondition cond); + GenTreeOpCC* gtNewOperCC(genTreeOps oper, var_types type, GenCondition cond, GenTree* op1, GenTree* op2); + GenTreeColon* gtNewColonNode(var_types type, GenTree* elseNode, GenTree* thenNode); GenTreeQmark* gtNewQmarkNode(var_types type, GenTree* cond, GenTreeColon* colon); diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp index 4c746faf98aad..64c5e32855520 100644 --- a/src/coreclr/jit/decomposelongs.cpp +++ b/src/coreclr/jit/decomposelongs.cpp @@ -1545,10 +1545,10 @@ GenTree* DecomposeLongs::DecomposeSelect(LIR::Use& use) // Normally GT_SELECT is responsible for evaluating the condition into // flags, but for the "upper half" we treat the lower GT_SELECT similar to - // other flag producing nodes and reuse them. GT_SELECT_HI is the variant + // other flag producing nodes and reuse them. GT_SELECTCC is the variant // that uses existing flags and has no condition as part of it. select->gtFlags |= GTF_SET_FLAGS; - GenTree* hiSelect = m_compiler->gtNewOperNode(GT_SELECT_HI, TYP_INT, hiOp1, hiOp2); + GenTree* hiSelect = m_compiler->gtNewOperCC(GT_SELECTCC, TYP_INT, GenCondition::NE, hiOp1, hiOp2); Range().InsertAfter(select, hiSelect); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 16fe9b2f19cc0..085eabf466188 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -312,6 +312,7 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeLclVar) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeLclFld) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeCC) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeOpCC) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeCast) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeBox) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeField) <= TREE_NODE_SZ_LARGE); // *** large node @@ -6943,6 +6944,18 @@ GenTree* Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, return node; } +GenTreeCC* Compiler::gtNewCC(genTreeOps oper, var_types type, GenCondition cond) +{ + GenTreeCC* node = new (this, oper) GenTreeCC(oper, type, cond); + return node; +} + +GenTreeOpCC* Compiler::gtNewOperCC(genTreeOps oper, var_types type, GenCondition cond, GenTree* op1, GenTree* op2) +{ + GenTreeOpCC* node = new (this, oper) GenTreeOpCC(oper, type, cond, op1, op2); + return node; +} + GenTreeColon* Compiler::gtNewColonNode(var_types type, GenTree* elseNode, GenTree* thenNode) { return new (this, GT_COLON) GenTreeColon(TYP_INT, elseNode, thenNode); @@ -12134,6 +12147,10 @@ void Compiler::gtDispTree(GenTree* tree, break; } } + else if (tree->OperIs(GT_SELECTCC)) + { + printf(" cond=%s", tree->AsOpCC()->gtCondition.Name()); + } gtDispCommonEndLine(tree); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 243474c42eb92..5ae66a967fe1e 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1683,7 +1683,16 @@ struct GenTree bool OperIsConditionalJump() const { - return (gtOper == GT_JTRUE) || (gtOper == GT_JCMP) || (gtOper == GT_JCC); + return OperIs(GT_JTRUE, GT_JCMP, GT_JCC); + } + + bool OperConsumesFlags() const + { +#if !defined(TARGET_64BIT) + if (OperIs(GT_ADD_HI, GT_SUB_HI)) + return true; +#endif + return OperIs(GT_JCC, GT_SETCC, GT_SELECTCC); } #ifdef DEBUG @@ -8518,12 +8527,11 @@ struct GenCondition }; // Represents a GT_JCC or GT_SETCC node. - struct GenTreeCC final : public GenTree { GenCondition gtCondition; - GenTreeCC(genTreeOps oper, GenCondition condition, var_types type = TYP_VOID) + GenTreeCC(genTreeOps oper, var_types type, GenCondition condition) : GenTree(oper, type DEBUGARG(/*largeNode*/ FALSE)), gtCondition(condition) { assert(OperIs(GT_JCC, GT_SETCC)); @@ -8536,6 +8544,24 @@ struct GenTreeCC final : public GenTree #endif // DEBUGGABLE_GENTREE }; +// Represents a node with two operands and a condition. +struct GenTreeOpCC final : public GenTreeOp +{ + GenCondition gtCondition; + + GenTreeOpCC(genTreeOps oper, var_types type, GenCondition condition, GenTree* op1 = nullptr, GenTree* op2 = nullptr) + : GenTreeOp(oper, type, op1, op2 DEBUGARG(/*largeNode*/ FALSE)), gtCondition(condition) + { + assert(OperIs(GT_SELECTCC)); + } + +#if DEBUGGABLE_GENTREE + GenTreeOpCC() : GenTreeOp() + { + } +#endif // DEBUGGABLE_GENTREE +}; + //------------------------------------------------------------------------ // Deferred inline functions of GenTree -- these need the subtypes above to // be defined already. diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index f6a7b4391b783..eb734e7ec767f 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -185,8 +185,6 @@ GTNODE(SUB_HI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) GTNODE(LSH_HI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) GTNODE(RSH_LO , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) -// Variant of SELECT that reuses flags computed by a previous SELECT. -GTNODE(SELECT_HI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) #endif // !defined(TARGET_64BIT) #ifdef FEATURE_HW_INTRINSICS @@ -234,16 +232,19 @@ GTNODE(CNEG_LT , GenTreeOp ,0,GTK_UNOP|DBK_NOTHIR) // Conditi GTNODE(CMP , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR) // Generate a test instruction; sets the CPU flags according to (a & b) and does not produce a value. GTNODE(TEST , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR) +#ifdef TARGET_XARCH +// The XARCH BT instruction. Like CMP, this sets the condition flags (CF to be precise) and does not produce a value. +GTNODE(BT , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)) +#endif // Makes a comparison and jump if the condition specified. Does not set flags. GTNODE(JCMP , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR) // Checks the condition flags and branch if the condition specified by GenTreeCC::gtCondition is true. GTNODE(JCC , GenTreeCC ,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Checks the condition flags and produces 1 if the condition specified by GenTreeCC::gtCondition is true and 0 otherwise. GTNODE(SETCC , GenTreeCC ,0,GTK_LEAF|DBK_NOTHIR) -#ifdef TARGET_XARCH -// The XARCH BT instruction. Like CMP, this sets the condition flags (CF to be precise) and does not produce a value. -GTNODE(BT , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)) -#endif +// Variant of SELECT that reuses flags computed by a previous node with the specified condition. +GTNODE(SELECTCC , GenTreeCC ,0,GTK_BINOP|DBK_NOTHIR) + //----------------------------------------------------------------------------- // Other nodes that look like unary/binary operators: diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h index 0321f51a95c00..052a6d09a8812 100644 --- a/src/coreclr/jit/gtstructs.h +++ b/src/coreclr/jit/gtstructs.h @@ -112,6 +112,7 @@ GTSTRUCT_1(AllocObj , GT_ALLOCOBJ) GTSTRUCT_1(RuntimeLookup, GT_RUNTIMELOOKUP) GTSTRUCT_1(ArrAddr , GT_ARR_ADDR) GTSTRUCT_2(CC , GT_JCC, GT_SETCC) +GTSTRUCT_1(OpCC , GT_SELECTCC) #if defined(TARGET_X86) GTSTRUCT_1(MultiRegOp , GT_MUL_LONG) #elif defined (TARGET_ARM) diff --git a/src/coreclr/jit/ifconversion.cpp b/src/coreclr/jit/ifconversion.cpp index 01b3fc7b22d31..d7194d36c0df1 100644 --- a/src/coreclr/jit/ifconversion.cpp +++ b/src/coreclr/jit/ifconversion.cpp @@ -61,8 +61,6 @@ class OptIfConversionDsc void IfConvertDump(); #endif - bool IsHWIntrinsicCC(GenTree* node); - public: bool optIfConvert(); }; @@ -406,69 +404,6 @@ void OptIfConversionDsc::IfConvertDump() } #endif -#ifdef TARGET_XARCH -//----------------------------------------------------------------------------- -// IsHWIntrinsicCC: -// Check if this is a HW intrinsic node that can be compared efficiently -// against 0. -// -// Returns: -// True if so. -// -// Notes: -// For xarch, we currently skip if-conversion for these cases as the backend can handle them more efficiently -// when they are normal compares. -// -bool OptIfConversionDsc::IsHWIntrinsicCC(GenTree* node) -{ -#ifdef FEATURE_HW_INTRINSICS - if (!node->OperIs(GT_HWINTRINSIC)) - { - return false; - } - - switch (node->AsHWIntrinsic()->GetHWIntrinsicId()) - { - case NI_SSE_CompareScalarOrderedEqual: - case NI_SSE_CompareScalarOrderedNotEqual: - case NI_SSE_CompareScalarOrderedLessThan: - case NI_SSE_CompareScalarOrderedLessThanOrEqual: - case NI_SSE_CompareScalarOrderedGreaterThan: - case NI_SSE_CompareScalarOrderedGreaterThanOrEqual: - case NI_SSE_CompareScalarUnorderedEqual: - case NI_SSE_CompareScalarUnorderedNotEqual: - case NI_SSE_CompareScalarUnorderedLessThanOrEqual: - case NI_SSE_CompareScalarUnorderedLessThan: - case NI_SSE_CompareScalarUnorderedGreaterThanOrEqual: - case NI_SSE_CompareScalarUnorderedGreaterThan: - case NI_SSE2_CompareScalarOrderedEqual: - case NI_SSE2_CompareScalarOrderedNotEqual: - case NI_SSE2_CompareScalarOrderedLessThan: - case NI_SSE2_CompareScalarOrderedLessThanOrEqual: - case NI_SSE2_CompareScalarOrderedGreaterThan: - case NI_SSE2_CompareScalarOrderedGreaterThanOrEqual: - case NI_SSE2_CompareScalarUnorderedEqual: - case NI_SSE2_CompareScalarUnorderedNotEqual: - case NI_SSE2_CompareScalarUnorderedLessThanOrEqual: - case NI_SSE2_CompareScalarUnorderedLessThan: - case NI_SSE2_CompareScalarUnorderedGreaterThanOrEqual: - case NI_SSE2_CompareScalarUnorderedGreaterThan: - case NI_SSE41_TestC: - case NI_SSE41_TestZ: - case NI_SSE41_TestNotZAndNotC: - case NI_AVX_TestC: - case NI_AVX_TestZ: - case NI_AVX_TestNotZAndNotC: - return true; - default: - return false; - } -#else - return false; -#endif -} -#endif - //----------------------------------------------------------------------------- // optIfConvert // @@ -718,39 +653,6 @@ bool OptIfConversionDsc::optIfConvert() } } -#ifdef TARGET_XARCH - // Currently the xarch backend does not handle SELECT (EQ/NE (arithmetic op that sets ZF) 0) ... - // as efficiently as JTRUE (EQ/NE (arithmetic op that sets ZF) 0). The support is complicated - // to add due to the destructive nature of xarch instructions. - // The exception is for cases that can be transformed into TEST_EQ/TEST_NE. - // TODO-CQ: Fix this. - if (m_cond->OperIs(GT_EQ, GT_NE) && m_cond->gtGetOp2()->IsIntegralConst(0) && - !m_cond->gtGetOp1()->OperIs(GT_AND) && - (m_cond->gtGetOp1()->SupportsSettingZeroFlag() || IsHWIntrinsicCC(m_cond->gtGetOp1()))) - { - JITDUMP("Skipping if-conversion where condition is EQ/NE 0 with operation that sets ZF"); - return false; - } - - // However, in some cases bit tests can emit 'bt' when not going - // through the GT_SELECT path. - if (m_cond->OperIs(GT_EQ, GT_NE) && m_cond->gtGetOp1()->OperIs(GT_AND) && - m_cond->gtGetOp2()->IsIntegralConst(0)) - { - // A bit test that can be transformed into 'bt' will look like - // EQ/NE(AND(x, LSH(1, y)), 0) - - GenTree* andOp1 = m_cond->gtGetOp1()->gtGetOp1(); - GenTree* andOp2 = m_cond->gtGetOp1()->gtGetOp2(); - - if (andOp2->OperIs(GT_LSH) && andOp2->gtGetOp1()->IsIntegralConst(1)) - { - JITDUMP("Skipping if-conversion where condition is amenable to be transformed to BT"); - return false; - } - } -#endif - // Cost to allow for "x = cond ? a + b : c + d". if (thenCost > 7 || elseCost > 7) { diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index e1a3bb4ae84d3..15111d68991ed 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -2225,6 +2225,12 @@ bool Compiler::fgTryRemoveNonLocal(GenTree* node, LIR::Range* blockRange) return GenTree::VisitResult::Continue; }); + if (node->OperIs(GT_SELECTCC, GT_SETCC)) + { + assert((node->gtPrev->gtFlags & GTF_SET_FLAGS) != 0); + node->gtPrev->gtFlags &= ~GTF_SET_FLAGS; + } + blockRange->Remove(node); return true; } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 8262d78b37dfa..9a4be8835236a 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -159,6 +159,11 @@ bool Lowering::IsInvariantInRange(GenTree* node, GenTree* endExclusive) const return true; } + if (node->OperConsumesFlags()) + { + return false; + } + m_scratchSideEffects.Clear(); m_scratchSideEffects.AddNode(comp, node); @@ -193,6 +198,21 @@ bool Lowering::IsInvariantInRange(GenTree* node, GenTree* endExclusive, GenTree* { assert((node != nullptr) && (endExclusive != nullptr)); + if (ignoreNode == nullptr) + { + return IsInvariantInRange(node, endExclusive); + } + + if ((node->gtNext == endExclusive) || ((node->gtNext == ignoreNode) && (node->gtNext->gtNext == endExclusive))) + { + return true; + } + + if (node->OperConsumesFlags()) + { + return false; + } + m_scratchSideEffects.Clear(); m_scratchSideEffects.AddNode(comp, node); @@ -396,11 +416,9 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_SELECT: return LowerSelect(node->AsConditional()); -#ifdef TARGET_X86 - case GT_SELECT_HI: + case GT_SELECTCC: ContainCheckSelect(node->AsOp()); break; -#endif case GT_JMP: LowerJmpMethod(node); @@ -1179,7 +1197,7 @@ bool Lowering::TryLowerSwitchToBitTest( GenTree* bitTableIcon = comp->gtNewIconNode(bitTable, bitTableType); GenTree* bitTest = comp->gtNewOperNode(GT_BT, TYP_VOID, bitTableIcon, switchValue); bitTest->gtFlags |= GTF_SET_FLAGS; - GenTreeCC* jcc = new (comp, GT_JCC) GenTreeCC(GT_JCC, bbSwitchCondition); + GenTreeCC* jcc = comp->gtNewCC(GT_JCC, TYP_VOID, bbSwitchCondition); LIR::AsRange(bbSwitch).InsertAfter(switchValue, bitTableIcon, bitTest, jcc); @@ -3299,58 +3317,9 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) assert(relop->gtNext == jtrue); assert(jtrue->gtNext == nullptr); - GenCondition cond = GenCondition::FromRelop(relop); - bool optimizing = comp->opts.OptimizationEnabled(); -#ifdef TARGET_XARCH - // Optimize FP x != x to only check parity flag. This is a common way of - // checking NaN and avoids two branches that we would otherwise emit. - if (optimizing && (cond.GetCode() == GenCondition::FNEU) && relopOp1->OperIsLocal() && - GenTree::Compare(relopOp1, relopOp2) && IsInvariantInRange(relopOp1, relop) && - IsInvariantInRange(relopOp2, relop)) - { - cond = GenCondition(GenCondition::P); - } -#endif - - // Optimize EQ/NE(op_that_sets_zf, 0) into op_that_sets_zf with GTF_SET_FLAGS. - if (optimizing && relop->OperIs(GT_EQ, GT_NE) && relopOp2->IsIntegralConst(0) && - relopOp1->SupportsSettingZeroFlag() && IsInvariantInRange(relopOp1, relop)) - { - relopOp1->gtFlags |= GTF_SET_FLAGS; - relopOp1->SetUnusedValue(); - - BlockRange().Remove(relopOp1); - BlockRange().InsertBefore(jtrue, relopOp1); - BlockRange().Remove(relop); - BlockRange().Remove(relopOp2); - } - else - { - relop->gtType = TYP_VOID; - relop->gtFlags |= GTF_SET_FLAGS; - - if (relop->OperIs(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT)) - { - relop->SetOper(GT_CMP); - - if (cond.PreferSwap()) - { - std::swap(relop->gtOp1, relop->gtOp2); - cond = GenCondition::Swap(cond); - } - } -#ifdef TARGET_XARCH - else if (relop->OperIs(GT_BITTEST_EQ, GT_BITTEST_NE)) - { - relop->SetOper(GT_BT); - } -#endif - else - { - assert(relop->OperIs(GT_TEST_EQ, GT_TEST_NE)); - relop->SetOper(GT_TEST); - } - } + GenCondition cond; + bool lowered = TryLowerConditionToFlagsNode(jtrue, relop, &cond); + assert(lowered); // Should succeed since relop is right before jtrue jtrue->SetOperRaw(GT_JCC); jtrue->AsCC()->gtCondition = cond; @@ -3400,10 +3369,138 @@ GenTree* Lowering::LowerSelect(GenTreeConditional* select) } } +#ifdef TARGET_XARCH + // Do not transform GT_SELECT with GTF_SET_FLAGS into GT_SELECTCC; this + // node is used by decomposition on x86. + // TODO-CQ: If we allowed multiple nodes to consume the same CPU flags then + // we could do this on x86. We currently disable if-conversion for TYP_LONG + // on 32-bit architectures because of this. + GenCondition selectCond; + if (((select->gtFlags & GTF_SET_FLAGS) == 0) && TryLowerConditionToFlagsNode(select, cond, &selectCond)) + { + select->SetOperRaw(GT_SELECTCC); + GenTreeOpCC* newSelect = select->AsOpCC(); + newSelect->gtCondition = selectCond; + ContainCheckSelect(newSelect); + return newSelect->gtNext; + } +#endif + ContainCheckSelect(select); return select->gtNext; } +//---------------------------------------------------------------------------------------------- +// TryLowerCompareToFlagsNode: Given a node 'parent' that is able to consume +// conditions from CPU flags, try to transform 'condition' into a node that +// produces CPU flags, and reorder it to happen right before 'parent'. +// +// Arguments: +// parent - The parent node that can consume from CPU flags. +// relop - The relop that can be transformed into something that produces CPU flags. +// cond - The condition that makes the compare true. +// +// Return Value: +// True if relop was transformed and is now right before 'parent'; otherwise false. +// +bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* cond) +{ + if (condition->OperIsCompare()) + { + if (!IsInvariantInRange(condition, parent)) + { + return false; + } + + GenTreeOp* relop = condition->AsOp(); + + *cond = GenCondition::FromRelop(relop); + bool optimizing = comp->opts.OptimizationEnabled(); + + GenTree* relopOp1 = relop->gtGetOp1(); + GenTree* relopOp2 = relop->gtGetOp2(); + +#ifdef TARGET_XARCH + // Optimize FP x != x to only check parity flag. This is a common way of + // checking NaN and avoids two branches that we would otherwise emit. + if (optimizing && (cond->GetCode() == GenCondition::FNEU) && relopOp1->OperIsLocal() && + GenTree::Compare(relopOp1, relopOp2) && IsInvariantInRange(relopOp1, relop) && + IsInvariantInRange(relopOp2, relop)) + { + *cond = GenCondition(GenCondition::P); + } +#endif + + // Optimize EQ/NE(op_that_sets_zf, 0) into op_that_sets_zf with GTF_SET_FLAGS. + if (optimizing && relop->OperIs(GT_EQ, GT_NE) && relopOp2->IsIntegralConst(0) && + relopOp1->SupportsSettingZeroFlag() && IsInvariantInRange(relopOp1, parent)) + { + relopOp1->gtFlags |= GTF_SET_FLAGS; + relopOp1->SetUnusedValue(); + + BlockRange().Remove(relopOp1); + BlockRange().InsertBefore(parent, relopOp1); + BlockRange().Remove(relop); + BlockRange().Remove(relopOp2); + } + else + { + relop->gtType = TYP_VOID; + relop->gtFlags |= GTF_SET_FLAGS; + + if (relop->OperIs(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT)) + { + relop->SetOper(GT_CMP); + + if (cond->PreferSwap()) + { + std::swap(relop->gtOp1, relop->gtOp2); + *cond = GenCondition::Swap(*cond); + } + } +#ifdef TARGET_XARCH + else if (relop->OperIs(GT_BITTEST_EQ, GT_BITTEST_NE)) + { + relop->SetOper(GT_BT); + } +#endif + else + { + assert(relop->OperIs(GT_TEST_EQ, GT_TEST_NE)); + relop->SetOper(GT_TEST); + } + + if (relop->gtNext != parent) + { + BlockRange().Remove(relop); + BlockRange().InsertBefore(parent, relop); + } + } + + return true; + } + + // TODO-Cleanup: Avoid creating these SETCC nodes in the first place. + if (condition->OperIs(GT_SETCC)) + { + assert((condition->gtPrev->gtFlags & GTF_SET_FLAGS) != 0); + GenTree* flagsProducer = condition->gtPrev; + if (!IsInvariantInRange(flagsProducer, parent, condition)) + { + return false; + } + + *cond = condition->AsCC()->gtCondition; + + BlockRange().Remove(condition); + BlockRange().Remove(flagsProducer); + BlockRange().InsertBefore(parent, flagsProducer); + return true; + } + + return false; +} + //---------------------------------------------------------------------------------------------- // LowerNodeCC: Lowers a node that produces a boolean value by setting the condition flags. // @@ -3483,7 +3580,7 @@ GenTreeCC* Lowering::LowerNodeCC(GenTree* node, GenCondition condition) if (BlockRange().TryGetUse(relop, &use)) { - cc = new (comp, GT_SETCC) GenTreeCC(GT_SETCC, condition, TYP_INT); + cc = comp->gtNewCC(GT_SETCC, TYP_INT, condition); BlockRange().InsertAfter(node, cc); use.ReplaceWith(cc); } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index dc462e8220a68..bb88daed053c7 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -137,6 +137,7 @@ class Lowering final : public Phase GenTree* LowerCompare(GenTree* cmp); GenTree* LowerJTrue(GenTreeOp* jtrue); GenTree* LowerSelect(GenTreeConditional* cond); + bool TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* cond); GenTreeCC* LowerNodeCC(GenTree* node, GenCondition condition); void LowerJmpMethod(GenTree* jmp); void LowerRet(GenTreeUnOp* ret); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 1b7054b138130..a3cd206ec471c 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -5909,52 +5909,40 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) // ContainCheckSelect: determine whether the sources of a select should be contained. // // Arguments: -// select - the GT_SELECT or GT_SELECT_HI node. +// select - the GT_SELECT or GT_SELECTCC node. // void Lowering::ContainCheckSelect(GenTreeOp* select) { -#ifdef TARGET_64BIT - assert(select->OperIs(GT_SELECT)); -#else - assert(select->OperIs(GT_SELECT, GT_SELECT_HI)); -#endif + assert(select->OperIs(GT_SELECT, GT_SELECTCC)); - // Disallow containing compares if the flags may be used by follow-up - // nodes, in which case those nodes expect zero/non-zero in the flags. - if (select->OperIs(GT_SELECT) && ((select->gtFlags & GTF_SET_FLAGS) == 0)) + if (select->OperIs(GT_SELECTCC)) { - GenTree* cond = select->AsConditional()->gtCond; - - if (cond->OperIsCompare() && IsSafeToContainMem(select, cond)) - { - MakeSrcContained(select, cond); + GenCondition cc = select->AsOpCC()->gtCondition; - // op1 and op2 are emitted as two separate instructions due to the - // conditional nature of cmov, so both operands can usually be - // contained memory operands. The exception is for compares - // requiring two cmovs, in which case we do not want to incur the - // memory access/address calculation twice. - // - // See the comment in Codegen::GenConditionDesc::map for why these - // comparisons are special and end up requiring the two cmovs. - // - GenCondition cc = GenCondition::FromRelop(cond); - switch (cc.GetCode()) - { - case GenCondition::FEQ: - case GenCondition::FLT: - case GenCondition::FLE: - case GenCondition::FNEU: - case GenCondition::FGEU: - case GenCondition::FGTU: - // Skip containment checking below. - // TODO-CQ: We could allow one of the operands to be a - // contained memory operand, but it requires updating LSRA - // build to take it into account. - return; - default: - break; - } + // op1 and op2 are emitted as two separate instructions due to the + // conditional nature of cmov, so both operands can usually be + // contained memory operands. The exception is for compares + // requiring two cmovs, in which case we do not want to incur the + // memory access/address calculation twice. + // + // See the comment in Codegen::GenConditionDesc::map for why these + // comparisons are special and end up requiring the two cmovs. + // + switch (cc.GetCode()) + { + case GenCondition::FEQ: + case GenCondition::FLT: + case GenCondition::FLE: + case GenCondition::FNEU: + case GenCondition::FGEU: + case GenCondition::FGTU: + // Skip containment checking below. + // TODO-CQ: We could allow one of the operands to be a + // contained memory operand, but it requires updating LSRA + // build to take it into account. + return; + default: + break; } } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 815404dd6bcd8..b600b73a0ab77 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -245,12 +245,10 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = BuildSelect(tree->AsConditional()); break; -#ifdef TARGET_X86 - case GT_SELECT_HI: + case GT_SELECTCC: assert(dstCount == 1); srcCount = BuildSelect(tree->AsOp()); break; -#endif case GT_JMP: srcCount = 0; @@ -900,21 +898,11 @@ int LinearScan::BuildSelect(GenTreeOp* select) { int srcCount = 0; - GenCondition cc = GenCondition::NE; if (select->OperIs(GT_SELECT)) { GenTree* cond = select->AsConditional()->gtCond; - if (cond->isContained()) - { - assert(cond->OperIsCompare()); - srcCount += BuildCmpOperands(cond); - cc = GenCondition::FromRelop(cond); - } - else - { - BuildUse(cond); - srcCount++; - } + BuildUse(cond); + srcCount++; } GenTree* trueVal = select->gtOp1; @@ -1012,28 +1000,32 @@ int LinearScan::BuildSelect(GenTreeOp* select) // multiple memory accesses, but we could contain the operand in the 'mov' // instruction with some more care taken for marking things delay reg freed // correctly). - switch (cc.GetCode()) - { - case GenCondition::FEQ: - case GenCondition::FLT: - case GenCondition::FLE: - // Normally these require an 'AND' conditional and cmovs with - // both the true and false values as sources. However, after - // swapping these into an 'OR' conditional the cmovs require - // only the original falseVal, so we need only to mark that as - // delay-reg freed to allow codegen to resolve this. - assert(uncontainedFalseRP != nullptr); - setDelayFree(uncontainedFalseRP); - break; - case GenCondition::FNEU: - case GenCondition::FGEU: - case GenCondition::FGTU: - // These require an 'OR' conditional and only access 'trueVal'. - assert(uncontainedTrueRP != nullptr); - setDelayFree(uncontainedTrueRP); - break; - default: - break; + if (select->OperIs(GT_SELECTCC)) + { + GenCondition cc = select->AsOpCC()->gtCondition; + switch (cc.GetCode()) + { + case GenCondition::FEQ: + case GenCondition::FLT: + case GenCondition::FLE: + // Normally these require an 'AND' conditional and cmovs with + // both the true and false values as sources. However, after + // swapping these into an 'OR' conditional the cmovs require + // only the original falseVal, so we need only to mark that as + // delay-reg freed to allow codegen to resolve this. + assert(uncontainedFalseRP != nullptr); + setDelayFree(uncontainedFalseRP); + break; + case GenCondition::FNEU: + case GenCondition::FGEU: + case GenCondition::FGTU: + // These require an 'OR' conditional and only access 'trueVal'. + assert(uncontainedTrueRP != nullptr); + setDelayFree(uncontainedTrueRP); + break; + default: + break; + } } BuildDef(select); From 415521af15b80526adb4dc4aa6643e943109d5e2 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 18 Feb 2023 12:02:48 +0100 Subject: [PATCH 2/9] JIT: Make GTF_SET_FLAGS a LIR flag --- src/coreclr/jit/codegenxarch.cpp | 8 +-- src/coreclr/jit/compiler.cpp | 9 --- src/coreclr/jit/fgopt.cpp | 4 +- src/coreclr/jit/gentree.cpp | 87 ----------------------------- src/coreclr/jit/gentree.h | 8 ++- src/coreclr/jit/lir.cpp | 3 +- src/coreclr/jit/lir.h | 58 ++++++++++++------- src/coreclr/jit/liveness.cpp | 8 +-- src/coreclr/jit/lower.cpp | 12 ++-- src/coreclr/jit/lowerxarch.cpp | 10 ++-- src/coreclr/jit/morph.cpp | 45 ++------------- src/coreclr/jit/optimizer.cpp | 24 -------- src/coreclr/jit/targetamd64.h | 1 - src/coreclr/jit/targetarm.h | 1 - src/coreclr/jit/targetarm64.h | 1 - src/coreclr/jit/targetloongarch64.h | 1 - src/coreclr/jit/targetx86.h | 2 - 17 files changed, 70 insertions(+), 212 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 595dcb01d7a2b..dedc5a6043a20 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -968,7 +968,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) } // now we know there are 3 different operands so attempt to use LEA else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags - && (op2->isContainedIntOrIImmed() || op2->isUsedFromReg()) && !treeNode->gtSetFlags()) + && (op2->isContainedIntOrIImmed() || op2->isUsedFromReg()) && !treeNode->ProducesFlags()) { if (op2->isContainedIntOrIImmed()) { @@ -1001,7 +1001,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) // We can skip emitting 'and reg0, -1` if we know that the upper 32bits of 'reg0' are zero'ed. if (compiler->opts.OptimizationEnabled()) { - if ((oper == GT_AND) && (targetType == TYP_INT) && !treeNode->gtSetFlags() && op2->IsIntegralConst(-1) && + if ((oper == GT_AND) && (targetType == TYP_INT) && !treeNode->ProducesFlags() && op2->IsIntegralConst(-1) && emit->AreUpper32BitsZero(targetReg)) { genProduceReg(treeNode); @@ -4534,7 +4534,7 @@ void CodeGen::genCodeForShift(GenTree* tree) { emitAttr size = emitTypeSize(tree); - bool mightOptimizeLsh = tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags(); + bool mightOptimizeLsh = tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->ProducesFlags(); // Optimize "X<<1" to "lea [reg+reg]" or "add reg, reg" if (mightOptimizeLsh && shiftBy->IsIntegralConst(1)) @@ -6842,7 +6842,7 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT // GenTree* CodeGen::genTryFindFlagsConsumer(GenTree* producer, GenCondition** cond) { - assert((producer->gtFlags & GTF_SET_FLAGS) != 0); + assert(producer->ProducesFlags()); // We allow skipping some nodes where we know for sure that the flags are // not consumed. In particular we handle resolution nodes. If we see any // other node after the compare (which is an uncommon case, happens diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 2856cc56d6b1e..63a815924403e 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -9842,15 +9842,6 @@ void cTreeFlags(Compiler* comp, GenTree* tree) { chars += printf("[SPILLED_OPER]"); } -#if FEATURE_SET_FLAGS - if (tree->gtFlags & GTF_SET_FLAGS) - { - if ((op != GT_IND) && (op != GT_STOREIND)) - { - chars += printf("[ZSF_SET_FLAGS]"); - } - } -#endif if (tree->gtFlags & GTF_IND_NONFAULTING) { if (tree->OperIsIndirOrArrMetaData()) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 4679a056488b1..ffdc0bd4c7ba7 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -3842,8 +3842,8 @@ bool Compiler::fgOptimizeBranchToNext(BasicBlock* block, BasicBlock* bNext, Basi // For JCC we have an invariant until resolution that the // previous node sets those CPU flags. GenTree* prevNode = jmp->gtPrev; - assert((prevNode != nullptr) && ((prevNode->gtFlags & GTF_SET_FLAGS) != 0)); - prevNode->gtFlags &= ~GTF_SET_FLAGS; + assert((prevNode != nullptr) && prevNode->ProducesFlags()); + prevNode->ClearProducesFlags(); jmpRange = blockRange.GetTreeRange(prevNode, &isClosed, &sideEffects); jmpRange = LIR::ReadOnlyRange(jmpRange.FirstNode(), jmp); } diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 085eabf466188..ffb1566758393 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -9425,85 +9425,6 @@ void Compiler::gtUpdateNodeSideEffects(GenTree* tree) }); } -bool GenTree::gtSetFlags() const -{ - // - // When FEATURE_SET_FLAGS (TARGET_ARM) is active the method returns true - // when the gtFlags has the flag GTF_SET_FLAGS set - // otherwise the architecture will be have instructions that typically set - // the flags and this method will return true. - // - // Exceptions: GT_IND (load/store) is not allowed to set the flags - // and on XARCH the GT_MUL/GT_DIV and all overflow instructions - // do not set the condition flags - // - // Precondition we have a GTK_SMPOP - // - if (!varTypeIsIntegralOrI(TypeGet()) && (TypeGet() != TYP_VOID)) - { - return false; - } - - if (((gtFlags & GTF_SET_FLAGS) != 0) && (gtOper != GT_IND)) - { - // GTF_SET_FLAGS is not valid on GT_IND and is overlaid with GTF_NONFAULTING_IND - return true; - } - else - { - return false; - } -} - -bool GenTree::gtRequestSetFlags() -{ - bool result = false; - -#if FEATURE_SET_FLAGS - // This method is a Nop unless FEATURE_SET_FLAGS is defined - - // In order to set GTF_SET_FLAGS - // we must have a GTK_SMPOP - // and we have a integer or machine size type (not floating point or TYP_LONG on 32-bit) - // - if (!OperIsSimple()) - return false; - - if (!varTypeIsIntegralOrI(TypeGet())) - return false; - - switch (gtOper) - { - case GT_IND: - case GT_ARR_LENGTH: - case GT_MDARR_LENGTH: - case GT_MDARR_LOWER_BOUND: - // These will turn into simple load from memory instructions - // and we can't force the setting of the flags on load from memory - break; - - case GT_MUL: - case GT_DIV: - // These instructions don't set the flags (on x86/x64) - // - break; - - default: - // Otherwise we can set the flags for this gtOper - // and codegen must set the condition flags. - // - gtFlags |= GTF_SET_FLAGS; - result = true; - break; - } -#endif // FEATURE_SET_FLAGS - - // Codegen for this tree must set the condition flags if - // this method returns true. - // - return result; -} - GenTreeUseEdgeIterator::GenTreeUseEdgeIterator() : m_advance(nullptr), m_node(nullptr), m_edge(nullptr), m_statePtr(nullptr), m_state(-1) { @@ -10161,10 +10082,6 @@ void GenTree::SetIndirExceptionFlags(Compiler* comp) (flags & GTF_MAKE_CSE) ? 'H' : '-'); // H is for Hoist this expr printf("%c", (flags & GTF_REVERSE_OPS) ? 'R' : '-'); printf("%c", (flags & GTF_UNSIGNED) ? 'U' : (flags & GTF_BOOLEAN) ? 'B' : '-'); -#if FEATURE_SET_FLAGS - printf("%c", (flags & GTF_SET_FLAGS) ? 'S' : '-'); - ++charsDisplayed; -#endif printf("%c", (flags & GTF_SPILLED) ? 'z' : (flags & GTF_SPILL) ? 'Z' : '-'); return charsDisplayed; @@ -12732,10 +12649,6 @@ void Compiler::gtDispLIRNode(GenTree* node, const char* prefixMsg /* = nullptr * // 60 spaces for alignment printf("%-60s", ""); -#if FEATURE_SET_FLAGS - // additional flag enlarges the flag field by one character - printf(" "); -#endif indentStack.Push(operandArc); indentStack.print(); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 5ae66a967fe1e..52da07292e8ab 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -380,8 +380,6 @@ enum GenTreeFlags : unsigned int GTF_NOREG_AT_USE = 0x00000100, // tree node is in memory at the point of use - GTF_SET_FLAGS = 0x00000200, // Requires that codegen for this node set the flags. Use gtSetFlags() to check this flag. - #ifdef TARGET_XARCH GTF_DONT_EXTEND = 0x00000400, // This small-typed tree produces a value with undefined upper bits. Used on x86/x64 as a // lowering optimization and tells the codegen to use instructions like "mov al, [addr]" @@ -1090,6 +1088,11 @@ struct GenTree inline void SetUnusedValue(); inline void ClearUnusedValue(); inline bool IsUnusedValue() const; + // Indicates whether this node produces CPU flags consumed by a subsequent node. + inline void SetProducesFlags(); + inline void ClearProducesFlags(); + inline bool ProducesFlags() const; + // RegOptional indicates that codegen can still generate code even if it isn't allocated a register. inline bool IsRegOptional() const; inline void SetRegOptional(); @@ -2218,7 +2221,6 @@ struct GenTree bool gtOverflow() const; bool gtOverflowEx() const; - bool gtSetFlags() const; bool gtRequestSetFlags(); #ifdef DEBUG diff --git a/src/coreclr/jit/lir.cpp b/src/coreclr/jit/lir.cpp index 0d7ab925900ab..da133a43aeb97 100644 --- a/src/coreclr/jit/lir.cpp +++ b/src/coreclr/jit/lir.cpp @@ -1580,8 +1580,7 @@ bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const // other code that relies on being able to reach all the operands from a call node. // The argument of a JTRUE doesn't produce a value (just sets a flag). assert(((node->OperGet() == GT_CALL) && def->OperIs(GT_PUTARG_STK)) || - ((node->OperGet() == GT_JTRUE) && (def->TypeGet() == TYP_VOID) && - ((def->gtFlags & GTF_SET_FLAGS) != 0))); + ((node->OperGet() == GT_JTRUE) && (def->TypeGet() == TYP_VOID) && !def->ProducesFlags())); continue; } diff --git a/src/coreclr/jit/lir.h b/src/coreclr/jit/lir.h index 2171dcafc01c7..42d0d25a10e22 100644 --- a/src/coreclr/jit/lir.h +++ b/src/coreclr/jit/lir.h @@ -27,19 +27,24 @@ class LIR final { None = 0x00, - Mark = 0x01, // An arbitrary "mark" bit that can be used in place of - // a more expensive data structure when processing a set - // of LIR nodes. See for example `LIR::GetTreeRange`. - - UnusedValue = 0x02, // Set on a node if it produces a value that is not - // subsequently used. Should never be set on nodes - // that return `false` for `GenTree::IsValue`. Note - // that this bit should not be assumed to be valid - // at all points during compilation: it is currently - // only computed during target-dependent lowering. - - RegOptional = 0x04, // Set on a node if it produces a value, but does not - // require a register (i.e. it can be used from memory). + // An arbitrary "mark" bit that can be used in place of a more + // expensive data structure when processing a set of LIR nodes. See + // for example `LIR::GetTreeRange`. + Mark = 0x01, + + // Set on a node if it produces a value that is not subsequently + // used. Should never be set on nodes that return `false` for + // `GenTree::IsValue`. + UnusedValue = 0x02, + + // Set on a node if it produces CPU flags that will be consumed by + // a follow-up node. + ProducesFlags = 0x4, + + // Set on a node if it produces a value, but does not require a + // register (i.e. it can be used from memory). See + // IsSafeToMarkRegOptional for more information. + RegOptional = 0x08, }; }; @@ -308,33 +313,48 @@ class LIR final static void InsertBeforeTerminator(BasicBlock* block, LIR::Range&& range); }; -inline void GenTree::SetUnusedValue() +void GenTree::SetUnusedValue() { gtLIRFlags |= LIR::Flags::UnusedValue; ClearContained(); } -inline void GenTree::ClearUnusedValue() +void GenTree::ClearUnusedValue() { gtLIRFlags &= ~LIR::Flags::UnusedValue; } -inline bool GenTree::IsUnusedValue() const +bool GenTree::IsUnusedValue() const { return (gtLIRFlags & LIR::Flags::UnusedValue) != 0; } -inline void GenTree::SetRegOptional() +void GenTree::SetProducesFlags() +{ + gtLIRFlags |= LIR::Flags::ProducesFlags; +} + +void GenTree::ClearProducesFlags() +{ + gtLIRFlags &= ~LIR::Flags::ProducesFlags; +} + +bool GenTree::ProducesFlags() const +{ + return (gtLIRFlags & LIR::Flags::ProducesFlags) != 0; +} + +void GenTree::SetRegOptional() { gtLIRFlags |= LIR::Flags::RegOptional; } -inline void GenTree::ClearRegOptional() +void GenTree::ClearRegOptional() { gtLIRFlags &= ~LIR::Flags::RegOptional; } -inline bool GenTree::IsRegOptional() const +bool GenTree::IsRegOptional() const { return (gtLIRFlags & LIR::Flags::RegOptional) != 0; } diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index 15111d68991ed..0bc7789d990ac 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -2215,7 +2215,7 @@ bool Compiler::fgTryRemoveNonLocal(GenTree* node, LIR::Range* blockRange) // (as opposed to side effects of their children). // This default case should never include calls or assignments. assert(!node->OperRequiresAsgFlag() && !node->OperIs(GT_CALL)); - if (!node->gtSetFlags() && !node->OperMayThrow(this)) + if (!node->ProducesFlags() && !node->OperMayThrow(this)) { JITDUMP("Removing dead node:\n"); DISPNODE(node); @@ -2225,10 +2225,10 @@ bool Compiler::fgTryRemoveNonLocal(GenTree* node, LIR::Range* blockRange) return GenTree::VisitResult::Continue; }); - if (node->OperIs(GT_SELECTCC, GT_SETCC)) + if (node->OperConsumesFlags()) { - assert((node->gtPrev->gtFlags & GTF_SET_FLAGS) != 0); - node->gtPrev->gtFlags &= ~GTF_SET_FLAGS; + assert(node->gtPrev->ProducesFlags()); + node->gtPrev->ClearProducesFlags(); } blockRange->Remove(node); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 9a4be8835236a..57b6759c84fe7 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1196,7 +1196,7 @@ bool Lowering::TryLowerSwitchToBitTest( var_types bitTableType = (bitCount <= (genTypeSize(TYP_INT) * 8)) ? TYP_INT : TYP_LONG; GenTree* bitTableIcon = comp->gtNewIconNode(bitTable, bitTableType); GenTree* bitTest = comp->gtNewOperNode(GT_BT, TYP_VOID, bitTableIcon, switchValue); - bitTest->gtFlags |= GTF_SET_FLAGS; + bitTest->SetProducesFlags(); GenTreeCC* jcc = comp->gtNewCC(GT_JCC, TYP_VOID, bbSwitchCondition); LIR::AsRange(bbSwitch).InsertAfter(switchValue, bitTableIcon, bitTest, jcc); @@ -3376,7 +3376,7 @@ GenTree* Lowering::LowerSelect(GenTreeConditional* select) // we could do this on x86. We currently disable if-conversion for TYP_LONG // on 32-bit architectures because of this. GenCondition selectCond; - if (((select->gtFlags & GTF_SET_FLAGS) == 0) && TryLowerConditionToFlagsNode(select, cond, &selectCond)) + if (!select->ProducesFlags() && TryLowerConditionToFlagsNode(select, cond, &selectCond)) { select->SetOperRaw(GT_SELECTCC); GenTreeOpCC* newSelect = select->AsOpCC(); @@ -3435,7 +3435,7 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, if (optimizing && relop->OperIs(GT_EQ, GT_NE) && relopOp2->IsIntegralConst(0) && relopOp1->SupportsSettingZeroFlag() && IsInvariantInRange(relopOp1, parent)) { - relopOp1->gtFlags |= GTF_SET_FLAGS; + relopOp1->SetProducesFlags(); relopOp1->SetUnusedValue(); BlockRange().Remove(relopOp1); @@ -3446,7 +3446,7 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, else { relop->gtType = TYP_VOID; - relop->gtFlags |= GTF_SET_FLAGS; + relop->SetProducesFlags(); if (relop->OperIs(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT)) { @@ -3483,7 +3483,7 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, // TODO-Cleanup: Avoid creating these SETCC nodes in the first place. if (condition->OperIs(GT_SETCC)) { - assert((condition->gtPrev->gtFlags & GTF_SET_FLAGS) != 0); + assert(condition->gtPrev->ProducesFlags()); GenTree* flagsProducer = condition->gtPrev; if (!IsInvariantInRange(flagsProducer, parent, condition)) { @@ -3589,7 +3589,7 @@ GenTreeCC* Lowering::LowerNodeCC(GenTree* node, GenCondition condition) if (cc != nullptr) { - node->gtFlags |= GTF_SET_FLAGS; + node->SetProducesFlags(); } // Remove the chain of EQ/NE(x, 0) relop nodes, if any. Note that if a SETCC was diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index a3cd206ec471c..a38a8c52569b4 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -4383,8 +4383,7 @@ GenTree* Lowering::TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode) } // Subsequent nodes may rely on CPU flags set by these nodes in which case we cannot remove them - if (((addOp2->gtFlags & GTF_SET_FLAGS) != 0) || ((op2->gtFlags & GTF_SET_FLAGS) != 0) || - ((andNode->gtFlags & GTF_SET_FLAGS) != 0)) + if (addOp2->ProducesFlags() || op2->ProducesFlags() || andNode->ProducesFlags()) { return nullptr; } @@ -4469,7 +4468,7 @@ GenTree* Lowering::TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode) } // Subsequent nodes may rely on CPU flags set by these nodes in which case we cannot remove them - if (((opNode->gtFlags & GTF_SET_FLAGS) != 0) || ((negNode->gtFlags & GTF_SET_FLAGS) != 0)) + if (opNode->ProducesFlags() || negNode->ProducesFlags()) { return nullptr; } @@ -4554,7 +4553,7 @@ GenTree* Lowering::TryLowerAndOpToAndNot(GenTreeOp* andNode) } // Subsequent nodes may rely on CPU flags set by these nodes in which case we cannot remove them - if (((andNode->gtFlags & GTF_SET_FLAGS) != 0) || ((notNode->gtFlags & GTF_SET_FLAGS) != 0)) + if (andNode->ProducesFlags() || notNode->ProducesFlags()) { return nullptr; } @@ -4640,8 +4639,7 @@ GenTree* Lowering::TryLowerXorOpToGetMaskUpToLowestSetBit(GenTreeOp* xorNode) } // Subsequent nodes may rely on CPU flags set by these nodes in which case we cannot remove them - if (((addOp2->gtFlags & GTF_SET_FLAGS) != 0) || ((op2->gtFlags & GTF_SET_FLAGS) != 0) || - ((xorNode->gtFlags & GTF_SET_FLAGS) != 0)) + if (addOp2->ProducesFlags() || op2->ProducesFlags() || xorNode->ProducesFlags()) { return nullptr; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index a34552aa1f197..5f65ea64e824b 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -8603,34 +8603,18 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA op1->gtFlags |= GTF_DONT_CSE; break; +#ifdef DEBUG case GT_QMARK: case GT_JTRUE: + assert(op1); - noway_assert(op1); - - if (op1->OperIsCompare()) - { - /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does - not need to materialize the result as a 0 or 1. */ - - /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */ - op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); - - // Request that the codegen for op1 sets the condition flags - // when it generates the code for op1. - // - // Codegen for op1 must set the condition flags if - // this method returns true. - // - op1->gtRequestSetFlags(); - } - else + if (!op1->OperIsCompare()) { GenTree* effOp1 = op1->gtEffectiveVal(); - noway_assert((effOp1->gtOper == GT_CNS_INT) && - (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1))); + assert((effOp1->gtOper == GT_CNS_INT) && (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1))); } break; +#endif case GT_COLON: if (optLocalAssertionProp) @@ -9430,23 +9414,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA op1 = tree->AsOp()->gtOp1; op2 = tree->gtGetOp2IfPresent(); - // Do we have an integer compare operation? - // - if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet())) - { - // Are we comparing against zero? - // - if (op2->IsIntegralConst(0)) - { - // Request that the codegen for op1 sets the condition flags - // when it generates the code for op1. - // - // Codegen for op1 must set the condition flags if - // this method returns true. - // - op1->gtRequestSetFlags(); - } - } /*------------------------------------------------------------------------- * Perform the required oper-specific postorder morphing */ @@ -9690,8 +9657,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA CM_OVF_OP: if (tree->gtOverflow()) { - tree->gtRequestSetFlags(); - // Add the excptn-throwing basic block to jump to on overflow fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 0d2c3c99d58ed..582d0d147ad65 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -9514,30 +9514,6 @@ void OptBoolsDsc::optOptimizeBoolsUpdateTrees() --m_comp->fgReturnCount; } -#if FEATURE_SET_FLAGS - // For comparisons against zero we will have the GTF_SET_FLAGS set - // and this can cause an assert to fire in fgMoveOpsLeft(GenTree* tree) - // during the CSE phase. - // - // So make sure to clear any GTF_SET_FLAGS bit on these operations - // as they are no longer feeding directly into a comparisons against zero - - // Make sure that the GTF_SET_FLAGS bit is cleared. - // Fix 388436 ARM JitStress WP7 - m_c1->gtFlags &= ~GTF_SET_FLAGS; - m_c2->gtFlags &= ~GTF_SET_FLAGS; - - // The new top level node that we just created does feed directly into - // a comparison against zero, so set the GTF_SET_FLAGS bit so that - // we generate an instruction that sets the flags, which allows us - // to omit the cmp with zero instruction. - - // Request that the codegen for cmpOp1 sets the condition flags - // when it generates the code for cmpOp1. - // - cmpOp1->gtRequestSetFlags(); -#endif - // Recost/rethread the tree if necessary // if (m_comp->fgNodeThreading != NodeThreading::None) diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 64af2659bd592..ecd1f3c5da38f 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -31,7 +31,6 @@ #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. - #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set #define MAX_PASS_SINGLEREG_BYTES 8 // Maximum size of a struct passed in a single register (double). #ifdef UNIX_AMD64_ABI #define FEATURE_IMPLICIT_BYREFS 0 // Support for struct parameters passed via pointers to shadow copies diff --git a/src/coreclr/jit/targetarm.h b/src/coreclr/jit/targetarm.h index 60b01d6063dcb..2c550e07a2ae3 100644 --- a/src/coreclr/jit/targetarm.h +++ b/src/coreclr/jit/targetarm.h @@ -22,7 +22,6 @@ #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. - #define FEATURE_SET_FLAGS 1 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set #define FEATURE_IMPLICIT_BYREFS 0 // Support for struct parameters passed via pointers to shadow copies #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register (including HFA support) #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register (including passing HFAs) diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index b30c7af7e40d9..220ca554f1350 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -27,7 +27,6 @@ #define FEATURE_MULTIREG_STRUCT_PROMOTE 1 // True when we want to promote fields of a multireg struct into registers #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. - #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set #define FEATURE_IMPLICIT_BYREFS 1 // Support for struct parameters passed via pointers to shadow copies #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register diff --git a/src/coreclr/jit/targetloongarch64.h b/src/coreclr/jit/targetloongarch64.h index 5d511e73ec3ae..79ac86ddd8325 100644 --- a/src/coreclr/jit/targetloongarch64.h +++ b/src/coreclr/jit/targetloongarch64.h @@ -30,7 +30,6 @@ #define FEATURE_MULTIREG_STRUCT_PROMOTE 1 // True when we want to promote fields of a multireg struct into registers #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. - #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set #define FEATURE_IMPLICIT_BYREFS 1 // Support for struct parameters passed via pointers to shadow copies #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h index 09c6b6b0b04ef..673fb09b15f64 100644 --- a/src/coreclr/jit/targetx86.h +++ b/src/coreclr/jit/targetx86.h @@ -28,8 +28,6 @@ #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers #define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (without ".tail" prefix) made as fast tail calls. - #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when - // the flags need to be set #define FEATURE_IMPLICIT_BYREFS 0 // Support for struct parameters passed via pointers to shadow copies #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register #define FEATURE_MULTIREG_ARGS 0 // Support for passing a single argument in more than one register From 895421d7be4be554895dcc5f3ee7746bdb0c983c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 18 Feb 2023 23:23:31 +0100 Subject: [PATCH 3/9] JIT: Model CPU flag dependencies explicitly in LIR --- src/coreclr/jit/codegen.h | 7 +- src/coreclr/jit/codegenarm64.cpp | 25 +-- src/coreclr/jit/codegenarmarch.cpp | 10 +- src/coreclr/jit/codegenlinear.cpp | 4 +- src/coreclr/jit/codegenxarch.cpp | 69 ++----- src/coreclr/jit/compiler.h | 6 +- src/coreclr/jit/compiler.hpp | 2 +- src/coreclr/jit/decomposelongs.cpp | 46 ++--- src/coreclr/jit/emitarm.cpp | 2 +- src/coreclr/jit/fgopt.cpp | 16 +- src/coreclr/jit/gentree.cpp | 63 +++++-- src/coreclr/jit/gentree.h | 90 +++++++-- src/coreclr/jit/gtlist.h | 16 +- src/coreclr/jit/gtstructs.h | 11 +- src/coreclr/jit/lir.cpp | 286 ++++++++++++++++++++++++----- src/coreclr/jit/lir.h | 33 +++- src/coreclr/jit/liveness.cpp | 6 +- src/coreclr/jit/lower.cpp | 72 ++++---- src/coreclr/jit/lower.h | 4 +- src/coreclr/jit/lowerarmarch.cpp | 40 ++-- src/coreclr/jit/lowerxarch.cpp | 4 +- src/coreclr/jit/lsraxarch.cpp | 2 +- src/coreclr/jit/morph.cpp | 17 +- 23 files changed, 567 insertions(+), 264 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 87310728dd28b..6e6fe10737fb3 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -904,7 +904,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCompareInt(GenTree* treeNode); #ifdef TARGET_XARCH bool genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opType); - GenTree* genTryFindFlagsConsumer(GenTree* flagsProducer, GenCondition** condition); #endif #ifdef FEATURE_SIMD @@ -1202,7 +1201,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #if defined(TARGET_ARM64) void genCodeForJumpCompare(GenTreeOp* tree); void genCodeForBfiz(GenTreeOp* tree); - void genCodeForCond(GenTreeOp* tree); + void genCodeForCond(GenTreeOpFlagsCC* tree); #endif // TARGET_ARM64 #if defined(FEATURE_EH_FUNCLETS) @@ -1586,8 +1585,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void inst_JCC(GenCondition condition, BasicBlock* target); void inst_SETCC(GenCondition condition, var_types type, regNumber dstReg); - void genCodeForJcc(GenTreeCC* tree); - void genCodeForSetcc(GenTreeCC* setcc); + void genCodeForJcc(GenTreeFlagsCC* tree); + void genCodeForSetcc(GenTreeFlagsCC* setcc); #endif // !TARGET_LOONGARCH64 }; diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 17fa38771b05a..6cbfbf7a5ec9b 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2529,7 +2529,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* tree) assert(varTypeIsIntegral(tree)); // These operations cannot set flags - assert((tree->gtFlags & GTF_SET_FLAGS) == 0); + assert(!tree->ProducesFlags()); GenTree* a = op1; GenTree* b = op2->gtGetOp1(); @@ -2576,7 +2576,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* tree) instruction ins = genGetInsForOper(tree->OperGet(), targetType); insOpts opt = INS_OPTS_NONE; - if ((tree->gtFlags & GTF_SET_FLAGS) != 0) + if (tree->ProducesFlags()) { // A subset of operations can still set flags @@ -2649,7 +2649,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* tree) instruction ins = genGetInsForOper(tree->OperGet(), targetType); insOpts opt = INS_OPTS_NONE; - if ((tree->gtFlags & GTF_SET_FLAGS) != 0) + if (tree->ProducesFlags()) { // A subset of operations can still set flags @@ -2727,7 +2727,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* tree) instruction ins = genGetInsForOper(tree->OperGet(), targetType); - if ((tree->gtFlags & GTF_SET_FLAGS) != 0) + if (tree->ProducesFlags()) { switch (oper) { @@ -3378,7 +3378,7 @@ void CodeGen::genCodeForNegNot(GenTree* tree) regNumber targetReg = tree->GetRegNum(); instruction ins = genGetInsForOper(tree->OperGet(), targetType); - if ((tree->gtFlags & GTF_SET_FLAGS) != 0) + if (tree->ProducesFlags()) { switch (tree->OperGet()) { @@ -10355,18 +10355,19 @@ void CodeGen::genCodeForBfiz(GenTreeOp* tree) // Arguments: // tree - conditional op // -void CodeGen::genCodeForCond(GenTreeOp* tree) +void CodeGen::genCodeForCond(GenTreeOpFlagsCC* tree) { - assert(tree->OperIs(GT_CSNEG_MI, GT_CNEG_LT)); - assert(!(tree->gtFlags & GTF_SET_FLAGS)); + assert(tree->OperIs(GT_CSNEG, GT_CNEG)); + assert(!tree->ProducesFlags()); genConsumeOperands(tree); + insCond cond = JumpKindToInsCond(GenConditionDesc::Get(tree->gtCondition).jumpKind1); + switch (tree->OperGet()) { - case GT_CSNEG_MI: + case GT_CSNEG: { - instruction ins = INS_csneg; - insCond cond = INS_COND_MI; + instruction ins = INS_csneg; regNumber dstReg = tree->GetRegNum(); regNumber op1Reg = tree->gtGetOp1()->GetRegNum(); @@ -10376,7 +10377,7 @@ void CodeGen::genCodeForCond(GenTreeOp* tree) break; } - case GT_CNEG_LT: + case GT_CNEG: { instruction ins = INS_cneg; insCond cond = INS_COND_LT; diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index c325e96e576f5..c734d90e1bb68 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -319,9 +319,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genCodeForBfiz(treeNode->AsOp()); break; - case GT_CSNEG_MI: - case GT_CNEG_LT: - genCodeForCond(treeNode->AsOp()); + case GT_CSNEG: + case GT_CNEG: + genCodeForCond(treeNode->AsOpFlagsCC()); break; #endif // TARGET_ARM64 @@ -370,11 +370,11 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) #endif // TARGET_ARM64 case GT_JCC: - genCodeForJcc(treeNode->AsCC()); + genCodeForJcc(treeNode->AsFlagsCC()); break; case GT_SETCC: - genCodeForSetcc(treeNode->AsCC()); + genCodeForSetcc(treeNode->AsFlagsCC()); break; case GT_RETURNTRAP: diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 3051aec3b67f1..6d2eb7429befc 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2595,7 +2595,7 @@ void CodeGen::genStoreLongLclVar(GenTree* treeNode) // Arguments: // jcc - The node // -void CodeGen::genCodeForJcc(GenTreeCC* jcc) +void CodeGen::genCodeForJcc(GenTreeFlagsCC* jcc) { assert(compiler->compCurBB->bbJumpKind == BBJ_COND); assert(jcc->OperIs(GT_JCC)); @@ -2638,7 +2638,7 @@ void CodeGen::inst_JCC(GenCondition condition, BasicBlock* target) // Arguments: // setcc - The node // -void CodeGen::genCodeForSetcc(GenTreeCC* setcc) +void CodeGen::genCodeForSetcc(GenTreeFlagsCC* setcc) { assert(setcc->OperIs(GT_SETCC)); diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index dedc5a6043a20..7132088519126 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1387,7 +1387,7 @@ void CodeGen::genCodeForSelect(GenTreeOp* select) } else { - cc = select->AsOpCC()->gtCondition; + cc = select->AsOpFlagsCC()->gtCondition; } // The usual codegen will be @@ -1848,11 +1848,11 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; case GT_JCC: - genCodeForJcc(treeNode->AsCC()); + genCodeForJcc(treeNode->AsFlagsCC()); break; case GT_SETCC: - genCodeForSetcc(treeNode->AsCC()); + genCodeForSetcc(treeNode->AsFlagsCC()); break; case GT_SELECT: @@ -6801,8 +6801,22 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT } else { - consumer = genTryFindFlagsConsumer(tree, &mutableCond); - if (consumer == nullptr) + LIR::Use flagsUse; + if (!LIR::AsRange(compiler->compCurBB).TryGetFlagsUse(tree, &flagsUse)) + { + return false; + } + + GenTree* user = flagsUse.User(); + if (user->OperIs(GT_SELECTCC)) + { + mutableCond = &user->AsOpFlagsCC()->gtCondition; + } + else if (user->OperIs(GT_JCC, GT_SETCC)) + { + mutableCond = &user->AsFlagsCC()->gtCondition; + } + else { return false; } @@ -6829,51 +6843,6 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT return false; } -//------------------------------------------------------------------------ -// genTryFindFlagsConsumer: Given a node that produces flags, try to look ahead -// for the node that consumes those flags. -// -// Parameters: -// producer - the node that produces CPU flags -// cond - [out] the pointer to the condition inside that consumer. -// -// Returns: -// A node that consumes the flags, or nullptr if no such node was found. -// -GenTree* CodeGen::genTryFindFlagsConsumer(GenTree* producer, GenCondition** cond) -{ - assert(producer->ProducesFlags()); - // We allow skipping some nodes where we know for sure that the flags are - // not consumed. In particular we handle resolution nodes. If we see any - // other node after the compare (which is an uncommon case, happens - // sometimes with decomposition) then we assume it could consume the flags. - for (GenTree* candidate = producer->gtNext; candidate != nullptr; candidate = candidate->gtNext) - { - if (candidate->OperIs(GT_JCC, GT_SETCC)) - { - *cond = &candidate->AsCC()->gtCondition; - return candidate; - } - - if (candidate->OperIs(GT_SELECTCC)) - { - *cond = &candidate->AsOpCC()->gtCondition; - return candidate; - } - - // The following nodes can be inserted between the compare and the user - // of the flags by resolution. Codegen for these will never modify CPU - // flags. - if (!candidate->OperIs(GT_LCL_VAR, GT_COPY, GT_SWAP)) - { - // For other nodes we do the conservative thing. - return nullptr; - } - } - - return nullptr; -} - #if !defined(TARGET_64BIT) //------------------------------------------------------------------------ // genLongToIntCast: Generate code for long to int casts on x86. diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 70316d599bda8..0c22dbcd78f70 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2414,8 +2414,10 @@ class Compiler // For binary opers. GenTree* gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2); - GenTreeCC* gtNewCC(genTreeOps oper, var_types type, GenCondition cond); - GenTreeOpCC* gtNewOperCC(genTreeOps oper, var_types type, GenCondition cond, GenTree* op1, GenTree* op2); + GenTreeOpFlags* gtNewOperFlags(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2, GenTree* opFlags); + GenTreeFlagsCC* gtNewFlagsCC(genTreeOps oper, var_types type, GenTree* opFlags, GenCondition cond); + GenTreeOpFlagsCC* gtNewOperFlagsCC( + genTreeOps oper, var_types type, GenTree* op1, GenTree* op2, GenTree* opFlags, GenCondition cond); GenTreeColon* gtNewColonNode(var_types type, GenTree* elseNode, GenTree* thenNode); GenTreeQmark* gtNewQmarkNode(var_types type, GenTree* cond, GenTreeColon* colon); diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 94dcf70963a41..077fff70d31e2 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4085,7 +4085,7 @@ void GenTree::VisitOperands(TVisitor visitor) // Standard unary operators #ifdef TARGET_ARM64 - case GT_CNEG_LT: + case GT_CNEG: #endif // TARGET_ARM64 case GT_STORE_LCL_VAR: case GT_STORE_LCL_FLD: diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp index 64c5e32855520..c6b0f76a7db69 100644 --- a/src/coreclr/jit/decomposelongs.cpp +++ b/src/coreclr/jit/decomposelongs.cpp @@ -927,25 +927,22 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use) loResult->AsOp()->gtOp1 = loOp1; GenTree* zero = m_compiler->gtNewZeroConNode(TYP_INT); + // The zero may modify flags, so keep it before we start the instructions that produce/consume flags. + Range().InsertBefore(loResult, zero); #if defined(TARGET_X86) - GenTree* hiAdjust = m_compiler->gtNewOperNode(GT_ADD_HI, TYP_INT, hiOp1, zero); + loResult->SetProducesFlags(); + GenTree* hiAdjust = m_compiler->gtNewOperFlags(GT_ADD_HI, TYP_INT, hiOp1, zero, loResult); GenTree* hiResult = m_compiler->gtNewOperNode(GT_NEG, TYP_INT, hiAdjust); - Range().InsertAfter(loResult, zero, hiAdjust, hiResult); - - loResult->gtFlags |= GTF_SET_FLAGS; + Range().InsertAfter(loResult, hiAdjust, hiResult); #elif defined(TARGET_ARM) - // We tend to use "movs" to load zero to a register, and that sets the flags, so put the - // zero before the loResult, which is setting the flags needed by GT_SUB_HI. - GenTree* hiResult = m_compiler->gtNewOperNode(GT_SUB_HI, TYP_INT, zero, hiOp1); - Range().InsertBefore(loResult, zero); + loResult->SetProducesFlags(); + GenTree* hiResult = m_compiler->gtNewOperFlags(GT_SUB_HI, TYP_INT, zero, hiOp1, loResult); Range().InsertAfter(loResult, hiResult); - loResult->gtFlags |= GTF_SET_FLAGS; - #endif return FinalizeDecomposition(use, loResult, hiResult, hiResult); @@ -993,12 +990,12 @@ GenTree* DecomposeLongs::DecomposeArith(LIR::Use& use) loResult->AsOp()->gtOp1 = loOp1; loResult->AsOp()->gtOp2 = loOp2; - GenTree* hiResult = new (m_compiler, oper) GenTreeOp(GetHiOper(oper), TYP_INT, hiOp1, hiOp2); - Range().InsertAfter(loResult, hiResult); + GenTree* hiResult; if ((oper == GT_ADD) || (oper == GT_SUB)) { - loResult->gtFlags |= GTF_SET_FLAGS; + loResult->SetProducesFlags(); + hiResult = m_compiler->gtNewOperFlags(GetHiOper(oper), TYP_INT, hiOp1, hiOp2, loResult); if ((loResult->gtFlags & GTF_OVERFLOW) != 0) { @@ -1010,6 +1007,12 @@ GenTree* DecomposeLongs::DecomposeArith(LIR::Use& use) hiResult->gtFlags |= GTF_UNSIGNED; } } + else + { + hiResult = m_compiler->gtNewOperNode(GetHiOper(oper), TYP_INT, hiOp1, hiOp2); + } + + Range().InsertAfter(loResult, hiResult); return FinalizeDecomposition(use, loResult, hiResult, hiResult); } @@ -1211,7 +1214,7 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use) // // TODO-CQ: we could go perform this removal transitively (i.e. iteratively remove everything that // feeds the lo operand while there are no side effects) - if ((loOp1->gtFlags & (GTF_ALL_EFFECT | GTF_SET_FLAGS)) == 0) + if (((loOp1->gtFlags & GTF_ALL_EFFECT) == 0) && !loOp1->ProducesFlags()) { Range().Remove(loOp1, true); } @@ -1280,7 +1283,7 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use) // // TODO-CQ: we could go perform this removal transitively (i.e. iteratively remove everything that // feeds the lo operand while there are no side effects) - if ((loOp1->gtFlags & (GTF_ALL_EFFECT | GTF_SET_FLAGS)) == 0) + if (((loOp1->gtFlags & GTF_ALL_EFFECT) == 0) && !loOp1->ProducesFlags()) { Range().Remove(loOp1, true); } @@ -1547,8 +1550,9 @@ GenTree* DecomposeLongs::DecomposeSelect(LIR::Use& use) // flags, but for the "upper half" we treat the lower GT_SELECT similar to // other flag producing nodes and reuse them. GT_SELECTCC is the variant // that uses existing flags and has no condition as part of it. - select->gtFlags |= GTF_SET_FLAGS; - GenTree* hiSelect = m_compiler->gtNewOperCC(GT_SELECTCC, TYP_INT, GenCondition::NE, hiOp1, hiOp2); + select->SetProducesFlags(); + GenTree* hiSelect = + m_compiler->gtNewOperFlagsCC(GT_SELECTCC, TYP_INT, hiOp1, hiOp2, select, GenCondition(GenCondition::NE)); Range().InsertAfter(select, hiSelect); @@ -1882,7 +1886,7 @@ GenTree* DecomposeLongs::OptimizeCastFromDecomposedLong(GenTreeCast* cast, GenTr // TODO-CQ: we could go perform this removal transitively. // See also identical code in shift decomposition. - if ((hiSrc->gtFlags & (GTF_ALL_EFFECT | GTF_SET_FLAGS)) == 0) + if (((hiSrc->gtFlags & GTF_ALL_EFFECT) == 0) && !hiSrc->ProducesFlags()) { JITDUMP("Removing the HI part of [%06u] and marking its operands unused:\n", src->gtTreeID); DISPNODE(hiSrc); @@ -1892,6 +1896,7 @@ GenTree* DecomposeLongs::OptimizeCastFromDecomposedLong(GenTreeCast* cast, GenTr { JITDUMP("The HI part of [%06u] has side effects, marking it unused\n", src->gtTreeID); hiSrc->SetUnusedValue(); + hiSrc->ClearProducesFlags(); } JITDUMP("Removing the LONG source:\n"); @@ -2044,19 +2049,14 @@ genTreeOps DecomposeLongs::GetHiOper(genTreeOps oper) { case GT_ADD: return GT_ADD_HI; - break; case GT_SUB: return GT_SUB_HI; - break; case GT_OR: return GT_OR; - break; case GT_AND: return GT_AND; - break; case GT_XOR: return GT_XOR; - break; default: assert(!"GetHiOper called for invalid oper"); return GT_NONE; diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index dc1ea3a71d849..16fffff307f7d 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -8155,7 +8155,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, } } - if (dst->gtSetFlags()) + if (dst->ProducesFlags()) { assert((ins == INS_add) || (ins == INS_adc) || (ins == INS_sub) || (ins == INS_sbc) || (ins == INS_and) || (ins == INS_orr) || (ins == INS_eor) || (ins == INS_orn) || (ins == INS_bic)); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index ffdc0bd4c7ba7..06f83911fb301 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -2591,10 +2591,20 @@ void Compiler::fgRemoveConditionalJump(BasicBlock* block) bool isClosed; unsigned sideEffects; - LIR::ReadOnlyRange testRange = blockRange.GetTreeRange(test, &isClosed, &sideEffects); + LIR::ReadOnlyRange testRange; + + if (test->OperIs(GT_JCC)) + { + assert(test->gtPrev->ProducesFlags()); + test->gtPrev->ClearProducesFlags(); + testRange = blockRange.GetTreeRange(test->gtPrev, &isClosed, &sideEffects); + testRange = LIR::ReadOnlyRange(testRange.FirstNode(), test); + } + else + { + testRange = blockRange.GetTreeRange(test, &isClosed, &sideEffects); + } - // TODO-LIR: this should really be checking GTF_ALL_EFFECT, but that produces unacceptable - // diffs compared to the existing backend. if (isClosed && ((sideEffects & GTF_SIDE_EFFECT) == 0)) { // If the jump and its operands form a contiguous, side-effect-free range, diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ffb1566758393..26539ee972fdb 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -311,8 +311,9 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeLclVarCommon) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeLclVar) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeLclFld) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeCC) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeOpCC) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeFlagsCC) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeOpFlags) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeOpFlagsCC) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeCast) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeBox) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeField) <= TREE_NODE_SZ_LARGE); // *** large node @@ -3455,8 +3456,8 @@ GenTree* Compiler::gtReverseCond(GenTree* tree) } else if (tree->OperIs(GT_JCC, GT_SETCC)) { - GenTreeCC* cc = tree->AsCC(); - cc->gtCondition = GenCondition::Reverse(cc->gtCondition); + GenTreeFlagsCC* cc = tree->AsFlagsCC(); + cc->gtCondition = GenCondition::Reverse(cc->gtCondition); } else if (tree->OperIs(GT_JCMP)) { @@ -6165,7 +6166,7 @@ bool GenTree::TryGetUse(GenTree* operand, GenTree*** pUse) // Standard unary operators #ifdef TARGET_ARM64 - case GT_CNEG_LT: + case GT_CNEG: #endif // TARGET_ARM64 case GT_STORE_LCL_VAR: case GT_STORE_LCL_FLD: @@ -6944,15 +6945,22 @@ GenTree* Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, return node; } -GenTreeCC* Compiler::gtNewCC(genTreeOps oper, var_types type, GenCondition cond) +GenTreeOpFlags* Compiler::gtNewOperFlags(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2, GenTree* opFlags) { - GenTreeCC* node = new (this, oper) GenTreeCC(oper, type, cond); + GenTreeOpFlags* node = new (this, oper) GenTreeOpFlags(oper, type, op1, op2, opFlags); return node; } -GenTreeOpCC* Compiler::gtNewOperCC(genTreeOps oper, var_types type, GenCondition cond, GenTree* op1, GenTree* op2) +GenTreeFlagsCC* Compiler::gtNewFlagsCC(genTreeOps oper, var_types type, GenTree* opFlags, GenCondition cond) { - GenTreeOpCC* node = new (this, oper) GenTreeOpCC(oper, type, cond, op1, op2); + GenTreeFlagsCC* node = new (this, oper) GenTreeFlagsCC(oper, type, opFlags, cond); + return node; +} + +GenTreeOpFlagsCC* Compiler::gtNewOperFlagsCC( + genTreeOps oper, var_types type, GenTree* op1, GenTree* op2, GenTree* opFlags, GenCondition cond) +{ + GenTreeOpFlagsCC* node = new (this, oper) GenTreeOpFlagsCC(oper, type, op1, op2, opFlags, cond); return node; } @@ -9477,7 +9485,7 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) // Standard unary operators #ifdef TARGET_ARM64 - case GT_CNEG_LT: + case GT_CNEG: #endif // TARGET_ARM64 case GT_STORE_LCL_VAR: case GT_STORE_LCL_FLD: @@ -10722,14 +10730,24 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, _In_ _In_opt_ // two additional characters for alignment. if (!hasOperands) { - msgLength += 1; + msgLength++; } - if (tree->IsValue()) + const char* prefix; + if (tree->IsValue() && tree->ProducesFlags()) + prefix = "t/f"; + else if (tree->IsValue()) + prefix = "t"; + else if (tree->ProducesFlags()) + prefix = "f"; + else + prefix = nullptr; + + if (prefix != nullptr) { const size_t bufLength = msgLength - 1; msg = reinterpret_cast(_alloca(bufLength * sizeof(char))); - sprintf_s(const_cast(msg), bufLength, "t%d = %s", tree->gtTreeID, hasOperands ? "" : " "); + sprintf_s(const_cast(msg), bufLength, "%s%d = %s", prefix, tree->gtTreeID, hasOperands ? "" : " "); } } @@ -11708,7 +11726,7 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack) case GT_JCC: case GT_SETCC: - printf(" cond=%s", tree->AsCC()->gtCondition.Name()); + printf(" cond=%s", tree->AsFlagsCC()->gtCondition.Name()); break; case GT_JCMP: printf(" cond=%s%s", (tree->gtFlags & GTF_JCMP_TST) ? "TEST_" : "", @@ -12066,7 +12084,7 @@ void Compiler::gtDispTree(GenTree* tree, } else if (tree->OperIs(GT_SELECTCC)) { - printf(" cond=%s", tree->AsOpCC()->gtCondition.Name()); + printf(" cond=%s", tree->AsOpFlagsCC()->gtCondition.Name()); } gtDispCommonEndLine(tree); @@ -12637,8 +12655,8 @@ void Compiler::gtDispTreeRange(LIR::Range& containingRange, GenTree* tree) // void Compiler::gtDispLIRNode(GenTree* node, const char* prefixMsg /* = nullptr */) { - auto displayOperand = [](GenTree* operand, const char* message, IndentInfo operandArc, IndentStack& indentStack, - size_t prefixIndent) { + auto displayOperand = [node](GenTree* operand, const char* message, IndentInfo operandArc, IndentStack& indentStack, + size_t prefixIndent) { assert(operand != nullptr); assert(message != nullptr); @@ -12655,7 +12673,9 @@ void Compiler::gtDispLIRNode(GenTree* node, const char* prefixMsg /* = nullptr * indentStack.Pop(); operandArc = IIArc; - printf(" t%-5d %-6s %s\n", operand->gtTreeID, varTypeName(operand->TypeGet()), message); + bool isFlagsUse = operand == node->gtGetOpFlagsIfPresent(); + printf(" %s%-5d %-6s %s\n", isFlagsUse ? "f" : "t", operand->gtTreeID, + isFlagsUse ? "flags" : varTypeName(operand->TypeGet()), message); }; IndentStack indentStack(this); @@ -12748,6 +12768,13 @@ void Compiler::gtDispLIRNode(GenTree* node, const char* prefixMsg /* = nullptr * operandArc = IIArc; } + GenTree* opFlags = node->gtGetOpFlagsIfPresent(); + if (opFlags != nullptr) + { + displayOperand(opFlags, "", operandArc, indentStack, prefixIndent); + operandArc = IIArc; + } + // Visit the operator if (prefixMsg != nullptr) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 52da07292e8ab..a2871962981d5 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1088,7 +1088,8 @@ struct GenTree inline void SetUnusedValue(); inline void ClearUnusedValue(); inline bool IsUnusedValue() const; - // Indicates whether this node produces CPU flags consumed by a subsequent node. + // Indicates whether this node produces CPU flags consumed by a subsequent node. Used by codegen also to + // emit variants of instructions that produce CPU flags (e.g. add -> adds on arm64). inline void SetProducesFlags(); inline void ClearProducesFlags(); inline bool ProducesFlags() const; @@ -1689,15 +1690,6 @@ struct GenTree return OperIs(GT_JTRUE, GT_JCMP, GT_JCC); } - bool OperConsumesFlags() const - { -#if !defined(TARGET_64BIT) - if (OperIs(GT_ADD_HI, GT_SUB_HI)) - return true; -#endif - return OperIs(GT_JCC, GT_SETCC, GT_SELECTCC); - } - #ifdef DEBUG static const GenTreeDebugOperKind gtDebugOperKindTable[]; @@ -1792,6 +1784,10 @@ struct GenTree // The returned pointer might be nullptr if the node is not binary, or if non-null op2 is not required. inline GenTree* gtGetOp2IfPresent() const; + inline GenTree** gtGetOpFlagsEdgeIfPresent(); + + inline GenTree* gtGetOpFlagsIfPresent() const; + inline GenTree*& Data(); bool TryGetUse(GenTree* operand, GenTree*** pUse); @@ -8528,37 +8524,56 @@ struct GenCondition } }; -// Represents a GT_JCC or GT_SETCC node. -struct GenTreeCC final : public GenTree +// Represents a node with a flags operand and a condition. +struct GenTreeFlagsCC : public GenTree { + GenTree* gtOpFlags; GenCondition gtCondition; - GenTreeCC(genTreeOps oper, var_types type, GenCondition condition) - : GenTree(oper, type DEBUGARG(/*largeNode*/ FALSE)), gtCondition(condition) + GenTreeFlagsCC(genTreeOps oper, var_types type, GenTree* opFlags, GenCondition condition) + : GenTree(oper, type DEBUGARG(/*largeNode*/ FALSE)), gtOpFlags(opFlags), gtCondition(condition) { assert(OperIs(GT_JCC, GT_SETCC)); } #if DEBUGGABLE_GENTREE - GenTreeCC() : GenTree() + GenTreeFlagsCC() : GenTree() { } #endif // DEBUGGABLE_GENTREE }; -// Represents a node with two operands and a condition. -struct GenTreeOpCC final : public GenTreeOp +// Represents a node with up to two value operands and a flags operand. +struct GenTreeOpFlags : public GenTreeOp +{ + GenTree* gtOpFlags; + + GenTreeOpFlags(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2, GenTree* opFlags) + : GenTreeOp(oper, type, op1, op2 DEBUGARG(/*largeNode*/ FALSE)), gtOpFlags(opFlags) + { + } + +#if DEBUGGABLE_GENTREE + GenTreeOpFlags() : GenTreeOp() + { + } +#endif // DEBUGGABLE_GENTREE +}; + +// Represents a node with up to two value operands, a flags operand, and a condition. +struct GenTreeOpFlagsCC : public GenTreeOpFlags { GenCondition gtCondition; - GenTreeOpCC(genTreeOps oper, var_types type, GenCondition condition, GenTree* op1 = nullptr, GenTree* op2 = nullptr) - : GenTreeOp(oper, type, op1, op2 DEBUGARG(/*largeNode*/ FALSE)), gtCondition(condition) + GenTreeOpFlagsCC( + genTreeOps oper, var_types type, GenTree* op1, GenTree* op2, GenTree* opFlags, GenCondition condition) + : GenTreeOpFlags(oper, type, op1, op2, opFlags), gtCondition(condition) { assert(OperIs(GT_SELECTCC)); } #if DEBUGGABLE_GENTREE - GenTreeOpCC() : GenTreeOp() + GenTreeOpFlagsCC() : GenTreeOpFlags() { } #endif // DEBUGGABLE_GENTREE @@ -8977,6 +8992,41 @@ inline GenTree* GenTree::gtGetOp2IfPresent() const return op2; } +inline GenTree** GenTree::gtGetOpFlagsEdgeIfPresent() +{ + if (OperIs(GT_SETCC, GT_JCC)) + { + return &AsFlagsCC()->gtOpFlags; + } + + if (OperIs(GT_SELECTCC)) + { + return &AsOpFlagsCC()->gtOpFlags; + } + +#ifdef TARGET_ARM64 + if (OperIs(GT_CSNEG, GT_CNEG)) + { + return &AsOpFlagsCC()->gtOpFlags; + } +#endif + +#ifndef TARGET_64BIT + if (OperIs(GT_ADD_HI, GT_SUB_HI)) + { + return &AsOpFlags()->gtOpFlags; + } +#endif + + return nullptr; +} + +inline GenTree* GenTree::gtGetOpFlagsIfPresent() const +{ + GenTree** ptr = const_cast(this)->gtGetOpFlagsEdgeIfPresent(); + return ptr == nullptr ? nullptr : *ptr; +} + inline GenTree*& GenTree::Data() { assert(OperIsStore() && (OperIsIndir() || OperIsLocal())); diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index eb734e7ec767f..89bc61b6036c1 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -171,9 +171,9 @@ GTNODE(LONG , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // to not be modified in phases post-decompose, and operators that return 64-bit // results in one instruction. GTNODE(ADD_LO , GenTreeOp ,1,GTK_BINOP|DBK_NOTHIR) -GTNODE(ADD_HI , GenTreeOp ,1,GTK_BINOP|DBK_NOTHIR) +GTNODE(ADD_HI , GenTreeOpFlags ,1,GTK_BINOP|DBK_NOTHIR) GTNODE(SUB_LO , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) -GTNODE(SUB_HI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) +GTNODE(SUB_HI , GenTreeOpFlags ,0,GTK_BINOP|DBK_NOTHIR) // The following are nodes that specify shifts that take a GT_LONG op1. The GT_LONG // contains the hi and lo parts of three operand shift form where one op will be @@ -220,8 +220,10 @@ GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) #ifdef TARGET_ARM64 GTNODE(BFIZ , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Bitfield Insert in Zero. -GTNODE(CSNEG_MI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Conditional select, negate, minus result -GTNODE(CNEG_LT , GenTreeOp ,0,GTK_UNOP|DBK_NOTHIR) // Conditional, negate, signed less than result +// Conditional select negation: value = cond ? op1 : -op2 +GTNODE(CSNEG , GenTreeOpFlagsCC ,0,GTK_BINOP|DBK_NOTHIR) +// Conditional negate: value = cond ? op : -op +GTNODE(CNEG , GenTreeOpFlagsCC ,0,GTK_UNOP|DBK_NOTHIR) #endif //----------------------------------------------------------------------------- @@ -239,11 +241,11 @@ GTNODE(BT , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE|DBK_NOTHI // Makes a comparison and jump if the condition specified. Does not set flags. GTNODE(JCMP , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR) // Checks the condition flags and branch if the condition specified by GenTreeCC::gtCondition is true. -GTNODE(JCC , GenTreeCC ,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) +GTNODE(JCC , GenTreeFlagsCC ,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Checks the condition flags and produces 1 if the condition specified by GenTreeCC::gtCondition is true and 0 otherwise. -GTNODE(SETCC , GenTreeCC ,0,GTK_LEAF|DBK_NOTHIR) +GTNODE(SETCC , GenTreeFlagsCC ,0,GTK_LEAF|DBK_NOTHIR) // Variant of SELECT that reuses flags computed by a previous node with the specified condition. -GTNODE(SELECTCC , GenTreeCC ,0,GTK_BINOP|DBK_NOTHIR) +GTNODE(SELECTCC , GenTreeOpFlagsCC ,0,GTK_BINOP|DBK_NOTHIR) //----------------------------------------------------------------------------- diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h index 052a6d09a8812..5ef7fb1a2be06 100644 --- a/src/coreclr/jit/gtstructs.h +++ b/src/coreclr/jit/gtstructs.h @@ -111,8 +111,15 @@ GTSTRUCT_1(HWIntrinsic , GT_HWINTRINSIC) GTSTRUCT_1(AllocObj , GT_ALLOCOBJ) GTSTRUCT_1(RuntimeLookup, GT_RUNTIMELOOKUP) GTSTRUCT_1(ArrAddr , GT_ARR_ADDR) -GTSTRUCT_2(CC , GT_JCC, GT_SETCC) -GTSTRUCT_1(OpCC , GT_SELECTCC) +GTSTRUCT_2(FlagsCC , GT_JCC, GT_SETCC) +#if !defined(TARGET_64BIT) +GTSTRUCT_2(OpFlags , GT_ADD_HI, GT_SUB_HI) +#endif +#if defined(TARGET_ARM64) +GTSTRUCT_N(OpFlagsCC , GT_SELECTCC, GT_CSNEG, GT_NEG) +#else +GTSTRUCT_1(OpFlagsCC , GT_SELECTCC) +#endif #if defined(TARGET_X86) GTSTRUCT_1(MultiRegOp , GT_MUL_LONG) #elif defined (TARGET_ARM) diff --git a/src/coreclr/jit/lir.cpp b/src/coreclr/jit/lir.cpp index da133a43aeb97..6e9473f05cb09 100644 --- a/src/coreclr/jit/lir.cpp +++ b/src/coreclr/jit/lir.cpp @@ -126,8 +126,15 @@ void LIR::Use::AssertIsValid() const assert(Def() != nullptr); GenTree** useEdge = nullptr; - assert(m_user->TryGetUse(Def(), &useEdge)); - assert(useEdge == m_edge); + if (m_user->TryGetUse(Def(), &useEdge)) + { + assert(useEdge == m_edge); + } + else + { + GenTree** flagsEdge = m_user->gtGetOpFlagsEdgeIfPresent(); + assert((flagsEdge != nullptr) && (flagsEdge == m_edge)); + } } //------------------------------------------------------------------------ @@ -933,6 +940,12 @@ void LIR::Range::Remove(GenTree* node, bool markOperandsUnused) } return GenTree::VisitResult::Continue; }); + + GenTree* opFlags = node->gtGetOpFlagsIfPresent(); + if (opFlags != nullptr) + { + opFlags->ClearProducesFlags(); + } } GenTree* prev = node->gtPrev; @@ -1130,6 +1143,39 @@ bool LIR::Range::TryGetUse(GenTree* node, Use* use) return false; } +//------------------------------------------------------------------------ +// LIR::Range::TryGetFlagsUse: Try to find the flags use for a given node. +// +// Arguments: +// node - The node for which to find the corresponding flags use. +// use (out) - The use of the corresponding node, if any. Invalid if +// this method returns false. +// +// Return Value: Returns true if a flags use was found; false otherwise. +// +bool LIR::Range::TryGetFlagsUse(GenTree* node, Use* use) +{ + assert(node != nullptr); + assert(use != nullptr); + assert(Contains(node)); + + if (node->ProducesFlags() && (node != LastNode())) + { + for (GenTree* n : ReadOnlyRange(node->gtNext, m_lastNode)) + { + GenTree** flagsEdge = n->gtGetOpFlagsEdgeIfPresent(); + if ((flagsEdge != nullptr) && (*flagsEdge == node)) + { + *use = Use(*this, flagsEdge, n); + return true; + } + } + } + + *use = Use(); + return false; +} + //------------------------------------------------------------------------ // LIR::Range::GetTreeRange: Computes the subrange that includes all nodes // in the dataflow trees rooted at a particular @@ -1156,12 +1202,14 @@ bool LIR::Range::TryGetUse(GenTree* node, Use* use) // return firstNode // // Instead of using a set for the worklist, the implementation uses the -// `LIR::Mark` bit of the `GenTree::LIRFlags` field to track whether or +// `LIR::Mark` bits of the `GenTree::LIRFlags` field to track whether or // not a node is in the worklist. // -// Note also that this algorithm depends LIR nodes being SDSU, SDSU defs -// and uses occurring in the same block, and correct dataflow (i.e. defs -// occurring before uses). +// Note also that this algorithm depends LIR nodes being SDSU, SDSU defs and +// uses occurring in the same block, and correct dataflow (i.e. defs occurring +// before uses). The exception is for flags nodes that can have two uses (a +// value use and a flags use). The second mark bit is used to indicate these +// that a node was used as a flag. // // Arguments: // root - The root of the dataflow tree. @@ -1172,10 +1220,10 @@ bool LIR::Range::TryGetUse(GenTree* node, Use* use) // Returns: // The computed subrange. // -LIR::ReadOnlyRange LIR::Range::GetMarkedRange(unsigned markCount, - GenTree* start, - bool* isClosed, - unsigned* sideEffects) const +LIR::ReadOnlyRange LIR::Range::GetDataFlowRangeWithMarks(unsigned markCount, + GenTree* start, + bool* isClosed, + unsigned* sideEffects) const { assert(markCount != 0); assert(start != nullptr); @@ -1189,7 +1237,7 @@ LIR::ReadOnlyRange LIR::Range::GetMarkedRange(unsigned markCount, GenTree* lastNode = nullptr; for (;;) { - if ((firstNode->gtLIRFlags & LIR::Flags::Mark) != 0) + if ((firstNode->gtLIRFlags & (LIR::Flags::Mark | LIR::Flags::Mark2)) != 0) { if (lastNode == nullptr) { @@ -1203,9 +1251,40 @@ LIR::ReadOnlyRange LIR::Range::GetMarkedRange(unsigned markCount, return GenTree::VisitResult::Continue; }); - // Unmark the node and update `firstNode` - firstNode->gtLIRFlags &= ~LIR::Flags::Mark; - markCount--; + GenTree* opFlags = firstNode->gtGetOpFlagsIfPresent(); + if (opFlags != nullptr) + { + opFlags->gtLIRFlags |= LIR::Flags::Mark2; + } + + if ((firstNode->gtLIRFlags & LIR::Flags::Mark) == 0) + { + if (firstNode->IsValue()) + { + // This node produces a value but we did not see the user. + sawUnmarkedNode = true; + } + } + else + { + markCount--; + } + + if ((firstNode->gtLIRFlags & LIR::Flags::Mark2) == 0) + { + if (firstNode->ProducesFlags()) + { + // This node produces flags but we did not see the user. + sawUnmarkedNode = true; + } + } + else + { + markCount--; + } + + // Unmark the node + firstNode->gtLIRFlags &= ~(LIR::Flags::Mark | LIR::Flags::Mark2); } else if (lastNode != nullptr) { @@ -1276,10 +1355,10 @@ LIR::ReadOnlyRange LIR::Range::GetTreeRange(GenTree* root, bool* isClosed, unsig assert(root != nullptr); // Mark the root of the tree - const unsigned markCount = 1; - root->gtLIRFlags |= LIR::Flags::Mark; + const unsigned markCount = 2; + root->gtLIRFlags |= LIR::Flags::Mark | LIR::Flags::Mark2; - return GetMarkedRange(markCount, root, isClosed, sideEffects); + return GetDataFlowRangeWithMarks(markCount, root, isClosed, sideEffects); } //------------------------------------------------------------------------ @@ -1312,6 +1391,13 @@ LIR::ReadOnlyRange LIR::Range::GetRangeOfOperandTrees(GenTree* root, bool* isClo return GenTree::VisitResult::Continue; }); + GenTree* flagsOp = root->gtGetOpFlagsIfPresent(); + if (flagsOp != nullptr) + { + flagsOp->gtLIRFlags |= LIR::Flags::Mark; + markCount++; + } + if (markCount == 0) { *isClosed = true; @@ -1319,7 +1405,7 @@ LIR::ReadOnlyRange LIR::Range::GetRangeOfOperandTrees(GenTree* root, bool* isClo return ReadOnlyRange(); } - return GetMarkedRange(markCount, root, isClosed, sideEffects); + return GetDataFlowRangeWithMarks(markCount, root, isClosed, sideEffects); } #ifdef DEBUG @@ -1401,28 +1487,8 @@ class CheckLclVarSemanticsHelper bool found = const_cast(range)->TryGetUse(readNode, &use); GenTree* user = found ? use.User() : nullptr; - for (GenTree* rangeNode : *range) - { - const char* prefix = nullptr; - if (rangeNode == readNode) - { - prefix = "read: "; - } - else if (rangeNode == node) - { - prefix = "write: "; - } - else if (rangeNode == user) - { - prefix = "user: "; - } - else - { - prefix = " "; - } - - compiler->gtDispLIRNode(rangeNode, prefix); - } + LIR::Range::DisplayLIR(compiler, range->FirstNode(), range->LastNode(), readNode, "read: ", + node, "write: ", user, "user: "); assert(!"Write to unaliased local overlaps outstanding read"); break; @@ -1516,6 +1582,85 @@ class CheckLclVarSemanticsHelper ArrayStack*> lclVarReadsMapsCache; }; +void LIR::Range::DisplayLIR(Compiler* compiler, + GenTree* first, + GenTree* last, + GenTree* node1, + const char* prefix1, + GenTree* node2, + const char* prefix2, + GenTree* node3, + const char* prefix3) +{ + assert((first != nullptr) || (last != nullptr)); + if (first == nullptr) + { + first = last; + for (int i = 0; i < 16 && (first->gtPrev != nullptr); i++) + { + first = first->gtPrev; + } + } + else + { + last = first; + for (int i = 0; i < 16 && (last->gtNext != nullptr); i++) + { + last = last->gtNext; + } + } + + size_t prefixLen = 0; + if (prefix1 != nullptr) + { + prefixLen = max(prefixLen, strlen(prefix1)); + } + if (prefix2 != nullptr) + { + prefixLen = max(prefixLen, strlen(prefix2)); + } + if (prefix3 != nullptr) + { + prefixLen = max(prefixLen, strlen(prefix3)); + } + + char spaces[64]; + for (size_t i = 0; i < prefixLen && i < ArrLen(spaces); i++) + spaces[i] = ' '; + spaces[min(ArrLen(spaces) - 1, prefixLen)] = '\0'; + + GenTree* node = first; + while (true) + { + const char* prefix = nullptr; + if (node == node1) + { + prefix = prefix1; + } + else if (node == node2) + { + prefix = prefix2; + } + else if (node == node3) + { + prefix = prefix3; + } + else + { + prefix = spaces; + } + + compiler->gtDispLIRNode(node, prefix); + + if (node == last) + { + break; + } + + node = node->gtNext; + } +} + //------------------------------------------------------------------------ // LIR::Range::CheckLIR: Performs a set of correctness checks on the LIR // contained in this range. @@ -1549,7 +1694,9 @@ bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const SmallHashTable unusedDefs(compiler->getAllocatorDebugOnly()); - GenTree* prev = nullptr; + GenTree* currentFlagsProducer = nullptr; + GenTree* prev = nullptr; + for (Iterator node = begin(), end = this->end(); node != end; prev = *node, ++node) { // Verify that the node is allowed in LIR. @@ -1612,6 +1759,55 @@ bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const } } + GenTree* flagsDef = node->gtGetOpFlagsIfPresent(); + if (flagsDef != nullptr) + { + if (flagsDef != currentFlagsProducer) + { + printf("found flags use of conflicting def\n"); + + DisplayLIR(compiler, nullptr, *node, *node, "flags use: "); + + assert(!"found flags use of conflicting def"); + } + + currentFlagsProducer = nullptr; + } + + if (currentFlagsProducer != nullptr) + { + bool interference = node->ProducesFlags(); + +#ifdef TARGET_XARCH + // We only allow a few interfering nodes for which we know their + // codegen does not modify flags. These occur in practice due to + // decomposition or after resolution. + interference |= + !node->OperIsLocal() && !node->OperIs(GT_COPY, GT_RELOAD, GT_SWAP) && + (!node->OperIs(GT_INTRINSIC) || (node->AsIntrinsic()->gtIntrinsicName != NI_SIMD_UpperRestore)); +#else + // Other platforms are more explicit about which instructions + // modify flags, and we only expect a few kinds of nodes to do that + // unless marked with ProducesFlags that is caught above. + interference |= node->OperIs(GT_CMP, GT_TEST); +#endif + + if (interference) + { + printf("found potentially interfering flags defs\n"); + + DisplayLIR(compiler, currentFlagsProducer, *node, currentFlagsProducer, "flags def 1: ", *node, + "flags def 2: "); + + assert(!"found potentially interfering flags defs"); + } + } + + if (node->ProducesFlags()) + { + currentFlagsProducer = *node; + } + if (node->IsValue()) { bool added = unusedDefs.AddOrUpdate(*node, true); @@ -1621,6 +1817,14 @@ bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const assert(prev == m_lastNode); + if (currentFlagsProducer != nullptr) + { + printf("expected to have used CPU flags at the end of the block\n"); + DisplayLIR(compiler, currentFlagsProducer, m_lastNode, currentFlagsProducer, "flags def: "); + + assert(!"expected to have used CPU flags at the end of the block"); + } + // At this point the unusedDefs map should contain only unused values. if (checkUnusedValues) { diff --git a/src/coreclr/jit/lir.h b/src/coreclr/jit/lir.h index 42d0d25a10e22..a6b11e7f38f70 100644 --- a/src/coreclr/jit/lir.h +++ b/src/coreclr/jit/lir.h @@ -27,24 +27,25 @@ class LIR final { None = 0x00, - // An arbitrary "mark" bit that can be used in place of a more - // expensive data structure when processing a set of LIR nodes. See - // for example `LIR::GetTreeRange`. - Mark = 0x01, - // Set on a node if it produces a value that is not subsequently // used. Should never be set on nodes that return `false` for // `GenTree::IsValue`. - UnusedValue = 0x02, + UnusedValue = 0x01, // Set on a node if it produces CPU flags that will be consumed by // a follow-up node. - ProducesFlags = 0x4, + ProducesFlags = 0x02, // Set on a node if it produces a value, but does not require a // register (i.e. it can be used from memory). See // IsSafeToMarkRegOptional for more information. - RegOptional = 0x08, + RegOptional = 0x04, + + // Arbitrary "mark" bits that can be used in place of a more + // expensive data structure when processing a set of LIR nodes. See + // for example `LIR::GetTreeRange`. + Mark = 0x80, + Mark2 = 0x40, }; }; @@ -246,6 +247,7 @@ class LIR final friend class LIR; friend struct BasicBlock; friend class Rationalizer; + friend class CheckLclVarSemanticsHelper; private: Range(GenTree* firstNode, GenTree* lastNode); @@ -253,7 +255,10 @@ class LIR final Range(const Range& other) = delete; Range& operator=(const Range& other) = delete; - ReadOnlyRange GetMarkedRange(unsigned markCount, GenTree* start, bool* isClosed, unsigned* sideEffects) const; + ReadOnlyRange GetDataFlowRangeWithMarks(unsigned markCount, + GenTree* start, + bool* isClosed, + unsigned* sideEffects) const; void FinishInsertBefore(GenTree* insertionPoint, GenTree* first, GenTree* last); void FinishInsertAfter(GenTree* insertionPoint, GenTree* first, GenTree* last); @@ -293,12 +298,22 @@ class LIR final void Delete(Compiler* compiler, BasicBlock* block, ReadOnlyRange&& range); bool TryGetUse(GenTree* node, Use* use); + bool TryGetFlagsUse(GenTree* node, Use* use); ReadOnlyRange GetTreeRange(GenTree* root, bool* isClosed) const; ReadOnlyRange GetTreeRange(GenTree* root, bool* isClosed, unsigned* sideEffects) const; ReadOnlyRange GetRangeOfOperandTrees(GenTree* root, bool* isClosed, unsigned* sideEffects) const; #ifdef DEBUG + static void DisplayLIR(Compiler* compiler, + GenTree* first, + GenTree* last, + GenTree* node1 = nullptr, + const char* prefix1 = nullptr, + GenTree* node2 = nullptr, + const char* prefix2 = nullptr, + GenTree* node3 = nullptr, + const char* prefix3 = nullptr); bool CheckLIR(Compiler* compiler, bool checkUnusedValues = false) const; #endif }; diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index 0bc7789d990ac..b1e601a64afc4 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -2225,10 +2225,10 @@ bool Compiler::fgTryRemoveNonLocal(GenTree* node, LIR::Range* blockRange) return GenTree::VisitResult::Continue; }); - if (node->OperConsumesFlags()) + GenTree* opFlags = node->gtGetOpFlagsIfPresent(); + if (opFlags != nullptr) { - assert(node->gtPrev->ProducesFlags()); - node->gtPrev->ClearProducesFlags(); + opFlags->ClearProducesFlags(); } blockRange->Remove(node); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 57b6759c84fe7..40eff54c43a85 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -159,7 +159,7 @@ bool Lowering::IsInvariantInRange(GenTree* node, GenTree* endExclusive) const return true; } - if (node->OperConsumesFlags()) + if (node->gtGetOpFlagsIfPresent() != nullptr) { return false; } @@ -208,7 +208,7 @@ bool Lowering::IsInvariantInRange(GenTree* node, GenTree* endExclusive, GenTree* return true; } - if (node->OperConsumesFlags()) + if (node->gtGetOpFlagsIfPresent() != nullptr) { return false; } @@ -1197,7 +1197,7 @@ bool Lowering::TryLowerSwitchToBitTest( GenTree* bitTableIcon = comp->gtNewIconNode(bitTable, bitTableType); GenTree* bitTest = comp->gtNewOperNode(GT_BT, TYP_VOID, bitTableIcon, switchValue); bitTest->SetProducesFlags(); - GenTreeCC* jcc = comp->gtNewCC(GT_JCC, TYP_VOID, bbSwitchCondition); + GenTreeFlagsCC* jcc = comp->gtNewFlagsCC(GT_JCC, TYP_VOID, bitTest, bbSwitchCondition); LIR::AsRange(bbSwitch).InsertAfter(switchValue, bitTableIcon, bitTest, jcc); @@ -2817,6 +2817,8 @@ GenTree* Lowering::DecomposeLongCompare(GenTree* cmp) } hiCmp = comp->gtNewOperNode(GT_OR, TYP_INT, loCmp, hiCmp); + hiCmp->SetProducesFlags(); + hiCmp->SetUnusedValue(); BlockRange().InsertBefore(cmp, hiCmp); ContainCheckBinary(hiCmp->AsOp()); } @@ -2897,14 +2899,17 @@ GenTree* Lowering::DecomposeLongCompare(GenTree* cmp) } hiCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, hiSrc1, hiSrc2); + hiCmp->SetProducesFlags(); BlockRange().InsertBefore(cmp, hiCmp); ContainCheckCompare(hiCmp->AsOp()); } else { loCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, loSrc1, loSrc2); - loCmp->gtFlags |= GTF_SET_FLAGS; - hiCmp = comp->gtNewOperNode(GT_SUB_HI, TYP_INT, hiSrc1, hiSrc2); + loCmp->SetProducesFlags(); + hiCmp = comp->gtNewOperFlags(GT_SUB_HI, TYP_INT, hiSrc1, hiSrc2, loCmp); + hiCmp->SetProducesFlags(); + hiCmp->SetUnusedValue(); BlockRange().InsertBefore(cmp, loCmp, hiCmp); ContainCheckCompare(loCmp->AsOp()); ContainCheckBinary(hiCmp->AsOp()); @@ -2925,12 +2930,6 @@ GenTree* Lowering::DecomposeLongCompare(GenTree* cmp) } } - hiCmp->gtFlags |= GTF_SET_FLAGS; - if (hiCmp->IsValue()) - { - hiCmp->SetUnusedValue(); - } - LIR::Use cmpUse; if (BlockRange().TryGetUse(cmp, &cmpUse) && cmpUse.User()->OperIs(GT_JTRUE)) { @@ -2939,14 +2938,16 @@ GenTree* Lowering::DecomposeLongCompare(GenTree* cmp) GenTree* jcc = cmpUse.User(); jcc->AsOp()->gtOp1 = nullptr; jcc->ChangeOper(GT_JCC); - jcc->AsCC()->gtCondition = GenCondition::FromIntegralRelop(condition, cmp->IsUnsigned()); + jcc->AsFlagsCC()->gtCondition = GenCondition::FromIntegralRelop(condition, cmp->IsUnsigned()); + jcc->AsFlagsCC()->gtOpFlags = hiCmp; } else { cmp->AsOp()->gtOp1 = nullptr; cmp->AsOp()->gtOp2 = nullptr; cmp->ChangeOper(GT_SETCC); - cmp->AsCC()->gtCondition = GenCondition::FromIntegralRelop(condition, cmp->IsUnsigned()); + cmp->AsFlagsCC()->gtCondition = GenCondition::FromIntegralRelop(condition, cmp->IsUnsigned()); + cmp->AsFlagsCC()->gtOpFlags = hiCmp; } return cmp->gtNext; @@ -3317,12 +3318,14 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) assert(relop->gtNext == jtrue); assert(jtrue->gtNext == nullptr); + GenTree* opFlags; GenCondition cond; - bool lowered = TryLowerConditionToFlagsNode(jtrue, relop, &cond); + bool lowered = TryLowerConditionToFlagsDef(jtrue, relop, &opFlags, &cond); assert(lowered); // Should succeed since relop is right before jtrue - jtrue->SetOperRaw(GT_JCC); - jtrue->AsCC()->gtCondition = cond; + jtrue->SetOper(GT_JCC); + jtrue->AsFlagsCC()->gtOpFlags = opFlags; + jtrue->AsFlagsCC()->gtCondition = cond; return nullptr; } @@ -3375,12 +3378,14 @@ GenTree* Lowering::LowerSelect(GenTreeConditional* select) // TODO-CQ: If we allowed multiple nodes to consume the same CPU flags then // we could do this on x86. We currently disable if-conversion for TYP_LONG // on 32-bit architectures because of this. + GenTree* flagsDef; GenCondition selectCond; - if (!select->ProducesFlags() && TryLowerConditionToFlagsNode(select, cond, &selectCond)) + if (!select->ProducesFlags() && TryLowerConditionToFlagsDef(select, cond, &flagsDef, &selectCond)) { - select->SetOperRaw(GT_SELECTCC); - GenTreeOpCC* newSelect = select->AsOpCC(); - newSelect->gtCondition = selectCond; + select->SetOper(GT_SELECTCC); + GenTreeOpFlagsCC* newSelect = select->AsOpFlagsCC(); + newSelect->gtOpFlags = flagsDef; + newSelect->gtCondition = selectCond; ContainCheckSelect(newSelect); return newSelect->gtNext; } @@ -3403,7 +3408,7 @@ GenTree* Lowering::LowerSelect(GenTreeConditional* select) // Return Value: // True if relop was transformed and is now right before 'parent'; otherwise false. // -bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* cond) +bool Lowering::TryLowerConditionToFlagsDef(GenTree* parent, GenTree* condition, GenTree** flags, GenCondition* cond) { if (condition->OperIsCompare()) { @@ -3442,6 +3447,8 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, BlockRange().InsertBefore(parent, relopOp1); BlockRange().Remove(relop); BlockRange().Remove(relopOp2); + + *flags = relopOp1; } else { @@ -3475,6 +3482,8 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, BlockRange().Remove(relop); BlockRange().InsertBefore(parent, relop); } + + *flags = relop; } return true; @@ -3483,14 +3492,15 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, // TODO-Cleanup: Avoid creating these SETCC nodes in the first place. if (condition->OperIs(GT_SETCC)) { - assert(condition->gtPrev->ProducesFlags()); - GenTree* flagsProducer = condition->gtPrev; + GenTreeFlagsCC* setCC = condition->AsFlagsCC(); + GenTree* flagsProducer = setCC->gtOpFlags; if (!IsInvariantInRange(flagsProducer, parent, condition)) { return false; } - *cond = condition->AsCC()->gtCondition; + *flags = flagsProducer; + *cond = setCC->gtCondition; BlockRange().Remove(condition); BlockRange().Remove(flagsProducer); @@ -3517,7 +3527,7 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, // It's the caller's responsibility to change `node` such that it only // sets the condition flags, without producing a boolean value. // -GenTreeCC* Lowering::LowerNodeCC(GenTree* node, GenCondition condition) +GenTreeFlagsCC* Lowering::LowerNodeCC(GenTree* node, GenCondition condition) { // Skip over a chain of EQ/NE(x, 0) relops. This may be present either // because `node` is not a relop and so it cannot be used directly by a @@ -3551,24 +3561,20 @@ GenTreeCC* Lowering::LowerNodeCC(GenTree* node, GenCondition condition) } } - GenTreeCC* cc = nullptr; + GenTreeFlagsCC* cc = nullptr; // Next may be null if `node` is not used. In that case we don't need to generate a SETCC node. if (next != nullptr) { if (next->OperIs(GT_JTRUE)) { - // If the instruction immediately following 'relop', i.e. 'next' is a conditional branch, - // it should always have 'relop' as its 'op1'. If it doesn't, then we have improperly - // constructed IL (the setting of a condition code should always immediately precede its - // use, since the JIT doesn't track dataflow for condition codes). Still, if it happens - // it's not our problem, it simply means that `node` is not used and can be removed. if (next->AsUnOp()->gtGetOp1() == relop) { assert(relop->OperIsCompare()); next->ChangeOper(GT_JCC); - cc = next->AsCC(); + cc = next->AsFlagsCC(); + cc->gtOpFlags = node; cc->gtCondition = condition; } } @@ -3580,7 +3586,7 @@ GenTreeCC* Lowering::LowerNodeCC(GenTree* node, GenCondition condition) if (BlockRange().TryGetUse(relop, &use)) { - cc = comp->gtNewCC(GT_SETCC, TYP_INT, condition); + cc = comp->gtNewFlagsCC(GT_SETCC, TYP_INT, node, condition); BlockRange().InsertAfter(node, cc); use.ReplaceWith(cc); } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index bb88daed053c7..66ba2e38d63c6 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -137,8 +137,8 @@ class Lowering final : public Phase GenTree* LowerCompare(GenTree* cmp); GenTree* LowerJTrue(GenTreeOp* jtrue); GenTree* LowerSelect(GenTreeConditional* cond); - bool TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* cond); - GenTreeCC* LowerNodeCC(GenTree* node, GenCondition condition); + bool TryLowerConditionToFlagsDef(GenTree* parent, GenTree* condition, GenTree** flags, GenCondition* cond); + GenTreeFlagsCC* LowerNodeCC(GenTree* node, GenCondition condition); void LowerJmpMethod(GenTree* jmp); void LowerRet(GenTreeUnOp* ret); void LowerStoreLocCommon(GenTreeLclVarCommon* lclVar); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index ecbfbfdf0a518..6923a9af09385 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -75,7 +75,7 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const emitAttr attr = emitActualTypeSize(childNode->TypeGet()); emitAttr size = EA_SIZE(attr); #ifdef TARGET_ARM - insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + insFlags flags = parentNode->ProducesFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; #endif switch (parentNode->OperGet()) @@ -187,7 +187,7 @@ bool Lowering::IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) co if (!varTypeIsIntegral(childNode)) return false; - if ((childNode->gtFlags & GTF_SET_FLAGS) != 0) + if (childNode->ProducesFlags()) return false; if (childNode->OperMayOverflow() && childNode->gtOverflow()) @@ -203,7 +203,7 @@ bool Lowering::IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) co return false; } - if ((parentNode->gtFlags & GTF_SET_FLAGS) != 0) + if (parentNode->ProducesFlags()) { // Cannot contain if the parent operation needs to set flags return false; @@ -263,7 +263,7 @@ bool Lowering::IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) co } } - if ((parentNode->gtFlags & GTF_SET_FLAGS) != 0) + if (parentNode->ProducesFlags()) { // Cannot contain if the parent operation needs to set flags return false; @@ -319,7 +319,7 @@ bool Lowering::IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) co } } - if ((parentNode->gtFlags & GTF_SET_FLAGS) != 0) + if (parentNode->ProducesFlags()) { // Cannot contain if the parent operation needs to set flags return false; @@ -870,12 +870,14 @@ void Lowering::LowerModPow2(GenTree* node) BlockRange().InsertAfter(trueExpr, cnsZero); GenTree* const cmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, dividend2, cnsZero); - cmp->gtFlags |= GTF_SET_FLAGS; + cmp->SetProducesFlags(); BlockRange().InsertAfter(cnsZero, cmp); LowerNode(cmp); - mod->ChangeOper(GT_CNEG_LT); - mod->gtOp1 = trueExpr; + mod->SetOper(GT_CNEG); + mod->gtOp1 = trueExpr; + mod->AsOpFlagsCC()->gtOpFlags = cmp; + mod->AsOpFlagsCC()->gtCondition = GenCondition(GenCondition::SLT); } else { @@ -890,7 +892,7 @@ void Lowering::LowerModPow2(GenTree* node) // csneg reg0, reg1, reg0, mi GenTree* const neg = comp->gtNewOperNode(GT_NEG, type, dividend2); - neg->gtFlags |= GTF_SET_FLAGS; + neg->SetProducesFlags(); BlockRange().InsertAfter(trueExpr, neg); GenTreeIntCon* cns2 = comp->gtNewIconNode(divisorCnsValueMinusOne, type); @@ -900,9 +902,11 @@ void Lowering::LowerModPow2(GenTree* node) BlockRange().InsertAfter(cns2, falseExpr); LowerNode(falseExpr); - mod->ChangeOper(GT_CSNEG_MI); - mod->gtOp1 = trueExpr; - mod->gtOp2 = falseExpr; + mod->SetOper(GT_CSNEG); + mod->gtOp1 = trueExpr; + mod->gtOp2 = falseExpr; + mod->AsOpFlagsCC()->gtOpFlags = neg; + mod->AsOpFlagsCC()->gtCondition = GenCondition(GenCondition::S); } ContainCheckNode(mod); @@ -929,7 +933,7 @@ GenTree* Lowering::LowerAddForPossibleContainment(GenTreeOp* node) if (!varTypeIsIntegral(node)) return nullptr; - if (node->gtFlags & GTF_SET_FLAGS) + if (node->ProducesFlags()) return nullptr; if (node->gtOverflow()) @@ -958,14 +962,14 @@ GenTree* Lowering::LowerAddForPossibleContainment(GenTreeOp* node) c = op1; } - if (mul->OperIs(GT_MUL) && !(mul->gtFlags & GTF_SET_FLAGS) && varTypeIsIntegral(mul) && !mul->gtOverflow() && + if (mul->OperIs(GT_MUL) && !mul->ProducesFlags() && varTypeIsIntegral(mul) && !mul->gtOverflow() && !mul->isContained() && !c->isContained()) { GenTree* a = mul->gtGetOp1(); GenTree* b = mul->gtGetOp2(); // Transform "-a * b + c" to "c - a * b" - if (a->OperIs(GT_NEG) && !(a->gtFlags & GTF_SET_FLAGS) && !b->OperIs(GT_NEG) && !a->isContained() && + if (a->OperIs(GT_NEG) && !a->ProducesFlags() && !b->OperIs(GT_NEG) && !a->isContained() && !a->gtGetOp1()->isContained()) { mul->AsOp()->gtOp1 = a->gtGetOp1(); @@ -979,7 +983,7 @@ GenTree* Lowering::LowerAddForPossibleContainment(GenTreeOp* node) return node->gtNext; } // Transform "a * -b + c" to "c - a * b" - else if (b->OperIs(GT_NEG) && !(b->gtFlags & GTF_SET_FLAGS) && !a->OperIs(GT_NEG) && !b->isContained() && + else if (b->OperIs(GT_NEG) && !b->ProducesFlags() && !a->OperIs(GT_NEG) && !b->isContained() && !b->gtGetOp1()->isContained()) { mul->AsOp()->gtOp2 = b->gtGetOp1(); @@ -2513,7 +2517,7 @@ void Lowering::ContainCheckNeg(GenTreeOp* neg) if (!varTypeIsIntegral(neg)) return; - if ((neg->gtFlags & GTF_SET_FLAGS)) + if (neg->ProducesFlags()) return; GenTree* childNode = neg->gtGetOp1(); @@ -2529,7 +2533,7 @@ void Lowering::ContainCheckNeg(GenTreeOp* neg) if (!varTypeIsIntegral(childNode)) return; - if ((childNode->gtFlags & GTF_SET_FLAGS)) + if (childNode->ProducesFlags()) return; if (IsInvariantInRange(childNode, neg)) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index a38a8c52569b4..343befd8b6280 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -861,7 +861,7 @@ void Lowering::LowerCast(GenTree* tree) // void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition) { - GenTreeCC* cc = LowerNodeCC(node, condition); + GenTreeFlagsCC* cc = LowerNodeCC(node, condition); assert(HWIntrinsicInfo::lookupNumArgs(newIntrinsicId) == 2); node->ChangeHWIntrinsicId(newIntrinsicId); @@ -5915,7 +5915,7 @@ void Lowering::ContainCheckSelect(GenTreeOp* select) if (select->OperIs(GT_SELECTCC)) { - GenCondition cc = select->AsOpCC()->gtCondition; + GenCondition cc = select->AsOpFlagsCC()->gtCondition; // op1 and op2 are emitted as two separate instructions due to the // conditional nature of cmov, so both operands can usually be diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index b600b73a0ab77..956c9579599e1 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1002,7 +1002,7 @@ int LinearScan::BuildSelect(GenTreeOp* select) // correctly). if (select->OperIs(GT_SELECTCC)) { - GenCondition cc = select->AsOpCC()->gtCondition; + GenCondition cc = select->AsOpFlagsCC()->gtCondition; switch (cc.GetCode()) { case GenCondition::FEQ: diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 5f65ea64e824b..dc34a89fb5a2c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -8603,18 +8603,25 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA op1->gtFlags |= GTF_DONT_CSE; break; -#ifdef DEBUG case GT_QMARK: case GT_JTRUE: - assert(op1); + noway_assert(op1); + + if (op1->OperIsCompare()) + { + /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does + not need to materialize the result as a 0 or 1. */ - if (!op1->OperIsCompare()) + /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */ + op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE); + } + else { GenTree* effOp1 = op1->gtEffectiveVal(); - assert((effOp1->gtOper == GT_CNS_INT) && (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1))); + noway_assert((effOp1->gtOper == GT_CNS_INT) && + (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1))); } break; -#endif case GT_COLON: if (optLocalAssertionProp) From 3d41495d1c757c5b9b1eb2b9db25f0c1a6bba321 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 18 Feb 2023 23:41:17 +0100 Subject: [PATCH 4/9] Fix a wrong mark bit in GetRangeOfOperandTrees --- src/coreclr/jit/lir.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lir.cpp b/src/coreclr/jit/lir.cpp index 6e9473f05cb09..b2cc95543e1e5 100644 --- a/src/coreclr/jit/lir.cpp +++ b/src/coreclr/jit/lir.cpp @@ -1394,7 +1394,7 @@ LIR::ReadOnlyRange LIR::Range::GetRangeOfOperandTrees(GenTree* root, bool* isClo GenTree* flagsOp = root->gtGetOpFlagsIfPresent(); if (flagsOp != nullptr) { - flagsOp->gtLIRFlags |= LIR::Flags::Mark; + flagsOp->gtLIRFlags |= LIR::Flags::Mark2; markCount++; } From d29b2d15ac5d01c1e0def1087f7e4456379be991 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 18 Feb 2023 23:41:23 +0100 Subject: [PATCH 5/9] Remove some unnecessary code --- src/coreclr/jit/fgopt.cpp | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 06f83911fb301..e1ae97d2ea768 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -2591,19 +2591,7 @@ void Compiler::fgRemoveConditionalJump(BasicBlock* block) bool isClosed; unsigned sideEffects; - LIR::ReadOnlyRange testRange; - - if (test->OperIs(GT_JCC)) - { - assert(test->gtPrev->ProducesFlags()); - test->gtPrev->ClearProducesFlags(); - testRange = blockRange.GetTreeRange(test->gtPrev, &isClosed, &sideEffects); - testRange = LIR::ReadOnlyRange(testRange.FirstNode(), test); - } - else - { - testRange = blockRange.GetTreeRange(test, &isClosed, &sideEffects); - } + LIR::ReadOnlyRange testRange = blockRange.GetTreeRange(test, &isClosed, &sideEffects); if (isClosed && ((sideEffects & GTF_SIDE_EFFECT) == 0)) { @@ -3845,22 +3833,7 @@ bool Compiler::fgOptimizeBranchToNext(BasicBlock* block, BasicBlock* bNext, Basi bool isClosed; unsigned sideEffects; - LIR::ReadOnlyRange jmpRange; - - if (jmp->OperIs(GT_JCC)) - { - // For JCC we have an invariant until resolution that the - // previous node sets those CPU flags. - GenTree* prevNode = jmp->gtPrev; - assert((prevNode != nullptr) && prevNode->ProducesFlags()); - prevNode->ClearProducesFlags(); - jmpRange = blockRange.GetTreeRange(prevNode, &isClosed, &sideEffects); - jmpRange = LIR::ReadOnlyRange(jmpRange.FirstNode(), jmp); - } - else - { - jmpRange = blockRange.GetTreeRange(jmp, &isClosed, &sideEffects); - } + LIR::ReadOnlyRange jmpRange = blockRange.GetTreeRange(jmp, &isClosed, &sideEffects); if (isClosed && ((sideEffects & GTF_SIDE_EFFECT) == 0)) { From c608b04fa578037223abef246b93a005058c8eac Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 18 Feb 2023 23:48:22 +0100 Subject: [PATCH 6/9] Fix build on linux-x86 --- src/coreclr/jit/lir.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lir.cpp b/src/coreclr/jit/lir.cpp index b2cc95543e1e5..8a2f237951b74 100644 --- a/src/coreclr/jit/lir.cpp +++ b/src/coreclr/jit/lir.cpp @@ -1782,9 +1782,15 @@ bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const // We only allow a few interfering nodes for which we know their // codegen does not modify flags. These occur in practice due to // decomposition or after resolution. - interference |= - !node->OperIsLocal() && !node->OperIs(GT_COPY, GT_RELOAD, GT_SWAP) && - (!node->OperIs(GT_INTRINSIC) || (node->AsIntrinsic()->gtIntrinsicName != NI_SIMD_UpperRestore)); + bool preservesFlags = false; + preservesFlags |= node->OperIsLocal(); + preservesFlags |= node->OperIs(GT_COPY, GT_RELOAD, GT_SWAP); + +#ifdef FEATURE_SIMD + preservesFlags |= + node->OperIs(GT_INTRINSIC) && node->AsIntrinsic()->gtIntrinsicName == NI_SIMD_UpperRestore; +#endif + interference |= !preservesFlags; #else // Other platforms are more explicit about which instructions // modify flags, and we only expect a few kinds of nodes to do that From eea4eacd99876f16d6a0e4adb1606345539c4bf0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 19 Feb 2023 00:31:59 +0100 Subject: [PATCH 7/9] Set consumer properly --- src/coreclr/jit/codegenxarch.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 7132088519126..5bbadca857cf7 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -6807,14 +6807,14 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT return false; } - GenTree* user = flagsUse.User(); - if (user->OperIs(GT_SELECTCC)) + consumer = flagsUse.User(); + if (consumer->OperIs(GT_SELECTCC)) { - mutableCond = &user->AsOpFlagsCC()->gtCondition; + mutableCond = &consumer->AsOpFlagsCC()->gtCondition; } - else if (user->OperIs(GT_JCC, GT_SETCC)) + else if (consumer->OperIs(GT_JCC, GT_SETCC)) { - mutableCond = &user->AsFlagsCC()->gtCondition; + mutableCond = &consumer->AsFlagsCC()->gtCondition; } else { From a61885792e190dfe4c0d9255612ea2d2b4d60f00 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 19 Feb 2023 01:28:19 +0100 Subject: [PATCH 8/9] Fix GetDataFlowRangeWithMarks markCount --- src/coreclr/jit/lir.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/lir.cpp b/src/coreclr/jit/lir.cpp index 8a2f237951b74..5f80d0db5c18b 100644 --- a/src/coreclr/jit/lir.cpp +++ b/src/coreclr/jit/lir.cpp @@ -1255,6 +1255,7 @@ LIR::ReadOnlyRange LIR::Range::GetDataFlowRangeWithMarks(unsigned markCount, if (opFlags != nullptr) { opFlags->gtLIRFlags |= LIR::Flags::Mark2; + markCount++; } if ((firstNode->gtLIRFlags & LIR::Flags::Mark) == 0) From e6491fa555c9961a2811c0ca2dfe5a365befb07f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 19 Feb 2023 01:28:35 +0100 Subject: [PATCH 9/9] Fix OpFlagsCC gtstructs.h definition --- src/coreclr/jit/gtstructs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h index 5ef7fb1a2be06..a0acf72ac5209 100644 --- a/src/coreclr/jit/gtstructs.h +++ b/src/coreclr/jit/gtstructs.h @@ -116,7 +116,7 @@ GTSTRUCT_2(FlagsCC , GT_JCC, GT_SETCC) GTSTRUCT_2(OpFlags , GT_ADD_HI, GT_SUB_HI) #endif #if defined(TARGET_ARM64) -GTSTRUCT_N(OpFlagsCC , GT_SELECTCC, GT_CSNEG, GT_NEG) +GTSTRUCT_N(OpFlagsCC , GT_SELECTCC, GT_CSNEG, GT_CNEG) #else GTSTRUCT_1(OpFlagsCC , GT_SELECTCC) #endif