diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index c75ae055e52d7..87310728dd28b 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -904,7 +904,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCompareInt(GenTree* treeNode); #ifdef TARGET_XARCH bool genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opType); - GenTreeCC* genTryFindFlagsConsumer(GenTree* flagsProducer); + GenTree* genTryFindFlagsConsumer(GenTree* flagsProducer, GenCondition** condition); #endif #ifdef FEATURE_SIMD diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index eed18f248c73e..348c597450589 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1356,18 +1356,14 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition) } //------------------------------------------------------------------------ -// genCodeForCompare: Produce code for a GT_SELECT/GT_SELECT_HI node. +// genCodeForCompare: Produce code for a GT_SELECT/GT_SELECTCC node. // // Arguments: // select - the node // void CodeGen::genCodeForSelect(GenTreeOp* select) { -#ifdef TARGET_X86 - assert(select->OperIs(GT_SELECT, GT_SELECT_HI)); -#else - assert(select->OperIs(GT_SELECT)); -#endif + assert(select->OperIs(GT_SELECT, GT_SELECTCC)); if (select->OperIs(GT_SELECT)) { @@ -1385,25 +1381,13 @@ void CodeGen::genCodeForSelect(GenTreeOp* select) if (select->OperIs(GT_SELECT)) { - GenTree* cond = select->AsConditional()->gtCond; - if (cond->isContained()) - { - assert(cond->OperIsCompare()); - genCodeForCompare(cond->AsOp()); - cc = GenCondition::FromRelop(cond); - - if (cc.PreferSwap()) - { - // genCodeForCompare generated the compare with swapped - // operands because this swap requires fewer branches/cmovs. - cc = GenCondition::Swap(cc); - } - } - else - { - regNumber condReg = cond->GetRegNum(); - GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg); - } + GenTree* cond = select->AsConditional()->gtCond; + regNumber condReg = cond->GetRegNum(); + GetEmitter()->emitIns_R_R(INS_test, emitActualTypeSize(cond), condReg, condReg); + } + else + { + cc = select->AsOpCC()->gtCondition; } // The usual codegen will be @@ -1875,11 +1859,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genCodeForSelect(treeNode->AsConditional()); break; -#ifdef TARGET_X86 - case GT_SELECT_HI: + case GT_SELECTCC: genCodeForSelect(treeNode->AsOp()); break; -#endif case GT_RETURNTRAP: genCodeForReturnTrap(treeNode->AsOp()); @@ -6809,8 +6791,9 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT return false; } - GenTreeCC* cc = nullptr; - GenCondition cond; + GenTree* consumer = nullptr; + GenCondition* mutableCond = nullptr; + GenCondition cond; if (tree->OperIsCompare()) { @@ -6818,13 +6801,13 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT } else { - cc = genTryFindFlagsConsumer(tree); - if (cc == nullptr) + consumer = genTryFindFlagsConsumer(tree, &mutableCond); + if (consumer == nullptr) { return false; } - cond = cc->gtCondition; + cond = *mutableCond; } if (GetEmitter()->AreFlagsSetToZeroCmp(op1->GetRegNum(), emitTypeSize(opType), cond)) @@ -6833,11 +6816,12 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT return true; } - if ((cc != nullptr) && GetEmitter()->AreFlagsSetForSignJumpOpt(op1->GetRegNum(), emitTypeSize(opType), cond)) + if ((mutableCond != nullptr) && + GetEmitter()->AreFlagsSetForSignJumpOpt(op1->GetRegNum(), emitTypeSize(opType), cond)) { JITDUMP("Not emitting compare due to sign being already set; modifying [%06u] to check sign flag\n", - Compiler::dspTreeID(cc)); - cc->gtCondition = + Compiler::dspTreeID(consumer)); + *mutableCond = (cond.GetCode() == GenCondition::SLT) ? GenCondition(GenCondition::S) : GenCondition(GenCondition::NS); return true; } @@ -6851,11 +6835,12 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT // // Parameters: // producer - the node that produces CPU flags +// cond - [out] the pointer to the condition inside that consumer. // // Returns: // A node that consumes the flags, or nullptr if no such node was found. // -GenTreeCC* CodeGen::genTryFindFlagsConsumer(GenTree* producer) +GenTree* CodeGen::genTryFindFlagsConsumer(GenTree* producer, GenCondition** cond) { assert((producer->gtFlags & GTF_SET_FLAGS) != 0); // We allow skipping some nodes where we know for sure that the flags are @@ -6866,7 +6851,14 @@ GenTreeCC* CodeGen::genTryFindFlagsConsumer(GenTree* producer) { if (candidate->OperIs(GT_JCC, GT_SETCC)) { - return candidate->AsCC(); + *cond = &candidate->AsCC()->gtCondition; + return candidate; + } + + if (candidate->OperIs(GT_SELECTCC)) + { + *cond = &candidate->AsOpCC()->gtCondition; + return candidate; } // The following nodes can be inserted between the compare and the user diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f47760ab8c708..b7eb5c20d69df 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2414,6 +2414,9 @@ class Compiler // For binary opers. GenTree* gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2); + GenTreeCC* gtNewCC(genTreeOps oper, var_types type, GenCondition cond); + GenTreeOpCC* gtNewOperCC(genTreeOps oper, var_types type, GenCondition cond, GenTree* op1, GenTree* op2); + GenTreeColon* gtNewColonNode(var_types type, GenTree* elseNode, GenTree* thenNode); GenTreeQmark* gtNewQmarkNode(var_types type, GenTree* cond, GenTreeColon* colon); diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp index 4c746faf98aad..64c5e32855520 100644 --- a/src/coreclr/jit/decomposelongs.cpp +++ b/src/coreclr/jit/decomposelongs.cpp @@ -1545,10 +1545,10 @@ GenTree* DecomposeLongs::DecomposeSelect(LIR::Use& use) // Normally GT_SELECT is responsible for evaluating the condition into // flags, but for the "upper half" we treat the lower GT_SELECT similar to - // other flag producing nodes and reuse them. GT_SELECT_HI is the variant + // other flag producing nodes and reuse them. GT_SELECTCC is the variant // that uses existing flags and has no condition as part of it. select->gtFlags |= GTF_SET_FLAGS; - GenTree* hiSelect = m_compiler->gtNewOperNode(GT_SELECT_HI, TYP_INT, hiOp1, hiOp2); + GenTree* hiSelect = m_compiler->gtNewOperCC(GT_SELECTCC, TYP_INT, GenCondition::NE, hiOp1, hiOp2); Range().InsertAfter(select, hiSelect); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 16fe9b2f19cc0..085eabf466188 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -312,6 +312,7 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeLclVar) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeLclFld) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeCC) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeOpCC) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeCast) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeBox) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeField) <= TREE_NODE_SZ_LARGE); // *** large node @@ -6943,6 +6944,18 @@ GenTree* Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, return node; } +GenTreeCC* Compiler::gtNewCC(genTreeOps oper, var_types type, GenCondition cond) +{ + GenTreeCC* node = new (this, oper) GenTreeCC(oper, type, cond); + return node; +} + +GenTreeOpCC* Compiler::gtNewOperCC(genTreeOps oper, var_types type, GenCondition cond, GenTree* op1, GenTree* op2) +{ + GenTreeOpCC* node = new (this, oper) GenTreeOpCC(oper, type, cond, op1, op2); + return node; +} + GenTreeColon* Compiler::gtNewColonNode(var_types type, GenTree* elseNode, GenTree* thenNode) { return new (this, GT_COLON) GenTreeColon(TYP_INT, elseNode, thenNode); @@ -12134,6 +12147,10 @@ void Compiler::gtDispTree(GenTree* tree, break; } } + else if (tree->OperIs(GT_SELECTCC)) + { + printf(" cond=%s", tree->AsOpCC()->gtCondition.Name()); + } gtDispCommonEndLine(tree); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index b54f62db5ed07..06ec58d7a3eb5 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1686,7 +1686,16 @@ struct GenTree bool OperIsConditionalJump() const { - return (gtOper == GT_JTRUE) || (gtOper == GT_JCMP) || (gtOper == GT_JCC); + return OperIs(GT_JTRUE, GT_JCMP, GT_JCC); + } + + bool OperConsumesFlags() const + { +#if !defined(TARGET_64BIT) + if (OperIs(GT_ADD_HI, GT_SUB_HI)) + return true; +#endif + return OperIs(GT_JCC, GT_SETCC, GT_SELECTCC); } #ifdef DEBUG @@ -8521,12 +8530,11 @@ struct GenCondition }; // Represents a GT_JCC or GT_SETCC node. - struct GenTreeCC final : public GenTree { GenCondition gtCondition; - GenTreeCC(genTreeOps oper, GenCondition condition, var_types type = TYP_VOID) + GenTreeCC(genTreeOps oper, var_types type, GenCondition condition) : GenTree(oper, type DEBUGARG(/*largeNode*/ FALSE)), gtCondition(condition) { assert(OperIs(GT_JCC, GT_SETCC)); @@ -8539,6 +8547,24 @@ struct GenTreeCC final : public GenTree #endif // DEBUGGABLE_GENTREE }; +// Represents a node with two operands and a condition. +struct GenTreeOpCC final : public GenTreeOp +{ + GenCondition gtCondition; + + GenTreeOpCC(genTreeOps oper, var_types type, GenCondition condition, GenTree* op1 = nullptr, GenTree* op2 = nullptr) + : GenTreeOp(oper, type, op1, op2 DEBUGARG(/*largeNode*/ FALSE)), gtCondition(condition) + { + assert(OperIs(GT_SELECTCC)); + } + +#if DEBUGGABLE_GENTREE + GenTreeOpCC() : GenTreeOp() + { + } +#endif // DEBUGGABLE_GENTREE +}; + //------------------------------------------------------------------------ // Deferred inline functions of GenTree -- these need the subtypes above to // be defined already. diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index f6a7b4391b783..eb734e7ec767f 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -185,8 +185,6 @@ GTNODE(SUB_HI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) GTNODE(LSH_HI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) GTNODE(RSH_LO , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) -// Variant of SELECT that reuses flags computed by a previous SELECT. -GTNODE(SELECT_HI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) #endif // !defined(TARGET_64BIT) #ifdef FEATURE_HW_INTRINSICS @@ -234,16 +232,19 @@ GTNODE(CNEG_LT , GenTreeOp ,0,GTK_UNOP|DBK_NOTHIR) // Conditi GTNODE(CMP , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR) // Generate a test instruction; sets the CPU flags according to (a & b) and does not produce a value. GTNODE(TEST , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR) +#ifdef TARGET_XARCH +// The XARCH BT instruction. Like CMP, this sets the condition flags (CF to be precise) and does not produce a value. +GTNODE(BT , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)) +#endif // Makes a comparison and jump if the condition specified. Does not set flags. GTNODE(JCMP , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR) // Checks the condition flags and branch if the condition specified by GenTreeCC::gtCondition is true. GTNODE(JCC , GenTreeCC ,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Checks the condition flags and produces 1 if the condition specified by GenTreeCC::gtCondition is true and 0 otherwise. GTNODE(SETCC , GenTreeCC ,0,GTK_LEAF|DBK_NOTHIR) -#ifdef TARGET_XARCH -// The XARCH BT instruction. Like CMP, this sets the condition flags (CF to be precise) and does not produce a value. -GTNODE(BT , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)) -#endif +// Variant of SELECT that reuses flags computed by a previous node with the specified condition. +GTNODE(SELECTCC , GenTreeCC ,0,GTK_BINOP|DBK_NOTHIR) + //----------------------------------------------------------------------------- // Other nodes that look like unary/binary operators: diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h index 0321f51a95c00..052a6d09a8812 100644 --- a/src/coreclr/jit/gtstructs.h +++ b/src/coreclr/jit/gtstructs.h @@ -112,6 +112,7 @@ GTSTRUCT_1(AllocObj , GT_ALLOCOBJ) GTSTRUCT_1(RuntimeLookup, GT_RUNTIMELOOKUP) GTSTRUCT_1(ArrAddr , GT_ARR_ADDR) GTSTRUCT_2(CC , GT_JCC, GT_SETCC) +GTSTRUCT_1(OpCC , GT_SELECTCC) #if defined(TARGET_X86) GTSTRUCT_1(MultiRegOp , GT_MUL_LONG) #elif defined (TARGET_ARM) diff --git a/src/coreclr/jit/ifconversion.cpp b/src/coreclr/jit/ifconversion.cpp index 01b3fc7b22d31..d7194d36c0df1 100644 --- a/src/coreclr/jit/ifconversion.cpp +++ b/src/coreclr/jit/ifconversion.cpp @@ -61,8 +61,6 @@ class OptIfConversionDsc void IfConvertDump(); #endif - bool IsHWIntrinsicCC(GenTree* node); - public: bool optIfConvert(); }; @@ -406,69 +404,6 @@ void OptIfConversionDsc::IfConvertDump() } #endif -#ifdef TARGET_XARCH -//----------------------------------------------------------------------------- -// IsHWIntrinsicCC: -// Check if this is a HW intrinsic node that can be compared efficiently -// against 0. -// -// Returns: -// True if so. -// -// Notes: -// For xarch, we currently skip if-conversion for these cases as the backend can handle them more efficiently -// when they are normal compares. -// -bool OptIfConversionDsc::IsHWIntrinsicCC(GenTree* node) -{ -#ifdef FEATURE_HW_INTRINSICS - if (!node->OperIs(GT_HWINTRINSIC)) - { - return false; - } - - switch (node->AsHWIntrinsic()->GetHWIntrinsicId()) - { - case NI_SSE_CompareScalarOrderedEqual: - case NI_SSE_CompareScalarOrderedNotEqual: - case NI_SSE_CompareScalarOrderedLessThan: - case NI_SSE_CompareScalarOrderedLessThanOrEqual: - case NI_SSE_CompareScalarOrderedGreaterThan: - case NI_SSE_CompareScalarOrderedGreaterThanOrEqual: - case NI_SSE_CompareScalarUnorderedEqual: - case NI_SSE_CompareScalarUnorderedNotEqual: - case NI_SSE_CompareScalarUnorderedLessThanOrEqual: - case NI_SSE_CompareScalarUnorderedLessThan: - case NI_SSE_CompareScalarUnorderedGreaterThanOrEqual: - case NI_SSE_CompareScalarUnorderedGreaterThan: - case NI_SSE2_CompareScalarOrderedEqual: - case NI_SSE2_CompareScalarOrderedNotEqual: - case NI_SSE2_CompareScalarOrderedLessThan: - case NI_SSE2_CompareScalarOrderedLessThanOrEqual: - case NI_SSE2_CompareScalarOrderedGreaterThan: - case NI_SSE2_CompareScalarOrderedGreaterThanOrEqual: - case NI_SSE2_CompareScalarUnorderedEqual: - case NI_SSE2_CompareScalarUnorderedNotEqual: - case NI_SSE2_CompareScalarUnorderedLessThanOrEqual: - case NI_SSE2_CompareScalarUnorderedLessThan: - case NI_SSE2_CompareScalarUnorderedGreaterThanOrEqual: - case NI_SSE2_CompareScalarUnorderedGreaterThan: - case NI_SSE41_TestC: - case NI_SSE41_TestZ: - case NI_SSE41_TestNotZAndNotC: - case NI_AVX_TestC: - case NI_AVX_TestZ: - case NI_AVX_TestNotZAndNotC: - return true; - default: - return false; - } -#else - return false; -#endif -} -#endif - //----------------------------------------------------------------------------- // optIfConvert // @@ -718,39 +653,6 @@ bool OptIfConversionDsc::optIfConvert() } } -#ifdef TARGET_XARCH - // Currently the xarch backend does not handle SELECT (EQ/NE (arithmetic op that sets ZF) 0) ... - // as efficiently as JTRUE (EQ/NE (arithmetic op that sets ZF) 0). The support is complicated - // to add due to the destructive nature of xarch instructions. - // The exception is for cases that can be transformed into TEST_EQ/TEST_NE. - // TODO-CQ: Fix this. - if (m_cond->OperIs(GT_EQ, GT_NE) && m_cond->gtGetOp2()->IsIntegralConst(0) && - !m_cond->gtGetOp1()->OperIs(GT_AND) && - (m_cond->gtGetOp1()->SupportsSettingZeroFlag() || IsHWIntrinsicCC(m_cond->gtGetOp1()))) - { - JITDUMP("Skipping if-conversion where condition is EQ/NE 0 with operation that sets ZF"); - return false; - } - - // However, in some cases bit tests can emit 'bt' when not going - // through the GT_SELECT path. - if (m_cond->OperIs(GT_EQ, GT_NE) && m_cond->gtGetOp1()->OperIs(GT_AND) && - m_cond->gtGetOp2()->IsIntegralConst(0)) - { - // A bit test that can be transformed into 'bt' will look like - // EQ/NE(AND(x, LSH(1, y)), 0) - - GenTree* andOp1 = m_cond->gtGetOp1()->gtGetOp1(); - GenTree* andOp2 = m_cond->gtGetOp1()->gtGetOp2(); - - if (andOp2->OperIs(GT_LSH) && andOp2->gtGetOp1()->IsIntegralConst(1)) - { - JITDUMP("Skipping if-conversion where condition is amenable to be transformed to BT"); - return false; - } - } -#endif - // Cost to allow for "x = cond ? a + b : c + d". if (thenCost > 7 || elseCost > 7) { diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index e9293f9962765..863dca578a61a 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -2224,6 +2224,12 @@ bool Compiler::fgTryRemoveNonLocal(GenTree* node, LIR::Range* blockRange) return GenTree::VisitResult::Continue; }); + if (node->OperIs(GT_SELECTCC, GT_SETCC)) + { + assert((node->gtPrev->gtFlags & GTF_SET_FLAGS) != 0); + node->gtPrev->gtFlags &= ~GTF_SET_FLAGS; + } + blockRange->Remove(node); return true; } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 7b00b36bab9d4..6963ea1f8560f 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -159,6 +159,11 @@ bool Lowering::IsInvariantInRange(GenTree* node, GenTree* endExclusive) const return true; } + if (node->OperConsumesFlags()) + { + return false; + } + m_scratchSideEffects.Clear(); m_scratchSideEffects.AddNode(comp, node); @@ -193,6 +198,21 @@ bool Lowering::IsInvariantInRange(GenTree* node, GenTree* endExclusive, GenTree* { assert((node != nullptr) && (endExclusive != nullptr)); + if (ignoreNode == nullptr) + { + return IsInvariantInRange(node, endExclusive); + } + + if ((node->gtNext == endExclusive) || ((node->gtNext == ignoreNode) && (node->gtNext->gtNext == endExclusive))) + { + return true; + } + + if (node->OperConsumesFlags()) + { + return false; + } + m_scratchSideEffects.Clear(); m_scratchSideEffects.AddNode(comp, node); @@ -396,11 +416,9 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_SELECT: return LowerSelect(node->AsConditional()); -#ifdef TARGET_X86 - case GT_SELECT_HI: + case GT_SELECTCC: ContainCheckSelect(node->AsOp()); break; -#endif case GT_JMP: LowerJmpMethod(node); @@ -1179,7 +1197,7 @@ bool Lowering::TryLowerSwitchToBitTest( GenTree* bitTableIcon = comp->gtNewIconNode(bitTable, bitTableType); GenTree* bitTest = comp->gtNewOperNode(GT_BT, TYP_VOID, bitTableIcon, switchValue); bitTest->gtFlags |= GTF_SET_FLAGS; - GenTreeCC* jcc = new (comp, GT_JCC) GenTreeCC(GT_JCC, bbSwitchCondition); + GenTreeCC* jcc = comp->gtNewCC(GT_JCC, TYP_VOID, bbSwitchCondition); LIR::AsRange(bbSwitch).InsertAfter(switchValue, bitTableIcon, bitTest, jcc); @@ -3289,20 +3307,8 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) assert(relop->gtNext == jtrue); assert(jtrue->gtNext == nullptr); - GenCondition cond = GenCondition::FromRelop(relop); - bool optimizing = comp->opts.OptimizationEnabled(); -#ifdef TARGET_XARCH - // Optimize FP x != x to only check parity flag. This is a common way of - // checking NaN and avoids two branches that we would otherwise emit. - if (optimizing && (cond.GetCode() == GenCondition::FNEU) && relopOp1->OperIsLocal() && - GenTree::Compare(relopOp1, relopOp2) && IsInvariantInRange(relopOp1, relop) && - IsInvariantInRange(relopOp2, relop)) - { - cond = GenCondition(GenCondition::P); - } -#endif - #if defined(TARGET_LOONGARCH64) + GenCondition cond = GenCondition::FromRelop(relop); // for LA64's integer compare and condition-branch instructions, // it's very similar to the IL instructions. if (!varTypeIsFloating(relopOp1->TypeGet())) @@ -3336,48 +3342,10 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue) relop->gtFlags |= GTF_SET_FLAGS; assert(relop->OperIs(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT)); } - #else - - // Optimize EQ/NE(op_that_sets_zf, 0) into op_that_sets_zf with GTF_SET_FLAGS. - if (optimizing && relop->OperIs(GT_EQ, GT_NE) && relopOp2->IsIntegralConst(0) && - relopOp1->SupportsSettingZeroFlag() && IsInvariantInRange(relopOp1, relop)) - { - relopOp1->gtFlags |= GTF_SET_FLAGS; - relopOp1->SetUnusedValue(); - - BlockRange().Remove(relopOp1); - BlockRange().InsertBefore(jtrue, relopOp1); - BlockRange().Remove(relop); - BlockRange().Remove(relopOp2); - } - else - { - relop->gtType = TYP_VOID; - relop->gtFlags |= GTF_SET_FLAGS; - - if (relop->OperIs(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT)) - { - relop->SetOper(GT_CMP); - - if (cond.PreferSwap()) - { - std::swap(relop->gtOp1, relop->gtOp2); - cond = GenCondition::Swap(cond); - } - } -#ifdef TARGET_XARCH - else if (relop->OperIs(GT_BITTEST_EQ, GT_BITTEST_NE)) - { - relop->SetOper(GT_BT); - } -#endif - else - { - assert(relop->OperIs(GT_TEST_EQ, GT_TEST_NE)); - relop->SetOper(GT_TEST); - } - } + GenCondition cond; + bool lowered = TryLowerConditionToFlagsNode(jtrue, relop, &cond); + assert(lowered); // Should succeed since relop is right before jtrue #endif // TARGET_LOONGARCH64 jtrue->SetOperRaw(GT_JCC); @@ -3428,10 +3396,138 @@ GenTree* Lowering::LowerSelect(GenTreeConditional* select) } } +#ifdef TARGET_XARCH + // Do not transform GT_SELECT with GTF_SET_FLAGS into GT_SELECTCC; this + // node is used by decomposition on x86. + // TODO-CQ: If we allowed multiple nodes to consume the same CPU flags then + // we could do this on x86. We currently disable if-conversion for TYP_LONG + // on 32-bit architectures because of this. + GenCondition selectCond; + if (((select->gtFlags & GTF_SET_FLAGS) == 0) && TryLowerConditionToFlagsNode(select, cond, &selectCond)) + { + select->SetOperRaw(GT_SELECTCC); + GenTreeOpCC* newSelect = select->AsOpCC(); + newSelect->gtCondition = selectCond; + ContainCheckSelect(newSelect); + return newSelect->gtNext; + } +#endif + ContainCheckSelect(select); return select->gtNext; } +//---------------------------------------------------------------------------------------------- +// TryLowerCompareToFlagsNode: Given a node 'parent' that is able to consume +// conditions from CPU flags, try to transform 'condition' into a node that +// produces CPU flags, and reorder it to happen right before 'parent'. +// +// Arguments: +// parent - The parent node that can consume from CPU flags. +// relop - The relop that can be transformed into something that produces CPU flags. +// cond - The condition that makes the compare true. +// +// Return Value: +// True if relop was transformed and is now right before 'parent'; otherwise false. +// +bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* cond) +{ + if (condition->OperIsCompare()) + { + if (!IsInvariantInRange(condition, parent)) + { + return false; + } + + GenTreeOp* relop = condition->AsOp(); + + *cond = GenCondition::FromRelop(relop); + bool optimizing = comp->opts.OptimizationEnabled(); + + GenTree* relopOp1 = relop->gtGetOp1(); + GenTree* relopOp2 = relop->gtGetOp2(); + +#ifdef TARGET_XARCH + // Optimize FP x != x to only check parity flag. This is a common way of + // checking NaN and avoids two branches that we would otherwise emit. + if (optimizing && (cond->GetCode() == GenCondition::FNEU) && relopOp1->OperIsLocal() && + GenTree::Compare(relopOp1, relopOp2) && IsInvariantInRange(relopOp1, relop) && + IsInvariantInRange(relopOp2, relop)) + { + *cond = GenCondition(GenCondition::P); + } +#endif + + // Optimize EQ/NE(op_that_sets_zf, 0) into op_that_sets_zf with GTF_SET_FLAGS. + if (optimizing && relop->OperIs(GT_EQ, GT_NE) && relopOp2->IsIntegralConst(0) && + relopOp1->SupportsSettingZeroFlag() && IsInvariantInRange(relopOp1, parent)) + { + relopOp1->gtFlags |= GTF_SET_FLAGS; + relopOp1->SetUnusedValue(); + + BlockRange().Remove(relopOp1); + BlockRange().InsertBefore(parent, relopOp1); + BlockRange().Remove(relop); + BlockRange().Remove(relopOp2); + } + else + { + relop->gtType = TYP_VOID; + relop->gtFlags |= GTF_SET_FLAGS; + + if (relop->OperIs(GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT)) + { + relop->SetOper(GT_CMP); + + if (cond->PreferSwap()) + { + std::swap(relop->gtOp1, relop->gtOp2); + *cond = GenCondition::Swap(*cond); + } + } +#ifdef TARGET_XARCH + else if (relop->OperIs(GT_BITTEST_EQ, GT_BITTEST_NE)) + { + relop->SetOper(GT_BT); + } +#endif + else + { + assert(relop->OperIs(GT_TEST_EQ, GT_TEST_NE)); + relop->SetOper(GT_TEST); + } + + if (relop->gtNext != parent) + { + BlockRange().Remove(relop); + BlockRange().InsertBefore(parent, relop); + } + } + + return true; + } + + // TODO-Cleanup: Avoid creating these SETCC nodes in the first place. + if (condition->OperIs(GT_SETCC)) + { + assert((condition->gtPrev->gtFlags & GTF_SET_FLAGS) != 0); + GenTree* flagsProducer = condition->gtPrev; + if (!IsInvariantInRange(flagsProducer, parent, condition)) + { + return false; + } + + *cond = condition->AsCC()->gtCondition; + + BlockRange().Remove(condition); + BlockRange().Remove(flagsProducer); + BlockRange().InsertBefore(parent, flagsProducer); + return true; + } + + return false; +} + //---------------------------------------------------------------------------------------------- // LowerNodeCC: Lowers a node that produces a boolean value by setting the condition flags. // @@ -3511,7 +3607,7 @@ GenTreeCC* Lowering::LowerNodeCC(GenTree* node, GenCondition condition) if (BlockRange().TryGetUse(relop, &use)) { - cc = new (comp, GT_SETCC) GenTreeCC(GT_SETCC, condition, TYP_INT); + cc = comp->gtNewCC(GT_SETCC, TYP_INT, condition); BlockRange().InsertAfter(node, cc); use.ReplaceWith(cc); } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index dc462e8220a68..bb88daed053c7 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -137,6 +137,7 @@ class Lowering final : public Phase GenTree* LowerCompare(GenTree* cmp); GenTree* LowerJTrue(GenTreeOp* jtrue); GenTree* LowerSelect(GenTreeConditional* cond); + bool TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* cond); GenTreeCC* LowerNodeCC(GenTree* node, GenCondition condition); void LowerJmpMethod(GenTree* jmp); void LowerRet(GenTreeUnOp* ret); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 1b7054b138130..a3cd206ec471c 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -5909,52 +5909,40 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) // ContainCheckSelect: determine whether the sources of a select should be contained. // // Arguments: -// select - the GT_SELECT or GT_SELECT_HI node. +// select - the GT_SELECT or GT_SELECTCC node. // void Lowering::ContainCheckSelect(GenTreeOp* select) { -#ifdef TARGET_64BIT - assert(select->OperIs(GT_SELECT)); -#else - assert(select->OperIs(GT_SELECT, GT_SELECT_HI)); -#endif + assert(select->OperIs(GT_SELECT, GT_SELECTCC)); - // Disallow containing compares if the flags may be used by follow-up - // nodes, in which case those nodes expect zero/non-zero in the flags. - if (select->OperIs(GT_SELECT) && ((select->gtFlags & GTF_SET_FLAGS) == 0)) + if (select->OperIs(GT_SELECTCC)) { - GenTree* cond = select->AsConditional()->gtCond; - - if (cond->OperIsCompare() && IsSafeToContainMem(select, cond)) - { - MakeSrcContained(select, cond); + GenCondition cc = select->AsOpCC()->gtCondition; - // op1 and op2 are emitted as two separate instructions due to the - // conditional nature of cmov, so both operands can usually be - // contained memory operands. The exception is for compares - // requiring two cmovs, in which case we do not want to incur the - // memory access/address calculation twice. - // - // See the comment in Codegen::GenConditionDesc::map for why these - // comparisons are special and end up requiring the two cmovs. - // - GenCondition cc = GenCondition::FromRelop(cond); - switch (cc.GetCode()) - { - case GenCondition::FEQ: - case GenCondition::FLT: - case GenCondition::FLE: - case GenCondition::FNEU: - case GenCondition::FGEU: - case GenCondition::FGTU: - // Skip containment checking below. - // TODO-CQ: We could allow one of the operands to be a - // contained memory operand, but it requires updating LSRA - // build to take it into account. - return; - default: - break; - } + // op1 and op2 are emitted as two separate instructions due to the + // conditional nature of cmov, so both operands can usually be + // contained memory operands. The exception is for compares + // requiring two cmovs, in which case we do not want to incur the + // memory access/address calculation twice. + // + // See the comment in Codegen::GenConditionDesc::map for why these + // comparisons are special and end up requiring the two cmovs. + // + switch (cc.GetCode()) + { + case GenCondition::FEQ: + case GenCondition::FLT: + case GenCondition::FLE: + case GenCondition::FNEU: + case GenCondition::FGEU: + case GenCondition::FGTU: + // Skip containment checking below. + // TODO-CQ: We could allow one of the operands to be a + // contained memory operand, but it requires updating LSRA + // build to take it into account. + return; + default: + break; } } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 815404dd6bcd8..b600b73a0ab77 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -245,12 +245,10 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = BuildSelect(tree->AsConditional()); break; -#ifdef TARGET_X86 - case GT_SELECT_HI: + case GT_SELECTCC: assert(dstCount == 1); srcCount = BuildSelect(tree->AsOp()); break; -#endif case GT_JMP: srcCount = 0; @@ -900,21 +898,11 @@ int LinearScan::BuildSelect(GenTreeOp* select) { int srcCount = 0; - GenCondition cc = GenCondition::NE; if (select->OperIs(GT_SELECT)) { GenTree* cond = select->AsConditional()->gtCond; - if (cond->isContained()) - { - assert(cond->OperIsCompare()); - srcCount += BuildCmpOperands(cond); - cc = GenCondition::FromRelop(cond); - } - else - { - BuildUse(cond); - srcCount++; - } + BuildUse(cond); + srcCount++; } GenTree* trueVal = select->gtOp1; @@ -1012,28 +1000,32 @@ int LinearScan::BuildSelect(GenTreeOp* select) // multiple memory accesses, but we could contain the operand in the 'mov' // instruction with some more care taken for marking things delay reg freed // correctly). - switch (cc.GetCode()) - { - case GenCondition::FEQ: - case GenCondition::FLT: - case GenCondition::FLE: - // Normally these require an 'AND' conditional and cmovs with - // both the true and false values as sources. However, after - // swapping these into an 'OR' conditional the cmovs require - // only the original falseVal, so we need only to mark that as - // delay-reg freed to allow codegen to resolve this. - assert(uncontainedFalseRP != nullptr); - setDelayFree(uncontainedFalseRP); - break; - case GenCondition::FNEU: - case GenCondition::FGEU: - case GenCondition::FGTU: - // These require an 'OR' conditional and only access 'trueVal'. - assert(uncontainedTrueRP != nullptr); - setDelayFree(uncontainedTrueRP); - break; - default: - break; + if (select->OperIs(GT_SELECTCC)) + { + GenCondition cc = select->AsOpCC()->gtCondition; + switch (cc.GetCode()) + { + case GenCondition::FEQ: + case GenCondition::FLT: + case GenCondition::FLE: + // Normally these require an 'AND' conditional and cmovs with + // both the true and false values as sources. However, after + // swapping these into an 'OR' conditional the cmovs require + // only the original falseVal, so we need only to mark that as + // delay-reg freed to allow codegen to resolve this. + assert(uncontainedFalseRP != nullptr); + setDelayFree(uncontainedFalseRP); + break; + case GenCondition::FNEU: + case GenCondition::FGEU: + case GenCondition::FGTU: + // These require an 'OR' conditional and only access 'trueVal'. + assert(uncontainedTrueRP != nullptr); + setDelayFree(uncontainedTrueRP); + break; + default: + break; + } } BuildDef(select);