diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index aa3fbefad7003..32305a70ab89b 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1532,9 +1532,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue); #endif // TARGET_XARCH -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) static insCflags InsCflagsForCcmp(GenCondition cond); static insCond JumpKindToInsCond(emitJumpKind condition); +#elif defined(TARGET_XARCH) + static instruction JumpKindToCmov(emitJumpKind condition); #endif #ifndef TARGET_LOONGARCH64 diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index b510ce4a558dc..30a2c89f399ad 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1595,6 +1595,14 @@ void CodeGen::genConsumeRegs(GenTree* tree) { genConsumeAddress(tree); } +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) + else if (tree->OperIsCompare()) + { + // Compares can be contained by SELECT/compare chains. + genConsumeRegs(tree->gtGetOp1()); + genConsumeRegs(tree->gtGetOp2()); + } +#endif #ifdef TARGET_ARM64 else if (tree->OperIs(GT_BFIZ)) { @@ -1610,10 +1618,9 @@ void CodeGen::genConsumeRegs(GenTree* tree) assert(cast->isContained()); genConsumeAddress(cast->CastOp()); } - else if (tree->OperIsCompare() || tree->OperIs(GT_AND)) + else if (tree->OperIs(GT_AND)) { - // Compares can be contained by a SELECT. - // ANDs and Cmp Compares may be contained in a chain. + // ANDs may be contained in a chain. genConsumeRegs(tree->gtGetOp1()); genConsumeRegs(tree->gtGetOp2()); } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 5c1c26150eae1..9d3a369b92b11 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1303,6 +1303,46 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) } } +//------------------------------------------------------------------------ +// JumpKindToCmov: +// Convert an emitJumpKind to the corresponding cmov instruction. +// +// Arguments: +// condition - the condition +// +// Returns: +// A cmov instruction. +// +instruction CodeGen::JumpKindToCmov(emitJumpKind condition) +{ + static constexpr instruction s_table[EJ_COUNT] = { + INS_none, INS_none, INS_cmovo, INS_cmovno, INS_cmovb, INS_cmovae, INS_cmove, INS_cmovne, INS_cmovbe, + INS_cmova, INS_cmovs, INS_cmovns, INS_cmovp, INS_cmovnp, INS_cmovl, INS_cmovge, INS_cmovle, INS_cmovg, + }; + + static_assert_no_msg(s_table[EJ_NONE] == INS_none); + static_assert_no_msg(s_table[EJ_jmp] == INS_none); + static_assert_no_msg(s_table[EJ_jo] == INS_cmovo); + static_assert_no_msg(s_table[EJ_jno] == INS_cmovno); + static_assert_no_msg(s_table[EJ_jb] == INS_cmovb); + static_assert_no_msg(s_table[EJ_jae] == INS_cmovae); + static_assert_no_msg(s_table[EJ_je] == INS_cmove); + static_assert_no_msg(s_table[EJ_jne] == INS_cmovne); + static_assert_no_msg(s_table[EJ_jbe] == INS_cmovbe); + static_assert_no_msg(s_table[EJ_ja] == INS_cmova); + static_assert_no_msg(s_table[EJ_js] == INS_cmovs); + static_assert_no_msg(s_table[EJ_jns] == INS_cmovns); + static_assert_no_msg(s_table[EJ_jp] == INS_cmovp); + static_assert_no_msg(s_table[EJ_jnp] == INS_cmovnp); + static_assert_no_msg(s_table[EJ_jl] == INS_cmovl); + static_assert_no_msg(s_table[EJ_jge] == INS_cmovge); + static_assert_no_msg(s_table[EJ_jle] == INS_cmovle); + static_assert_no_msg(s_table[EJ_jg] == INS_cmovg); + + assert((condition >= EJ_NONE) && (condition < EJ_COUNT)); + return s_table[condition]; +} + //------------------------------------------------------------------------ // genCodeForCompare: Produce code for a GT_SELECT/GT_SELECT_HI node. // @@ -1317,37 +1357,99 @@ void CodeGen::genCodeForSelect(GenTreeOp* select) assert(select->OperIs(GT_SELECT)); #endif - regNumber dstReg = select->GetRegNum(); if (select->OperIs(GT_SELECT)) { - genConsumeReg(select->AsConditional()->gtCond); + genConsumeRegs(select->AsConditional()->gtCond); } genConsumeOperands(select); - instruction cmovKind = INS_cmovne; - GenTree* trueVal = select->gtOp1; - GenTree* falseVal = select->gtOp2; + regNumber dstReg = select->GetRegNum(); - // If the 'true' operand was allocated the same register as the target - // register then flip it to the false value so we can skip a reg-reg mov. - if (trueVal->isUsedFromReg() && (trueVal->GetRegNum() == dstReg)) + GenTree* trueVal = select->gtOp1; + GenTree* falseVal = select->gtOp2; + + GenCondition cc = GenCondition::NE; + + if (select->OperIs(GT_SELECT)) + { + GenTree* cond = select->AsConditional()->gtCond; + if (cond->isContained()) + { + assert(cond->OperIsCompare()); + genCodeForCompare(cond->AsOp()); + cc = GenCondition::FromRelop(cond); + + if (cc.PreferSwap()) + { + // genCodeForCompare generated the compare with swapped + // operands because this swap requires fewer branches/cmovs. + cc = GenCondition::Swap(cc); + } + } + else + { + regNumber condReg = cond->GetRegNum(); + GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg); + } + } + + // The usual codegen will be + // mov targetReg, falseValue + // cmovne targetReg, trueValue + // + // However, if the 'true' operand was allocated the same register as the + // target register then prefer to generate + // + // mov targetReg, trueValue + // cmove targetReg, falseValue + // + // so the first mov is elided. + // + if (falseVal->isUsedFromReg() && (falseVal->GetRegNum() == dstReg)) { std::swap(trueVal, falseVal); - cmovKind = INS_cmove; + cc = GenCondition::Reverse(cc); } - if (select->OperIs(GT_SELECT)) + // If there is a conflict then swap the condition anyway. LSRA should have + // ensured the other way around has no conflict. + if ((trueVal->gtGetContainedRegMask() & genRegMask(dstReg)) != 0) { - // TODO-CQ: Support contained relops here. - assert(select->AsConditional()->gtCond->isUsedFromReg()); + std::swap(trueVal, falseVal); + cc = GenCondition::Reverse(cc); + } + + GenConditionDesc desc = GenConditionDesc::Get(cc); - regNumber condReg = select->AsConditional()->gtCond->GetRegNum(); - GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg); + // There may also be a conflict with the falseVal in case this is an AND + // condition. Once again, after swapping there should be no conflict as + // ensured by LSRA. + if ((desc.oper == GT_AND) && (falseVal->gtGetContainedRegMask() & genRegMask(dstReg)) != 0) + { + std::swap(trueVal, falseVal); + cc = GenCondition::Reverse(cc); + desc = GenConditionDesc::Get(cc); } inst_RV_TT(INS_mov, emitTypeSize(select), dstReg, falseVal); - inst_RV_TT(cmovKind, emitTypeSize(select), dstReg, trueVal); + + assert(!trueVal->isContained() || trueVal->isUsedFromMemory()); + assert((trueVal->gtGetContainedRegMask() & genRegMask(dstReg)) == 0); + inst_RV_TT(JumpKindToCmov(desc.jumpKind1), emitTypeSize(select), dstReg, trueVal); + + if (desc.oper == GT_AND) + { + assert(falseVal->isUsedFromReg()); + assert((falseVal->gtGetContainedRegMask() & genRegMask(dstReg)) == 0); + inst_RV_TT(JumpKindToCmov(emitter::emitReverseJumpKind(desc.jumpKind2)), emitTypeSize(select), dstReg, + falseVal); + } + else if (desc.oper == GT_OR) + { + assert(trueVal->isUsedFromReg()); + inst_RV_TT(JumpKindToCmov(desc.jumpKind2), emitTypeSize(select), dstReg, trueVal); + } genProduceReg(select); } @@ -1765,6 +1867,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_TEST_EQ: case GT_TEST_NE: case GT_CMP: + genConsumeOperands(treeNode->AsOp()); genCodeForCompare(treeNode->AsOp()); break; @@ -6479,8 +6582,6 @@ void CodeGen::genCompareFloat(GenTree* treeNode) var_types op1Type = op1->TypeGet(); var_types op2Type = op2->TypeGet(); - genConsumeOperands(tree); - assert(varTypeIsFloating(op1Type)); assert(op1Type == op2Type); @@ -6554,8 +6655,6 @@ void CodeGen::genCompareInt(GenTree* treeNode) emitter* emit = GetEmitter(); bool canReuseFlags = false; - genConsumeOperands(tree); - assert(!op1->isContainedIntOrIImmed()); assert(!varTypeIsFloating(op2Type)); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 50b22b34037d0..46c8c4709f664 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -1064,7 +1064,7 @@ regMaskTP GenTree::gtGetContainedRegMask() { if (!isContained()) { - return gtGetRegMask(); + return isUsedFromReg() ? gtGetRegMask() : RBM_NONE; } regMaskTP mask = 0; @@ -18720,6 +18720,46 @@ bool GenTree::IsArrayAddr(GenTreeArrAddr** pArrAddr) return false; } +//------------------------------------------------------------------------ +// SupportsSettingZeroFlag: Returns true if this is an arithmetic operation +// whose codegen supports setting the "zero flag" as part of its operation. +// +// Return Value: +// True if so. A false return does not imply that codegen for the node will +// not trash the zero flag. +// +// Remarks: +// For example, for EQ (AND x y) 0, both xarch and arm64 can emit +// instructions that directly set the flags after the 'AND' and thus no +// comparison is needed. +// +// The backend expects any node for which the flags will be consumed to be +// marked with GTF_SET_FLAGS. +// +bool GenTree::SupportsSettingZeroFlag() +{ +#if defined(TARGET_XARCH) + if (OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG)) + { + return true; + } + +#ifdef FEATURE_HW_INTRINSICS + if (OperIs(GT_HWINTRINSIC) && emitter::DoesWriteZeroFlag(HWIntrinsicInfo::lookupIns(AsHWIntrinsic()))) + { + return true; + } +#endif +#elif defined(TARGET_ARM64) + if (OperIs(GT_AND, GT_ADD, GT_SUB)) + { + return true; + } +#endif + + return false; +} + //------------------------------------------------------------------------ // Create: Create or retrieve a field sequence for the given field handle. // diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index ea8731460bf7c..4b807b585c203 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1985,6 +1985,8 @@ struct GenTree bool IsArrayAddr(GenTreeArrAddr** pArrAddr); + bool SupportsSettingZeroFlag(); + // These are only used for dumping. // The GetRegNum() is only valid in LIR, but the dumping methods are not easily // modified to check this. diff --git a/src/coreclr/jit/ifconversion.cpp b/src/coreclr/jit/ifconversion.cpp index 292d915384ae4..bbdded63dbe51 100644 --- a/src/coreclr/jit/ifconversion.cpp +++ b/src/coreclr/jit/ifconversion.cpp @@ -61,6 +61,8 @@ class OptIfConversionDsc void IfConvertDump(); #endif + bool IsHWIntrinsicCC(GenTree* node); + public: bool optIfConvert(); }; @@ -250,6 +252,15 @@ bool OptIfConversionDsc::IfConvertCheckStmts(BasicBlock* fromBlock, IfConvertOpe return false; } +#ifndef TARGET_64BIT + // Disallow 64-bit operands on 32-bit targets as the backend currently cannot + // handle contained relops efficiently after decomposition. + if (varTypeIsLong(tree)) + { + return false; + } +#endif + // Ensure it won't cause any additional side effects. if ((op1->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0 || (op2->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0) @@ -301,6 +312,15 @@ bool OptIfConversionDsc::IfConvertCheckStmts(BasicBlock* fromBlock, IfConvertOpe return false; } +#ifndef TARGET_64BIT + // Disallow 64-bit operands on 32-bit targets as the backend currently cannot + // handle contained relops efficiently after decomposition. + if (varTypeIsLong(tree)) + { + return false; + } +#endif + // Ensure it won't cause any additional side effects. if ((op1->gtFlags & (GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF)) != 0) { @@ -386,6 +406,69 @@ void OptIfConversionDsc::IfConvertDump() } #endif +#ifdef TARGET_XARCH +//----------------------------------------------------------------------------- +// IsHWIntrinsicCC: +// Check if this is a HW intrinsic node that can be compared efficiently +// against 0. +// +// Returns: +// True if so. +// +// Notes: +// For xarch, we currently skip if-conversion for these cases as the backend can handle them more efficiently +// when they are normal compares. +// +bool OptIfConversionDsc::IsHWIntrinsicCC(GenTree* node) +{ +#ifdef FEATURE_HW_INTRINSICS + if (!node->OperIs(GT_HWINTRINSIC)) + { + return false; + } + + switch (node->AsHWIntrinsic()->GetHWIntrinsicId()) + { + case NI_SSE_CompareScalarOrderedEqual: + case NI_SSE_CompareScalarOrderedNotEqual: + case NI_SSE_CompareScalarOrderedLessThan: + case NI_SSE_CompareScalarOrderedLessThanOrEqual: + case NI_SSE_CompareScalarOrderedGreaterThan: + case NI_SSE_CompareScalarOrderedGreaterThanOrEqual: + case NI_SSE_CompareScalarUnorderedEqual: + case NI_SSE_CompareScalarUnorderedNotEqual: + case NI_SSE_CompareScalarUnorderedLessThanOrEqual: + case NI_SSE_CompareScalarUnorderedLessThan: + case NI_SSE_CompareScalarUnorderedGreaterThanOrEqual: + case NI_SSE_CompareScalarUnorderedGreaterThan: + case NI_SSE2_CompareScalarOrderedEqual: + case NI_SSE2_CompareScalarOrderedNotEqual: + case NI_SSE2_CompareScalarOrderedLessThan: + case NI_SSE2_CompareScalarOrderedLessThanOrEqual: + case NI_SSE2_CompareScalarOrderedGreaterThan: + case NI_SSE2_CompareScalarOrderedGreaterThanOrEqual: + case NI_SSE2_CompareScalarUnorderedEqual: + case NI_SSE2_CompareScalarUnorderedNotEqual: + case NI_SSE2_CompareScalarUnorderedLessThanOrEqual: + case NI_SSE2_CompareScalarUnorderedLessThan: + case NI_SSE2_CompareScalarUnorderedGreaterThanOrEqual: + case NI_SSE2_CompareScalarUnorderedGreaterThan: + case NI_SSE41_TestC: + case NI_SSE41_TestZ: + case NI_SSE41_TestNotZAndNotC: + case NI_AVX_TestC: + case NI_AVX_TestZ: + case NI_AVX_TestNotZAndNotC: + return true; + default: + return false; + } +#else + return false; +#endif +} +#endif + //----------------------------------------------------------------------------- // optIfConvert // @@ -621,12 +704,6 @@ bool OptIfConversionDsc::optIfConvert() // Put a limit on the original source and destinations. if (!m_comp->compStressCompile(Compiler::STRESS_IF_CONVERSION_COST, 25)) { -#ifdef TARGET_XARCH - // xarch does not support containing relops in GT_SELECT nodes - // currently so only introduce GT_SELECT in stress. - JITDUMP("Skipping if-conversion on xarch\n"); - return false; -#else int thenCost = 0; int elseCost = 0; @@ -650,6 +727,39 @@ bool OptIfConversionDsc::optIfConvert() } } +#ifdef TARGET_XARCH + // Currently the xarch backend does not handle SELECT (EQ/NE (arithmetic op that sets ZF) 0) ... + // as efficiently as JTRUE (EQ/NE (arithmetic op that sets ZF) 0). The support is complicated + // to add due to the destructive nature of xarch instructions. + // The exception is for cases that can be transformed into TEST_EQ/TEST_NE. + // TODO-CQ: Fix this. + if (m_cond->OperIs(GT_EQ, GT_NE) && m_cond->gtGetOp2()->IsIntegralConst(0) && + !m_cond->gtGetOp1()->OperIs(GT_AND) && + (m_cond->gtGetOp1()->SupportsSettingZeroFlag() || IsHWIntrinsicCC(m_cond->gtGetOp1()))) + { + JITDUMP("Skipping if-conversion where condition is EQ/NE 0 with operation that sets ZF"); + return false; + } + + // However, in some cases bit tests can emit 'bt' when not going + // through the GT_SELECT path. + if (m_cond->OperIs(GT_EQ, GT_NE) && m_cond->gtGetOp1()->OperIs(GT_AND) && + m_cond->gtGetOp2()->IsIntegralConst(0)) + { + // A bit test that can be transformed into 'bt' will look like + // EQ/NE(AND(x, LSH(1, y)), 0) + + GenTree* andOp1 = m_cond->gtGetOp1()->gtGetOp1(); + GenTree* andOp2 = m_cond->gtGetOp1()->gtGetOp2(); + + if (andOp2->OperIs(GT_LSH) && andOp2->gtGetOp1()->IsIntegralConst(1)) + { + JITDUMP("Skipping if-conversion where condition is amenable to be transformed to BT"); + return false; + } + } +#endif + // Cost to allow for "x = cond ? a + b : c + d". if (thenCost > 7 || elseCost > 7) { @@ -657,7 +767,6 @@ bool OptIfConversionDsc::optIfConvert() elseCost); return false; } -#endif } // Get the select node inputs. @@ -673,16 +782,12 @@ bool OptIfConversionDsc::optIfConvert() } else { - // Invert the condition (to help matching condition codes back to CIL). - GenTree* revCond = m_comp->gtReverseCond(m_cond); - assert(m_cond == revCond); // Ensure `gtReverseCond` did not create a new node. - // Duplicate the destination of the Then assignment. assert(m_thenOperation.node->gtGetOp1()->IsLocal()); - selectFalseInput = m_comp->gtCloneExpr(m_thenOperation.node->gtGetOp1()); - selectFalseInput->gtFlags &= GTF_EMPTY; + selectTrueInput = m_comp->gtCloneExpr(m_thenOperation.node->gtGetOp1()); + selectTrueInput->gtFlags &= GTF_EMPTY; - selectTrueInput = m_thenOperation.node->gtGetOp2(); + selectFalseInput = m_thenOperation.node->gtGetOp2(); } // Pick the type as the type of the local, which should always be compatible even for implicit coercions. @@ -777,7 +882,7 @@ PhaseStatus Compiler::optIfConversion() // Currently only enabled on arm64 and under debug on xarch, since we only // do it under stress. CLANG_FORMAT_COMMENT_ANCHOR; -#if defined(TARGET_ARM64) || (defined(TARGET_XARCH) && defined(DEBUG)) +#if defined(TARGET_ARM64) || defined(TARGET_XARCH) // Reverse iterate through the blocks. BasicBlock* block = fgLastBB; while (block != nullptr) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 93df4f3c690bf..50f9148290917 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3108,19 +3108,11 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) // after op1 do not modify the flags so that it is safe to avoid generating a // test instruction. - if (op2->IsIntegralConst(0) && (op1->gtNext == op2) && (op2->gtNext == cmp) && -#ifdef TARGET_XARCH - (op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG) -#ifdef FEATURE_HW_INTRINSICS - || (op1->OperIs(GT_HWINTRINSIC) && - emitter::DoesWriteZeroFlag(HWIntrinsicInfo::lookupIns(op1->AsHWIntrinsic()))) -#endif // FEATURE_HW_INTRINSICS - ) -#else // TARGET_ARM64 - op1->OperIs(GT_AND, GT_ADD, GT_SUB) && + if (op2->IsIntegralConst(0) && (op1->gtNext == op2) && (op2->gtNext == cmp) && op1->SupportsSettingZeroFlag() +#ifdef TARGET_ARM64 // This happens in order to emit ARM64 'madd' and 'msub' instructions. // We cannot combine 'adds'/'subs' and 'mul'. - !(op1->gtGetOp2()->OperIs(GT_MUL) && op1->gtGetOp2()->isContained()) + && !(op1->gtGetOp2()->OperIs(GT_MUL) && op1->gtGetOp2()->isContained()) #endif ) { diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 81940f9a90ad9..21837d442f53c 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -5874,14 +5874,48 @@ void Lowering::ContainCheckSelect(GenTreeOp* select) assert(select->OperIs(GT_SELECT, GT_SELECT_HI)); #endif - // TODO-CQ: Support containing relops here for the GT_SELECT case. + // Disallow containing compares if the flags may be used by follow-up + // nodes, in which case those nodes expect zero/non-zero in the flags. + if (select->OperIs(GT_SELECT) && ((select->gtFlags & GTF_SET_FLAGS) == 0)) + { + GenTree* cond = select->AsConditional()->gtCond; + + if (cond->OperIsCompare() && IsSafeToContainMem(select, cond)) + { + MakeSrcContained(select, cond); + + // op1 and op2 are emitted as two separate instructions due to the + // conditional nature of cmov, so both operands can usually be + // contained memory operands. The exception is for compares + // requiring two cmovs, in which case we do not want to incur the + // memory access/address calculation twice. + // + // See the comment in Codegen::GenConditionDesc::map for why these + // comparisons are special and end up requiring the two cmovs. + // + GenCondition cc = GenCondition::FromRelop(cond); + switch (cc.GetCode()) + { + case GenCondition::FEQ: + case GenCondition::FLT: + case GenCondition::FLE: + case GenCondition::FNEU: + case GenCondition::FGEU: + case GenCondition::FGTU: + // Skip containment checking below. + // TODO-CQ: We could allow one of the operands to be a + // contained memory operand, but it requires updating LSRA + // build to take it into account. + return; + default: + break; + } + } + } GenTree* op1 = select->gtOp1; GenTree* op2 = select->gtOp2; - // op1 and op2 are emitted as two separate instructions due to the - // conditional nature of cmov, so both operands can be contained memory - // operands. unsigned operSize = genTypeSize(select); assert((operSize == 4) || (operSize == TARGET_POINTER_SIZE)); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index cfbd74487f494..cc75d1174fc40 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1878,6 +1878,7 @@ class LinearScan : public LinearScanInterface int BuildPutArgReg(GenTreeUnOp* node); int BuildCall(GenTreeCall* call); int BuildCmp(GenTree* tree); + int BuildCmpOperands(GenTree* tree); int BuildBlockStore(GenTreeBlk* blkNode); int BuildModDiv(GenTree* tree); int BuildIntrinsic(GenTree* tree); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3908f1998792a..0f1af6103299f 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3224,12 +3224,18 @@ int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) return BuildOperandUses(hwintrinsic->Op(1), candidates); } #endif // FEATURE_HW_INTRINSICS +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) + if (node->OperIsCompare()) + { + // Compares can be contained by a SELECT/compare chains. + return BuildBinaryUses(node->AsOp(), candidates); + } +#endif #ifdef TARGET_ARM64 - if (node->OperIs(GT_MUL) || node->OperIsCompare() || node->OperIs(GT_AND)) + if (node->OperIs(GT_MUL) || node->OperIs(GT_AND)) { // MUL can be contained for madd or msub on arm64. - // Compares can be contained by a SELECT. - // ANDs and Cmp Compares may be contained in a chain. + // ANDs may be contained in a chain. return BuildBinaryUses(node->AsOp(), candidates); } if (node->OperIs(GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ)) @@ -4071,24 +4077,47 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) // tree - The node of interest // // Return Value: -// None. +// Number of sources. // int LinearScan::BuildCmp(GenTree* tree) { assert(tree->OperIsCompare() || tree->OperIs(GT_CMP) || tree->OperIs(GT_JCMP)); - regMaskTP dstCandidates = RBM_NONE; + + int srcCount = BuildCmpOperands(tree); + + if (!tree->TypeIs(TYP_VOID)) + { + regMaskTP dstCandidates = RBM_NONE; + +#ifdef TARGET_X86 + // If the compare is used by a jump, we just need to set the condition codes. If not, then we need + // to store the result into the low byte of a register, which requires the dst be a byteable register. + dstCandidates = allByteRegs(); +#endif + + BuildDef(tree, dstCandidates); + } + return srcCount; +} + +//------------------------------------------------------------------------ +// BuildCmpOperands: Set the register requirements for a compare's operands. +// +// Arguments: +// tree - The node of interest +// +// Return Value: +// Number of sources. +// +int LinearScan::BuildCmpOperands(GenTree* tree) +{ + assert(tree->OperIsCompare() || tree->OperIs(GT_CMP) || tree->OperIs(GT_JCMP)); regMaskTP op1Candidates = RBM_NONE; regMaskTP op2Candidates = RBM_NONE; GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); #ifdef TARGET_X86 - // If the compare is used by a jump, we just need to set the condition codes. If not, then we need - // to store the result into the low byte of a register, which requires the dst be a byteable register. - if (tree->TypeGet() != TYP_VOID) - { - dstCandidates = allByteRegs(); - } bool needByteRegs = false; if (varTypeIsByte(tree)) { @@ -4135,9 +4164,5 @@ int LinearScan::BuildCmp(GenTree* tree) int srcCount = BuildOperandUses(op1, op1Candidates); srcCount += BuildOperandUses(op2, op2Candidates); - if (tree->TypeGet() != TYP_VOID) - { - BuildDef(tree, dstCandidates); - } return srcCount; } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index c0fd6030c2880..1e749f4d13893 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -910,41 +910,142 @@ int LinearScan::BuildSelect(GenTreeOp* select) { int srcCount = 0; + GenCondition cc = GenCondition::NE; if (select->OperIs(GT_SELECT)) { - srcCount += BuildOperandUses(select->AsConditional()->gtCond); + GenTree* cond = select->AsConditional()->gtCond; + if (cond->isContained()) + { + assert(cond->OperIsCompare()); + srcCount += BuildCmpOperands(cond); + cc = GenCondition::FromRelop(cond); + } + else + { + BuildUse(cond); + srcCount++; + } } - // cmov family of instructions are special in that they only conditionally - // define the destination register, so when generating code for GT_SELECT - // we normally need to preface it by a move into the destination with one - // of the operands. We can avoid this if one of the operands is already in - // the destination register, so try to prefer that. - // - // Because of the above we also need to set delayRegFree on the intervals - // for contained operands. Otherwise we could pick a target register that - // conflicted with one of those registers. - // - if (select->gtOp1->isContained()) + GenTree* trueVal = select->gtOp1; + GenTree* falseVal = select->gtOp2; + + RefPositionIterator op1UsesPrev = refPositions.backPosition(); + assert(op1UsesPrev != refPositions.end()); + + RefPosition* uncontainedTrueRP = nullptr; + if (trueVal->isContained()) { - srcCount += BuildDelayFreeUses(select->gtOp1); + srcCount += BuildOperandUses(trueVal); } else { - tgtPrefUse = BuildUse(select->gtOp1); + tgtPrefUse = uncontainedTrueRP = BuildUse(trueVal); srcCount++; } - if (select->gtOp2->isContained()) + RefPositionIterator op2UsesPrev = refPositions.backPosition(); + + RefPosition* uncontainedFalseRP = nullptr; + if (falseVal->isContained()) { - srcCount += BuildDelayFreeUses(select->gtOp2); + srcCount += BuildOperandUses(falseVal); } else { - tgtPrefUse2 = BuildUse(select->gtOp2); + tgtPrefUse2 = uncontainedFalseRP = BuildUse(falseVal); srcCount++; } + if ((tgtPrefUse != nullptr) && (tgtPrefUse2 != nullptr)) + { + // CQ analysis shows that it's best to always prefer only the 'true' + // val here. + tgtPrefUse2 = nullptr; + } + + // Codegen will emit something like: + // + // mov dstReg, falseVal + // cmov dstReg, trueVal + // + // We need to ensure that dstReg does not interfere with any register that + // appears in the second instruction. At the same time we want to + // preference the dstReg to be the same register as either falseVal/trueVal + // to be able to elide the mov whenever possible. + // + // While we could resolve the situation with either an internal register or + // by marking the uses as delay free unconditionally, this is a node used + // for very basic code patterns, so the logic here tries to be smarter to + // avoid the extra register pressure/potential copies. + // + // We have some flexibility as codegen can swap falseVal/trueVal as needed + // to avoid the conflict by reversing the sense of the cmov. If we can + // guarantee that the dstReg is used only in one of falseVal/trueVal, then + // we are good. + // + // To ensure the above we have some bespoke interference logic here on + // intervals for the ref positions we built above. It marks one of the uses + // as delay freed when it finds interference (almost never). + // + RefPositionIterator op1Use = op1UsesPrev; + while (op1Use != op2UsesPrev) + { + ++op1Use; + + if (op1Use->refType != RefTypeUse) + { + continue; + } + + RefPositionIterator op2Use = op2UsesPrev; + ++op2Use; + while (op2Use != refPositions.end()) + { + if (op2Use->refType == RefTypeUse) + { + if (op1Use->getInterval() == op2Use->getInterval()) + { + setDelayFree(&*op1Use); + break; + } + + ++op2Use; + } + } + } + + // Certain FP conditions are special and require multiple cmovs. These may + // introduce additional uses of either trueVal or falseVal after the first + // mov. In these cases we need additional delay-free marking. We do not + // support any containment for these currently (we do not want to incur + // multiple memory accesses, but we could contain the operand in the 'mov' + // instruction with some more care taken for marking things delay reg freed + // correctly). + switch (cc.GetCode()) + { + case GenCondition::FEQ: + case GenCondition::FLT: + case GenCondition::FLE: + // Normally these require an 'AND' conditional and cmovs with + // both the true and false values as sources. However, after + // swapping these into an 'OR' conditional the cmovs require + // only the original falseVal, so we need only to mark that as + // delay-reg freed to allow codegen to resolve this. + assert(uncontainedFalseRP != nullptr); + setDelayFree(uncontainedFalseRP); + break; + case GenCondition::FNEU: + case GenCondition::FGEU: + case GenCondition::FGTU: + // These require an 'OR' conditional and only access 'trueVal'. + assert(uncontainedTrueRP != nullptr); + setDelayFree(uncontainedTrueRP); + break; + default: + break; + } + BuildDef(select); return srcCount; } diff --git a/src/tests/JIT/opt/Compares/compares.cs b/src/tests/JIT/opt/Compares/compares.cs index 9a83ee566f79c..36d41c7821433 100644 --- a/src/tests/JIT/opt/Compares/compares.cs +++ b/src/tests/JIT/opt/Compares/compares.cs @@ -81,8 +81,11 @@ public static void consume(T a1, T a2) {} [MethodImpl(MethodImplOptions.NoInlining)] public static void Eq_byte_consume(byte a1, byte a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-FULL-LINE-NEXT: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{eq|ne}} + // + // X64-FULL-LINE: cmov{{ne|e}} {{[a-z0-9]+}}, {{.*}} + if (a1 == a2) { a1 = 10; } consume(a1, a2); } @@ -90,8 +93,11 @@ public static void Eq_byte_consume(byte a1, byte a2) { [MethodImpl(MethodImplOptions.NoInlining)] public static void Ne_short_consume(short a1, short a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{ne|eq}} + // + // X64-FULL-LINE: cmov{{ne|e}} {{[a-z0-9]+}}, {{.*}} + if (a1 != a2) { a1 = 11; } consume(a1, a2); } @@ -99,8 +105,11 @@ public static void Ne_short_consume(short a1, short a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Lt_int_consume(int a1, int a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lt + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{lt|ge}} + // + // X64-FULL-LINE: cmov{{l|ge}} {{[a-z0-9]+}}, {{.*}} + if (a1 < a2) { a1 = 12; } consume(a1, a2); } @@ -108,8 +117,11 @@ public static void Lt_int_consume(int a1, int a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Le_long_consume(long a1, long a2) { - //ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, le + // ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{le|gt}} + // + // X64-FULL-LINE: cmov{{le|g}} {{[a-z0-9]+}}, {{.*}} + if (a1 <= a2) { a1 = 13; } consume(a1, a2); } @@ -117,8 +129,11 @@ public static void Le_long_consume(long a1, long a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Gt_ushort_consume(ushort a1, ushort a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{gt|le}} + // + // X64-FULL-LINE: cmov{{g|le}} {{[a-z0-9]+}}, {{.*}} + if (a1 > a2) { a1 = 14; } consume(a1, a2); } @@ -126,8 +141,11 @@ public static void Gt_ushort_consume(ushort a1, ushort a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Ge_uint_consume(uint a1, uint a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ge + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{ge|lt}} + // + // X64-FULL-LINE: cmov{{ae|b}} {{[a-z0-9]+}}, {{.*}} + if (a1 >= a2) { a1 = 15; } consume(a1, a2); } @@ -135,8 +153,11 @@ public static void Ge_uint_consume(uint a1, uint a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Eq_ulong_consume(ulong a1, ulong a2) { - //ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, eq + // ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{eq|ne}} + // + // X64-FULL-LINE: cmov{{e|ne}} {{[a-z0-9]+}}, {{.*}} + if (a1 == a2) { a1 = 16; } consume(a1, a2); } @@ -144,8 +165,12 @@ public static void Eq_ulong_consume(ulong a1, ulong a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Ne_float_int_consume(float f1, float f2, int a1, int a2) { - //ARM64-FULL-LINE: fcmp {{s[0-9]+}}, {{s[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne + // ARM64-FULL-LINE: fcmp {{s[0-9]+}}, {{s[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{ne|eq}} + // + // X64-FULL-LINE: cmov{{p|np|ne|e}} {{[a-z0-9]+}}, {{.*}} + // X64-FULL-LINE-NEXT: cmov{{p|np|ne|e}} {{[a-z0-9]+}}, {{.*}} + if (f1 != f2) { a1 = 17; } consume(a1, a2); } @@ -153,8 +178,11 @@ public static void Ne_float_int_consume(float f1, float f2, int a1, int a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Lt_double_long_consume(double f1, double f2, long a1, long a2) { - //ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-31]}}, {{x[0-31]}}, {{x[0-31]}}, lt + // ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{x[0-31]}}, {{x[0-31]}}, {{x[0-31]}}, {{lt|ge}} + // + // X64-FULL-LINE: cmov{{be|a}} {{[a-z0-9]+}}, {{.*}} + if (f1 < f2) { a1 = 18; } consume(a1, a2); } @@ -162,8 +190,12 @@ public static void Lt_double_long_consume(double f1, double f2, long a1, long a2 [MethodImpl(MethodImplOptions.NoInlining)] public static void Eq_double_long_consume(double f1, double f2, long a1, long a2) { - //ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-31]}}, {{x[0-31]}}, {{x[0-31]}}, eq + // ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{x[0-31]}}, {{x[0-31]}}, {{x[0-31]}}, {{eq|ne}} + // + // X64-FULL-LINE: cmov{{p|np|ne|e}} {{[a-z0-9]+}}, {{.*}} + // X64-FULL-LINE-NEXT: cmov{{p|np|ne|e}} {{[a-z0-9]+}}, {{.*}} + if (f1 == f2) { a1 = 18; } consume(a1, a2); } @@ -171,8 +203,12 @@ public static void Eq_double_long_consume(double f1, double f2, long a1, long a2 [MethodImpl(MethodImplOptions.NoInlining)] public static void Ne_double_int_consume(double f1, double f2, int a1, int a2) { - //ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne + // ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{ne|eq}} + // + // X64-FULL-LINE: cmov{{p|np|ne|e}} {{[a-z0-9]+}}, {{.*}} + // X64-FULL-LINE-NEXT: cmov{{p|np|ne|e}} {{[a-z0-9]+}}, {{.*}} + if (f1 != f2) { a1 = 18; } consume(a1, a2); } @@ -182,8 +218,11 @@ public static void Ne_double_int_consume(double f1, double f2, int a1, int a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Ne_else_byte_consume(byte a1, byte a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{ne|eq}} + // + // X64-FULL-LINE: cmov{{ne|e}} {{[a-z0-9]+}}, {{.*}} + if (a1 != a2) { a1 = 10; } else { a1 = 100; } consume(a1, a2); } @@ -191,8 +230,11 @@ public static void Ne_else_byte_consume(byte a1, byte a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Lt_else_short_consume(short a1, short a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lt + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{lt|ge}} + // + // X64-FULL-LINE: cmov{{l|ge}} {{[a-z0-9]+}}, {{.*}} + if (a1 < a2) { a1 = 11; } else { a1 = 101; } consume(a1, a2); } @@ -200,8 +242,11 @@ public static void Lt_else_short_consume(short a1, short a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Le_else_int_consume(int a1, int a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{le|gt}} + // + // X64-FULL-LINE: cmov{{le|g}} {{[a-z0-9]+}}, {{.*}} + if (a1 <= a2) { a1 = 12; } else { a1 = 102; } consume(a1, a2); } @@ -209,8 +254,11 @@ public static void Le_else_int_consume(int a1, int a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Gt_else_long_consume(long a1, long a2) { - //ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, gt + // ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{gt|le}} + // + // X64-FULL-LINE: cmov{{g|le}} {{[a-z0-9]+}}, {{.*}} + if (a1 > a2) { a1 = 13; } else { a1 = 103; } consume(a1, a2); } @@ -218,8 +266,11 @@ public static void Gt_else_long_consume(long a1, long a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Ge_else_ushort_consume(ushort a1, ushort a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ge + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{ge|lt}} + // + // X64-FULL-LINE: cmov{{ge|l}} {{[a-z0-9]+}}, {{.*}} + if (a1 >= a2) { a1 = 14; } else { a1 = 104; } consume(a1, a2); } @@ -227,8 +278,11 @@ public static void Ge_else_ushort_consume(ushort a1, ushort a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Eq_else_uint_consume(uint a1, uint a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{eq|ne}} + // + // X64-FULL-LINE: cmov{{e|ne}} {{[a-z0-9]+}}, {{.*}} + if (a1 == a2) { a1 = 15; } else { a1 = 105; } consume(a1, a2); } @@ -236,8 +290,11 @@ public static void Eq_else_uint_consume(uint a1, uint a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Ne_else_ulong_consume(ulong a1, ulong a2) { - //ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ne + // ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{ne|eq}} + // + // X64-FULL-LINE: cmov{{ne|e}} {{[a-z0-9]+}}, {{.*}} + if (a1 != a2) { a1 = 16; } else { a1 = 106; } consume(a1, a2); } @@ -245,8 +302,11 @@ public static void Ne_else_ulong_consume(ulong a1, ulong a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Lt_else_float_int_consume(float f1, float f2, int a1, int a2) { - //ARM64-FULL-LINE: fcmp {{s[0-9]+}}, {{s[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lt + // ARM64-FULL-LINE: fcmp {{s[0-9]+}}, {{s[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{lt|ge}} + // + // X64-FULL-LINE: cmov{{be|a}} {{[a-z0-9]+}}, {{.*}} + if (f1 < f2) { a1 = 17; } else { a1 = 107; } consume(a1, a2); } @@ -254,8 +314,11 @@ public static void Lt_else_float_int_consume(float f1, float f2, int a1, int a2) [MethodImpl(MethodImplOptions.NoInlining)] public static void Le_else_double_int_consume(double f1, double f2, int a1, int a2) { - //ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le + // ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{le|gt}} + // + // X64-FULL-LINE: cmov{{b|ae}} {{[a-z0-9]+}}, {{.*}} + if (f1 <= f2) { a1 = 18; } else { a1 = 108; } consume(a1, a2); } @@ -265,72 +328,99 @@ public static void Le_else_double_int_consume(double f1, double f2, int a1, int [MethodImpl(MethodImplOptions.NoInlining)] public static byte Lt_else_byte_return(byte a1, byte a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lt + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{lt|ge}} + // + // X64-FULL-LINE: cmov{{l|ge}} {{[a-z0-9]+}}, {{.*}} + return (a1 < a2) ? (byte)10 : (byte)100; } [MethodImpl(MethodImplOptions.NoInlining)] public static short Le_else_short_return(short a1, short a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{le|gt}} + // + // X64-FULL-LINE: cmov{{le|g}} {{[a-z0-9]+}}, {{.*}} + return (a1 <= a2) ? (short)11 : (short)101; } [MethodImpl(MethodImplOptions.NoInlining)] public static int Gt_else_int_return(int a1, int a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{gt|le}} + // + // X64-FULL-LINE: cmov{{g|le}} {{[a-z0-9]+}}, {{.*}} + return (a1 > a2) ? (int)12 : (int)102; } [MethodImpl(MethodImplOptions.NoInlining)] public static long Ge_else_long_return(long a1, long a2) { - //ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ge + // ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{ge|lt}} + // + // X64-FULL-LINE: cmov{{ge|l}} {{[a-z0-9]+}}, {{.*}} + return (a1 >= a2) ? (long)13 : (long)103; } [MethodImpl(MethodImplOptions.NoInlining)] public static ushort Eq_else_ushort_return(ushort a1, ushort a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, eq + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{eq|ne}} + // + // X64-FULL-LINE: cmov{{e|ne}} {{[a-z0-9]+}}, {{.*}} + return (a1 == a2) ? (ushort)14 : (ushort)104; } [MethodImpl(MethodImplOptions.NoInlining)] public static uint Ne_else_uint_return(uint a1, uint a2) { - //ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ne + // ARM64-FULL-LINE: cmp {{w[0-9]+}}, {{w[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{ne|eq}} + // + // X64-FULL-LINE: cmov{{e|ne}} {{[a-z0-9]+}}, {{.*}} + return (a1 != a2) ? (uint)15 : (uint)105; } [MethodImpl(MethodImplOptions.NoInlining)] public static ulong Lt_else_ulong_return(ulong a1, ulong a2) { - //ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lt + // ARM64-FULL-LINE: cmp {{x[0-9]+}}, {{x[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{lt|ge}} + // + // X64-FULL-LINE: cmov{{b|ae}} {{[a-z0-9]+}}, {{.*}} + return (a1 < a2) ? (ulong)16 : (ulong)106; } [MethodImpl(MethodImplOptions.NoInlining)] public static int Le_else_float_int_return(float a1, float a2) { - //ARM64-FULL-LINE: fcmp {{s[0-9]+}}, {{s[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, le + // ARM64-FULL-LINE: fcmp {{s[0-9]+}}, {{s[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{le|gt}} + // + // X64-FULL-LINE: cmov{{b|ae}} {{[a-z0-9]+}}, {{.*}} + return (a1 <= a2) ? 17 : 107; } [MethodImpl(MethodImplOptions.NoInlining)] public static int Gt_else_double_int_return(double a1, double a2) { - //ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} - //ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, gt + // ARM64-FULL-LINE: fcmp {{d[0-9]+}}, {{d[0-9]+}} + // ARM64-NEXT-FULL-LINE: csel {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{gt|le}} + // + // X64-FULL-LINE: cmov{{be|a}} {{[a-z0-9]+}}, {{.*}} + return (a1 > a2) ? 18 : 108; }