Skip to content

Commit

Permalink
Support Math.max/min for floating points w.r.t java spec
Browse files Browse the repository at this point in the history
- spearate evaluators for J9 vs OMR to support differing behaviour (OMR
  complies with IEEE_754, while J9 returns the first NaN (if present)
- +0.0 compares as strictly greater than -0.0

Signed-off-by: Matthew Hall <[email protected]>
  • Loading branch information
matthewhall2 committed Nov 7, 2024
1 parent d3f8d48 commit 1d5132b
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 87 deletions.
2 changes: 1 addition & 1 deletion runtime/compiler/codegen/J9CodeGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,7 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz);
SavesNonVolatileGPRsForGC = 0x00000800,
SupportsInlineVectorizedMismatch = 0x00001000,
SupportsInlineVectorizedHashCode = 0x00002000,
SupportsInlineMath_MaxMin_FD = 0x00002000,
SupportsInlineMath_MaxMin_FD = 0x00004000,
};

flags32_t _j9Flags;
Expand Down
3 changes: 2 additions & 1 deletion runtime/compiler/optimizer/J9RecognizedCallTransformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1366,11 +1366,12 @@ bool J9::RecognizedCallTransformer::isInlineable(TR::TreeTop* treetop)
case TR::java_lang_Math_min_I:
case TR::java_lang_Math_max_L:
case TR::java_lang_Math_min_L:
return !comp()->getOption(TR_DisableMaxMinOptimization);
case TR::java_lang_Math_max_F:
case TR::java_lang_Math_min_F:
case TR::java_lang_Math_max_D:
case TR::java_lang_Math_min_D:
return !comp()->getOption(TR_DisableMaxMinOptimization);
return !comp()->getOption(TR_DisableMaxMinOptimization) && cg()->getSupportsInlineMath_MaxMin_FD();
case TR::java_lang_Math_multiplyHigh:
return cg()->getSupportsLMulHigh();
case TR::java_lang_StringUTF16_toBytes:
Expand Down
20 changes: 12 additions & 8 deletions runtime/compiler/z/codegen/J9CodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ J9::Z::CodeGenerator::initialize()
cg->setSupportsInlineEncodeASCII();
}

static bool disableInlineMath_MaxMin_FD = feGetEnv("TR_disableInlineMath_MaxMin_FD") != NULL;
static bool disableInlineMath_MaxMin_FD = feGetEnv("TR_disableInlineMaxMin") != NULL;
if (!disableInlineMath_MaxMin_FD)
{
cg->setSupportsInlineMath_MaxMin_FD();
Expand Down Expand Up @@ -4124,20 +4124,24 @@ J9::Z::CodeGenerator::inlineDirectCall(
}
}

if (!comp->getOption(TR_DisableSIMDDoubleMaxMin) && cg->getSupportsVectorRegisters())
{
switch (methodSymbol->getRecognizedMethod())
{
if (!self()->comp()->getOption(TR_DisableMaxMinOptimization) && cg->getSupportsInlineMath_MaxMin_FD()) {
switch (methodSymbol->getRecognizedMethod()) {
case TR::java_lang_Math_max_D:
resultReg = TR::TreeEvaluator::inlineDoubleMax(node, cg);
resultReg = J9::Z::TreeEvaluator::dmaxEvaluator(node, cg);
return true;
case TR::java_lang_Math_min_D:
resultReg = TR::TreeEvaluator::inlineDoubleMin(node, cg);
resultReg = J9::Z::TreeEvaluator::dminEvaluator(node, cg);
return true;
case TR::java_lang_Math_max_F:
resultReg = J9::Z::TreeEvaluator::fmaxEvaluator(node, cg);
return true;
case TR::java_lang_Math_min_F:
resultReg = J9::Z::TreeEvaluator::fminEvaluator(node, cg);
return true;
default:
break;
}
}
}

switch (methodSymbol->getRecognizedMethod())
{
Expand Down
110 changes: 35 additions & 75 deletions runtime/compiler/z/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -906,76 +906,48 @@ allocateWriteBarrierInternalPointerRegister(TR::CodeGenerator * cg, TR::Node * s
}


extern TR::Register *
doubleMaxMinHelper(TR::Node *node, TR::CodeGenerator *cg, bool isMaxOp)
TR::Register *
J9::Z::TreeEvaluator::dmaxEvaluator(TR::Node * node, TR::CodeGenerator * cg)
{
TR_ASSERT(node->getNumChildren() >= 1 || node->getNumChildren() <= 2, "node has incorrect number of children");

/* ===================== Allocating Registers ===================== */

TR::Register * v16 = cg->allocateRegister(TR_VRF);
TR::Register * v17 = cg->allocateRegister(TR_VRF);
TR::Register * v18 = cg->allocateRegister(TR_VRF);

/* ===================== Generating instructions ===================== */

/* ====== LD FPR0,16(GPR5) Load a ====== */
TR::Register * v0 = cg->fprClobberEvaluate(node->getFirstChild());

/* ====== LD FPR2, 0(GPR5) Load b ====== */
TR::Register * v2 = cg->evaluate(node->getSecondChild());

/* ====== WFTCIDB V16,V0,X'F' a == NaN ====== */
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v16, v0, 0xF, 8, 3);

/* ====== For Max: WFCHE V17,V0,V2 Compare a >= b ====== */
if(isMaxOp)
if (cg->getSupportsVectorRegisters())
{
generateVRRcInstruction(cg, TR::InstOpCode::VFCH, node, v17, v0, v2, 0, 8, 3);
cg->generateDebugCounter("z13/simd/doubleMax", 1, TR::DebugCounter::Free);
return OMR::Z::TreeEvaluator::fpMinMaxVectorHelper(node, cg);
}
/* ====== For Min: WFCHE V17,V0,V2 Compare a <= b ====== */
else
return OMR::Z::TreeEvaluator::xmaxxminHelper(node, cg);
}

TR::Register *
J9::Z::TreeEvaluator::dminEvaluator(TR::Node * node, TR::CodeGenerator * cg)
{
if (cg->getSupportsVectorRegisters())
{
generateVRRcInstruction(cg, TR::InstOpCode::VFCH, node, v17, v2, v0, 0, 8, 3);
cg->generateDebugCounter("z13/simd/doubleMin", 1, TR::DebugCounter::Free);
return OMR::Z::TreeEvaluator::fpMinMaxVectorHelper(node, cg);
}
return OMR::Z::TreeEvaluator::xmaxxminHelper(node, cg);
}

/* ====== VO V16,V16,V17 (a >= b) || (a == NaN) ====== */
generateVRRcInstruction(cg, TR::InstOpCode::VO, node, v16, v16, v17, 0, 0, 0);

/* ====== For Max: WFTCIDB V17,V0,X'800' a == +0 ====== */
if(isMaxOp)
{
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v17, v0, 0x800, 8, 3);
}
/* ====== For Min: WFTCIDB V17,V0,X'400' a == -0 ====== */
else
{
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v17, v0, 0x400, 8, 3);
}
/* ====== WFTCIDB V18,V2,X'C00' b == 0 ====== */
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v18, v2, 0xC00, 8, 3);

/* ====== VN V17,V17,V18 (a == -0) && (b == 0) ====== */
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, v17, v17, v18, 0, 0, 0);

/* ====== VO V16,V16,V17 (a >= b) || (a == NaN) || ((a == -0) && (b == 0)) ====== */
generateVRRcInstruction(cg, TR::InstOpCode::VO, node, v16, v16, v17, 0, 0, 0);

/* ====== VSEL V0,V0,V2,V16 ====== */
generateVRReInstruction(cg, TR::InstOpCode::VSEL, node, v0, v0, v2, v16);

/* ===================== Deallocating Registers ===================== */
cg->stopUsingRegister(v2);
cg->stopUsingRegister(v16);
cg->stopUsingRegister(v17);
cg->stopUsingRegister(v18);

node->setRegister(v0);

cg->decReferenceCount(node->getFirstChild());
cg->decReferenceCount(node->getSecondChild());
TR::Register *
J9::Z::TreeEvaluator::fmaxEvaluator(TR::Node * node, TR::CodeGenerator * cg)
{
if (cg->getSupportsVectorRegisters())
{
cg->generateDebugCounter("z13/simd/floatMax", 1, TR::DebugCounter::Free);
return OMR::Z::TreeEvaluator::fpMinMaxVectorHelper(node, cg);
}
return OMR::Z::TreeEvaluator::xmaxxminHelper(node, cg);
}

return node->getRegister();
TR::Register *
J9::Z::TreeEvaluator::fminEvaluator(TR::Node * node, TR::CodeGenerator * cg)
{
if (cg->getSupportsVectorRegisters())
{
cg->generateDebugCounter("z13/simd/floatMin", 1, TR::DebugCounter::Free);
return OMR::Z::TreeEvaluator::fpMinMaxVectorHelper(node, cg);
}
return OMR::Z::TreeEvaluator::xmaxxminHelper(node, cg);
}

TR::Register*
Expand Down Expand Up @@ -2945,19 +2917,7 @@ J9::Z::TreeEvaluator::toLowerIntrinsic(TR::Node *node, TR::CodeGenerator *cg, bo
return caseConversionHelper(node, cg, false, isCompressedString);
}

TR::Register*
J9::Z::TreeEvaluator::inlineDoubleMax(TR::Node *node, TR::CodeGenerator *cg)
{
cg->generateDebugCounter("z13/simd/doubleMax", 1, TR::DebugCounter::Free);
return doubleMaxMinHelper(node, cg, true);
}

TR::Register*
J9::Z::TreeEvaluator::inlineDoubleMin(TR::Node *node, TR::CodeGenerator *cg)
{
cg->generateDebugCounter("z13/simd/doubleMin", 1, TR::DebugCounter::Free);
return doubleMaxMinHelper(node, cg, false);
}

TR::Register *
J9::Z::TreeEvaluator::inlineMathFma(TR::Node *node, TR::CodeGenerator *cg)
Expand Down
6 changes: 4 additions & 2 deletions runtime/compiler/z/codegen/J9TreeEvaluator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,10 @@ class OMR_EXTENSIBLE TreeEvaluator: public J9::TreeEvaluator
*/
static TR::Register *inlineVectorizedStringIndexOf(TR::Node *node, TR::CodeGenerator *cg, bool isCompressed);
static TR::Register *inlineIntrinsicIndexOf(TR::Node *node, TR::CodeGenerator *cg, bool isLatin1);
static TR::Register *inlineDoubleMax(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *inlineDoubleMin(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *fminEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *dminEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *fmaxEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *dmaxEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *inlineMathFma(TR::Node *node, TR::CodeGenerator *cg);

/* This Evaluator generates the SIMD routine for methods
Expand Down

0 comments on commit 1d5132b

Please sign in to comment.