From c875d1733e4345cb44744bdbf5a38ab7920a798e Mon Sep 17 00:00:00 2001 From: Shubham Verma Date: Wed, 30 Oct 2024 10:13:12 -0400 Subject: [PATCH] Fix loop strider reassociation and hoisting logic for off-heap To prevent double hoisting of dataAddr pointer, limit reassociation and hoisting criteria to include only those that have dataAddr pointer as their first child. The offset node doesn't rely on array header size when using dataAddr pointer to get to data elements so no need for reassociation; we can use the original offset node with newly hoisted array load node. Signed-off-by: Shubham Verma --- compiler/optimizer/InductionVariable.cpp | 56 +++++++++++------------- compiler/optimizer/OMRTransformUtil.cpp | 16 +++---- compiler/optimizer/OMRTransformUtil.hpp | 3 +- 3 files changed, 36 insertions(+), 39 deletions(-) diff --git a/compiler/optimizer/InductionVariable.cpp b/compiler/optimizer/InductionVariable.cpp index b03dd4fdf24..5ce2a73ccba 100644 --- a/compiler/optimizer/InductionVariable.cpp +++ b/compiler/optimizer/InductionVariable.cpp @@ -71,6 +71,7 @@ #include "optimizer/LoopCanonicalizer.hpp" #include "optimizer/VPConstraint.hpp" #include "ras/Debug.hpp" +#include "optimizer/TransformUtil.hpp" #define OPT_DETAILS "O^O INDUCTION VARIABLE ANALYSIS: " @@ -926,22 +927,8 @@ int32_t TR_LoopStrider::detectCanonicalizedPredictableLoops(TR_Structure *loopSt traceMsg(comp(), "Found an reassociated induction var chance in %s\n", comp()->signature()); TR::SymbolReference *origAuto = symRefTab->getSymRef(j); - - TR::Node *arrayRefNode; - int32_t hdrSize = (int32_t)TR::Compiler->om.contiguousArrayHeaderSizeInBytes(); - if (usingAladd) - { - TR::Node *constantNode = TR::Node::create(byteCodeInfoNode, TR::lconst); - //constantNode->setLongInt(16); - constantNode->setLongInt((int64_t)hdrSize); - arrayRefNode = TR::Node::create(TR::aladd, 2, - TR::Node::createWithSymRef(byteCodeInfoNode, - TR::aload, 0, origAuto), constantNode); - } - else - arrayRefNode = TR::Node::create(TR::aiadd, 2, - TR::Node::createWithSymRef(byteCodeInfoNode, TR::aload, 0, origAuto), - TR::Node::create(byteCodeInfoNode, TR::iconst, 0, hdrSize)); + TR::Node *newArrayObjNode = TR::Node::createWithSymRef(byteCodeInfoNode, TR::aload, 0, origAuto); + TR::Node *arrayRefNode = TR::TransformUtil::generateArrayElementAddressTrees(comp(), newArrayObjNode, NULL, byteCodeInfoNode); arrayRefNode->setIsInternalPointer(true); if (!origAuto->getSymbol()->isInternalPointer()) @@ -3397,6 +3384,20 @@ bool TR_LoopStrider::reassociateAndHoistComputations(TR::Block *loopInvariantBlo int32_t originalInternalPointerSymbol = 0; TR::Node *originalNode = NULL; TR::Node *pinningArrayNode = NULL; + /* Tree structures eligible for reassociation and hoisting + non off-heap mode: + aladd (internal pointer) + array_obj (pinning array pointer) + add/sub offset + index + header_size/-header_size + + off-heap mode: + aladd (internal pointer) + contiguousArrayDataAddrFieldSymbol (dataAddrPointer, internal pointer) + array_obj (pinning array pointer) + mul/shift/integer offset + */ if (cg()->supportsInternalPointers() && reassociateAndHoistNonPacked() && node->isInternalPointer() && !node->isDataAddrPointer()) { @@ -3405,6 +3406,8 @@ bool TR_LoopStrider::reassociateAndHoistComputations(TR::Block *loopInvariantBlo else pinningArrayNode = node->getFirstChild(); + dumpOptDetails(comp(), "%s: Using %p as pinningArrayNode for internal pointer node %p\n", OPT_DETAILS, pinningArrayNode, node); + if (pinningArrayNode->getOpCode().isLoadVar() && pinningArrayNode->getSymbolReference()->getSymbol()->isAutoOrParm() && _neverWritten->get(pinningArrayNode->getSymbolReference()->getReferenceNumber())) @@ -3652,10 +3655,12 @@ bool TR_LoopStrider::reassociateAndHoistComputations(TR::Block *loopInvariantBlo } #ifdef J9_PROJECT_SPECIFIC - // For OffHeap runs, the array access trees don't include headerSize addition - // Offset tree opcode is lmul/lshl to calculate offset from the index node. - if (TR::Compiler->om.isOffHeapAllocationEnabled() && - (node->getOpCodeValue() == TR::lmul || node->getOpCodeValue() == TR::lshl)) + /* For OffHeap runs, the array access trees don't include headerSize addition. + * Offset tree opcode can be either mul/shift or a number, if array stride is 1. + * If first child of originalNode is not dataAddr pointer we have already + * hoisted the array aload, no need to do it again. + */ + if (TR::Compiler->om.isOffHeapAllocationEnabled() && originalNode && originalNode->getFirstChild()->isDataAddrPointer()) { if ((isInternalPointer && (comp()->getSymRefTab()->getNumInternalPointers() < maxInternalPointers())) && @@ -3709,20 +3714,11 @@ bool TR_LoopStrider::reassociateAndHoistComputations(TR::Block *loopInvariantBlo TR::SymbolReference *internalPointerSymRef = (*_reassociatedAutos)[originalInternalPointerSymbol]; originalNode->getFirstChild()->recursivelyDecReferenceCount(); - node->decReferenceCount(); - _reassociatedNodes.add(node); - - if (node->getReferenceCount() == 0) - { - node->getFirstChild()->decReferenceCount(); - node->getSecondChild()->decReferenceCount(); - } TR::Node *newLoad = TR::Node::createWithSymRef(node, TR::aload, 0, internalPointerSymRef); newLoad->setLocalIndex(~0); originalNode->setAndIncChild(0, newLoad); - originalNode->setAndIncChild(1, node->getFirstChild()); - reassociatedComputation = true; + dumpOptDetails(comp(), "%s: Replaced array load in %p with %p\n", OPT_DETAILS, originalNode, newLoad); } } #endif /* J9_PROJECT_SPECIFIC */ diff --git a/compiler/optimizer/OMRTransformUtil.cpp b/compiler/optimizer/OMRTransformUtil.cpp index 9e279042b51..76087c2c843 100644 --- a/compiler/optimizer/OMRTransformUtil.cpp +++ b/compiler/optimizer/OMRTransformUtil.cpp @@ -454,7 +454,7 @@ OMR::TransformUtil::generateDataAddrLoadTrees(TR::Compilation *comp, TR::Node *a #endif /* OMR_GC_SPARSE_HEAP_ALLOCATION */ TR::Node * -OMR::TransformUtil::generateArrayElementAddressTrees(TR::Compilation *comp, TR::Node *arrayNode, TR::Node *offsetNode) +OMR::TransformUtil::generateArrayElementAddressTrees(TR::Compilation *comp, TR::Node *arrayNode, TR::Node *offsetNode, TR::Node *originatingByteCodeNode) { TR::Node *arrayAddressNode = NULL; TR::Node *totalOffsetNode = NULL; @@ -468,24 +468,24 @@ OMR::TransformUtil::generateArrayElementAddressTrees(TR::Compilation *comp, TR:: { arrayAddressNode = generateDataAddrLoadTrees(comp, arrayNode); if (offsetNode) - arrayAddressNode = TR::Node::create(TR::aladd, 2, arrayAddressNode, offsetNode); + arrayAddressNode = TR::Node::create(originatingByteCodeNode, TR::aladd, 2, arrayAddressNode, offsetNode); } else if (comp->target().is64Bit()) #else if (comp->target().is64Bit()) #endif /* OMR_GC_SPARSE_HEAP_ALLOCATION */ { - totalOffsetNode = TR::Node::lconst(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()); + totalOffsetNode = TR::Node::lconst(originatingByteCodeNode, TR::Compiler->om.contiguousArrayHeaderSizeInBytes()); if (offsetNode) - totalOffsetNode = TR::Node::create(TR::ladd, 2, offsetNode, totalOffsetNode); - arrayAddressNode = TR::Node::create(TR::aladd, 2, arrayNode, totalOffsetNode); + totalOffsetNode = TR::Node::create(originatingByteCodeNode, TR::ladd, 2, offsetNode, totalOffsetNode); + arrayAddressNode = TR::Node::create(originatingByteCodeNode, TR::aladd, 2, arrayNode, totalOffsetNode); } else { - totalOffsetNode = TR::Node::iconst(static_cast(TR::Compiler->om.contiguousArrayHeaderSizeInBytes())); + totalOffsetNode = TR::Node::iconst(originatingByteCodeNode, static_cast(TR::Compiler->om.contiguousArrayHeaderSizeInBytes())); if (offsetNode) - totalOffsetNode = TR::Node::create(TR::iadd, 2, offsetNode, totalOffsetNode); - arrayAddressNode = TR::Node::create(TR::aiadd, 2, arrayNode, totalOffsetNode); + totalOffsetNode = TR::Node::create(originatingByteCodeNode, TR::iadd, 2, offsetNode, totalOffsetNode); + arrayAddressNode = TR::Node::create(originatingByteCodeNode, TR::aiadd, 2, arrayNode, totalOffsetNode); } return arrayAddressNode; diff --git a/compiler/optimizer/OMRTransformUtil.hpp b/compiler/optimizer/OMRTransformUtil.hpp index c6b27032077..8e218717a47 100644 --- a/compiler/optimizer/OMRTransformUtil.hpp +++ b/compiler/optimizer/OMRTransformUtil.hpp @@ -226,7 +226,8 @@ class OMR_EXTENSIBLE TransformUtil static TR::Node *generateArrayElementAddressTrees( TR::Compilation *comp, TR::Node *arrayNode, - TR::Node *offsetNode = NULL); + TR::Node *offsetNode = NULL, + TR::Node *originatingByteCodeNode = NULL); /** * \brief