From 9b9aeafbecbdae09dbcad7110d9d38ee04138d5b Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 2 Nov 2022 21:18:41 -0700 Subject: [PATCH] JIT: Enable phi-based jump threading when SSA updates aren't needed (#77748) Leverage the new SSA accounting to look for cases of phi-based jump threading that will not require SSA updates. In particular cases where the phis are all locally consumed. Also update documentation on the SSA checker implementation (which aims to ensure that the SSA accounting we're relying on here is accurate). --- src/coreclr/jit/compiler.h | 9 ++-- src/coreclr/jit/fgdiagnostic.cpp | 67 ++++++++++++++++++++----- src/coreclr/jit/redundantbranchopts.cpp | 64 +++++++++++++++++++---- 3 files changed, 113 insertions(+), 27 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8461c90aeacb3..70a924ec48410 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -217,12 +217,15 @@ class LclSsaVarDsc GenTreeOp* m_asg = nullptr; // The SSA number associated with the previous definition for partial (GTF_USEASG) defs. unsigned m_useDefSsaNum = SsaConfig::RESERVED_SSA_NUM; - // Number of uses of this SSA def (may be an over-estimate). Includes phi args uses. + // Number of uses of this SSA def (may be an over-estimate). + // May not be accurate for for promoted fields. unsigned short m_numUses = 0; // True if there may be phi args uses of this def - // (false implies all uses are non-phi) + // May not be accurate for for promoted fields. + // (false implies all uses are non-phi). bool m_hasPhiUse = false; - // True if there may be uses of the def in a different block + // True if there may be uses of the def in a different block. + // May not be accurate for for promoted fields. bool m_hasGlobalUse = false; public: diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 790178f51f35f..4b5c7d709b247 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -3583,12 +3583,40 @@ void Compiler::fgDebugCheckNodesUniqueness() } } -// SsaCheckWalker keeps data that is necessary to check if -// we are properly updating SSA uses and defs. +//------------------------------------------------------------------------------ +// SsaCheckVisitor: build and maintain state about SSA uses in the IR +// +// Expects to be invoked on each root expression in each basic block that +// SSA renames (note SSA will not rename defs and uses in unreachable blocks) +// and all blocks created after SSA was built (identified by bbID). +// +// Maintains a hash table keyed by (lclNum, ssaNum) that tracks information +// about that SSA lifetime. This information is updated by each SSA use and +// def seen in the trees via ProcessUses and ProcessDefs. +// +// We can spot certain errors during collection, if local occurrences either +// unexpectedy lack or have SSA numbers. +// +// Once collection is done, DoChecks() verifies that the collected information +// is soundly approximated by the data stored in the LclSsaVarDsc entries. +// +// In particular the properties claimed for an SSA lifetime via its +// LclSsaVarDsc must be accurate or an over-estimate. We tolerate over-estimates +// as there is no good mechanism in the jit for keeping track when bits of IR +// are deleted, so over time the number and kind of uses indicated in the +// LclSsaVarDsc may show more uses and more different kinds of uses then actually +// remain in the IR. +// +// One important caveat is that for promoted locals there may be implicit uses +// (via the parent var) that do not get numbered by SSA. Neither the LclSsaVarDsc +// nor the IR will track these implicit uses. So the checking done below will +// only catch anomalies in the defs or in the explicit uses. // class SsaCheckVisitor : public GenTreeVisitor { private: + // Hash key for tracking per-SSA lifetime info + // struct SsaKey { private: @@ -3624,6 +3652,8 @@ class SsaCheckVisitor : public GenTreeVisitor } }; + // Per-SSA lifetime info + // struct SsaInfo { private: @@ -3949,6 +3979,8 @@ class SsaCheckVisitor : public GenTreeVisitor { for (unsigned lclNum = 0; lclNum < m_compiler->lvaCount; lclNum++) { + // Check each local in SSA + // LclVarDsc* const varDsc = m_compiler->lvaGetDesc(lclNum); if (!varDsc->lvInSsa) @@ -3956,6 +3988,8 @@ class SsaCheckVisitor : public GenTreeVisitor continue; } + // Check each SSA lifetime of that local + // const SsaDefArray& ssaDefs = varDsc->lvPerSsaData; for (unsigned i = 0; i < ssaDefs.GetCount(); i++) @@ -3963,16 +3997,21 @@ class SsaCheckVisitor : public GenTreeVisitor LclSsaVarDsc* const ssaVarDsc = ssaDefs.GetSsaDefByIndex(i); const unsigned ssaNum = ssaDefs.GetSsaNum(ssaVarDsc); + // Find the SSA info we gathered for this lifetime via the IR walk + // SsaKey key(lclNum, ssaNum); SsaInfo* ssaInfo = nullptr; if (!m_infoMap.Lookup(key, &ssaInfo)) { - // Possibly there are no more references to this local. + // IR has no information about this lifetime. + // Possibly there are no more references. + // continue; } - // VarDsc block should be accurate. + // Now cross-check the gathered ssaInfo vs the LclSsaVarDsc. + // LclSsaVarDsc should have the correct def block // BasicBlock* const ssaInfoDefBlock = ssaInfo->GetDefBlock(); BasicBlock* const ssaVarDscDefBlock = ssaVarDsc->GetBlock(); @@ -3999,7 +4038,7 @@ class SsaCheckVisitor : public GenTreeVisitor unsigned const ssaInfoUses = ssaInfo->GetNumUses(); unsigned const ssaVarDscUses = ssaVarDsc->GetNumUses(); - // VarDsc use count must be accurate or an over-estimate + // LclSsaVarDsc use count must be accurate or an over-estimate // if (ssaInfoUses > ssaVarDscUses) { @@ -4015,7 +4054,7 @@ class SsaCheckVisitor : public GenTreeVisitor ssaVarDscUses); } - // HasPhiUse use must be accurate or an over-estimate + // LclSsaVarDsc HasPhiUse use must be accurate or an over-estimate // if (ssaInfo->HasPhiUse() && !ssaVarDsc->HasPhiUse()) { @@ -4027,7 +4066,7 @@ class SsaCheckVisitor : public GenTreeVisitor JITDUMP("[info] HasPhiUse overestimated for V%02u.%u\n", lclNum, ssaNum); } - // HasGlobalUse use must be accurate or an over-estimate + // LclSsaVarDsc HasGlobalUse use must be accurate or an over-estimate // if (ssaInfo->HasGlobalUse() && !ssaVarDsc->HasGlobalUse()) { @@ -4058,9 +4097,9 @@ class SsaCheckVisitor : public GenTreeVisitor // * There is at most one SSA def for a given SSA num, and it is in the expected block. // * Operands that should have SSA numbers have them // * Operands that should not have SSA numbers do not have them -// * The number of SSA uses is accurate or an over-estimate -// * The local/global bit is properly set or an over-estimate -// * The has phi use bit is properly set or an over-estimate +// * GetNumUses is accurate or an over-estimate +// * HasGlobalUse is properly set or an over-estimate +// * HasPhiUse is properly set or an over-estimate // // Todo: // * Try and sanity check PHIs @@ -4077,6 +4116,7 @@ void Compiler::fgDebugCheckSsa() assert(fgDomsComputed); // This class visits the flow graph the same way the SSA builder does. + // In particular it may skip over blocks that SSA did not rename. // class SsaCheckDomTreeVisitor : public DomTreeVisitor { @@ -4099,13 +4139,13 @@ void Compiler::fgDebugCheckSsa() } }; - // Visit the blocks SSA modified + // Visit the blocks that SSA intially renamed // SsaCheckVisitor scv(this); SsaCheckDomTreeVisitor visitor(this, scv); visitor.WalkTree(); - // Also visit any blocks added afterwards + // Also visit any blocks added after SSA was built // for (BasicBlock* const block : Blocks()) { @@ -4115,7 +4155,8 @@ void Compiler::fgDebugCheckSsa() } } - // Cross-check the information gathered from IR against the info in the SSA table + // Cross-check the information gathered from IR against the info + // in the LclSsaVarDscs. // scv.DoChecks(); diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index 87b604464c9ae..55443f60a1546 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -657,12 +657,7 @@ bool Compiler::optRedundantBranch(BasicBlock* const block) // We were unable to determine the relop value via dominance checks. // See if we can jump thread via phi disambiguation. // - // optJumpThreadPhi disabled as it is exposing problems with stale SSA. - // See issue #76636 and related. - // - // return optJumpThreadPhi(block, tree, treeNormVN); - - return false; + return optJumpThreadPhi(block, tree, treeNormVN); } // Be conservative if there is an exception effect and we're in an EH region @@ -815,21 +810,68 @@ bool Compiler::optJumpThreadCheck(BasicBlock* const block, BasicBlock* const dom // Since flow is going to bypass block, make sure there // is nothing in block that can cause a side effect. // - // Note we neglect PHI assignments. This reflects a general lack of - // SSA update abilities in the jit. We really should update any uses - // of PHIs defined here with the corresponding PHI input operand. + // For non-PHI RBO, we neglect PHI assignments. This can leave SSA + // in an incorrect state but so far it has not yet caused problems. // - // TODO: if block has side effects, for those predecessors that are + // For PHI-based RBO we need to be more cautious and insist that + // any PHI is locally consumed, so that if we bypass the block we + // don't need to make SSA updates. + // + // TODO: handle blocks with side effects. For those predecessors that are // favorable (ones that don't reach block via a critical edge), consider // duplicating block's IR into the predecessor. This is the jump threading // analog of the optimization we encourage via fgOptimizeUncondBranchToSimpleCond. // Statement* const lastStmt = block->lastStmt(); + bool const isPhiRBO = (domBlock == nullptr); - for (Statement* const stmt : block->NonPhiStatements()) + for (Statement* const stmt : block->Statements()) { GenTree* const tree = stmt->GetRootNode(); + // If we are doing PHI-based RBO then all local PHIs must be locally consumed. + // + if (stmt->IsPhiDefnStmt()) + { + if (isPhiRBO) + { + GenTreeLclVarCommon* const phiDef = tree->AsOp()->gtGetOp1()->AsLclVarCommon(); + unsigned const lclNum = phiDef->GetLclNum(); + unsigned const ssaNum = phiDef->GetSsaNum(); + LclVarDsc* const varDsc = lvaGetDesc(lclNum); + + // We do not put implicit uses of promoted local fields into SSA. + // So assume the worst here, that there is some implicit use of this ssa + // def we don't know about. + // + if (varDsc->lvIsStructField) + { + JITDUMP(FMT_BB " has phi for promoted field V%02u.%u; no phi-based threading\n", block->bbNum, + lclNum, ssaNum); + return false; + } + + LclSsaVarDsc* const ssaVarDsc = varDsc->GetPerSsaData(ssaNum); + + // Bypassing a global use might require SSA updates. + // Note a phi use is ok if it's local (self loop) + // + if (ssaVarDsc->HasGlobalUse()) + { + JITDUMP(FMT_BB " has global phi for V%02u.%u; no phi-based threading\n", block->bbNum, lclNum, + ssaNum); + return false; + } + } + + // We are either not doing PHI-based RBO or this PHI won't cause + // problems. Carry on. + // + continue; + } + + // This is a "real" statement. + // // We can ignore exception side effects in the jump tree. // // They are covered by the exception effects in the dominating compare.