Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/llvm/llvm-project into fix/…
Browse files Browse the repository at this point in the history
…100394
  • Loading branch information
a-tarasyuk committed Oct 8, 2024
2 parents 9bf91e6 + 04a8bff commit 3e3bc91
Show file tree
Hide file tree
Showing 763 changed files with 30,156 additions and 17,332 deletions.
61 changes: 61 additions & 0 deletions bolt/include/bolt/Passes/ContinuityStats.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
//===- bolt/Passes/ContinuityStats.h ----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass checks how well the BOLT input profile satisfies the following
// "CFG continuity" property of a perfect profile:
//
// Each positive-execution-count block in the function’s CFG
// should be *reachable* from a positive-execution-count function
// entry block through a positive-execution-count path.
//
// More specifically, for each of the hottest 1000 functions, the pass
// calculates the function’s fraction of basic block execution counts
// that is *unreachable*. It then reports the 95th percentile of the
// distribution of the 1000 unreachable fractions in a single BOLT-INFO line.
// The smaller the reported value is, the better the BOLT profile
// satisfies the CFG continuity property.

// The default value of 1000 above can be changed via the hidden BOLT option
// `-num-functions-for-continuity-check=[N]`.
// If more detailed stats are needed, `-v=1` can be used: the hottest N
// functions will be grouped into 5 equally-sized buckets, from the hottest
// to the coldest; for each bucket, various summary statistics of the
// distribution of the unreachable fractions and the raw unreachable execution
// counts will be reported.
//
//===----------------------------------------------------------------------===//

#ifndef BOLT_PASSES_CONTINUITYSTATS_H
#define BOLT_PASSES_CONTINUITYSTATS_H

#include "bolt/Passes/BinaryPasses.h"
#include <vector>

namespace llvm {

class raw_ostream;

namespace bolt {
class BinaryContext;

/// Compute and report to the user the function CFG continuity quality
class PrintContinuityStats : public BinaryFunctionPass {
public:
explicit PrintContinuityStats(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) {}

bool shouldOptimize(const BinaryFunction &BF) const override;
const char *getName() const override { return "continuity-stats"; }
bool shouldPrint(const BinaryFunction &) const override { return false; }
Error runOnFunctions(BinaryContext &BC) override;
};

} // namespace bolt
} // namespace llvm

#endif // BOLT_PASSES_CONTINUITYSTATS_H
7 changes: 4 additions & 3 deletions bolt/lib/Passes/ADRRelaxationPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,14 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
continue;
}

// Don't relax adr if it points to the same function and it is not split
// and BF initial size is < 1MB.
// Don't relax ADR if it points to the same function and is in the main
// fragment and BF initial size is < 1MB.
const unsigned OneMB = 0x100000;
if (BF.getSize() < OneMB) {
BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol);
if (TargetBF == &BF && !BF.isSplit())
if (TargetBF == &BF && !BB.isSplit())
continue;

// No relaxation needed if ADR references a basic block in the same
// fragment.
if (BinaryBasicBlock *TargetBB = BF.getBasicBlockForLabel(Symbol))
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Passes/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ add_llvm_library(LLVMBOLTPasses
PatchEntries.cpp
PettisAndHansen.cpp
PLTCall.cpp
ContinuityStats.cpp
RegAnalysis.cpp
RegReAssign.cpp
ReorderAlgorithm.cpp
Expand Down
250 changes: 250 additions & 0 deletions bolt/lib/Passes/ContinuityStats.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
//===- bolt/Passes/ContinuityStats.cpp --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the continuity stats calculation pass.
//
//===----------------------------------------------------------------------===//

#include "bolt/Passes/ContinuityStats.h"
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/Support/CommandLine.h"
#include <queue>
#include <unordered_map>
#include <unordered_set>

#define DEBUG_TYPE "bolt-opts"

using namespace llvm;
using namespace bolt;

namespace opts {
extern cl::opt<unsigned> Verbosity;
cl::opt<unsigned> NumFunctionsForContinuityCheck(
"num-functions-for-continuity-check",
cl::desc("number of hottest functions to print aggregated "
"CFG discontinuity stats of."),
cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
} // namespace opts

namespace {
using FunctionListType = std::vector<const BinaryFunction *>;
using function_iterator = FunctionListType::iterator;

template <typename T>
void printDistribution(raw_ostream &OS, std::vector<T> &values,
bool Fraction = false) {
if (values.empty())
return;
// Sort values from largest to smallest and print the MAX, TOP 1%, 5%, 10%,
// 20%, 50%, 80%, MIN. If Fraction is true, then values are printed as
// fractions instead of integers.
std::sort(values.begin(), values.end());

auto printLine = [&](std::string Text, double Percent) {
int Rank = int(values.size() * (1.0 - Percent / 100));
if (Percent == 0)
Rank = values.size() - 1;
if (Fraction)
OS << " " << Text << std::string(9 - Text.length(), ' ') << ": "
<< format("%.2lf%%", values[Rank] * 100) << "\n";
else
OS << " " << Text << std::string(9 - Text.length(), ' ') << ": "
<< values[Rank] << "\n";
};

printLine("MAX", 0);
const int percentages[] = {1, 5, 10, 20, 50, 80};
for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) {
printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]);
}
printLine("MIN", 100);
}

void printCFGContinuityStats(raw_ostream &OS,
iterator_range<function_iterator> &Functions) {
// Given a perfect profile, every positive-execution-count BB should be
// connected to an entry of the function through a positive-execution-count
// directed path in the control flow graph.
std::vector<size_t> NumUnreachables;
std::vector<size_t> SumECUnreachables;
std::vector<double> FractionECUnreachables;

for (auto it = Functions.begin(); it != Functions.end(); ++it) {
const BinaryFunction *Function = *it;
if (Function->size() <= 1)
continue;

// Compute the sum of all BB execution counts (ECs).
size_t NumPosECBBs = 0;
size_t SumAllBBEC = 0;
for (const BinaryBasicBlock &BB : *Function) {
const size_t BBEC = BB.getKnownExecutionCount();
NumPosECBBs += BBEC > 0 ? 1 : 0;
SumAllBBEC += BBEC;
}

// Perform BFS on subgraph of CFG induced by positive weight edges.
// Compute the number of BBs reachable from the entry(s) of the function and
// the sum of their execution counts (ECs).
std::unordered_map<unsigned, const BinaryBasicBlock *> IndexToBB;
std::unordered_set<unsigned> Visited;
std::queue<unsigned> Queue;
for (const BinaryBasicBlock &BB : *Function) {
// Make sure BB.getIndex() is not already in IndexToBB.
assert(IndexToBB.find(BB.getIndex()) == IndexToBB.end());
IndexToBB[BB.getIndex()] = &BB;
if (BB.isEntryPoint() && BB.getKnownExecutionCount() > 0) {
Queue.push(BB.getIndex());
Visited.insert(BB.getIndex());
}
}
while (!Queue.empty()) {
const unsigned BBIndex = Queue.front();
const BinaryBasicBlock *BB = IndexToBB[BBIndex];
Queue.pop();
auto SuccBIIter = BB->branch_info_begin();
for (const BinaryBasicBlock *Succ : BB->successors()) {
const uint64_t Count = SuccBIIter->Count;
if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) {
++SuccBIIter;
continue;
}
if (!Visited.insert(Succ->getIndex()).second) {
++SuccBIIter;
continue;
}
Queue.push(Succ->getIndex());
++SuccBIIter;
}
}

const size_t NumReachableBBs = Visited.size();

// Loop through Visited, and sum the corresponding BBs' execution counts
// (ECs).
size_t SumReachableBBEC = 0;
for (const unsigned BBIndex : Visited) {
const BinaryBasicBlock *BB = IndexToBB[BBIndex];
SumReachableBBEC += BB->getKnownExecutionCount();
}

const size_t NumPosECBBsUnreachableFromEntry =
NumPosECBBs - NumReachableBBs;
const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC;
const double FractionECUnreachable =
(double)SumUnreachableBBEC / SumAllBBEC;

if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) {
OS << "Non-trivial CFG discontinuity observed in function "
<< Function->getPrintName() << "\n";
LLVM_DEBUG(Function->dump());
}

NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry);
SumECUnreachables.push_back(SumUnreachableBBEC);
FractionECUnreachables.push_back(FractionECUnreachable);
}

if (FractionECUnreachables.empty())
return;

std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end());
const int Rank = int(FractionECUnreachables.size() * 0.95);
OS << format("top 5%% function CFG discontinuity is %.2lf%%\n",
FractionECUnreachables[Rank] * 100);

if (opts::Verbosity >= 1) {
OS << "abbreviations: EC = execution count, POS BBs = positive EC BBs\n"
<< "distribution of NUM(unreachable POS BBs) among all focal "
"functions\n";
printDistribution(OS, NumUnreachables);

OS << "distribution of SUM_EC(unreachable POS BBs) among all focal "
"functions\n";
printDistribution(OS, SumECUnreachables);

OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all "
"POS BBs))] among all focal functions\n";
printDistribution(OS, FractionECUnreachables, /*Fraction=*/true);
}
}

void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
size_t NumTopFunctions) {
// Sort the list of functions by execution counts (reverse).
llvm::sort(ValidFunctions,
[&](const BinaryFunction *A, const BinaryFunction *B) {
return A->getKnownExecutionCount() > B->getKnownExecutionCount();
});

const size_t RealNumTopFunctions =
std::min(NumTopFunctions, ValidFunctions.size());

iterator_range<function_iterator> Functions(
ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions);

BC.outs() << format("BOLT-INFO: among the hottest %zu functions ",
RealNumTopFunctions);
printCFGContinuityStats(BC.outs(), Functions);

// Print more detailed bucketed stats if requested.
if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) {
const size_t PerBucketSize = RealNumTopFunctions / 5;
BC.outs() << format(
"Detailed stats for 5 buckets, each with %zu functions:\n",
PerBucketSize);

// For each bucket, print the CFG continuity stats of the functions in the
// bucket.
for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) {
const size_t StartIndex = BucketIndex * PerBucketSize;
const size_t EndIndex = StartIndex + PerBucketSize;
iterator_range<function_iterator> Functions(
ValidFunctions.begin() + StartIndex,
ValidFunctions.begin() + EndIndex);
const size_t MaxFunctionExecutionCount =
ValidFunctions[StartIndex]->getKnownExecutionCount();
const size_t MinFunctionExecutionCount =
ValidFunctions[EndIndex - 1]->getKnownExecutionCount();
BC.outs() << format("----------------\n| Bucket %zu: "
"|\n----------------\n",
BucketIndex + 1)
<< format(
"execution counts of the %zu functions in the bucket: "
"%zu-%zu\n",
EndIndex - StartIndex, MinFunctionExecutionCount,
MaxFunctionExecutionCount);
printCFGContinuityStats(BC.outs(), Functions);
}
}
}
} // namespace

bool PrintContinuityStats::shouldOptimize(const BinaryFunction &BF) const {
if (BF.empty() || !BF.hasValidProfile())
return false;

return BinaryFunctionPass::shouldOptimize(BF);
}

Error PrintContinuityStats::runOnFunctions(BinaryContext &BC) {
// Create a list of functions with valid profiles.
FunctionListType ValidFunctions;
for (const auto &BFI : BC.getBinaryFunctions()) {
const BinaryFunction *Function = &BFI.second;
if (PrintContinuityStats::shouldOptimize(*Function))
ValidFunctions.push_back(Function);
}
if (ValidFunctions.empty() || opts::NumFunctionsForContinuityCheck == 0)
return Error::success();

printAll(BC, ValidFunctions, opts::NumFunctionsForContinuityCheck);
return Error::success();
}
3 changes: 3 additions & 0 deletions bolt/lib/Rewrite/BinaryPassManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "bolt/Passes/AllocCombiner.h"
#include "bolt/Passes/AsmDump.h"
#include "bolt/Passes/CMOVConversion.h"
#include "bolt/Passes/ContinuityStats.h"
#include "bolt/Passes/FixRISCVCallsPass.h"
#include "bolt/Passes/FixRelaxationPass.h"
#include "bolt/Passes/FrameOptimizer.h"
Expand Down Expand Up @@ -373,6 +374,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
if (opts::PrintProfileStats)
Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));

Manager.registerPass(std::make_unique<PrintContinuityStats>(NeverPrint));

Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));

Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
Expand Down
Loading

0 comments on commit 3e3bc91

Please sign in to comment.