-
Notifications
You must be signed in to change notification settings - Fork 11.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[nfc][ctx_prof] Efficient profile traversal and update #110052
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-pgo @llvm/pr-subscribers-llvm-transforms Author: Mircea Trofin (mtrofin) ChangesThis optimizes profile updates and visits, where we want to access contexts for a specific function. These are all the current update cases. We do so by maintaining a list of contexts for each function, preserving preorder traversal. The list is updated whenever contexts are Full diff: https://github.com/llvm/llvm-project/pull/110052.diff 5 Files Affected:
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index 0a9543f037eb58..be040d5eca5f31 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -35,7 +35,7 @@ class PGOContextualProfile {
uint32_t NextCounterIndex = 0;
uint32_t NextCallsiteIndex = 0;
const std::string Name;
-
+ PGOCtxProfContext Index;
FunctionInfo(StringRef Name) : Name(Name) {}
};
std::optional<PGOCtxProfContext::CallTargetMapTy> Profiles;
@@ -50,6 +50,8 @@ class PGOContextualProfile {
// its state piecemeal.
PGOContextualProfile() = default;
+ void initIndex();
+
public:
PGOContextualProfile(const PGOContextualProfile &) = delete;
PGOContextualProfile(PGOContextualProfile &&) = default;
@@ -94,7 +96,7 @@ class PGOContextualProfile {
using ConstVisitor = function_ref<void(const PGOCtxProfContext &)>;
using Visitor = function_ref<void(PGOCtxProfContext &)>;
- void update(Visitor, const Function *F = nullptr);
+ void update(Visitor, const Function &F);
void visit(ConstVisitor, const Function *F = nullptr) const;
const CtxProfFlatProfile flatten() const;
diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
index a00c21ddc7d7a1..e121a4e436d2bc 100644
--- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
+++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
@@ -22,6 +22,60 @@
#include <map>
namespace llvm {
+class PGOContextualProfile;
+class PGOCtxProfContext;
+
+namespace internal {
+// When we traverse the contextual profile, we typically want to visit contexts
+// pertaining to a specific function. To avoid traversing the whole tree, we
+// want to keep a per-function list - which will be in preorder - of that
+// function's contexts. This happens in PGOContextualProfile. For memory use
+// efficiency, we want to make PGOCtxProfContext an intrusive double-linked list
+// node. We need to handle the cases where PGOCtxProfContext nodes are moved and
+// deleted: in both cases, we need to update the index (==list). We can do that
+// directly from the node in the list, without knowing who the "parent" of the
+// list is. That makes the ADT ilist overkill here. Finally, IndexNode is meant
+// to be an implementation detail of PGOCtxProfContext, and the only reason it's
+// factored out is to avoid implementing move semantics for all its members.
+template <class T> class IndexNode {
+ // This class' members are intentionally private - it's a convenience
+ // implementation detail.
+ friend class ::llvm::PGOCtxProfContext;
+ friend class ::llvm::PGOContextualProfile;
+
+ T *Previous = nullptr;
+ T *Next = nullptr;
+
+ ~IndexNode() {
+ if (Next)
+ Next->Previous = Previous;
+ if (Previous)
+ Previous->Next = Next;
+ }
+
+ T *self() { return reinterpret_cast<T *>(this); }
+
+ IndexNode(const IndexNode &Other) = delete;
+
+ IndexNode(IndexNode &&Other) {
+ // Copy the neighbor info
+ Next = Other.Next;
+ Previous = Other.Previous;
+
+ // Update the neighbors to point to this object
+ if (Other.Next)
+ Other.Next->Previous = self();
+ if (Other.Previous)
+ Other.Previous->Next = self();
+
+ // Make sure the dtor is a noop
+ Other.Next = nullptr;
+ Other.Previous = nullptr;
+ }
+ IndexNode() = default;
+};
+} // namespace internal
+
/// A node (context) in the loaded contextual profile, suitable for mutation
/// during IPO passes. We generally expect a fraction of counters and
/// callsites to be populated. We continue to model counters as vectors, but
@@ -29,13 +83,15 @@ namespace llvm {
/// there is a small number of indirect targets (usually, 1 for direct calls);
/// but potentially a large number of callsites, and, as inlining progresses,
/// the callsite count of a caller will grow.
-class PGOCtxProfContext final {
+class PGOCtxProfContext final : public internal::IndexNode<PGOCtxProfContext> {
public:
using CallTargetMapTy = std::map<GlobalValue::GUID, PGOCtxProfContext>;
using CallsiteMapTy = std::map<uint32_t, CallTargetMapTy>;
private:
friend class PGOCtxProfileReader;
+ friend class PGOContextualProfile;
+
GlobalValue::GUID GUID = 0;
SmallVector<uint64_t, 16> Counters;
CallsiteMapTy Callsites;
@@ -47,11 +103,15 @@ class PGOCtxProfContext final {
getOrEmplace(uint32_t Index, GlobalValue::GUID G,
SmallVectorImpl<uint64_t> &&Counters);
+ // Create a bogus context object, used for anchoring the index double linked
+ // list - see IndexNode
+ PGOCtxProfContext() = default;
+
public:
PGOCtxProfContext(const PGOCtxProfContext &) = delete;
PGOCtxProfContext &operator=(const PGOCtxProfContext &) = delete;
PGOCtxProfContext(PGOCtxProfContext &&) = default;
- PGOCtxProfContext &operator=(PGOCtxProfContext &&) = default;
+ PGOCtxProfContext &operator=(PGOCtxProfContext &&) = delete;
GlobalValue::GUID guid() const { return GUID; }
const SmallVectorImpl<uint64_t> &counters() const { return Counters; }
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 873277cf51d6b9..50f5b6b32b435e 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -185,6 +185,7 @@ PGOContextualProfile CtxProfAnalysis::run(Module &M,
// If we made it this far, the Result is valid - which we mark by setting
// .Profiles.
Result.Profiles = std::move(*MaybeCtx);
+ Result.initIndex();
return Result;
}
@@ -266,11 +267,9 @@ CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
template <class ProfilesTy, class ProfTy>
static void preorderVisit(ProfilesTy &Profiles,
- function_ref<void(ProfTy &)> Visitor,
- GlobalValue::GUID Match = 0) {
+ function_ref<void(ProfTy &)> Visitor) {
std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
- if (!Match || Ctx.guid() == Match)
- Visitor(Ctx);
+ Visitor(Ctx);
for (auto &[_, SubCtxSet] : Ctx.callsites())
for (auto &[__, Subctx] : SubCtxSet)
Traverser(Subctx);
@@ -279,16 +278,44 @@ static void preorderVisit(ProfilesTy &Profiles,
Traverser(P);
}
-void PGOContextualProfile::update(Visitor V, const Function *F) {
- GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U;
+void PGOContextualProfile::initIndex() {
+ // Initialize the head of the index list for each function. We don't need it
+ // after this point.
+ DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;
+ for (auto &FI : FuncInfo)
+ InsertionPoints[FI.first] = &FI.second.Index;
preorderVisit<PGOCtxProfContext::CallTargetMapTy, PGOCtxProfContext>(
- *Profiles, V, G);
+ *Profiles, [&](PGOCtxProfContext &Ctx) {
+ auto InsertIt = InsertionPoints.find(Ctx.guid());
+ if (InsertIt == InsertionPoints.end())
+ return;
+ // Insert at the end of the list. Since we traverse in preorder, it
+ // means that when we iterate the list from the beginning, we'd
+ // encounter the contexts in the order we would have, should we have
+ // performed a full preorder traversal.
+ InsertIt->second->Next = &Ctx;
+ Ctx.Previous = InsertIt->second;
+ InsertIt->second = &Ctx;
+ });
+}
+
+void PGOContextualProfile::update(Visitor V, const Function &F) {
+ assert(isFunctionKnown(F));
+ GlobalValue::GUID G = getDefinedFunctionGUID(F);
+ for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
+ Node = Node->Next)
+ V(*Node);
}
void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {
- GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U;
- preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
- const PGOCtxProfContext>(*Profiles, V, G);
+ if (!F)
+ preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
+ const PGOCtxProfContext>(*Profiles, V);
+ assert(isFunctionKnown(*F));
+ GlobalValue::GUID G = getDefinedFunctionGUID(*F);
+ for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
+ Node = Node->Next)
+ V(*Node);
}
const CtxProfFlatProfile PGOContextualProfile::flatten() const {
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 3d2fa226ff15b9..4ead5cdcf29c01 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -655,7 +655,7 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee,
Ctx.counters()[IndirectID] = IndirectCount;
};
- CtxProf.update(ProfileUpdater, &Caller);
+ CtxProf.update(ProfileUpdater, Caller);
return &DirectCall;
}
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 25a01558165da0..671b0d0822a5d9 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -2375,7 +2375,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
assert(Deleted);
(void)Deleted;
};
- CtxProf.update(Updater, &Caller);
+ CtxProf.update(Updater, Caller);
return Ret;
}
|
@llvm/pr-subscribers-llvm-analysis Author: Mircea Trofin (mtrofin) ChangesThis optimizes profile updates and visits, where we want to access contexts for a specific function. These are all the current update cases. We do so by maintaining a list of contexts for each function, preserving preorder traversal. The list is updated whenever contexts are Full diff: https://github.com/llvm/llvm-project/pull/110052.diff 5 Files Affected:
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index 0a9543f037eb58..be040d5eca5f31 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -35,7 +35,7 @@ class PGOContextualProfile {
uint32_t NextCounterIndex = 0;
uint32_t NextCallsiteIndex = 0;
const std::string Name;
-
+ PGOCtxProfContext Index;
FunctionInfo(StringRef Name) : Name(Name) {}
};
std::optional<PGOCtxProfContext::CallTargetMapTy> Profiles;
@@ -50,6 +50,8 @@ class PGOContextualProfile {
// its state piecemeal.
PGOContextualProfile() = default;
+ void initIndex();
+
public:
PGOContextualProfile(const PGOContextualProfile &) = delete;
PGOContextualProfile(PGOContextualProfile &&) = default;
@@ -94,7 +96,7 @@ class PGOContextualProfile {
using ConstVisitor = function_ref<void(const PGOCtxProfContext &)>;
using Visitor = function_ref<void(PGOCtxProfContext &)>;
- void update(Visitor, const Function *F = nullptr);
+ void update(Visitor, const Function &F);
void visit(ConstVisitor, const Function *F = nullptr) const;
const CtxProfFlatProfile flatten() const;
diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
index a00c21ddc7d7a1..e121a4e436d2bc 100644
--- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
+++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h
@@ -22,6 +22,60 @@
#include <map>
namespace llvm {
+class PGOContextualProfile;
+class PGOCtxProfContext;
+
+namespace internal {
+// When we traverse the contextual profile, we typically want to visit contexts
+// pertaining to a specific function. To avoid traversing the whole tree, we
+// want to keep a per-function list - which will be in preorder - of that
+// function's contexts. This happens in PGOContextualProfile. For memory use
+// efficiency, we want to make PGOCtxProfContext an intrusive double-linked list
+// node. We need to handle the cases where PGOCtxProfContext nodes are moved and
+// deleted: in both cases, we need to update the index (==list). We can do that
+// directly from the node in the list, without knowing who the "parent" of the
+// list is. That makes the ADT ilist overkill here. Finally, IndexNode is meant
+// to be an implementation detail of PGOCtxProfContext, and the only reason it's
+// factored out is to avoid implementing move semantics for all its members.
+template <class T> class IndexNode {
+ // This class' members are intentionally private - it's a convenience
+ // implementation detail.
+ friend class ::llvm::PGOCtxProfContext;
+ friend class ::llvm::PGOContextualProfile;
+
+ T *Previous = nullptr;
+ T *Next = nullptr;
+
+ ~IndexNode() {
+ if (Next)
+ Next->Previous = Previous;
+ if (Previous)
+ Previous->Next = Next;
+ }
+
+ T *self() { return reinterpret_cast<T *>(this); }
+
+ IndexNode(const IndexNode &Other) = delete;
+
+ IndexNode(IndexNode &&Other) {
+ // Copy the neighbor info
+ Next = Other.Next;
+ Previous = Other.Previous;
+
+ // Update the neighbors to point to this object
+ if (Other.Next)
+ Other.Next->Previous = self();
+ if (Other.Previous)
+ Other.Previous->Next = self();
+
+ // Make sure the dtor is a noop
+ Other.Next = nullptr;
+ Other.Previous = nullptr;
+ }
+ IndexNode() = default;
+};
+} // namespace internal
+
/// A node (context) in the loaded contextual profile, suitable for mutation
/// during IPO passes. We generally expect a fraction of counters and
/// callsites to be populated. We continue to model counters as vectors, but
@@ -29,13 +83,15 @@ namespace llvm {
/// there is a small number of indirect targets (usually, 1 for direct calls);
/// but potentially a large number of callsites, and, as inlining progresses,
/// the callsite count of a caller will grow.
-class PGOCtxProfContext final {
+class PGOCtxProfContext final : public internal::IndexNode<PGOCtxProfContext> {
public:
using CallTargetMapTy = std::map<GlobalValue::GUID, PGOCtxProfContext>;
using CallsiteMapTy = std::map<uint32_t, CallTargetMapTy>;
private:
friend class PGOCtxProfileReader;
+ friend class PGOContextualProfile;
+
GlobalValue::GUID GUID = 0;
SmallVector<uint64_t, 16> Counters;
CallsiteMapTy Callsites;
@@ -47,11 +103,15 @@ class PGOCtxProfContext final {
getOrEmplace(uint32_t Index, GlobalValue::GUID G,
SmallVectorImpl<uint64_t> &&Counters);
+ // Create a bogus context object, used for anchoring the index double linked
+ // list - see IndexNode
+ PGOCtxProfContext() = default;
+
public:
PGOCtxProfContext(const PGOCtxProfContext &) = delete;
PGOCtxProfContext &operator=(const PGOCtxProfContext &) = delete;
PGOCtxProfContext(PGOCtxProfContext &&) = default;
- PGOCtxProfContext &operator=(PGOCtxProfContext &&) = default;
+ PGOCtxProfContext &operator=(PGOCtxProfContext &&) = delete;
GlobalValue::GUID guid() const { return GUID; }
const SmallVectorImpl<uint64_t> &counters() const { return Counters; }
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 873277cf51d6b9..50f5b6b32b435e 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -185,6 +185,7 @@ PGOContextualProfile CtxProfAnalysis::run(Module &M,
// If we made it this far, the Result is valid - which we mark by setting
// .Profiles.
Result.Profiles = std::move(*MaybeCtx);
+ Result.initIndex();
return Result;
}
@@ -266,11 +267,9 @@ CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
template <class ProfilesTy, class ProfTy>
static void preorderVisit(ProfilesTy &Profiles,
- function_ref<void(ProfTy &)> Visitor,
- GlobalValue::GUID Match = 0) {
+ function_ref<void(ProfTy &)> Visitor) {
std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
- if (!Match || Ctx.guid() == Match)
- Visitor(Ctx);
+ Visitor(Ctx);
for (auto &[_, SubCtxSet] : Ctx.callsites())
for (auto &[__, Subctx] : SubCtxSet)
Traverser(Subctx);
@@ -279,16 +278,44 @@ static void preorderVisit(ProfilesTy &Profiles,
Traverser(P);
}
-void PGOContextualProfile::update(Visitor V, const Function *F) {
- GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U;
+void PGOContextualProfile::initIndex() {
+ // Initialize the head of the index list for each function. We don't need it
+ // after this point.
+ DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;
+ for (auto &FI : FuncInfo)
+ InsertionPoints[FI.first] = &FI.second.Index;
preorderVisit<PGOCtxProfContext::CallTargetMapTy, PGOCtxProfContext>(
- *Profiles, V, G);
+ *Profiles, [&](PGOCtxProfContext &Ctx) {
+ auto InsertIt = InsertionPoints.find(Ctx.guid());
+ if (InsertIt == InsertionPoints.end())
+ return;
+ // Insert at the end of the list. Since we traverse in preorder, it
+ // means that when we iterate the list from the beginning, we'd
+ // encounter the contexts in the order we would have, should we have
+ // performed a full preorder traversal.
+ InsertIt->second->Next = &Ctx;
+ Ctx.Previous = InsertIt->second;
+ InsertIt->second = &Ctx;
+ });
+}
+
+void PGOContextualProfile::update(Visitor V, const Function &F) {
+ assert(isFunctionKnown(F));
+ GlobalValue::GUID G = getDefinedFunctionGUID(F);
+ for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
+ Node = Node->Next)
+ V(*Node);
}
void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {
- GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U;
- preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
- const PGOCtxProfContext>(*Profiles, V, G);
+ if (!F)
+ preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
+ const PGOCtxProfContext>(*Profiles, V);
+ assert(isFunctionKnown(*F));
+ GlobalValue::GUID G = getDefinedFunctionGUID(*F);
+ for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
+ Node = Node->Next)
+ V(*Node);
}
const CtxProfFlatProfile PGOContextualProfile::flatten() const {
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 3d2fa226ff15b9..4ead5cdcf29c01 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -655,7 +655,7 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee,
Ctx.counters()[IndirectID] = IndirectCount;
};
- CtxProf.update(ProfileUpdater, &Caller);
+ CtxProf.update(ProfileUpdater, Caller);
return &DirectCall;
}
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 25a01558165da0..671b0d0822a5d9 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -2375,7 +2375,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
assert(Deleted);
(void)Deleted;
};
- CtxProf.update(Updater, &Caller);
+ CtxProf.update(Updater, Caller);
return Ret;
}
|
23b4013
to
6331517
Compare
6331517
to
94f2f75
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks!
This optimizes profile updates and visits, where we want to access contexts for a specific function. These are all the current update cases. We do so by maintaining a list of contexts for each function, preserving preorder traversal. The list is updated whenever contexts are `std::move`-d or deleted.
This optimizes profile updates and visits, where we want to access contexts for a specific function. These are all the current update cases. We do so by maintaining a list of contexts for each function, preserving preorder traversal. The list is updated whenever contexts are `std::move`-d or deleted.
This optimizes profile updates and visits, where we want to access contexts for a specific function. These are all the current update cases. We do so by maintaining a list of contexts for each function, preserving preorder traversal. The list is updated whenever contexts are `std::move`-d or deleted.
This optimizes profile updates and visits, where we want to access contexts for a specific function. These are all the current update cases. We do so by maintaining a list of contexts for each function, preserving preorder traversal. The list is updated whenever contexts are
std::move
d or deleted.