Skip to content

Commit

Permalink
[InstrPGO] Instrument sampling profile based cold function
Browse files Browse the repository at this point in the history
  • Loading branch information
wlei-llvm committed Sep 27, 2024
1 parent f404207 commit 36a747b
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 1 deletion.
6 changes: 6 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1784,6 +1784,12 @@ defm debug_info_for_profiling : BoolFOption<"debug-info-for-profiling",
PosFlag<SetTrue, [], [ClangOption, CC1Option],
"Emit extra debug info to make sample profile more accurate">,
NegFlag<SetFalse>>;
def fprofile_sample_cold_function : Flag<["-"], "fprofile-sample-cold-function">,
Group<f_Group>, Visibility<[ClangOption, CLOption]>,
HelpText<"Generate instrumented code to cold functions guided by sampling-based profile into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
def fprofile_sample_cold_function_EQ : Joined<["-"], "fprofile-sample-cold-function=">,
Group<f_Group>, Visibility<[ClangOption, CLOption]>, MetaVarName<"<file>">,
HelpText<"Generate instrumented code to cold functions guided by sampling-based profile into <file> (overridden by LLVM_PROFILE_FILE env var)">;
def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">,
Group<f_Group>, Visibility<[ClangOption, CLOption]>,
HelpText<"Generate instrumented code to collect execution counts into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
Expand Down
4 changes: 3 additions & 1 deletion clang/lib/Driver/ToolChain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -889,7 +889,9 @@ bool ToolChain::needsProfileRT(const ArgList &Args) {
Args.hasArg(options::OPT_fprofile_instr_generate) ||
Args.hasArg(options::OPT_fprofile_instr_generate_EQ) ||
Args.hasArg(options::OPT_fcreate_profile) ||
Args.hasArg(options::OPT_forder_file_instrumentation);
Args.hasArg(options::OPT_forder_file_instrumentation) ||
Args.hasArg(options::OPT_fprofile_sample_cold_function) ||
Args.hasArg(options::OPT_fprofile_sample_cold_function_EQ);
}

bool ToolChain::needsGCovInstrumentation(const llvm::opt::ArgList &Args) {
Expand Down
12 changes: 12 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,18 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
}
}

if (auto *SampleColdArg =
Args.getLastArg(options::OPT_fprofile_sample_cold_function,
options::OPT_fprofile_sample_cold_function_EQ)) {
SmallString<128> Path(SampleColdArg->getOption().matches(
options::OPT_fprofile_sample_cold_function_EQ)
? SampleColdArg->getValue()
: "default_%m.profraw");
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString(
Twine("--instrument-sample-cold-function-path=") + Path));
}

Arg *PGOGenArg = nullptr;
if (PGOGenerateArg) {
assert(!CSPGOGenerateArg);
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,13 @@ static cl::opt<bool> UseLoopVersioningLICM(
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
cl::desc("Enable the experimental Loop Versioning LICM pass"));

static cl::opt<std::string> InstrumentSampleColdFuncPath(
"instrument-sample-cold-function-path", cl::init(""),
cl::desc("File path for instrumenting sampling PGO guided cold functions"),
cl::Hidden);

extern cl::opt<std::string> UseCtxProfile;
extern cl::opt<bool> InstrumentColdFunction;

namespace llvm {
extern cl::opt<bool> EnableMemProfContextDisambiguation;
Expand Down Expand Up @@ -1119,6 +1125,17 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// removed.
MPM.addPass(
PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));

if (InstrumentSampleColdFuncPath.getNumOccurrences() &&
Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
assert(!InstrumentSampleColdFuncPath.empty() &&
"File path is requeired for instrumentation generation");
InstrumentColdFunction = true;
addPreInlinerPasses(MPM, Level, Phase);
addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
/* IsCS */ false, /* AtomicCounterUpdate */ false,
InstrumentSampleColdFuncPath, "", PGOOpt->FS);
}
}

// Try to perform OpenMP specific optimizations on the module. This is a
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,16 @@ static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
cl::desc("Do not instrument functions with the number of critical edges "
" greater than this threshold."));

cl::opt<bool> InstrumentColdFunction(
"instrument-cold-function", cl::init(false), cl::Hidden,
cl::desc("Instrument cold functions (currently only used under sampling "
" PGO pipeline))"));

static cl::opt<uint64_t> InstrumentColdFuncMaxEntryCount(
"instrument-cold-function-max-entry-count", cl::init(0), cl::Hidden,
cl::desc("When using --instrument-cold-function, skip instrumenting the "
"function whose entry count is above the given value"));

extern cl::opt<unsigned> MaxNumVTableAnnotations;

namespace llvm {
Expand Down Expand Up @@ -1891,6 +1901,10 @@ static bool skipPGOGen(const Function &F) {
return true;
if (F.getInstructionCount() < PGOFunctionSizeThreshold)
return true;
if (InstrumentColdFunction &&
(!F.getEntryCount() ||
F.getEntryCount()->getCount() > InstrumentColdFuncMaxEntryCount))
return true;
return false;
}

Expand Down
24 changes: 24 additions & 0 deletions llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; RUN: opt < %s --passes=pgo-instr-gen -instrument-cold-function -S | FileCheck --check-prefixes=COLD %s
; RUN: opt < %s --passes=pgo-instr-gen -instrument-cold-function -instrument-cold-function-max-entry-count=1 -S | FileCheck --check-prefixes=ENTRY-COUNT %s

; COLD: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 [[#]], i32 1, i32 0)
; COLD-NOT: __profn_main

; ENTRY-COUNT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 [[#]], i32 1, i32 0)
; ENTRY-COUNT: call void @llvm.instrprof.increment(ptr @__profn_main, i64 [[#]], i32 1, i32 0)

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define void @foo() !prof !0 {
entry:
ret void
}

define i32 @main() !prof !1 {
entry:
ret i32 0
}

!0 = !{!"function_entry_count", i64 0}
!1 = !{!"function_entry_count", i64 1}

0 comments on commit 36a747b

Please sign in to comment.