Skip to content

Commit

Permalink
Support TID (#2970)
Browse files Browse the repository at this point in the history
Bump to our fork of opentelemetry-ebpf-profiler that supports thread IDs, and add the thread ID as a label.

Previously we were just getting the `comm` of the first thread we saw a sample for and saving that as the entire process's `comm`. This PR saves that as `thread_name` instead and gets the main thread's comm (i.e. process name) from procfs, saving that as `comm` instead.
  • Loading branch information
umanwizard authored Aug 14, 2024
1 parent d4a5bb8 commit af563bc
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 9 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ Using relabeling the following labels can be attached to profiles:
* `__meta_process_executable_stripped`: Whether the executable of the process being profiled is stripped from debuginfo.
* `__meta_system_kernel_release`: The kernel release of the system.
* `__meta_system_kernel_machine`: The kernel machine of the system (typically the architecture).
* `__meta_thread_comm`: The command name of the thread being profiled.
* `__meta_thread_id`: The PID of the thread being profiled.
* `__meta_agent_revision`: The revision of the agent.
* `__meta_kubernetes_namespace`: The namespace of the pod the process is running in.
* `__meta_kubernetes_pod_name`: The name of the pod the process is running in.
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,4 @@ require (
sigs.k8s.io/yaml v1.3.0 // indirect
)

replace github.com/elastic/otel-profiling-agent => github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20240808105213-c3f10480766d
replace github.com/elastic/otel-profiling-agent => github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20240813170341-36f636a73dce
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaL
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20240808105213-c3f10480766d h1:7AqUWM0Lz1S50+RAizKiizuCXrrku3NZeqryMP/ze/M=
github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20240808105213-c3f10480766d/go.mod h1:rwZuUJiuz1l/5OBIgLLlwwSy6t42I601X1InwD489U4=
github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20240813170341-36f636a73dce h1:Su3TOz8QFn6hPpAFUcAtMy/UuJl5JovEbA7h13arXmk=
github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20240813170341-36f636a73dce/go.mod h1:rwZuUJiuz1l/5OBIgLLlwwSy6t42I601X1InwD489U4=
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
Expand Down
7 changes: 7 additions & 0 deletions reporter/metadata/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,13 @@ func (pmp *processMetadataProvider) AddMetadata(pid util.PID, lb *labels.Builder
}
lb.Set("__meta_process_cmdline", strings.Join(cmdline, " "))

comm, err := p.comm()
if err != nil {
log.Debugf("Failed to get comm for PID %d: %v", pid, err)
return
}
lb.Set("comm", comm)

cgroup, err := p.cgroup()
if err != nil {
log.Debugf("Failed to get cgroups for PID %d: %v", pid, err)
Expand Down
19 changes: 11 additions & 8 deletions reporter/parca_reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ type ParcaReporter struct {
// executables stores metadata for executables.
executables *lru.SyncedLRU[libpf.FileID, metadata.ExecInfo]

// labels stores labels about the process.
// labels stores labels about the thread.
labels *lru.SyncedLRU[util.PID, labelRetrievalResult]

// frames maps frame information to its source location.
Expand Down Expand Up @@ -139,7 +139,7 @@ func (r *ParcaReporter) SupportsReportTraceEvent() bool { return true }

// ReportTraceEvent enqueues reported trace events for the OTLP reporter.
func (r *ParcaReporter) ReportTraceEvent(trace *libpf.Trace,
timestamp libpf.UnixTime64, comm, _ string, pid util.PID) {
timestamp libpf.UnixTime64, comm, _ string, pid, tid util.PID) {

// This is an LRU so we need to check every time if the stack is already
// known, as it might have been evicted.
Expand All @@ -151,7 +151,9 @@ func (r *ParcaReporter) ReportTraceEvent(trace *libpf.Trace,
})
}

labelRetrievalResult := r.labelsForPID(pid, comm)
labelRetrievalResult := r.labelsForTID(tid, pid, comm)


if !labelRetrievalResult.keep {
log.Debugf("Skipping trace event for PID %d, as it was filtered out by relabeling", pid)
return
Expand Down Expand Up @@ -182,14 +184,15 @@ func (r *ParcaReporter) addMetadataForPID(pid util.PID, lb *labels.Builder) {
}
}

func (r *ParcaReporter) labelsForPID(pid util.PID, comm string) labelRetrievalResult {
if labels, exists := r.labels.Get(pid); exists {
func (r *ParcaReporter) labelsForTID(tid, pid util.PID, comm string) labelRetrievalResult {
if labels, exists := r.labels.Get(tid); exists {
return labels
}

lb := &labels.Builder{}
lb.Set("node", r.nodeName)
lb.Set("comm", comm)
lb.Set("__meta_thread_comm", comm)
lb.Set("__meta_thread_id", fmt.Sprint(tid))
r.addMetadataForPID(pid, lb)

keep := relabel.ProcessBuilder(lb, r.relabelConfigs...)
Expand All @@ -206,7 +209,7 @@ func (r *ParcaReporter) labelsForPID(pid util.PID, comm string) labelRetrievalRe
labels: lb.Labels(),
keep: keep,
}
r.labels.Add(pid, res)
r.labels.Add(tid, res)
return res
}

Expand All @@ -215,7 +218,7 @@ func (r *ParcaReporter) ReportFramesForTrace(_ *libpf.Trace) {}

// ReportCountForTrace is a NOP for ParcaReporter.
func (r *ParcaReporter) ReportCountForTrace(_ libpf.TraceHash, _ libpf.UnixTime64,
_ uint16, _ string, _ string, _ util.PID) {
_ uint16, _ string, _ string, _, _ util.PID) {
}

// ReportFallbackSymbol enqueues a fallback symbol for reporting, for a given frame.
Expand Down

0 comments on commit af563bc

Please sign in to comment.