From 0fd814092876c258578aaa1c89cdaff0968cecc4 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Wed, 21 Aug 2024 11:18:13 +0200 Subject: [PATCH 1/7] Timeline Allow timestamps to be added to the samples as labels. With pprof this implies duplicating the samples. --- interpreter/php/decode_arm64.go | 2 +- reporter/datadog_reporter.go | 321 +++++++++++++++++++------------- 2 files changed, 193 insertions(+), 130 deletions(-) diff --git a/interpreter/php/decode_arm64.go b/interpreter/php/decode_arm64.go index 39f9444..f03fd31 100644 --- a/interpreter/php/decode_arm64.go +++ b/interpreter/php/decode_arm64.go @@ -12,8 +12,8 @@ import ( "errors" "fmt" - "github.com/elastic/otel-profiling-agent/libpf" ah "github.com/elastic/otel-profiling-agent/armhelpers" + "github.com/elastic/otel-profiling-agent/libpf" aa "golang.org/x/arch/arm64/arm64asm" ) diff --git a/reporter/datadog_reporter.go b/reporter/datadog_reporter.go index 5e2e619..250172f 100644 --- a/reporter/datadog_reporter.go +++ b/reporter/datadog_reporter.go @@ -15,6 +15,7 @@ import ( "os" "path" "runtime" + "strconv" "strings" "time" @@ -360,6 +361,184 @@ func (r *DatadogReporter) reportProfile(ctx context.Context) error { return err } +func copySample(sample *pprofile.Sample) *pprofile.Sample { + // Create a new Sample struct + newSample := &pprofile.Sample{} + + if sample.Location != nil { + newSample.Location = append([]*pprofile.Location{}, sample.Location...) + } + + if sample.Value != nil { + newSample.Value = append([]int64{}, sample.Value...) + } + + if sample.Label != nil { + newSample.Label = make(map[string][]string) + for k, v := range sample.Label { + newSample.Label[k] = append([]string{}, v...) + } + } + + if sample.NumLabel != nil { + newSample.NumLabel = make(map[string][]int64) + for k, v := range sample.NumLabel { + newSample.NumLabel[k] = append([]int64{}, v...) + } + } + + if sample.NumUnit != nil { + newSample.NumUnit = make(map[string][]string) + for k, v := range sample.NumUnit { + newSample.NumUnit[k] = append([]string{}, v...) + } + } + + return newSample +} + +func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofile.Profile, traceKey traceAndMetaKey, + traceInfo *traceFramesCounts, ts uint64, fileIDtoMapping map[libpf.FileID]*pprofile.Mapping, + frameIDtoFunction map[libpf.FrameID]*pprofile.Function, + funcMap map[funcInfo]*pprofile.Function, aggregateAllTimestamps bool) { + const unknownStr = "UNKNOWN" + // Walk every frame of the trace. + for i := range traceInfo.frameTypes { + loc := createPProfLocation(profile, uint64(traceInfo.linenos[i])) + + switch frameKind := traceInfo.frameTypes[i]; frameKind { + case libpf.NativeFrame: + // As native frames are resolved in the backend, we use Mapping to + // report these frames. + + if tmpMapping, exists := fileIDtoMapping[traceInfo.files[i]]; exists { + loc.Mapping = tmpMapping + } else { + executionInfo, exists := r.executables.Get(traceInfo.files[i]) + + // Next step: Select a proper default value, + // if the name of the executable is not known yet. + var fileName = unknownStr + var buildID = traceInfo.files[i].StringNoQuotes() + if exists { + fileName = executionInfo.fileName + if executionInfo.buildID != "" { + buildID = executionInfo.buildID + } + } + + tmpMapping := createPprofMapping(profile, uint64(traceInfo.linenos[i]), + fileName, buildID) + fileIDtoMapping[traceInfo.files[i]] = tmpMapping + loc.Mapping = tmpMapping + } + line := pprofile.Line{Function: createPprofFunctionEntry(funcMap, profile, "", + loc.Mapping.File)} + loc.Line = append(loc.Line, line) + case libpf.KernelFrame: + // Reconstruct frameID + frameID := libpf.NewFrameID(traceInfo.files[i], traceInfo.linenos[i]) + // Store Kernel frame information as Line message: + line := pprofile.Line{} + + if tmpFunction, exists := frameIDtoFunction[frameID]; exists { + line.Function = tmpFunction + } else { + symbol, exists := r.fallbackSymbols.Get(frameID) + if !exists { + // TODO: choose a proper default value if the kernel symbol was not + // reported yet. + symbol = unknownStr + } + line.Function = createPprofFunctionEntry( + funcMap, profile, symbol, "") + } + loc.Line = append(loc.Line, line) + + // To be compliant with the protocol generate a dummy mapping entry. + loc.Mapping = getDummyMapping(fileIDtoMapping, profile, + traceInfo.files[i]) + + case libpf.AbortFrame: + // Next step: Figure out how the OTLP protocol + // could handle artificial frames, like AbortFrame, + // that are not originate from a native or interpreted + // program. + default: + // Store interpreted frame information as Line message: + line := pprofile.Line{} + + fileIDInfoLock, exists := r.frames.Get(traceInfo.files[i]) + if !exists { + // At this point, we do not have enough information for the frame. + // Therefore, we report a dummy entry and use the interpreter as filename. + line.Function = createPprofFunctionEntry(funcMap, profile, + "UNREPORTED", frameKind.String()) + } else { + fileIDInfo := fileIDInfoLock.RLock() + si, exists := (*fileIDInfo)[traceInfo.linenos[i]] + if !exists { + // At this point, we do not have enough information for the frame. + // Therefore, we report a dummy entry and use the interpreter as filename. + // To differentiate this case with the case where no information about + // the file ID is available at all, we use a different name for reported + // function. + line.Function = createPprofFunctionEntry(funcMap, profile, + "UNRESOLVED", frameKind.String()) + } else { + line.Line = int64(si.lineNumber) + + line.Function = createPprofFunctionEntry(funcMap, profile, + si.functionName, si.filePath) + } + fileIDInfoLock.RUnlock(&fileIDInfo) + } + loc.Line = append(loc.Line, line) + + // To be compliant with the protocol generate a dummy mapping entry. + loc.Mapping = getDummyMapping(fileIDtoMapping, profile, traceInfo.files[i]) + } + sample.Location = append(sample.Location, loc) + } + + execPath, _ := r.execPathes.Get(traceKey.pid) + + // Check if the last frame is a kernel frame. + if len(traceInfo.frameTypes) > 0 && + traceInfo.frameTypes[len(traceInfo.frameTypes)-1] == libpf.KernelFrame { + // If the last frame is a kernel frame, we need to add a dummy + // location with the kernel as the function name. + execPath = "kernel" + } + + if execPath != "" { + base := path.Base(execPath) + loc := createPProfLocation(profile, 0) + m := createPprofFunctionEntry(funcMap, profile, base, execPath) + loc.Line = append(loc.Line, pprofile.Line{Function: m}) + sample.Location = append(sample.Location, loc) + } + + sample.Label = make(map[string][]string) + addTraceLabels(sample.Label, traceKey) + if aggregateAllTimestamps { + // Aggregate all timestamps into one sample + count := len(traceInfo.timestamps) + sample.Value = append(sample.Value, int64(count), int64(count)*int64(r.samplingPeriod)) + profile.Sample = append(profile.Sample, sample) + } else { + // Process each timestamp separately + for _, ts := range traceInfo.timestamps { + individualSample := copySample(sample) + individualSample.Label["end_timestamp_ns"] = []string{strconv.FormatUint(ts, 10)} + count := 1 + // we can append the value as we should not have set this part yet + individualSample.Value = append(individualSample.Value, int64(count), int64(count)*int64(r.samplingPeriod)) + profile.Sample = append(profile.Sample, individualSample) + } + } +} + // getPprofProfile returns a pprof profile containing all collected samples up to this moment. func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, startTS uint64, endTS uint64) { @@ -369,10 +548,11 @@ func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, delete(*traceEvents, key) } r.traceEvents.WUnlock(&traceEvents) - + aggregateAllTimestamps := false // todo timeline flag here numSamples := len(samples) - - const unknownStr = "UNKNOWN" + if !aggregateAllTimestamps { + numSamples = numSamples * 4 + } // funcMap is a temporary helper that will build the Function array // in profile and make sure information is deduplicated. @@ -392,7 +572,6 @@ func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, totalSampleCount := 0 for traceKey, traceInfo := range samples { - sample := &pprofile.Sample{} for _, ts := range traceInfo.timestamps { if ts < startTS || startTS == 0 { @@ -403,131 +582,11 @@ func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, endTS = ts } } - - // Walk every frame of the trace. - for i := range traceInfo.frameTypes { - loc := createPProfLocation(profile, uint64(traceInfo.linenos[i])) - - switch frameKind := traceInfo.frameTypes[i]; frameKind { - case libpf.NativeFrame: - // As native frames are resolved in the backend, we use Mapping to - // report these frames. - - if tmpMapping, exists := fileIDtoMapping[traceInfo.files[i]]; exists { - loc.Mapping = tmpMapping - } else { - executionInfo, exists := r.executables.Get(traceInfo.files[i]) - - // Next step: Select a proper default value, - // if the name of the executable is not known yet. - var fileName = unknownStr - var buildID = traceInfo.files[i].StringNoQuotes() - if exists { - fileName = executionInfo.fileName - if executionInfo.buildID != "" { - buildID = executionInfo.buildID - } - } - - tmpMapping := createPprofMapping(profile, uint64(traceInfo.linenos[i]), - fileName, buildID) - fileIDtoMapping[traceInfo.files[i]] = tmpMapping - loc.Mapping = tmpMapping - } - line := pprofile.Line{Function: createPprofFunctionEntry(funcMap, profile, "", - loc.Mapping.File)} - loc.Line = append(loc.Line, line) - case libpf.KernelFrame: - // Reconstruct frameID - frameID := libpf.NewFrameID(traceInfo.files[i], traceInfo.linenos[i]) - // Store Kernel frame information as Line message: - line := pprofile.Line{} - - if tmpFunction, exists := frameIDtoFunction[frameID]; exists { - line.Function = tmpFunction - } else { - symbol, exists := r.fallbackSymbols.Get(frameID) - if !exists { - // TODO: choose a proper default value if the kernel symbol was not - // reported yet. - symbol = unknownStr - } - line.Function = createPprofFunctionEntry( - funcMap, profile, symbol, "") - } - loc.Line = append(loc.Line, line) - - // To be compliant with the protocol generate a dummy mapping entry. - loc.Mapping = getDummyMapping(fileIDtoMapping, profile, - traceInfo.files[i]) - - case libpf.AbortFrame: - // Next step: Figure out how the OTLP protocol - // could handle artificial frames, like AbortFrame, - // that are not originate from a native or interpreted - // program. - default: - // Store interpreted frame information as Line message: - line := pprofile.Line{} - - fileIDInfoLock, exists := r.frames.Get(traceInfo.files[i]) - if !exists { - // At this point, we do not have enough information for the frame. - // Therefore, we report a dummy entry and use the interpreter as filename. - line.Function = createPprofFunctionEntry(funcMap, profile, - "UNREPORTED", frameKind.String()) - } else { - fileIDInfo := fileIDInfoLock.RLock() - si, exists := (*fileIDInfo)[traceInfo.linenos[i]] - if !exists { - // At this point, we do not have enough information for the frame. - // Therefore, we report a dummy entry and use the interpreter as filename. - // To differentiate this case with the case where no information about - // the file ID is available at all, we use a different name for reported - // function. - line.Function = createPprofFunctionEntry(funcMap, profile, - "UNRESOLVED", frameKind.String()) - } else { - line.Line = int64(si.lineNumber) - - line.Function = createPprofFunctionEntry(funcMap, profile, - si.functionName, si.filePath) - } - fileIDInfoLock.RUnlock(&fileIDInfo) - } - loc.Line = append(loc.Line, line) - - // To be compliant with the protocol generate a dummy mapping entry. - loc.Mapping = getDummyMapping(fileIDtoMapping, profile, traceInfo.files[i]) - } - sample.Location = append(sample.Location, loc) - } - - execPath, _ := r.execPathes.Get(traceKey.pid) - - // Check if the last frame is a kernel frame. - if len(traceInfo.frameTypes) > 0 && - traceInfo.frameTypes[len(traceInfo.frameTypes)-1] == libpf.KernelFrame { - // If the last frame is a kernel frame, we need to add a dummy - // location with the kernel as the function name. - execPath = "kernel" - } - - if execPath != "" { - base := path.Base(execPath) - loc := createPProfLocation(profile, 0) - m := createPprofFunctionEntry(funcMap, profile, base, execPath) - loc.Line = append(loc.Line, pprofile.Line{Function: m}) - sample.Location = append(sample.Location, loc) - } - - sample.Label = make(map[string][]string) - addTraceLabels(sample.Label, traceKey) - - count := int64(len(traceInfo.timestamps)) - sample.Value = append(sample.Value, count, count*int64(r.samplingPeriod)) - profile.Sample = append(profile.Sample, sample) - totalSampleCount += len(traceInfo.timestamps) + sample := &pprofile.Sample{} + count := len(traceInfo.timestamps) + r.processSample(sample, profile, traceKey, traceInfo, 0, fileIDtoMapping, + frameIDtoFunction, funcMap, aggregateAllTimestamps) + totalSampleCount += count } log.Infof("Reporting pprof profile with %d samples from %v to %v", totalSampleCount, startTS, endTS) @@ -596,6 +655,10 @@ func addTraceLabels(labels map[string][]string, k traceAndMetaKey) { // This is also consistent with ddprof. labels["thread id"] = append(labels["thread id"], fmt.Sprintf("%d", k.tid)) } + + if k.comm != "" { + labels["comm"] = append(labels["comm"], k.comm) + } } // getDummyMappingIndex inserts or looks up a dummy entry for interpreted FileIDs. From a127bf12bd7ed7b4f19cb1881f3222bd43271b06 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 22 Aug 2024 17:59:15 +0200 Subject: [PATCH 2/7] Timeline - Add a timeline flag - Ensure process name is added as a label (to help with grouping) --- cli_flags.go | 2 ++ main.go | 1 + reporter/config.go | 3 +++ reporter/datadog_reporter.go | 37 ++++++++++++++++++++---------------- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/cli_flags.go b/cli_flags.go index 03574bc..80afb50 100644 --- a/cli_flags.go +++ b/cli_flags.go @@ -99,6 +99,7 @@ var ( argProbabilisticInterval time.Duration argPprofAddr string argSaveCPUProfile bool + argTimeline bool // "internal" flag variables. // Flag variables that are configured in "internal" builds will have to be assigned @@ -161,6 +162,7 @@ func parseArgs() error { saveCPUProfileHelp) fs.IntVar(&argSamplesPerSecond, "samples-per-second", defaultArgSamplesPerSecond, samplesPerSecondHelp) + fs.BoolVar(&argTimeline, "timeline", false, "Enable timeline feature.") fs.Usage = func() { fs.PrintDefaults() diff --git a/main.go b/main.go index 3ab9bd4..f9aa63e 100644 --- a/main.go +++ b/main.go @@ -315,6 +315,7 @@ func mainWithExitCode() exitCode { FallbackSymbolsMaxQueue: 1024, DisableTLS: argDisableTLS, MaxGRPCRetries: 5, + Timeline: argTimeline, SamplesPerSecond: conf.SamplesPerSecond, SaveCPUProfile: argSaveCPUProfile, Times: times, diff --git a/reporter/config.go b/reporter/config.go index 5b42e47..337bd69 100644 --- a/reporter/config.go +++ b/reporter/config.go @@ -51,6 +51,9 @@ type Config struct { SaveCPUProfile bool + // Whether to include timestamps on samples for the timeline feature + Timeline bool + Times Times // gRPCInterceptor is the client gRPC interceptor, e.g., for sending gRPC metadata. diff --git a/reporter/datadog_reporter.go b/reporter/datadog_reporter.go index 250172f..c52aeba 100644 --- a/reporter/datadog_reporter.go +++ b/reporter/datadog_reporter.go @@ -50,6 +50,8 @@ type DatadogReporter struct { agentAddr string + timeline bool + samplingPeriod uint64 saveCPUProfile bool @@ -256,6 +258,7 @@ func StartDatadog(mainCtx context.Context, cfg *Config) (Reporter, error) { agentAddr: cfg.CollAgentAddr, samplingPeriod: 1000000000 / uint64(cfg.SamplesPerSecond), saveCPUProfile: cfg.SaveCPUProfile, + timeline: cfg.Timeline, fallbackSymbols: fallbackSymbols, executables: executables, frames: frames, @@ -398,9 +401,9 @@ func copySample(sample *pprofile.Sample) *pprofile.Sample { } func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofile.Profile, traceKey traceAndMetaKey, - traceInfo *traceFramesCounts, ts uint64, fileIDtoMapping map[libpf.FileID]*pprofile.Mapping, + traceInfo *traceFramesCounts, fileIDtoMapping map[libpf.FileID]*pprofile.Mapping, frameIDtoFunction map[libpf.FrameID]*pprofile.Function, - funcMap map[funcInfo]*pprofile.Function, aggregateAllTimestamps bool) { + funcMap map[funcInfo]*pprofile.Function) { const unknownStr = "UNKNOWN" // Walk every frame of the trace. for i := range traceInfo.frameTypes { @@ -502,7 +505,10 @@ func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofi } execPath, _ := r.execPathes.Get(traceKey.pid) - + baseExec := path.Base(execPath) + if baseExec == "." || baseExec == "/" { + baseExec = execPath // avoids kernel being transformed in . + } // Check if the last frame is a kernel frame. if len(traceInfo.frameTypes) > 0 && traceInfo.frameTypes[len(traceInfo.frameTypes)-1] == libpf.KernelFrame { @@ -512,16 +518,15 @@ func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofi } if execPath != "" { - base := path.Base(execPath) loc := createPProfLocation(profile, 0) - m := createPprofFunctionEntry(funcMap, profile, base, execPath) + m := createPprofFunctionEntry(funcMap, profile, baseExec, execPath) loc.Line = append(loc.Line, pprofile.Line{Function: m}) sample.Location = append(sample.Location, loc) } sample.Label = make(map[string][]string) - addTraceLabels(sample.Label, traceKey) - if aggregateAllTimestamps { + addTraceLabels(sample.Label, traceKey, baseExec) + if !r.timeline { // Aggregate all timestamps into one sample count := len(traceInfo.timestamps) sample.Value = append(sample.Value, int64(count), int64(count)*int64(r.samplingPeriod)) @@ -533,7 +538,8 @@ func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofi individualSample.Label["end_timestamp_ns"] = []string{strconv.FormatUint(ts, 10)} count := 1 // we can append the value as we should not have set this part yet - individualSample.Value = append(individualSample.Value, int64(count), int64(count)*int64(r.samplingPeriod)) + individualSample.Value = append(individualSample.Value, int64(count), + int64(count)*int64(r.samplingPeriod)) profile.Sample = append(profile.Sample, individualSample) } } @@ -548,10 +554,9 @@ func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, delete(*traceEvents, key) } r.traceEvents.WUnlock(&traceEvents) - aggregateAllTimestamps := false // todo timeline flag here numSamples := len(samples) - if !aggregateAllTimestamps { - numSamples = numSamples * 4 + if r.timeline { + numSamples *= 4 } // funcMap is a temporary helper that will build the Function array @@ -584,8 +589,8 @@ func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, } sample := &pprofile.Sample{} count := len(traceInfo.timestamps) - r.processSample(sample, profile, traceKey, traceInfo, 0, fileIDtoMapping, - frameIDtoFunction, funcMap, aggregateAllTimestamps) + r.processSample(sample, profile, traceKey, traceInfo, fileIDtoMapping, + frameIDtoFunction, funcMap) totalSampleCount += count } log.Infof("Reporting pprof profile with %d samples from %v to %v", @@ -624,7 +629,7 @@ func createPprofFunctionEntry(funcMap map[funcInfo]*pprofile.Function, } //nolint:gocritic -func addTraceLabels(labels map[string][]string, k traceAndMetaKey) { +func addTraceLabels(labels map[string][]string, k traceAndMetaKey, baseExec string) { if k.comm != "" { labels["thread_name"] = append(labels["thread_name"], k.comm) } @@ -656,8 +661,8 @@ func addTraceLabels(labels map[string][]string, k traceAndMetaKey) { labels["thread id"] = append(labels["thread id"], fmt.Sprintf("%d", k.tid)) } - if k.comm != "" { - labels["comm"] = append(labels["comm"], k.comm) + if baseExec != "" { + labels["process_name"] = append(labels["process_name"], baseExec) } } From ea5531d894149f4c83117f0f1cae3af747cdf1f9 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Mon, 26 Aug 2024 18:41:34 +0200 Subject: [PATCH 3/7] Timeline avoid duplicating samples Add the list of timestamps to the sample's labels --- reporter/datadog_reporter.go | 58 +++++------------------------------- 1 file changed, 8 insertions(+), 50 deletions(-) diff --git a/reporter/datadog_reporter.go b/reporter/datadog_reporter.go index c52aeba..1638ccd 100644 --- a/reporter/datadog_reporter.go +++ b/reporter/datadog_reporter.go @@ -364,42 +364,6 @@ func (r *DatadogReporter) reportProfile(ctx context.Context) error { return err } -func copySample(sample *pprofile.Sample) *pprofile.Sample { - // Create a new Sample struct - newSample := &pprofile.Sample{} - - if sample.Location != nil { - newSample.Location = append([]*pprofile.Location{}, sample.Location...) - } - - if sample.Value != nil { - newSample.Value = append([]int64{}, sample.Value...) - } - - if sample.Label != nil { - newSample.Label = make(map[string][]string) - for k, v := range sample.Label { - newSample.Label[k] = append([]string{}, v...) - } - } - - if sample.NumLabel != nil { - newSample.NumLabel = make(map[string][]int64) - for k, v := range sample.NumLabel { - newSample.NumLabel[k] = append([]int64{}, v...) - } - } - - if sample.NumUnit != nil { - newSample.NumUnit = make(map[string][]string) - for k, v := range sample.NumUnit { - newSample.NumUnit[k] = append([]string{}, v...) - } - } - - return newSample -} - func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofile.Profile, traceKey traceAndMetaKey, traceInfo *traceFramesCounts, fileIDtoMapping map[libpf.FileID]*pprofile.Mapping, frameIDtoFunction map[libpf.FrameID]*pprofile.Function, @@ -526,23 +490,17 @@ func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofi sample.Label = make(map[string][]string) addTraceLabels(sample.Label, traceKey, baseExec) - if !r.timeline { - // Aggregate all timestamps into one sample - count := len(traceInfo.timestamps) - sample.Value = append(sample.Value, int64(count), int64(count)*int64(r.samplingPeriod)) - profile.Sample = append(profile.Sample, sample) - } else { - // Process each timestamp separately + if r.timeline { + timestamps := make([]string, 0, len(traceInfo.timestamps)) for _, ts := range traceInfo.timestamps { - individualSample := copySample(sample) - individualSample.Label["end_timestamp_ns"] = []string{strconv.FormatUint(ts, 10)} - count := 1 - // we can append the value as we should not have set this part yet - individualSample.Value = append(individualSample.Value, int64(count), - int64(count)*int64(r.samplingPeriod)) - profile.Sample = append(profile.Sample, individualSample) + timestamps = append(timestamps, strconv.FormatUint(ts, 10)) } + // Assign all timestamps as a single label entry + sample.Label["end_timestamp_ns"] = timestamps } + count := len(traceInfo.timestamps) + sample.Value = append(sample.Value, int64(count), int64(count)*int64(r.samplingPeriod)) + profile.Sample = append(profile.Sample, sample) } // getPprofProfile returns a pprof profile containing all collected samples up to this moment. From d88ef1d8e18d4f2fe7b4e5040d1bcbe208c1c646 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 3 Sep 2024 12:10:41 +0200 Subject: [PATCH 4/7] Timeline support - Minor adjustements --- reporter/datadog_reporter.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/reporter/datadog_reporter.go b/reporter/datadog_reporter.go index 1638ccd..98f555a 100644 --- a/reporter/datadog_reporter.go +++ b/reporter/datadog_reporter.go @@ -469,10 +469,6 @@ func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofi } execPath, _ := r.execPathes.Get(traceKey.pid) - baseExec := path.Base(execPath) - if baseExec == "." || baseExec == "/" { - baseExec = execPath // avoids kernel being transformed in . - } // Check if the last frame is a kernel frame. if len(traceInfo.frameTypes) > 0 && traceInfo.frameTypes[len(traceInfo.frameTypes)-1] == libpf.KernelFrame { @@ -480,6 +476,7 @@ func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofi // location with the kernel as the function name. execPath = "kernel" } + baseExec := path.Base(execPath) if execPath != "" { loc := createPProfLocation(profile, 0) @@ -489,15 +486,11 @@ func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofi } sample.Label = make(map[string][]string) - addTraceLabels(sample.Label, traceKey, baseExec) + var timestamps []uint64 = nil if r.timeline { - timestamps := make([]string, 0, len(traceInfo.timestamps)) - for _, ts := range traceInfo.timestamps { - timestamps = append(timestamps, strconv.FormatUint(ts, 10)) - } - // Assign all timestamps as a single label entry - sample.Label["end_timestamp_ns"] = timestamps + timestamps = traceInfo.timestamps } + addTraceLabels(sample.Label, traceKey, baseExec, timestamps) count := len(traceInfo.timestamps) sample.Value = append(sample.Value, int64(count), int64(count)*int64(r.samplingPeriod)) profile.Sample = append(profile.Sample, sample) @@ -513,9 +506,6 @@ func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, } r.traceEvents.WUnlock(&traceEvents) numSamples := len(samples) - if r.timeline { - numSamples *= 4 - } // funcMap is a temporary helper that will build the Function array // in profile and make sure information is deduplicated. @@ -587,7 +577,8 @@ func createPprofFunctionEntry(funcMap map[funcInfo]*pprofile.Function, } //nolint:gocritic -func addTraceLabels(labels map[string][]string, k traceAndMetaKey, baseExec string) { +func addTraceLabels(labels map[string][]string, k traceAndMetaKey, + baseExec string, timestamps []uint64) { if k.comm != "" { labels["thread_name"] = append(labels["thread_name"], k.comm) } @@ -622,6 +613,15 @@ func addTraceLabels(labels map[string][]string, k traceAndMetaKey, baseExec stri if baseExec != "" { labels["process_name"] = append(labels["process_name"], baseExec) } + + if timestamps != nil && len(timestamps) > 0 { + timestampStrs := make([]string, 0, len(timestamps)) + for _, ts := range timestamps { + timestampStrs = append(timestampStrs, strconv.FormatUint(ts, 10)) + } + // Assign all timestamps as a single label entry + labels["end_timestamp_ns"] = timestampStrs + } } // getDummyMappingIndex inserts or looks up a dummy entry for interpreted FileIDs. From 9f115598a90dfe55814d3d18e3e87d05aea7cdd1 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Tue, 3 Sep 2024 14:28:44 +0200 Subject: [PATCH 5/7] Timeline - minor fixup Avoid a redundant nil check --- reporter/datadog_reporter.go | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/reporter/datadog_reporter.go b/reporter/datadog_reporter.go index 98f555a..c8e4659 100644 --- a/reporter/datadog_reporter.go +++ b/reporter/datadog_reporter.go @@ -364,8 +364,9 @@ func (r *DatadogReporter) reportProfile(ctx context.Context) error { return err } -func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofile.Profile, traceKey traceAndMetaKey, - traceInfo *traceFramesCounts, fileIDtoMapping map[libpf.FileID]*pprofile.Mapping, +func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofile.Profile, + traceKey *traceAndMetaKey, traceInfo *traceFramesCounts, + fileIDtoMapping map[libpf.FileID]*pprofile.Mapping, frameIDtoFunction map[libpf.FrameID]*pprofile.Function, funcMap map[funcInfo]*pprofile.Function) { const unknownStr = "UNKNOWN" @@ -486,7 +487,7 @@ func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofi } sample.Label = make(map[string][]string) - var timestamps []uint64 = nil + var timestamps []uint64 if r.timeline { timestamps = traceInfo.timestamps } @@ -525,7 +526,6 @@ func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, totalSampleCount := 0 for traceKey, traceInfo := range samples { - for _, ts := range traceInfo.timestamps { if ts < startTS || startTS == 0 { startTS = ts @@ -537,7 +537,7 @@ func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, } sample := &pprofile.Sample{} count := len(traceInfo.timestamps) - r.processSample(sample, profile, traceKey, traceInfo, fileIDtoMapping, + r.processSample(sample, profile, &traceKey, traceInfo, fileIDtoMapping, frameIDtoFunction, funcMap) totalSampleCount += count } @@ -576,13 +576,11 @@ func createPprofFunctionEntry(funcMap map[funcInfo]*pprofile.Function, return function } -//nolint:gocritic -func addTraceLabels(labels map[string][]string, k traceAndMetaKey, +func addTraceLabels(labels map[string][]string, k *traceAndMetaKey, baseExec string, timestamps []uint64) { if k.comm != "" { labels["thread_name"] = append(labels["thread_name"], k.comm) } - if k.podName != "" { labels["pod_name"] = append(labels["pod_name"], k.podName) } @@ -614,7 +612,7 @@ func addTraceLabels(labels map[string][]string, k traceAndMetaKey, labels["process_name"] = append(labels["process_name"], baseExec) } - if timestamps != nil && len(timestamps) > 0 { + if len(timestamps) > 0 { timestampStrs := make([]string, 0, len(timestamps)) for _, ts := range timestamps { timestampStrs = append(timestampStrs, strconv.FormatUint(ts, 10)) From c69b23b1a2978b0fe6bf86cd821d396c1cebadf0 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 5 Sep 2024 14:35:56 +0200 Subject: [PATCH 6/7] Timeline - revert extraction of the loop on samples --- interpreter/php/decode_arm64.go | 2 +- reporter/datadog_reporter.go | 272 ++++++++++++++++---------------- 2 files changed, 135 insertions(+), 139 deletions(-) diff --git a/interpreter/php/decode_arm64.go b/interpreter/php/decode_arm64.go index f03fd31..39f9444 100644 --- a/interpreter/php/decode_arm64.go +++ b/interpreter/php/decode_arm64.go @@ -12,8 +12,8 @@ import ( "errors" "fmt" - ah "github.com/elastic/otel-profiling-agent/armhelpers" "github.com/elastic/otel-profiling-agent/libpf" + ah "github.com/elastic/otel-profiling-agent/armhelpers" aa "golang.org/x/arch/arm64/arm64asm" ) diff --git a/reporter/datadog_reporter.go b/reporter/datadog_reporter.go index c8e4659..90fb1aa 100644 --- a/reporter/datadog_reporter.go +++ b/reporter/datadog_reporter.go @@ -364,139 +364,6 @@ func (r *DatadogReporter) reportProfile(ctx context.Context) error { return err } -func (r *DatadogReporter) processSample(sample *pprofile.Sample, profile *pprofile.Profile, - traceKey *traceAndMetaKey, traceInfo *traceFramesCounts, - fileIDtoMapping map[libpf.FileID]*pprofile.Mapping, - frameIDtoFunction map[libpf.FrameID]*pprofile.Function, - funcMap map[funcInfo]*pprofile.Function) { - const unknownStr = "UNKNOWN" - // Walk every frame of the trace. - for i := range traceInfo.frameTypes { - loc := createPProfLocation(profile, uint64(traceInfo.linenos[i])) - - switch frameKind := traceInfo.frameTypes[i]; frameKind { - case libpf.NativeFrame: - // As native frames are resolved in the backend, we use Mapping to - // report these frames. - - if tmpMapping, exists := fileIDtoMapping[traceInfo.files[i]]; exists { - loc.Mapping = tmpMapping - } else { - executionInfo, exists := r.executables.Get(traceInfo.files[i]) - - // Next step: Select a proper default value, - // if the name of the executable is not known yet. - var fileName = unknownStr - var buildID = traceInfo.files[i].StringNoQuotes() - if exists { - fileName = executionInfo.fileName - if executionInfo.buildID != "" { - buildID = executionInfo.buildID - } - } - - tmpMapping := createPprofMapping(profile, uint64(traceInfo.linenos[i]), - fileName, buildID) - fileIDtoMapping[traceInfo.files[i]] = tmpMapping - loc.Mapping = tmpMapping - } - line := pprofile.Line{Function: createPprofFunctionEntry(funcMap, profile, "", - loc.Mapping.File)} - loc.Line = append(loc.Line, line) - case libpf.KernelFrame: - // Reconstruct frameID - frameID := libpf.NewFrameID(traceInfo.files[i], traceInfo.linenos[i]) - // Store Kernel frame information as Line message: - line := pprofile.Line{} - - if tmpFunction, exists := frameIDtoFunction[frameID]; exists { - line.Function = tmpFunction - } else { - symbol, exists := r.fallbackSymbols.Get(frameID) - if !exists { - // TODO: choose a proper default value if the kernel symbol was not - // reported yet. - symbol = unknownStr - } - line.Function = createPprofFunctionEntry( - funcMap, profile, symbol, "") - } - loc.Line = append(loc.Line, line) - - // To be compliant with the protocol generate a dummy mapping entry. - loc.Mapping = getDummyMapping(fileIDtoMapping, profile, - traceInfo.files[i]) - - case libpf.AbortFrame: - // Next step: Figure out how the OTLP protocol - // could handle artificial frames, like AbortFrame, - // that are not originate from a native or interpreted - // program. - default: - // Store interpreted frame information as Line message: - line := pprofile.Line{} - - fileIDInfoLock, exists := r.frames.Get(traceInfo.files[i]) - if !exists { - // At this point, we do not have enough information for the frame. - // Therefore, we report a dummy entry and use the interpreter as filename. - line.Function = createPprofFunctionEntry(funcMap, profile, - "UNREPORTED", frameKind.String()) - } else { - fileIDInfo := fileIDInfoLock.RLock() - si, exists := (*fileIDInfo)[traceInfo.linenos[i]] - if !exists { - // At this point, we do not have enough information for the frame. - // Therefore, we report a dummy entry and use the interpreter as filename. - // To differentiate this case with the case where no information about - // the file ID is available at all, we use a different name for reported - // function. - line.Function = createPprofFunctionEntry(funcMap, profile, - "UNRESOLVED", frameKind.String()) - } else { - line.Line = int64(si.lineNumber) - - line.Function = createPprofFunctionEntry(funcMap, profile, - si.functionName, si.filePath) - } - fileIDInfoLock.RUnlock(&fileIDInfo) - } - loc.Line = append(loc.Line, line) - - // To be compliant with the protocol generate a dummy mapping entry. - loc.Mapping = getDummyMapping(fileIDtoMapping, profile, traceInfo.files[i]) - } - sample.Location = append(sample.Location, loc) - } - - execPath, _ := r.execPathes.Get(traceKey.pid) - // Check if the last frame is a kernel frame. - if len(traceInfo.frameTypes) > 0 && - traceInfo.frameTypes[len(traceInfo.frameTypes)-1] == libpf.KernelFrame { - // If the last frame is a kernel frame, we need to add a dummy - // location with the kernel as the function name. - execPath = "kernel" - } - baseExec := path.Base(execPath) - - if execPath != "" { - loc := createPProfLocation(profile, 0) - m := createPprofFunctionEntry(funcMap, profile, baseExec, execPath) - loc.Line = append(loc.Line, pprofile.Line{Function: m}) - sample.Location = append(sample.Location, loc) - } - - sample.Label = make(map[string][]string) - var timestamps []uint64 - if r.timeline { - timestamps = traceInfo.timestamps - } - addTraceLabels(sample.Label, traceKey, baseExec, timestamps) - count := len(traceInfo.timestamps) - sample.Value = append(sample.Value, int64(count), int64(count)*int64(r.samplingPeriod)) - profile.Sample = append(profile.Sample, sample) -} - // getPprofProfile returns a pprof profile containing all collected samples up to this moment. func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, startTS uint64, endTS uint64) { @@ -506,8 +373,11 @@ func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, delete(*traceEvents, key) } r.traceEvents.WUnlock(&traceEvents) + numSamples := len(samples) + const unknownStr = "UNKNOWN" + // funcMap is a temporary helper that will build the Function array // in profile and make sure information is deduplicated. funcMap := make(map[funcInfo]*pprofile.Function) @@ -526,6 +396,8 @@ func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, totalSampleCount := 0 for traceKey, traceInfo := range samples { + sample := &pprofile.Sample{} + for _, ts := range traceInfo.timestamps { if ts < startTS || startTS == 0 { startTS = ts @@ -535,11 +407,135 @@ func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, endTS = ts } } - sample := &pprofile.Sample{} - count := len(traceInfo.timestamps) - r.processSample(sample, profile, &traceKey, traceInfo, fileIDtoMapping, - frameIDtoFunction, funcMap) - totalSampleCount += count + + // Walk every frame of the trace. + for i := range traceInfo.frameTypes { + loc := createPProfLocation(profile, uint64(traceInfo.linenos[i])) + + switch frameKind := traceInfo.frameTypes[i]; frameKind { + case libpf.NativeFrame: + // As native frames are resolved in the backend, we use Mapping to + // report these frames. + + if tmpMapping, exists := fileIDtoMapping[traceInfo.files[i]]; exists { + loc.Mapping = tmpMapping + } else { + executionInfo, exists := r.executables.Get(traceInfo.files[i]) + + // Next step: Select a proper default value, + // if the name of the executable is not known yet. + var fileName = unknownStr + var buildID = traceInfo.files[i].StringNoQuotes() + if exists { + fileName = executionInfo.fileName + if executionInfo.buildID != "" { + buildID = executionInfo.buildID + } + } + + tmpMapping := createPprofMapping(profile, uint64(traceInfo.linenos[i]), + fileName, buildID) + fileIDtoMapping[traceInfo.files[i]] = tmpMapping + loc.Mapping = tmpMapping + } + line := pprofile.Line{Function: createPprofFunctionEntry(funcMap, profile, "", + loc.Mapping.File)} + loc.Line = append(loc.Line, line) + case libpf.KernelFrame: + // Reconstruct frameID + frameID := libpf.NewFrameID(traceInfo.files[i], traceInfo.linenos[i]) + // Store Kernel frame information as Line message: + line := pprofile.Line{} + + if tmpFunction, exists := frameIDtoFunction[frameID]; exists { + line.Function = tmpFunction + } else { + symbol, exists := r.fallbackSymbols.Get(frameID) + if !exists { + // TODO: choose a proper default value if the kernel symbol was not + // reported yet. + symbol = unknownStr + } + line.Function = createPprofFunctionEntry( + funcMap, profile, symbol, "") + } + loc.Line = append(loc.Line, line) + + // To be compliant with the protocol generate a dummy mapping entry. + loc.Mapping = getDummyMapping(fileIDtoMapping, profile, + traceInfo.files[i]) + + case libpf.AbortFrame: + // Next step: Figure out how the OTLP protocol + // could handle artificial frames, like AbortFrame, + // that are not originate from a native or interpreted + // program. + default: + // Store interpreted frame information as Line message: + line := pprofile.Line{} + + fileIDInfoLock, exists := r.frames.Get(traceInfo.files[i]) + if !exists { + // At this point, we do not have enough information for the frame. + // Therefore, we report a dummy entry and use the interpreter as filename. + line.Function = createPprofFunctionEntry(funcMap, profile, + "UNREPORTED", frameKind.String()) + } else { + fileIDInfo := fileIDInfoLock.RLock() + si, exists := (*fileIDInfo)[traceInfo.linenos[i]] + if !exists { + // At this point, we do not have enough information for the frame. + // Therefore, we report a dummy entry and use the interpreter as filename. + // To differentiate this case with the case where no information about + // the file ID is available at all, we use a different name for reported + // function. + line.Function = createPprofFunctionEntry(funcMap, profile, + "UNRESOLVED", frameKind.String()) + } else { + line.Line = int64(si.lineNumber) + + line.Function = createPprofFunctionEntry(funcMap, profile, + si.functionName, si.filePath) + } + fileIDInfoLock.RUnlock(&fileIDInfo) + } + loc.Line = append(loc.Line, line) + + // To be compliant with the protocol generate a dummy mapping entry. + loc.Mapping = getDummyMapping(fileIDtoMapping, profile, traceInfo.files[i]) + } + sample.Location = append(sample.Location, loc) + } + + execPath, _ := r.execPathes.Get(traceKey.pid) + + // Check if the last frame is a kernel frame. + if len(traceInfo.frameTypes) > 0 && + traceInfo.frameTypes[len(traceInfo.frameTypes)-1] == libpf.KernelFrame { + // If the last frame is a kernel frame, we need to add a dummy + // location with the kernel as the function name. + execPath = "kernel" + } + baseExec := path.Base(execPath) + + if execPath != "" { + loc := createPProfLocation(profile, 0) + m := createPprofFunctionEntry(funcMap, profile, baseExec, execPath) + loc.Line = append(loc.Line, pprofile.Line{Function: m}) + sample.Location = append(sample.Location, loc) + } + + sample.Label = make(map[string][]string) + var timestamps []uint64 + if r.timeline { + timestamps = traceInfo.timestamps + } + addTraceLabels(sample.Label, &traceKey, baseExec, timestamps) + + count := int64(len(traceInfo.timestamps)) + sample.Value = append(sample.Value, count, count*int64(r.samplingPeriod)) + profile.Sample = append(profile.Sample, sample) + totalSampleCount += len(traceInfo.timestamps) } log.Infof("Reporting pprof profile with %d samples from %v to %v", totalSampleCount, startTS, endTS) From 9ca49c0acc6ed2ff75c9141d2ac7927b52af3756 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Thu, 5 Sep 2024 14:45:03 +0200 Subject: [PATCH 7/7] Timeline - Silence linter on memory aliasing --- reporter/datadog_reporter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reporter/datadog_reporter.go b/reporter/datadog_reporter.go index 90fb1aa..1259a08 100644 --- a/reporter/datadog_reporter.go +++ b/reporter/datadog_reporter.go @@ -530,7 +530,7 @@ func (r *DatadogReporter) getPprofProfile() (profile *pprofile.Profile, if r.timeline { timestamps = traceInfo.timestamps } - addTraceLabels(sample.Label, &traceKey, baseExec, timestamps) + addTraceLabels(sample.Label, &traceKey, baseExec, timestamps) //nolint:gomemory count := int64(len(traceInfo.timestamps)) sample.Value = append(sample.Value, count, count*int64(r.samplingPeriod))