-
Notifications
You must be signed in to change notification settings - Fork 382
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a metric to provide per-event missed events
Example: $ curl localhost:2112/metrics 2> /dev/null | grep 'missed_events_total\|ringbuf_perf_event_lost_total\|ringbuf_queue_lost_total\|msg_op_total\|ringbuf_queue_received_total' tetragon_missed_events_total{event="clone_sent"} 323 tetragon_missed_events_total{event="data_failed"} 927 tetragon_missed_events_total{event="data_sent"} 616 tetragon_missed_events_total{event="exec_sent"} 323 tetragon_missed_events_total{event="exit_sent"} 321 tetragon_missed_events_total{event="kprobe_sent"} 52 tetragon_missed_events_total{event="total_failed"} 927 tetragon_missed_events_total{event="total_sent"} 1635 tetragon_msg_op_total{msg_op="13"} 52 tetragon_msg_op_total{msg_op="23"} 323 tetragon_msg_op_total{msg_op="24"} 616 tetragon_msg_op_total{msg_op="5"} 323 tetragon_msg_op_total{msg_op="7"} 321 tetragon_ringbuf_perf_event_lost_total 927 tetragon_ringbuf_queue_lost_total 0 tetragon_ringbuf_queue_received_total 1635 This PR adds an eBPF map collector for getting metrics directly from a map. This map contains values with the return values of all perf_event_output calls (i.e. if it fails). This provides us the ability to determine missed events per type. Metric tetragon_missed_events_total contains such information. Using the previous example, we can see that we lost 927 events from the user-space (tetragon_ringbuf_perf_event_lost_total). This is the same as tetragon_missed_events_total{event="total_failed"} gathered from the kernel. All of these missed events are from data events (tetragon_missed_events_total{event="data_failed"}). The total events that we got from the user-space perspective is tetragon_ringbuf_queue_received_total while from the kernel perspective is tetragon_missed_events_total{event="total_sent"}. As we have seen cases where tetragon_missed_events_total{event="total_failed"} is not the same as tetragon_ringbuf_perf_event_lost_total we also provide the number of all per-type events that sent successfully (and not). Signed-off-by: Anastasios Papagiannis <[email protected]>
- Loading branch information
Showing
16 changed files
with
174 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// Copyright Authors of Tetragon | ||
package bpfmetrics | ||
|
||
import ( | ||
"fmt" | ||
"path/filepath" | ||
|
||
"github.com/cilium/ebpf" | ||
"github.com/cilium/tetragon/pkg/api/processapi" | ||
"github.com/cilium/tetragon/pkg/metrics/eventmetrics" | ||
"github.com/cilium/tetragon/pkg/option" | ||
"github.com/prometheus/client_golang/prometheus" | ||
) | ||
|
||
var metrics = map[int]string{ | ||
processapi.MetricExec: "exec", | ||
processapi.MetricClone: "clone", | ||
processapi.MetricExit: "exit", | ||
processapi.MetricData: "data", | ||
processapi.MetricCgroup: "cgroup", | ||
processapi.MetricLoader: "loader", | ||
processapi.MetricTracepoint: "tracepoint", | ||
processapi.MetricKprobe: "kprobe", | ||
processapi.MetricUprobe: "uprobe", | ||
} | ||
|
||
// bpfCollector implements prometheus.Collector. It collects metrics directly from BPF maps. | ||
type bpfCollector struct{} | ||
|
||
func NewBPFCollector() prometheus.Collector { | ||
return &bpfCollector{} | ||
} | ||
|
||
func (c *bpfCollector) Describe(ch chan<- *prometheus.Desc) { | ||
ch <- eventmetrics.MissedEvents.Desc() | ||
} | ||
|
||
func (c *bpfCollector) Collect(ch chan<- prometheus.Metric) { | ||
mapHandle, err := ebpf.LoadPinnedMap(filepath.Join(option.Config.MapDir, "tg_stats_map"), nil) | ||
if err != nil { | ||
return | ||
} | ||
defer mapHandle.Close() | ||
|
||
var zero uint32 | ||
var allCpuValue []processapi.KernelStats | ||
if err := mapHandle.Lookup(zero, &allCpuValue); err != nil { | ||
return | ||
} | ||
|
||
sum := processapi.KernelStats{} | ||
for _, val := range allCpuValue { | ||
for i := 0; i < processapi.MetricMaxValues; i++ { | ||
sum.Sent[i] += val.Sent[i] | ||
sum.SentFailed[i] += val.SentFailed[i] | ||
} | ||
} | ||
|
||
for i := 0; i < processapi.MetricMaxValues; i++ { | ||
if sum.Sent[i] > 0 { | ||
ch <- eventmetrics.MissedEvents.MustMetric(float64(sum.Sent[i]), fmt.Sprintf("%s_sent", metrics[i])) | ||
} | ||
if sum.SentFailed[i] > 0 { | ||
ch <- eventmetrics.MissedEvents.MustMetric(float64(sum.SentFailed[i]), fmt.Sprintf("%s_failed", metrics[i])) | ||
} | ||
} | ||
|
||
var totalSent uint64 | ||
for i := 0; i < processapi.MetricMaxValues; i++ { | ||
totalSent += sum.Sent[i] | ||
} | ||
if totalSent > 0 { | ||
ch <- eventmetrics.MissedEvents.MustMetric(float64(totalSent), "total_sent") | ||
} | ||
|
||
var totalSentFailed uint64 | ||
for i := 0; i < processapi.MetricMaxValues; i++ { | ||
totalSentFailed += sum.SentFailed[i] | ||
} | ||
if totalSentFailed > 0 { | ||
ch <- eventmetrics.MissedEvents.MustMetric(float64(totalSentFailed), "total_failed") | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters