From a87a9e2cb7139fdd7e1017bd0cfdf547bab550ea Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 6 Jan 2024 21:47:23 +0000 Subject: [PATCH] tetragon: Add missed stats to kprobemetrics package Add tetragon_missed_probes_total metric to kprobemetrics package and logic to store and collect missed stats. The missed stats are supported for kprobes and stored per function and policy name, like for generic kprobes: tetragon_missed_probes_total{attach="__x64_sys_close",policy="syswritefollowfdpsswd"} 453 tetragon_missed_probes_total{attach="__x64_sys_write",policy="syswritefollowfdpsswd"} 455 tetragon_missed_probes_total{attach="fd_install",policy="syswritefollowfdpsswd"} 451 and multi kprobes: tetragon_missed_probes_total{attach="kprobe_multi (3 functions)",policy="syswritefollowfdpsswd"} 41 Signed-off-by: Jiri Olsa --- docs/content/en/docs/reference/metrics.md | 18 +++++ pkg/metrics/kprobemetrics/collector.go | 93 +++++++++++++++++++++++ pkg/metrics/kprobemetrics/missed.go | 23 ++++++ pkg/metrics/metricsconfig/initmetrics.go | 2 + 4 files changed, 136 insertions(+) create mode 100644 pkg/metrics/kprobemetrics/collector.go create mode 100644 pkg/metrics/kprobemetrics/missed.go diff --git a/docs/content/en/docs/reference/metrics.md b/docs/content/en/docs/reference/metrics.md index ff0ee4cc92c..56621ff5a6a 100644 --- a/docs/content/en/docs/reference/metrics.md +++ b/docs/content/en/docs/reference/metrics.md @@ -181,6 +181,24 @@ The total number of Tetragon events per type that are failed to sent from the ke | ----- | ------ | | `msg_op` | `11, 13, 14, 15, 23, 24, 25, 26, 5, 7` | +### `tetragon_missed_link_probes_total` + +The total number of Tetragon probe missed by link. + +| label | values | +| ----- | ------ | +| `attach` | `attach` | +| `policy` | `policy` | + +### `tetragon_missed_prog_probes_total` + +The total number of Tetragon probe missed by program. + +| label | values | +| ----- | ------ | +| `attach` | `attach` | +| `policy` | `policy` | + ### `tetragon_msg_op_total` The total number of times we encounter a given message opcode. For internal use only. diff --git a/pkg/metrics/kprobemetrics/collector.go b/pkg/metrics/kprobemetrics/collector.go new file mode 100644 index 00000000000..ee987a299f5 --- /dev/null +++ b/pkg/metrics/kprobemetrics/collector.go @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon + +package kprobemetrics + +import ( + "github.com/cilium/ebpf/link" + "github.com/cilium/tetragon/pkg/sensors" + "github.com/cilium/tetragon/pkg/sensors/program" + "github.com/prometheus/client_golang/prometheus" + "golang.org/x/sys/unix" +) + +// bpfCollector implements prometheus.Collector. It collects metrics directly from BPF maps. +type bpfCollector struct{} + +func NewBPFCollector() prometheus.Collector { + return &bpfCollector{} +} + +func (c *bpfCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- MissedLink.Desc() + ch <- MissedProg.Desc() +} + +func collectLink(ch chan<- prometheus.Metric, load *program.Program) { + if load.Link == nil { + return + } + + info, err := load.Link.Info() + if err != nil { + return + } + + missed := uint64(0) + + switch info.Type { + case link.PerfEventType: + pevent := info.PerfEvent() + switch pevent.Type { + case unix.BPF_PERF_EVENT_KPROBE, unix.BPF_PERF_EVENT_KRETPROBE: + kprobe := pevent.Kprobe() + missed, _ = kprobe.Missed() + } + case link.KprobeMultiType: + kmulti := info.KprobeMulti() + missed, _ = kmulti.Missed() + default: + } + + ch <- MissedLink.MustMetric(float64(missed), load.Policy, load.Attach) +} + +func collectProg(ch chan<- prometheus.Metric, load *program.Program) { + info, err := load.Prog.Info() + if err != nil { + return + } + + missed, _ := info.RecursionMisses() + ch <- MissedProg.MustMetric(float64(missed), load.Policy, load.Attach) +} + +func (c *bpfCollector) Collect(ch chan<- prometheus.Metric) { + allPrograms := sensors.AllPrograms() + for _, prog := range allPrograms { + collectLink(ch, prog) + collectProg(ch, prog) + } +} + +// bpfZeroCollector implements prometheus.Collector. It collects "zero" metrics. +// It's intended to be used when BPF metrics are not collected, but we still want +// Prometheus metrics to be exposed. +type bpfZeroCollector struct { + bpfCollector +} + +func NewBPFZeroCollector() prometheus.Collector { + return &bpfZeroCollector{ + bpfCollector: bpfCollector{}, + } +} + +func (c *bpfZeroCollector) Describe(ch chan<- *prometheus.Desc) { + c.bpfCollector.Describe(ch) +} + +func (c *bpfZeroCollector) Collect(ch chan<- prometheus.Metric) { + ch <- MissedLink.MustMetric(0, "policy", "attach") + ch <- MissedProg.MustMetric(0, "policy", "attach") +} diff --git a/pkg/metrics/kprobemetrics/missed.go b/pkg/metrics/kprobemetrics/missed.go new file mode 100644 index 00000000000..2c4bd36350f --- /dev/null +++ b/pkg/metrics/kprobemetrics/missed.go @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon + +package kprobemetrics + +import ( + "github.com/cilium/tetragon/pkg/metrics" + "github.com/cilium/tetragon/pkg/metrics/consts" + "github.com/prometheus/client_golang/prometheus" +) + +var ( + MissedLink = metrics.NewBPFCounter(prometheus.NewDesc( + prometheus.BuildFQName(consts.MetricsNamespace, "", "missed_link_probes_total"), + "The total number of Tetragon probe missed by link.", + []string{"policy", "attach"}, nil, + )) + MissedProg = metrics.NewBPFCounter(prometheus.NewDesc( + prometheus.BuildFQName(consts.MetricsNamespace, "", "missed_prog_probes_total"), + "The total number of Tetragon probe missed by program.", + []string{"policy", "attach"}, nil, + )) +) diff --git a/pkg/metrics/metricsconfig/initmetrics.go b/pkg/metrics/metricsconfig/initmetrics.go index 9157b8b8b45..c57e0bdd4bd 100644 --- a/pkg/metrics/metricsconfig/initmetrics.go +++ b/pkg/metrics/metricsconfig/initmetrics.go @@ -61,6 +61,7 @@ func initAllHealthMetrics(registry *prometheus.Registry) { // register custom collectors registry.MustRegister(observer.NewBPFCollector()) registry.MustRegister(eventmetrics.NewBPFCollector()) + registry.MustRegister(kprobemetrics.NewBPFCollector()) } func InitHealthMetricsForDocs(registry *prometheus.Registry) { @@ -72,6 +73,7 @@ func InitHealthMetricsForDocs(registry *prometheus.Registry) { // register custom zero collectors registry.MustRegister(observer.NewBPFZeroCollector()) registry.MustRegister(eventmetrics.NewBPFZeroCollector()) + registry.MustRegister(kprobemetrics.NewBPFZeroCollector()) } func initResourcesMetrics(registry *prometheus.Registry) {