Skip to content

Commit

Permalink
tetragon: Add missed stats to kprobemetrics package
Browse files Browse the repository at this point in the history
Adding metrics for missed runs on program and link level to kprobemetrics
package and logic to store and collect missed stats.

The missed stats are supported for all programs and kprobe/kprobe-multi links.
They are stored per 'attach name' and 'policy name'.

For programs (not just kprobes):

  tetragon_missed_prog_probes_total{attach="__x64_sys_linkat",policy="sys-linkat-passwd"} 68
  tetragon_missed_prog_probes_total{attach="acct_process",policy="__base__"} 60
  tetragon_missed_prog_probes_total{attach="sched/sched_process_exec",policy="__base__"} 64
  tetragon_missed_prog_probes_total{attach="security_bprm_committing_creds",policy="__base__"} 66
  tetragon_missed_prog_probes_total{attach="wake_up_new_task",policy="__base__"} 62

For kprobe and kprobe-multi links:

  tetragon_missed_link_probes_total{attach="__x64_sys_linkat",policy="sys-linkat-passwd"} 45
  tetragon_missed_link_probes_total{attach="acct_process",policy="__base__"} 39
  tetragon_missed_link_probes_total{attach="security_bprm_committing_creds",policy="__base__"} 43
  tetragon_missed_link_probes_total{attach="wake_up_new_task",policy="__base__"} 41

  tetragon_missed_prog_probes_total{attach="acct_process",policy="__base__"} 40
  tetragon_missed_prog_probes_total{attach="kprobe_multi (1 functions)",policy="sys-linkat-passwd"} 48
  tetragon_missed_prog_probes_total{attach="sched/sched_process_exec",policy="__base__"} 44
  tetragon_missed_prog_probes_total{attach="security_bprm_committing_creds",policy="__base__"} 46
  tetragon_missed_prog_probes_total{attach="wake_up_new_task",policy="__base__"} 42

Note changing the healthMetrics group to be created as not constrained,
so it can carry new metrics. It will be addressed in future by adding
debug metrics group.

Signed-off-by: Jiri Olsa <[email protected]>
  • Loading branch information
olsajiri committed Aug 6, 2024
1 parent 262e2ee commit 5ea36d1
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 1 deletion.
18 changes: 18 additions & 0 deletions docs/content/en/docs/reference/metrics.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

83 changes: 83 additions & 0 deletions pkg/metrics/kprobemetrics/collector.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package kprobemetrics

import (
"github.com/cilium/ebpf/link"
"github.com/cilium/tetragon/pkg/bpf"
"github.com/cilium/tetragon/pkg/metrics"
"github.com/cilium/tetragon/pkg/sensors"
"github.com/cilium/tetragon/pkg/sensors/program"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/sys/unix"
)

func NewBPFCollector() prometheus.Collector {
return metrics.NewCustomCollector(
metrics.CustomMetrics{
MissedLink,
MissedProg,
},
collect,
collectForDocs,
)
}

func collectLink(ch chan<- prometheus.Metric, load *program.Program) {
if load.Link == nil {
return
}

info, err := load.Link.Info()
if err != nil {
return
}

missed := uint64(0)

switch info.Type {
case link.PerfEventType:
if !bpf.HasMissedStatsPerfEvent() {
return
}
pevent := info.PerfEvent()
switch pevent.Type {
case unix.BPF_PERF_EVENT_KPROBE, unix.BPF_PERF_EVENT_KRETPROBE:
kprobe := pevent.Kprobe()
missed, _ = kprobe.Missed()
}
case link.KprobeMultiType:
if !bpf.HasMissedStatsKprobeMulti() {
return
}
kmulti := info.KprobeMulti()
missed, _ = kmulti.Missed()
default:
}

ch <- MissedLink.MustMetric(float64(missed), load.Policy, load.Attach)
}

func collectProg(ch chan<- prometheus.Metric, load *program.Program) {
info, err := load.Prog.Info()
if err != nil {
return
}

missed, _ := info.RecursionMisses()
ch <- MissedProg.MustMetric(float64(missed), load.Policy, load.Attach)
}

func collect(ch chan<- prometheus.Metric) {
allPrograms := sensors.AllPrograms()
for _, prog := range allPrograms {
collectLink(ch, prog)
collectProg(ch, prog)
}
}

func collectForDocs(ch chan<- prometheus.Metric) {
ch <- MissedLink.MustMetric(0, "monitor_panic", "sys_panic")
ch <- MissedProg.MustMetric(0, "monitor_panic", "sys_panic")
}
29 changes: 29 additions & 0 deletions pkg/metrics/kprobemetrics/missed.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package kprobemetrics

import (
"github.com/cilium/tetragon/pkg/metrics"
"github.com/cilium/tetragon/pkg/metrics/consts"
)

var (
MissedLink = metrics.MustNewCustomCounter(metrics.NewOpts(
consts.MetricsNamespace, "", "missed_link_probes_total",
"The total number of Tetragon probe missed by link.",
nil, nil, []metrics.UnconstrainedLabel{
metrics.UnconstrainedLabel{Name: "policy", ExampleValue: "monitor_panic"},
metrics.UnconstrainedLabel{Name: "attach", ExampleValue: "sys_panic"},
},
))

MissedProg = metrics.MustNewCustomCounter(metrics.NewOpts(
consts.MetricsNamespace, "", "missed_prog_probes_total",
"The total number of Tetragon probe missed by program.",
nil, nil, []metrics.UnconstrainedLabel{
metrics.UnconstrainedLabel{Name: "policy", ExampleValue: "monitor_panic"},
metrics.UnconstrainedLabel{Name: "attach", ExampleValue: "sys_panic"},
},
))
)
4 changes: 3 additions & 1 deletion pkg/metricsconfig/healthmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ var (

func GetHealthGroup() metrics.Group {
healthMetricsOnce.Do(func() {
healthMetrics = metrics.NewMetricsGroup(true)
healthMetrics = metrics.NewMetricsGroup(false)
})
return healthMetrics
}
Expand Down Expand Up @@ -102,4 +102,6 @@ func registerHealthMetrics(group metrics.Group) {
group.MustRegister(policystatemetrics.NewPolicyStateCollector())
// gRPC metrics
group.MustRegister(grpcmetrics.NewServerMetrics())
// missed metris
group.MustRegister(kprobemetrics.NewBPFCollector())
}

0 comments on commit 5ea36d1

Please sign in to comment.