Skip to content

Commit

Permalink
Stop reporting process cache evictions and misses as errors
Browse files Browse the repository at this point in the history
Stop reporting tetragon_errors_total metric with "type" label values:
process_cache_miss_on_get
process_cache_evicted
process_cache_miss_on_remove

Evictions and misses are not errors, this is just cache's life, sometimes you
hit, sometimes you miss, and sometimes you gotta evict a process.

They are now reported by more intuitive metrics:
tetragon_process_cache_evictions_total
tetragon_process_cache_misses_total{operation=~"get|remove"}

Signed-off-by: Anna Kapuscinska <[email protected]>
  • Loading branch information
lambdanis committed Sep 1, 2024
1 parent 4077c53 commit c7354b0
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 15 deletions.
3 changes: 3 additions & 0 deletions contrib/upgrade-notes/latest.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,6 @@ tetragon:
* `tetragon_ringbuf_perf_event_lost_total` -> `tetragon_observer_ringbuf_events_lost_total`
* `tetragon_ringbuf_queue_received_total` -> `tetragon_observer_ringbuf_queue_events_received_total`
* `tetragon_ringbuf_queue_lost_total` -> `tetragon_observer_ringbuf_queue_events_lost_total`
* `tetragon_errors_total{type="process_cache_evicted"}` metric is replaced by `tetragon_process_cache_evicted_total`.
* `tetragon_errors_total{type=~"process_cache_miss_on_get|process_cache_miss_on_remove"}` metrics are replaced by
`tetragon_process_cache_misses_total{operation=~"get|remove"}`.
2 changes: 1 addition & 1 deletion docs/content/en/docs/reference/metrics.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 1 addition & 10 deletions pkg/metrics/errormetrics/errormetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,8 @@ import (
type ErrorType int

const (
// Process not found on get() call.
ProcessCacheMissOnGet ErrorType = iota
// Process evicted from the cache.
ProcessCacheEvicted
// Process not found on remove() call.
ProcessCacheMissOnRemove
// Tid and Pid mismatch that could affect BPF and user space caching logic
ProcessPidTidMismatch
ProcessPidTidMismatch ErrorType = iota
// An event is missing process info.
EventMissingProcessInfo
// An error occurred in an event handler.
Expand All @@ -37,9 +31,6 @@ const (
)

var errorTypeLabelValues = map[ErrorType]string{
ProcessCacheMissOnGet: "process_cache_miss_on_get",
ProcessCacheEvicted: "process_cache_evicted",
ProcessCacheMissOnRemove: "process_cache_miss_on_remove",
ProcessPidTidMismatch: "process_pid_tid_mismatch",
EventMissingProcessInfo: "event_missing_process_info",
HandlerError: "handler_error",
Expand Down
4 changes: 0 additions & 4 deletions pkg/process/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (

"github.com/cilium/tetragon/api/v1/tetragon"
"github.com/cilium/tetragon/pkg/logger"
"github.com/cilium/tetragon/pkg/metrics/errormetrics"
lru "github.com/hashicorp/golang-lru/v2"
)

Expand Down Expand Up @@ -132,7 +131,6 @@ func NewCache(
lruCache, err := lru.NewWithEvict(
processCacheSize,
func(_ string, _ *ProcessInternal) {
errormetrics.ErrorTotalInc(errormetrics.ProcessCacheEvicted)
processCacheEvictions.Inc()
},
)
Expand All @@ -151,7 +149,6 @@ func (pc *Cache) get(processID string) (*ProcessInternal, error) {
process, ok := pc.cache.Get(processID)
if !ok {
logger.GetLogger().WithField("id in event", processID).Debug("process not found in cache")
errormetrics.ErrorTotalInc(errormetrics.ProcessCacheMissOnGet)
processCacheMisses.WithLabelValues("get").Inc()
return nil, fmt.Errorf("invalid entry for process ID: %s", processID)
}
Expand All @@ -173,7 +170,6 @@ func (pc *Cache) remove(process *tetragon.Process) bool {
if present {
processCacheTotal.Dec()
} else {
errormetrics.ErrorTotalInc(errormetrics.ProcessCacheMissOnRemove)
processCacheMisses.WithLabelValues("remove").Inc()
}
return present
Expand Down

0 comments on commit c7354b0

Please sign in to comment.