Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix policyfilter metrics #2282

Merged
merged 3 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/content/en/docs/reference/metrics.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 30 additions & 10 deletions pkg/metrics/policyfiltermetrics/policyfiltermetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,32 +44,52 @@ func (s Operation) String() string {
return operationLabelValues[s]
}

type OperationErr int

const (
NoErr OperationErr = iota
GenericErr
PodNamespaceConflictErr
)

var operationErrLabels = map[OperationErr]string{
NoErr: "",
GenericErr: "generic-error",
PodNamespaceConflictErr: "pod-namespace-conflict",
}

func (s OperationErr) String() string {
return operationErrLabels[s]
}

var (
PolicyFilterOpMetrics = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "policyfilter_metrics_total",
Help: "Policy filter metrics. For internal use only.",
ConstLabels: nil,
}, []string{"subsys", "op"})
}, []string{"subsys", "op", "error"})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(PolicyFilterOpMetrics)

// Initialize metrics with labels
PolicyFilterOpMetrics.WithLabelValues(RTHooksSubsys.String(), AddContainerOperation.String()).Add(0)
PolicyFilterOpMetrics.WithLabelValues(PodHandlersSubsys.String(), AddPodOperation.String()).Add(0)
PolicyFilterOpMetrics.WithLabelValues(PodHandlersSubsys.String(), UpdatePodOperation.String()).Add(0)
PolicyFilterOpMetrics.WithLabelValues(PodHandlersSubsys.String(), DeletePodOperation.String()).Add(0)
for _, subsys := range subsysLabelValues {
for _, op := range operationLabelValues {
for _, err := range operationErrLabels {
PolicyFilterOpMetrics.WithLabelValues(
subsys, op, err,
).Add(0)
}
}
}

// NOTES:
// * error, error_type, type - standardize on a label
// * Don't confuse op in policyfilter_metrics_total with ops.OpCode
// * Rename policyfilter_metrics_total to get rid of _metrics?
}

func OpInc(subsys Subsys, op Operation) {
PolicyFilterOpMetrics.WithLabelValues(
subsys.String(), op.String(),
).Inc()
func OpInc(subsys Subsys, op Operation, err string) {
PolicyFilterOpMetrics.WithLabelValues(subsys.String(), op.String(), err).Inc()
}
34 changes: 34 additions & 0 deletions pkg/policyfilter/error.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package policyfilter

import (
"fmt"

"github.com/cilium/tetragon/pkg/metrics/policyfiltermetrics"
)

// podNamespaceConflictErr: even if a pod changes, we expect the namespace to remain the same
type podNamespaceConflictErr struct {
podID PodID
oldNs, newNs string
}

func (e *podNamespaceConflictErr) Error() string {
return fmt.Sprintf("conflicting namespaces for pod with id '%s': old='%s' vs new='%s'",
e.podID.String(), e.oldNs, e.newNs)
}

// ErrorLabel returns an error label with a small cardinality so it can be used in metrics
func ErrorLabel(err error) string {
if err == nil {
return policyfiltermetrics.NoErr.String()
}
switch err.(type) {
case *podNamespaceConflictErr:
return policyfiltermetrics.PodNamespaceConflictErr.String()
default:
return policyfiltermetrics.GenericErr.String()
}
}
16 changes: 16 additions & 0 deletions pkg/policyfilter/error_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

package policyfilter

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestErrorLabel(t *testing.T) {
var err error = &podNamespaceConflictErr{PodID{}, "foo", "lala"}
require.Equal(t, "", ErrorLabel(nil))
require.Equal(t, "pod-namespace-conflict", ErrorLabel(err))
}
2 changes: 1 addition & 1 deletion pkg/policyfilter/rthooks/rthooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func createContainerHook(_ context.Context, arg *rthooks.CreateContainerArg) err
if err := pfState.AddPodContainer(policyfilter.PodID(podID), namespace, pod.Labels, containerID, cgid); err != nil {
log.WithError(err).Warn("failed to update policy filter, aborting hook.")
}
policyfiltermetrics.OpInc(policyfiltermetrics.RTHooksSubsys, policyfiltermetrics.AddContainerOperation)
policyfiltermetrics.OpInc(policyfiltermetrics.RTHooksSubsys, policyfiltermetrics.AddContainerOperation, policyfilter.ErrorLabel(err))

return nil
}
16 changes: 8 additions & 8 deletions pkg/policyfilter/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ func (m *state) updatePodHandler(pod *v1.Pod) error {
"pod-id": podID,
"container-ids": containerIDs,
"namespace": namespace,
}).Warn("policyfilter: UpdatePod failed")
}).Warn("policyfilter, UpdatePod failed")
return err
}

Expand All @@ -299,17 +299,17 @@ func (m *state) getPodEventHandlers() cache.ResourceEventHandlerFuncs {
logger.GetLogger().Warn("policyfilter, add-pod handler: unexpected object type: %T", pod)
return
}
m.updatePodHandler(pod)
policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.AddPodOperation)
err := m.updatePodHandler(pod)
policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.AddPodOperation, ErrorLabel(err))
},
UpdateFunc: func(_, newObj interface{}) {
pod, ok := newObj.(*v1.Pod)
if !ok {
logger.GetLogger().Warn("policyfilter, update-pod handler: unexpected object type(s): new:%T", pod)
return
}
m.updatePodHandler(pod)
policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.UpdatePodOperation)
err := m.updatePodHandler(pod)
policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.UpdatePodOperation, ErrorLabel(err))
},
DeleteFunc: func(obj interface{}) {
// Remove all containers for this pod
Expand All @@ -332,7 +332,7 @@ func (m *state) getPodEventHandlers() cache.ResourceEventHandlerFuncs {
"namespace": namespace,
}).Warn("policyfilter, delete-pod handler: DelPod failed")
}
policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.DeletePodOperation)
policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.DeletePodOperation, ErrorLabel(err))
},
}
}
Expand Down Expand Up @@ -587,7 +587,7 @@ func (m *state) AddPodContainer(podID PodID, namespace string, podLabels labels.
}).Info("AddPodContainer: added pod")
} else if pod.namespace != namespace {
// sanity check: old and new namespace should match
return fmt.Errorf("conflicting namespaces for pod with id %s: old='%s' vs new='%s'", podID, pod.namespace, namespace)
return &podNamespaceConflictErr{podID: podID, oldNs: pod.namespace, newNs: namespace}
}

m.addPodContainers(pod, []string{containerID}, []CgroupID{cgID})
Expand Down Expand Up @@ -788,7 +788,7 @@ func (m *state) UpdatePod(podID PodID, namespace string, podLabels labels.Labels
dlog.Info("UpdatePod: added pod")
} else if pod.namespace != namespace {
// sanity check: old and new namespace should match
return fmt.Errorf("conflicting namespaces for pod with id %s: old='%s' vs new='%s'", podID, pod.namespace, namespace)
return &podNamespaceConflictErr{podID: podID, oldNs: pod.namespace, newNs: namespace}
}

// labels changed: check if there are policies ads that:
Expand Down
Loading