diff --git a/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml b/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml index efd760113..a385fa677 100644 --- a/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml +++ b/bindata/manifests/metrics-exporter/metrics-prometheus-rule.yaml @@ -11,28 +11,28 @@ spec: interval: 30s rules: - expr: | - sriov_vf_tx_packets * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_tx_packets * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_tx_packets - expr: | - sriov_vf_rx_packets * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_packets * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_packets - expr: | - sriov_vf_tx_bytes * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_tx_bytes * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_tx_bytes - expr: | - sriov_vf_rx_bytes * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_bytes * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_bytes - expr: | - sriov_vf_tx_dropped * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_tx_dropped * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_tx_dropped - expr: | - sriov_vf_rx_dropped * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_dropped * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_dropped - expr: | - sriov_vf_rx_broadcast * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_broadcast * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_broadcast - expr: | - sriov_vf_rx_multicast * on (pciAddr) group_left(pod,namespace,dev_type) sriov_kubepoddevice + sriov_vf_rx_multicast * on (pciAddr,node) group_left(pod,namespace,dev_type) sriov_kubepoddevice record: network:sriov_vf_rx_multicast {{ end }} diff --git a/bindata/manifests/metrics-exporter/metrics-prometheus.yaml b/bindata/manifests/metrics-exporter/metrics-prometheus.yaml index 45ae7adbf..d1772a554 100644 --- a/bindata/manifests/metrics-exporter/metrics-prometheus.yaml +++ b/bindata/manifests/metrics-exporter/metrics-prometheus.yaml @@ -12,6 +12,17 @@ spec: bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token" scheme: "https" honorLabels: true + relabelings: + - action: replace + sourceLabels: + - __meta_kubernetes_endpoint_node_name + targetLabel: node + - action: labeldrop + regex: pod + - action: labeldrop + regex: container + - action: labeldrop + regex: namespace tlsConfig: serverName: sriov-network-metrics-exporter-service.{{.Namespace}}.svc caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt diff --git a/hack/run-e2e-conformance-virtual-ocp.sh b/hack/run-e2e-conformance-virtual-ocp.sh index 0092fcdad..cb65aaf50 100755 --- a/hack/run-e2e-conformance-virtual-ocp.sh +++ b/hack/run-e2e-conformance-virtual-ocp.sh @@ -191,9 +191,9 @@ export DEV_MODE=TRUE export CLUSTER_HAS_EMULATED_PF=TRUE export OPERATOR_LEADER_ELECTION_ENABLE=true export METRICS_EXPORTER_PROMETHEUS_OPERATOR_ENABLED=true -export METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULE=true +export METRICS_EXPORTER_PROMETHEUS_DEPLOY_RULES=true export METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_SERVICE_ACCOUNT:-"prometheus-k8s"} -export METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE:-"openshfit-monitoring"} +export METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE=${METRICS_EXPORTER_PROMETHEUS_OPERATOR_NAMESPACE:-"openshift-monitoring"} export SRIOV_NETWORK_OPERATOR_IMAGE="$registry/$NAMESPACE/sriov-network-operator:latest" export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE="$registry/$NAMESPACE/sriov-network-config-daemon:latest" diff --git a/test/conformance/tests/test_exporter_metrics.go b/test/conformance/tests/test_exporter_metrics.go index 804432f04..96bf792b5 100644 --- a/test/conformance/tests/test_exporter_metrics.go +++ b/test/conformance/tests/test_exporter_metrics.go @@ -19,21 +19,18 @@ import ( "github.com/prometheus/common/model" corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) -var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { +var _ = Describe("[sriov] Metrics Exporter", Ordered, ContinueOnFailure, func() { var node string var nic *sriovv1.InterfaceExt BeforeAll(func() { - if cluster.VirtualCluster() { - Skip("IGB driver does not support VF statistics") - } - err := namespaces.Create(namespaces.Test, clients) Expect(err).ToNot(HaveOccurred()) @@ -73,6 +70,9 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { }) It("collects metrics regarding receiving traffic via VF", func() { + if cluster.VirtualCluster() { + Skip("IGB driver does not support VF statistics") + } pod := createTestPod(node, []string{"test-me-network"}) DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) @@ -98,27 +98,76 @@ var _ = Describe("[sriov] Metrics Exporter", Ordered, func() { Expect(finalRxPackets).Should(BeNumerically(">", initialRxPackets)) }) - It("PrometheusRule should provide namespaced metrics", func() { - pod := createTestPod(node, []string{"test-me-network"}) - DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) + Context("When Prometheus operator is available", func() { + BeforeEach(func() { + _, err := clients.ServiceMonitors(operatorNamespace).List(context.Background(), metav1.ListOptions{}) + if k8serrors.IsNotFound(err) { + Skip("Prometheus operator not available in the cluster") + } + }) - namespacedMetricNames := []string{ - "network:sriov_vf_rx_bytes", - "network:sriov_vf_tx_bytes", - "network:sriov_vf_rx_packets", - "network:sriov_vf_tx_packets", - "network:sriov_vf_rx_dropped", - "network:sriov_vf_tx_dropped", - "network:sriov_vf_rx_broadcast", - "network:sriov_vf_rx_multicast", - } + It("PrometheusRule should provide namespaced metrics", func() { + pod := createTestPod(node, []string{"test-me-network"}) + DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) + + namespacedMetricNames := []string{ + "network:sriov_vf_rx_bytes", + "network:sriov_vf_tx_bytes", + "network:sriov_vf_rx_packets", + "network:sriov_vf_tx_packets", + "network:sriov_vf_rx_dropped", + "network:sriov_vf_tx_dropped", + "network:sriov_vf_rx_broadcast", + "network:sriov_vf_rx_multicast", + } - Eventually(func(g Gomega) { - for _, metricName := range namespacedMetricNames { - values := runPromQLQuery(fmt.Sprintf(`%s{namespace="%s",pod="%s"}`, metricName, pod.Namespace, pod.Name)) - g.Expect(values).ToNot(BeEmpty(), "no value for metric %s", metricName) + Eventually(func(g Gomega) { + for _, metricName := range namespacedMetricNames { + values := runPromQLQuery(fmt.Sprintf(`%s{namespace="%s",pod="%s"}`, metricName, pod.Namespace, pod.Name)) + g.Expect(values).ToNot(BeEmpty(), "no value for metric %s", metricName) + } + }, "90s", "1s").Should(Succeed()) + }) + + It("Metrics should have the correct labels", func() { + pod := createTestPod(node, []string{"test-me-network"}) + DeferCleanup(namespaces.CleanPods, namespaces.Test, clients) + + metricsName := []string{ + "sriov_vf_rx_bytes", + "sriov_vf_tx_bytes", + "sriov_vf_rx_packets", + "sriov_vf_tx_packets", + "sriov_vf_rx_dropped", + "sriov_vf_tx_dropped", + "sriov_vf_rx_broadcast", + "sriov_vf_rx_multicast", } - }, "40s", "1s").Should(Succeed()) + + Eventually(func(g Gomega) { + for _, metricName := range metricsName { + samples := runPromQLQuery(metricName) + g.Expect(samples).ToNot(BeEmpty(), "no value for metric %s", metricName) + g.Expect(samples[0].Metric).To(And( + HaveKey(model.LabelName("pciAddr")), + HaveKey(model.LabelName("node")), + HaveKey(model.LabelName("pf")), + HaveKey(model.LabelName("vf")), + )) + } + }, "90s", "1s").Should(Succeed()) + + // sriov_kubepoddevice has a different sets of label than statistics metrics + samples := runPromQLQuery(fmt.Sprintf(`sriov_kubepoddevice{namespace="%s",pod="%s"}`, pod.Namespace, pod.Name)) + Expect(samples).ToNot(BeEmpty(), "no value for metric sriov_kubepoddevice") + Expect(samples[0].Metric).To(And( + HaveKey(model.LabelName("pciAddr")), + HaveKeyWithValue(model.LabelName("node"), model.LabelValue(pod.Spec.NodeName)), + HaveKeyWithValue(model.LabelName("dev_type"), model.LabelValue("openshift.io/metricsResource")), + HaveKeyWithValue(model.LabelName("namespace"), model.LabelValue(pod.Namespace)), + HaveKeyWithValue(model.LabelName("pod"), model.LabelValue(pod.Name)), + )) + }) }) }) diff --git a/test/util/k8sreporter/reporter.go b/test/util/k8sreporter/reporter.go index 5a3405a91..13baac0aa 100644 --- a/test/util/k8sreporter/reporter.go +++ b/test/util/k8sreporter/reporter.go @@ -10,6 +10,9 @@ import ( sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + rbacv1 "k8s.io/api/rbac/v1" ) func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) { @@ -18,6 +21,17 @@ func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) { if err != nil { return err } + + err = monitoringv1.AddToScheme(s) + if err != nil { + return err + } + + err = rbacv1.AddToScheme(s) + if err != nil { + return err + } + return nil } @@ -38,6 +52,8 @@ func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) { return true case multusNamespace != "" && ns == multusNamespace: return true + case ns == "openshift-monitoring": + return true } return false } @@ -47,6 +63,10 @@ func New(reportPath string) (*kniK8sReporter.KubernetesReporter, error) { {Cr: &sriovv1.SriovNetworkNodePolicyList{}}, {Cr: &sriovv1.SriovNetworkList{}}, {Cr: &sriovv1.SriovOperatorConfigList{}}, + {Cr: &monitoringv1.ServiceMonitorList{}, Namespace: &operatorNamespace}, + {Cr: &monitoringv1.PrometheusRuleList{}, Namespace: &operatorNamespace}, + {Cr: &rbacv1.RoleList{}, Namespace: &operatorNamespace}, + {Cr: &rbacv1.RoleBindingList{}, Namespace: &operatorNamespace}, } err := os.Mkdir(reportPath, 0755)