From ac8a330bfe1bb47b8168e4d982814f4e63a9bdf3 Mon Sep 17 00:00:00 2001 From: Israel Blancas Date: Mon, 28 Oct 2024 13:37:47 +0100 Subject: [PATCH 1/3] Create ServiceMonitor for operator metrics programmatically Signed-off-by: Israel Blancas --- .chloggen/3370-create-dynamic-sm.yaml | 19 +++ ...emetry-operator.clusterserviceversion.yaml | 2 +- ...er-manager-metrics-service_v1_service.yaml | 2 + ...nitoring.coreos.com_v1_prometheusrule.yaml | 24 +++ ...eus_rbac.authorization.k8s.io_v1_role.yaml | 15 ++ ...c.authorization.k8s.io_v1_rolebinding.yaml | 12 ++ ...emetry-operator.clusterserviceversion.yaml | 15 +- config/default/kustomization.yaml | 2 - config/overlays/openshift/kustomization.yaml | 4 + config/overlays/openshift/manager-patch.yaml | 2 +- .../manager_auth_proxy_tls_patch.yaml | 29 ++++ .../openshift/metrics_service_tls_patch.yaml | 7 + config/prometheus/kustomization.yaml | 2 - config/prometheus/monitor.yaml | 26 ---- internal/operator-metrics/metrics.go | 142 ++++++++++++++++++ internal/operator-metrics/metrics_test.go | 103 +++++++++++++ main.go | 11 ++ 17 files changed, 383 insertions(+), 34 deletions(-) create mode 100755 .chloggen/3370-create-dynamic-sm.yaml create mode 100644 bundle/openshift/manifests/opentelemetry-operator-prometheus-rules_monitoring.coreos.com_v1_prometheusrule.yaml create mode 100644 bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml create mode 100644 bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml create mode 100644 config/overlays/openshift/manager_auth_proxy_tls_patch.yaml create mode 100644 config/overlays/openshift/metrics_service_tls_patch.yaml delete mode 100644 config/prometheus/kustomization.yaml delete mode 100644 config/prometheus/monitor.yaml create mode 100644 internal/operator-metrics/metrics.go create mode 100644 internal/operator-metrics/metrics_test.go diff --git a/.chloggen/3370-create-dynamic-sm.yaml b/.chloggen/3370-create-dynamic-sm.yaml new file mode 100755 index 0000000000..4f6264744a --- /dev/null +++ b/.chloggen/3370-create-dynamic-sm.yaml @@ -0,0 +1,19 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action) +component: operator + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Programmatically create the `ServiceMonitor` for the operator metrics endpoint, ensuring correct namespace handling and dynamic configuration. + +# One or more tracking issues related to the change +issues: [3370] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + Previously, the `ServiceMonitor` was created statically from a manifest file, causing failures when the + operator was deployed in a non-default namespace. This enhancement ensures automatic adjustment of the + `serverName` and seamless metrics scraping. diff --git a/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml b/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml index cf2f802a05..c15aa26a49 100644 --- a/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml +++ b/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml @@ -99,7 +99,7 @@ metadata: categories: Logging & Tracing,Monitoring certified: "false" containerImage: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator - createdAt: "2024-10-16T10:10:50Z" + createdAt: "2024-10-28T12:33:36Z" description: Provides the OpenTelemetry components, including the Collector operators.operatorframework.io/builder: operator-sdk-v1.29.0 operators.operatorframework.io/project_layout: go.kubebuilder.io/v3 diff --git a/bundle/openshift/manifests/opentelemetry-operator-controller-manager-metrics-service_v1_service.yaml b/bundle/openshift/manifests/opentelemetry-operator-controller-manager-metrics-service_v1_service.yaml index 66b0879b4d..a57cc212d5 100644 --- a/bundle/openshift/manifests/opentelemetry-operator-controller-manager-metrics-service_v1_service.yaml +++ b/bundle/openshift/manifests/opentelemetry-operator-controller-manager-metrics-service_v1_service.yaml @@ -1,6 +1,8 @@ apiVersion: v1 kind: Service metadata: + annotations: + service.beta.openshift.io/serving-cert-secret-name: opentelemetry-operator-metrics creationTimestamp: null labels: app.kubernetes.io/name: opentelemetry-operator diff --git a/bundle/openshift/manifests/opentelemetry-operator-prometheus-rules_monitoring.coreos.com_v1_prometheusrule.yaml b/bundle/openshift/manifests/opentelemetry-operator-prometheus-rules_monitoring.coreos.com_v1_prometheusrule.yaml new file mode 100644 index 0000000000..88441a6cad --- /dev/null +++ b/bundle/openshift/manifests/opentelemetry-operator-prometheus-rules_monitoring.coreos.com_v1_prometheusrule.yaml @@ -0,0 +1,24 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + app.kubernetes.io/managed-by: operator-lifecycle-manager + app.kubernetes.io/name: opentelemetry-operator + app.kubernetes.io/part-of: opentelemetry-operator + name: opentelemetry-operator-prometheus-rules +spec: + groups: + - name: opentelemetry-operator-monitoring.rules + rules: + - expr: sum by (type) (opentelemetry_collector_receivers) + record: type:opentelemetry_collector_receivers:sum + - expr: sum by (type) (opentelemetry_collector_exporters) + record: type:opentelemetry_collector_exporters:sum + - expr: sum by (type) (opentelemetry_collector_processors) + record: type:opentelemetry_collector_processors:sum + - expr: sum by (type) (opentelemetry_collector_extensions) + record: type:opentelemetry_collector_extensions:sum + - expr: sum by (type) (opentelemetry_collector_connectors) + record: type:opentelemetry_collector_connectors:sum + - expr: sum by (type) (opentelemetry_collector_info) + record: type:opentelemetry_collector_info:sum \ No newline at end of file diff --git a/bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml b/bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml new file mode 100644 index 0000000000..9895de1183 --- /dev/null +++ b/bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: opentelemetry-operator-prometheus +rules: +- apiGroups: + - "" + resources: + - services + - endpoints + - pods + verbs: + - get + - list + - watch diff --git a/bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml b/bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml new file mode 100644 index 0000000000..db617726d5 --- /dev/null +++ b/bundle/openshift/manifests/opentelemetry-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: opentelemetry-operator-prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: opentelemetry-operator-prometheus +subjects: +- kind: ServiceAccount + name: prometheus-k8s + namespace: openshift-monitoring diff --git a/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml b/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml index 8ab2219366..88078811fb 100644 --- a/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml +++ b/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml @@ -99,7 +99,7 @@ metadata: categories: Logging & Tracing,Monitoring certified: "false" containerImage: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator - createdAt: "2024-10-16T10:10:50Z" + createdAt: "2024-10-28T12:33:40Z" description: Provides the OpenTelemetry components, including the Collector operators.operatorframework.io/builder: operator-sdk-v1.29.0 operators.operatorframework.io/project_layout: go.kubebuilder.io/v3 @@ -477,9 +477,9 @@ spec: - --zap-time-encoding=rfc3339nano - --enable-nginx-instrumentation=true - --enable-go-instrumentation=true - - --enable-multi-instrumentation=true - --openshift-create-dashboard=true - --feature-gates=+operator.observability.prometheus + - --enable-cr-metrics=true env: - name: SERVICE_ACCOUNT_NAME valueFrom: @@ -516,6 +516,10 @@ spec: - --upstream=http://127.0.0.1:8080/ - --logtostderr=true - --v=0 + - --tls-cert-file=/var/run/tls/server/tls.crt + - --tls-private-key-file=/var/run/tls/server/tls.key + - --tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA256 + - --tls-min-version=VersionTLS12 image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1 name: kube-rbac-proxy ports: @@ -529,9 +533,16 @@ spec: requests: cpu: 5m memory: 64Mi + volumeMounts: + - mountPath: /var/run/tls/server + name: opentelemetry-operator-metrics-cert serviceAccountName: opentelemetry-operator-controller-manager terminationGracePeriodSeconds: 10 volumes: + - name: opentelemetry-operator-metrics-cert + secret: + defaultMode: 420 + secretName: opentelemetry-operator-metrics - name: cert secret: defaultMode: 420 diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index b5d04b59ae..2475c8ee5b 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -18,8 +18,6 @@ bases: - ../manager - ../webhook - ../certmanager -# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. -#- ../prometheus patchesStrategicMerge: # Protect the /metrics endpoint by putting it behind auth. diff --git a/config/overlays/openshift/kustomization.yaml b/config/overlays/openshift/kustomization.yaml index ddd0d3b29b..dd5b4300d0 100644 --- a/config/overlays/openshift/kustomization.yaml +++ b/config/overlays/openshift/kustomization.yaml @@ -8,3 +8,7 @@ patches: kind: Deployment name: controller-manager path: manager-patch.yaml + +patchesStrategicMerge: +- metrics_service_tls_patch.yaml +- manager_auth_proxy_tls_patch.yaml \ No newline at end of file diff --git a/config/overlays/openshift/manager-patch.yaml b/config/overlays/openshift/manager-patch.yaml index 2fb76bd889..57b097ca29 100644 --- a/config/overlays/openshift/manager-patch.yaml +++ b/config/overlays/openshift/manager-patch.yaml @@ -7,6 +7,6 @@ - --zap-time-encoding=rfc3339nano - --enable-nginx-instrumentation=true - '--enable-go-instrumentation=true' - - '--enable-multi-instrumentation=true' - '--openshift-create-dashboard=true' - '--feature-gates=+operator.observability.prometheus' + - '--enable-cr-metrics=true' \ No newline at end of file diff --git a/config/overlays/openshift/manager_auth_proxy_tls_patch.yaml b/config/overlays/openshift/manager_auth_proxy_tls_patch.yaml new file mode 100644 index 0000000000..077fa74ea6 --- /dev/null +++ b/config/overlays/openshift/manager_auth_proxy_tls_patch.yaml @@ -0,0 +1,29 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system +spec: + template: + spec: + containers: + - name: manager # without this line, kustomize reorders the containers, making kube-rbac-proxy the default container + - name: kube-rbac-proxy + args: + - "--secure-listen-address=0.0.0.0:8443" + - "--upstream=http://127.0.0.1:8080/" + - "--logtostderr=true" + - "--v=0" + - "--tls-cert-file=/var/run/tls/server/tls.crt" + - "--tls-private-key-file=/var/run/tls/server/tls.key" + - "--tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA256" + - "--tls-min-version=VersionTLS12" + volumeMounts: + - mountPath: /var/run/tls/server + name: opentelemetry-operator-metrics-cert + volumes: + - name: opentelemetry-operator-metrics-cert + secret: + defaultMode: 420 + # secret generated by the 'service.beta.openshift.io/serving-cert-secret-name' annotation on the metrics-service + secretName: opentelemetry-operator-metrics diff --git a/config/overlays/openshift/metrics_service_tls_patch.yaml b/config/overlays/openshift/metrics_service_tls_patch.yaml new file mode 100644 index 0000000000..7505c7894a --- /dev/null +++ b/config/overlays/openshift/metrics_service_tls_patch.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + service.beta.openshift.io/serving-cert-secret-name: opentelemetry-operator-metrics + name: controller-manager-metrics-service + namespace: system diff --git a/config/prometheus/kustomization.yaml b/config/prometheus/kustomization.yaml deleted file mode 100644 index ed137168a1..0000000000 --- a/config/prometheus/kustomization.yaml +++ /dev/null @@ -1,2 +0,0 @@ -resources: -- monitor.yaml diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml deleted file mode 100644 index 6e5f438a21..0000000000 --- a/config/prometheus/monitor.yaml +++ /dev/null @@ -1,26 +0,0 @@ - -# Prometheus Monitor Service (Metrics) -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - app.kubernetes.io/name: opentelemetry-operator - control-plane: controller-manager - name: controller-manager-metrics-monitor - namespace: system -spec: - endpoints: - - path: /metrics - port: https - scheme: https - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - tlsConfig: - insecureSkipVerify: false - ca: - secret: - key: ca.crt - name: opentelemetry-operator-controller-manager-service-cert - selector: - matchLabels: - app.kubernetes.io/name: opentelemetry-operator - control-plane: controller-manager diff --git a/internal/operator-metrics/metrics.go b/internal/operator-metrics/metrics.go new file mode 100644 index 0000000000..d68c4845dd --- /dev/null +++ b/internal/operator-metrics/metrics.go @@ -0,0 +1,142 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package operatormetrics + +import ( + "context" + "fmt" + "os" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +var ( + // namespaceFile is the path to the namespace file for the service account. + namespaceFile = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" + + // caBundleConfigMap declares the name of the config map for the CA bundle. + caBundleConfigMap = "serving-certs-ca-bundle" + + // prometheusCAFile declares the path for prometheus CA file for service monitors in OpenShift. + prometheusCAFile = fmt.Sprintf("/etc/prometheus/configmaps/%s/service-ca.crt", caBundleConfigMap) + + // nolint #nosec + // bearerTokenFile declares the path for bearer token file for service monitors. + bearerTokenFile = "/var/run/secrets/kubernetes.io/serviceaccount/token" + + // openshiftInClusterMonitoringNamespace declares the namespace for the OpenShift in-cluster monitoring. + openshiftInClusterMonitoringNamespace = "openshift-monitoring" +) + +type OperatorMetrics struct { + kubeClient client.Client +} + +func NewOperatorMetrics(config *rest.Config, scheme *runtime.Scheme) (OperatorMetrics, error) { + kubeClient, err := client.New(config, client.Options{Scheme: scheme}) + if err != nil { + return OperatorMetrics{}, err + } + + return OperatorMetrics{ + kubeClient: kubeClient, + }, nil +} + +func (om OperatorMetrics) Start(ctx context.Context) error { + rawNamespace, err := os.ReadFile(namespaceFile) + if err != nil { + return fmt.Errorf("error reading namespace file: %w", err) + } + namespace := string(rawNamespace) + + var tlsConfig *monitoringv1.TLSConfig + + if om.caConfigMapExists() { + serviceName := fmt.Sprintf("opentelemetry-operator-controller-manager-metrics-service.%s.svc", namespace) + + tlsConfig = &monitoringv1.TLSConfig{ + CAFile: prometheusCAFile, + SafeTLSConfig: monitoringv1.SafeTLSConfig{ + ServerName: &serviceName, + }, + } + } else { + t := true + tlsConfig = &monitoringv1.TLSConfig{ + SafeTLSConfig: monitoringv1.SafeTLSConfig{ + // kube-rbac-proxy uses a self-signed cert by default + InsecureSkipVerify: &t, + }, + } + } + + sm := monitoringv1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: "opentelemetry-operator-metrics-monitor", + Namespace: namespace, + Labels: map[string]string{ + "app.kubernetes.io/name": "opentelemetry-operator", + "app.kubernetes.io/part-of": "opentelemetry-operator", + "control-plane": "controller-manager", + }, + }, + Spec: monitoringv1.ServiceMonitorSpec{ + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app.kubernetes.io/name": "opentelemetry-operator", + }, + }, + Endpoints: []monitoringv1.Endpoint{ + { + BearerTokenFile: bearerTokenFile, + Interval: "30s", + Path: "/metrics", + Scheme: "https", + ScrapeTimeout: "10s", + TargetPort: &intstr.IntOrString{IntVal: 8443}, + TLSConfig: tlsConfig, + }, + }, + }, + } + + err = om.kubeClient.Create(ctx, &sm) + if err != nil { + return fmt.Errorf("error creating service monitor: %w", err) + } + + <-ctx.Done() + + return om.kubeClient.Delete(ctx, &sm) +} + +func (om OperatorMetrics) NeedLeaderElection() bool { + return true +} + +func (om OperatorMetrics) caConfigMapExists() bool { + return om.kubeClient.Get(context.Background(), client.ObjectKey{ + Name: caBundleConfigMap, + Namespace: openshiftInClusterMonitoringNamespace, + }, &corev1.ConfigMap{}, + ) == nil +} diff --git a/internal/operator-metrics/metrics_test.go b/internal/operator-metrics/metrics_test.go new file mode 100644 index 0000000000..b6d115772e --- /dev/null +++ b/internal/operator-metrics/metrics_test.go @@ -0,0 +1,103 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package operatormetrics + +import ( + "context" + "os" + "testing" + "time" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestNewOperatorMetrics(t *testing.T) { + config := &rest.Config{} + scheme := runtime.NewScheme() + metrics, err := NewOperatorMetrics(config, scheme) + assert.NoError(t, err) + assert.NotNil(t, metrics.kubeClient) +} + +func TestOperatorMetrics_Start(t *testing.T) { + tmpFile, err := os.CreateTemp("", "namespace") + require.NoError(t, err) + defer os.Remove(tmpFile.Name()) + + _, err = tmpFile.WriteString("test-namespace") + require.NoError(t, err) + tmpFile.Close() + + namespaceFile = tmpFile.Name() + + scheme := runtime.NewScheme() + err = corev1.AddToScheme(scheme) + require.NoError(t, err) + err = monitoringv1.AddToScheme(scheme) + require.NoError(t, err) + + client := fake.NewClientBuilder().WithScheme(scheme).Build() + + metrics := OperatorMetrics{kubeClient: client} + + ctx, cancel := context.WithCancel(context.Background()) + errChan := make(chan error) + go func() { + errChan <- metrics.Start(ctx) + }() + + // Wait a bit to allow the Start method to run + time.Sleep(100 * time.Millisecond) + + cancel() + err = <-errChan + assert.NoError(t, err) +} + +func TestOperatorMetrics_NeedLeaderElection(t *testing.T) { + metrics := OperatorMetrics{} + assert.True(t, metrics.NeedLeaderElection()) +} + +func TestOperatorMetrics_caConfigMapExists(t *testing.T) { + scheme := runtime.NewScheme() + err := corev1.AddToScheme(scheme) + require.NoError(t, err) + + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects( + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: caBundleConfigMap, + Namespace: openshiftInClusterMonitoringNamespace, + }, + }, + ).Build() + + metrics := OperatorMetrics{kubeClient: client} + + assert.True(t, metrics.caConfigMapExists()) + + // Test when the ConfigMap doesn't exist + clientWithoutConfigMap := fake.NewClientBuilder().WithScheme(scheme).Build() + metricsWithoutConfigMap := OperatorMetrics{kubeClient: clientWithoutConfigMap} + assert.False(t, metricsWithoutConfigMap.caConfigMapExists()) +} diff --git a/main.go b/main.go index 8d37edce7d..d52abb382b 100644 --- a/main.go +++ b/main.go @@ -58,6 +58,7 @@ import ( "github.com/open-telemetry/opentelemetry-operator/internal/fips" collectorManifests "github.com/open-telemetry/opentelemetry-operator/internal/manifests/collector" openshiftDashboards "github.com/open-telemetry/opentelemetry-operator/internal/openshift/dashboards" + operatormetrics "github.com/open-telemetry/opentelemetry-operator/internal/operator-metrics" "github.com/open-telemetry/opentelemetry-operator/internal/rbac" "github.com/open-telemetry/opentelemetry-operator/internal/version" "github.com/open-telemetry/opentelemetry-operator/internal/webhook/podmutation" @@ -435,6 +436,16 @@ func main() { setupLog.Error(err, "Error init CRD metrics") } + if cfg.PrometheusCRAvailability() == prometheus.Available { + operatorMetrics, opError := operatormetrics.NewOperatorMetrics(mgr.GetConfig(), scheme) + if opError != nil { + setupLog.Error(opError, "Failed to create the operator metrics SM") + } + err = mgr.Add(operatorMetrics) + if err != nil { + setupLog.Error(err, "Failed to add the operator metrics SM") + } + } } bv := func(collector otelv1beta1.OpenTelemetryCollector) admission.Warnings { From 4e0dad086e32692e4d1705fab47f010cb0b9e70c Mon Sep 17 00:00:00 2001 From: Israel Blancas Date: Tue, 29 Oct 2024 17:36:11 +0100 Subject: [PATCH 2/3] Apply changes requested in CR Signed-off-by: Israel Blancas --- internal/operator-metrics/metrics.go | 3 +++ internal/operator-metrics/metrics_test.go | 28 +++++++++++++++++++++-- main.go | 22 +++++++++--------- 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/internal/operator-metrics/metrics.go b/internal/operator-metrics/metrics.go index d68c4845dd..43b3a607e3 100644 --- a/internal/operator-metrics/metrics.go +++ b/internal/operator-metrics/metrics.go @@ -26,6 +26,7 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" ) var ( @@ -46,6 +47,8 @@ var ( openshiftInClusterMonitoringNamespace = "openshift-monitoring" ) +var _ manager.Runnable = &OperatorMetrics{} + type OperatorMetrics struct { kubeClient client.Client } diff --git a/internal/operator-metrics/metrics_test.go b/internal/operator-metrics/metrics_test.go index b6d115772e..8745886bb6 100644 --- a/internal/operator-metrics/metrics_test.go +++ b/internal/operator-metrics/metrics_test.go @@ -24,8 +24,11 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) @@ -65,8 +68,29 @@ func TestOperatorMetrics_Start(t *testing.T) { errChan <- metrics.Start(ctx) }() - // Wait a bit to allow the Start method to run - time.Sleep(100 * time.Millisecond) + ctxTimeout, cancelTimeout := context.WithTimeout(ctx, time.Second*10) + defer cancelTimeout() + + // Wait until one service monitor is being created + var serviceMonitor *monitoringv1.ServiceMonitor = &monitoringv1.ServiceMonitor{} + err = wait.PollUntilContextTimeout( + ctxTimeout, + time.Millisecond*100, + time.Second*100, + true, + func(ctx context.Context) (bool, error) { + errGet := client.Get(ctx, types.NamespacedName{Name: "opentelemetry-operator-metrics-monitor", Namespace: "test-namespace"}, serviceMonitor) + + if errGet != nil { + if apierrors.IsNotFound(errGet) { + return false, nil + } + return false, err + } + return true, nil + }, + ) + require.NoError(t, err) cancel() err = <-errChan diff --git a/main.go b/main.go index d52abb382b..4a2ce615a8 100644 --- a/main.go +++ b/main.go @@ -422,6 +422,17 @@ func main() { os.Exit(1) } + if cfg.PrometheusCRAvailability() == prometheus.Available { + operatorMetrics, opError := operatormetrics.NewOperatorMetrics(mgr.GetConfig(), scheme) + if opError != nil { + setupLog.Error(opError, "Failed to create the operator metrics SM") + } + err = mgr.Add(operatorMetrics) + if err != nil { + setupLog.Error(err, "Failed to add the operator metrics SM") + } + } + if os.Getenv("ENABLE_WEBHOOKS") != "false" { var crdMetrics *otelv1beta1.Metrics @@ -435,17 +446,6 @@ func main() { if err != nil { setupLog.Error(err, "Error init CRD metrics") } - - if cfg.PrometheusCRAvailability() == prometheus.Available { - operatorMetrics, opError := operatormetrics.NewOperatorMetrics(mgr.GetConfig(), scheme) - if opError != nil { - setupLog.Error(opError, "Failed to create the operator metrics SM") - } - err = mgr.Add(operatorMetrics) - if err != nil { - setupLog.Error(err, "Failed to add the operator metrics SM") - } - } } bv := func(collector otelv1beta1.OpenTelemetryCollector) admission.Warnings { From a9bf098a947dc370c17fd72bd04c264fef28932e Mon Sep 17 00:00:00 2001 From: Israel Blancas Date: Tue, 29 Oct 2024 17:36:26 +0100 Subject: [PATCH 3/3] Apply changes requested in CR Signed-off-by: Israel Blancas --- internal/operator-metrics/metrics_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/operator-metrics/metrics_test.go b/internal/operator-metrics/metrics_test.go index 8745886bb6..ae625bfff4 100644 --- a/internal/operator-metrics/metrics_test.go +++ b/internal/operator-metrics/metrics_test.go @@ -76,7 +76,7 @@ func TestOperatorMetrics_Start(t *testing.T) { err = wait.PollUntilContextTimeout( ctxTimeout, time.Millisecond*100, - time.Second*100, + time.Second*10, true, func(ctx context.Context) (bool, error) { errGet := client.Get(ctx, types.NamespacedName{Name: "opentelemetry-operator-metrics-monitor", Namespace: "test-namespace"}, serviceMonitor)