From 87acbd09533254a84e5a7b81e497b0e26a01701c Mon Sep 17 00:00:00 2001 From: Bastian Krol Date: Wed, 25 Sep 2024 11:48:23 +0100 Subject: [PATCH] feat: manage Perses dashboards via the operator --- .../v1alpha1/operator_configuration_types.go | 16 + api/dash0monitoring/v1alpha1/types_common.go | 9 + cmd/main.go | 40 +- .../operator.dash0.com_dash0monitorings.yaml | 8 + ...dash0.com_dash0operatorconfigurations.yaml | 8 + config/rbac/role.yaml | 16 + go.mod | 17 +- go.sum | 58 ++ helm-chart/dash0-operator/README.md | 6 +- .../templates/operator/cluster-roles.yaml | 20 + ...rce-definition-operator-configuration.yaml | 17 +- .../operator/deployment-and-webhooks.yaml | 3 + .../__snapshot__/cluster-roles_test.yaml.snap | 16 + ...tion-operator-configuration_test.yaml.snap | 17 +- .../deployment-and-webhooks_test.yaml | 4 + helm-chart/dash0-operator/values.yaml | 20 +- images/instrumentation/Dockerfile | 2 +- .../backendconnection_manager_test.go | 4 +- .../otelcolresources/desired_state.go | 2 +- .../otelcolresources/otelcol_resources.go | 14 - .../otelcol_resources_test.go | 78 +-- .../dash0/controller/dash0_controller_test.go | 8 +- .../operator_configuration_controller.go | 175 +++++- .../operator_configuration_controller_test.go | 95 ++- .../perses_dashboards_controller.go | 403 +++++++++++++ .../instrumentation/instrumenter_test.go | 2 +- .../operator_pre_delete_handler_test.go | 2 +- .../dash0/predelete/pre_delete_suite_test.go | 2 +- .../dash0/selfmonitoring/self_monitoring.go | 166 ++--- .../auto_operator_configuration_handler.go | 22 +- ...uto_operator_configuration_handler_test.go | 2 +- internal/dash0/startup/startup_suite_test.go | 2 +- internal/dash0/util/constants.go | 2 + internal/dash0/util/controller.go | 6 +- .../webhooks/attach_dangling_events_test.go | 4 +- test-resources/.env.template | 1 + test-resources/bin/test-cleanup.sh | 7 + .../bin/test-scenario-01-aum-operator-cr.sh | 68 ++- .../bin/test-scenario-02-operator-cr-aum.sh | 67 ++- test-resources/bin/util | 68 ++- .../dash0operatorconfiguration.secret.yaml | 1 + ...0operatorconfiguration.token.yaml.template | 1 + .../persesdashboard/persesdashboard.yaml | 565 ++++++++++++++++++ test/util/constants.go | 4 +- test/util/matchers.go | 50 ++ test/util/operator_resource.go | 4 +- test/util/resources.go | 6 +- 47 files changed, 1824 insertions(+), 284 deletions(-) create mode 100644 internal/dash0/controller/perses_dashboards_controller.go create mode 100644 test-resources/customresources/persesdashboard/persesdashboard.yaml diff --git a/api/dash0monitoring/v1alpha1/operator_configuration_types.go b/api/dash0monitoring/v1alpha1/operator_configuration_types.go index 663fad1e..774ef2b7 100644 --- a/api/dash0monitoring/v1alpha1/operator_configuration_types.go +++ b/api/dash0monitoring/v1alpha1/operator_configuration_types.go @@ -146,6 +146,22 @@ func (d *Dash0OperatorConfiguration) EnsureResourceIsMarkedAsDegraded( }) } +func (d *Dash0OperatorConfiguration) GetDash0AuthorizationIfConfigured() *Authorization { + if d.Spec.Export == nil { + return nil + } + if d.Spec.Export.Dash0 == nil { + return nil + } + + authorization := d.Spec.Export.Dash0.Authorization + if (authorization.Token != nil && *authorization.Token != "") || + (authorization.SecretRef != nil && authorization.SecretRef.Name != "" && authorization.SecretRef.Key != "") { + return &authorization + } + return nil +} + func (d *Dash0OperatorConfiguration) GetResourceTypeName() string { return "Dash0OperatorConfiguration" } diff --git a/api/dash0monitoring/v1alpha1/types_common.go b/api/dash0monitoring/v1alpha1/types_common.go index ee108cfb..2466a4b7 100644 --- a/api/dash0monitoring/v1alpha1/types_common.go +++ b/api/dash0monitoring/v1alpha1/types_common.go @@ -52,6 +52,15 @@ type Dash0Configuration struct { // // +kubebuilder:validation:Required Authorization Authorization `json:"authorization"` + + // The base URL of the Dash0 API to talk to. This is not where telemetry will be sent, but it is used for managing + // dashboards and check rules via the operator. This property is mandatory. The value needs to be the API endpoint + // of your Dash0 organization. The correct API endpoint can be copied fom https://app.dash0.com -> organization + // settings -> "Endpoints" -> "API". The correct endpoint value will always start with "https://api." and end in + // ".dash0.com" + // + // +kubebuilder:validation:Optional + ApiEndpoint string `json:"apiEndpoint,omitempty"` } // Authorization contains the authorization settings for Dash0. diff --git a/cmd/main.go b/cmd/main.go index 721b7b8a..46e2a74a 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -16,10 +16,12 @@ import ( _ "k8s.io/client-go/plugin/pkg/client/auth" "github.com/go-logr/logr" + persesv1alpha1 "github.com/perses/perses-operator/api/v1alpha1" semconv "go.opentelemetry.io/collector/semconv/v1.27.0" otelmetric "go.opentelemetry.io/otel/metric" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/kubernetes" @@ -60,6 +62,7 @@ type environmentVariables struct { configurationReloaderImagePullPolicy corev1.PullPolicy filelogOffsetSynchImage string filelogOffsetSynchImagePullPolicy corev1.PullPolicy + selfMonitoringAndApiAuthToken string } const ( @@ -100,9 +103,11 @@ var ( func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) - utilruntime.Must(dash0v1alpha1.AddToScheme(scheme)) - //+kubebuilder:scaffold:scheme + + // for perses dashboard controller, prometheus scrape config controller etc. + utilruntime.Must(apiextensionsv1.AddToScheme(scheme)) + utilruntime.Must(persesv1alpha1.AddToScheme(scheme)) } func main() { @@ -111,6 +116,7 @@ func main() { var operatorConfigurationToken string var operatorConfigurationSecretRefName string var operatorConfigurationSecretRefKey string + var operatorConfigurationApiEndpoint string var isUninstrumentAll bool var metricsAddr string var enableLeaderElection bool @@ -132,6 +138,8 @@ func main() { flag.StringVar(&operatorConfigurationSecretRefKey, "operator-configuration-secret-ref-key", "", "The key in an existing Kubernetes secret containing the Dash0 auth token, used to creating an operator "+ "configuration resource.") + flag.StringVar(&operatorConfigurationApiEndpoint, "operator-configuration-api-endpoint", "", + "The Dash0 API endpoint for managing dashboards and check rules via the operator.") flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.BoolVar(&enableLeaderElection, "leader-elect", false, @@ -223,6 +231,9 @@ func main() { Key: operatorConfigurationSecretRefKey, }, } + if len(operatorConfigurationApiEndpoint) > 0 { + operatorConfiguration.ApiEndpoint = operatorConfigurationApiEndpoint + } } if err = startOperatorManager( @@ -389,6 +400,8 @@ func readEnvironmentVariables() error { filelogOffsetSynchImagePullPolicy := readOptionalPullPolicyFromEnvironmentVariable(filelogOffsetSynchImagePullPolicyEnvVarName) + selfMonitoringAndApiAuthToken := os.Getenv(util.SelfMonitoringAndApiAuthTokenEnvVarName) + envVars = environmentVariables{ operatorNamespace: operatorNamespace, deploymentName: deploymentName, @@ -402,6 +415,7 @@ func readEnvironmentVariables() error { configurationReloaderImagePullPolicy: configurationReloaderImagePullPolicy, filelogOffsetSynchImage: filelogOffsetSynchImage, filelogOffsetSynchImagePullPolicy: filelogOffsetSynchImagePullPolicy, + selfMonitoringAndApiAuthToken: selfMonitoringAndApiAuthToken, } return nil @@ -504,14 +518,22 @@ func startDash0Controllers( return fmt.Errorf("unable to set up the backend connection reconciler: %w", err) } + persesDashboardCrdReconciler := &controller.PersesDashboardCrdReconciler{ + AuthToken: envVars.selfMonitoringAndApiAuthToken, + } + if err := persesDashboardCrdReconciler.SetupWithManager(ctx, mgr, startupTasksK8sClient, &setupLog); err != nil { + return fmt.Errorf("unable to set up the Perses dashboard reconciler: %w", err) + } + operatorConfigurationReconciler := &controller.OperatorConfigurationReconciler{ - Client: k8sClient, - Clientset: clientset, - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorderFor("dash0-operator-configuration-controller"), - DeploymentSelfReference: deploymentSelfReference, - Images: images, - DevelopmentMode: developmentMode, + Client: k8sClient, + Clientset: clientset, + PersesDashboardCrdReconciler: persesDashboardCrdReconciler, + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("dash0-operator-configuration-controller"), + DeploymentSelfReference: deploymentSelfReference, + Images: images, + DevelopmentMode: developmentMode, } if err := operatorConfigurationReconciler.SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to set up the operator configuration reconciler: %w", err) diff --git a/config/crd/bases/operator.dash0.com_dash0monitorings.yaml b/config/crd/bases/operator.dash0.com_dash0monitorings.yaml index ae4b5604..7160daa6 100644 --- a/config/crd/bases/operator.dash0.com_dash0monitorings.yaml +++ b/config/crd/bases/operator.dash0.com_dash0monitorings.yaml @@ -59,6 +59,14 @@ spec: description: The configuration of the Dash0 ingress endpoint to which telemetry data will be sent. properties: + apiEndpoint: + description: |- + The base URL of the Dash0 API to talk to. This is not where telemetry will be sent, but it is used for managing + dashboards and check rules via the operator. This property is mandatory. The value needs to be the API endpoint + of your Dash0 organization. The correct API endpoint can be copied fom https://app.dash0.com -> organization + settings -> "Endpoints" -> "API". The correct endpoint value will always start with "https://api." and end in + ".dash0.com" + type: string authorization: description: Mandatory authorization settings for sending data to Dash0. diff --git a/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml b/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml index 56fcfa3c..8a7ba89c 100644 --- a/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml +++ b/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml @@ -59,6 +59,14 @@ spec: description: The configuration of the Dash0 ingress endpoint to which telemetry data will be sent. properties: + apiEndpoint: + description: |- + The base URL of the Dash0 API to talk to. This is not where telemetry will be sent, but it is used for managing + dashboards and check rules via the operator. This property is mandatory. The value needs to be the API endpoint + of your Dash0 organization. The correct API endpoint can be copied fom https://app.dash0.com -> organization + settings -> "Endpoints" -> "API". The correct endpoint value will always start with "https://api." and end in + ".dash0.com" + type: string authorization: description: Mandatory authorization settings for sending data to Dash0. diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 9d798ebc..180edd7a 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -4,6 +4,14 @@ kind: ClusterRole metadata: name: manager-role rules: +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - get + - list + - watch - apiGroups: - apps resources: @@ -111,3 +119,11 @@ rules: - get - patch - update +- apiGroups: + - perses.dev + resources: + - persesdashboards + verbs: + - get + - list + - watch diff --git a/go.mod b/go.mod index 58a1c473..e5dcdeb7 100644 --- a/go.mod +++ b/go.mod @@ -11,19 +11,23 @@ require ( github.com/google/uuid v1.6.0 github.com/onsi/ginkgo/v2 v2.20.2 github.com/onsi/gomega v1.34.2 + github.com/perses/perses-operator v0.0.0-20240402153734-4ccf03f6c8e6 go.opentelemetry.io/collector/pdata v1.16.0 go.opentelemetry.io/collector/semconv v0.110.0 go.opentelemetry.io/otel/metric v1.30.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.31.1 + k8s.io/apiextensions-apiserver v0.31.0 k8s.io/apimachinery v0.31.1 k8s.io/client-go v0.31.1 k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 sigs.k8s.io/controller-runtime v0.19.0 + sigs.k8s.io/yaml v1.4.0 ) require ( emperror.dev/errors v0.8.1 // indirect + github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect @@ -33,6 +37,7 @@ require ( github.com/evanphx/json-patch/v5 v5.9.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-jose/go-jose/v3 v3.0.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-logr/zapr v1.3.0 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect @@ -49,18 +54,27 @@ require ( github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect github.com/imdario/mergo v0.3.16 // indirect github.com/josharian/intern v1.0.0 // indirect + github.com/jpillora/backoff v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/muhlemmer/gu v0.3.1 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect + github.com/nexucis/lamenv v0.5.2 // indirect + github.com/perses/common v0.23.1 // indirect + github.com/perses/perses v0.44.0 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/prometheus/client_golang v1.19.1 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.55.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/x448/float16 v0.8.4 // indirect + github.com/zitadel/oidc/v3 v3.18.0 // indirect + github.com/zitadel/schema v1.3.0 // indirect go.opentelemetry.io/otel v1.30.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.30.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.30.0 // indirect @@ -70,6 +84,7 @@ require ( go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect + golang.org/x/crypto v0.27.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/net v0.29.0 // indirect golang.org/x/oauth2 v0.22.0 // indirect @@ -86,12 +101,10 @@ require ( gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect - k8s.io/apiextensions-apiserver v0.31.0 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect ) replace github.com/dash0hq/dash0-operator/images/pkg/common => ./images/pkg/common diff --git a/go.sum b/go.sum index 602f5d5b..288d9cff 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ emperror.dev/errors v0.8.1 h1:UavXZ5cSX/4u9iyvH6aDcuGkVjeexUGJ7Ij7G4VfQT0= emperror.dev/errors v0.8.1/go.mod h1:YcRvLPh626Ubn2xqtoprejnA5nFha+TJ+2vew48kWuE= +github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0= +github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= @@ -22,6 +24,8 @@ github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nos github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-jose/go-jose/v3 v3.0.3 h1:fFKWeig/irsp7XD2zBxvnmA/XaRWp5V3CBsZXJF7G7k= +github.com/go-jose/go-jose/v3 v3.0.3/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -61,6 +65,8 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= @@ -76,12 +82,24 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/muhlemmer/gu v0.3.1 h1:7EAqmFrW7n3hETvuAdmFmn4hS8W+z3LgKtrnow+YzNM= +github.com/muhlemmer/gu v0.3.1/go.mod h1:YHtHR+gxM+bKEIIs7Hmi9sPT3ZDUvTN/i88wQpZkrdM= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/nexucis/lamenv v0.5.2 h1:tK/u3XGhCq9qIoVNcXsK9LZb8fKopm0A5weqSRvHd7M= +github.com/nexucis/lamenv v0.5.2/go.mod h1:HusJm6ltmmT7FMG8A750mOLuME6SHCsr2iFYxp5fFi0= github.com/onsi/ginkgo/v2 v2.20.2 h1:7NVCeyIWROIAheY21RLS+3j2bb52W0W82tkberYytp4= github.com/onsi/ginkgo/v2 v2.20.2/go.mod h1:K9gyxPIlb+aIvnZ8bd9Ak+YP18w3APlR+5coaZoE2ag= github.com/onsi/gomega v1.34.2 h1:pNCwDkzrsv7MS9kpaQvVb1aVLahQXyJ/Tv5oAZMI3i8= github.com/onsi/gomega v1.34.2/go.mod h1:v1xfxRgk0KIsG+QOdm7p8UosrOzPYRo60fd3B/1Dukc= +github.com/perses/common v0.23.1 h1:agRsLOOFMoecwATZ0sb414ulOYC70z5vUvsi2RCdWQM= +github.com/perses/common v0.23.1/go.mod h1:CZ4xpwLMZ61vBM32dfSflIBpqxbnHOlNknIziFAjU0c= +github.com/perses/perses v0.44.0 h1:5OZRiT4+mfFyQ3fK2p89WIJzSseB5oQLr4ZWXG3kO4g= +github.com/perses/perses v0.44.0/go.mod h1:vSJpzFS1gjolahl+Of7buj38xohOC4euuLTjUWPnlOY= +github.com/perses/perses-operator v0.0.0-20240402153734-4ccf03f6c8e6 h1:wHOV+H/fjZiTVxdYhLa/DU6PIdaOxMgjOYFD33Bs5V8= +github.com/perses/perses-operator v0.0.0-20240402153734-4ccf03f6c8e6/go.mod h1:FTIGrH1+0cqFVeCY+XksI65CGmmAsgdqt2OiEyPrPOM= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -97,16 +115,24 @@ github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0leargg github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/zitadel/oidc/v3 v3.18.0 h1:NGdxLIYbuvaIqc/Na1fu61wBXIbqufp7LsFNV1bXOQw= +github.com/zitadel/oidc/v3 v3.18.0/go.mod h1:tY75hMcm07McpPXzvgvFTNPefPYDnHRYZQZVn9gtAps= +github.com/zitadel/schema v1.3.0 h1:kQ9W9tvIwZICCKWcMvCEweXET1OcOyGEuFbHs4o5kg0= +github.com/zitadel/schema v1.3.0/go.mod h1:NptN6mkBDFvERUCvZHlvWmmME+gmZ44xzwRXwhzsbtc= go.opentelemetry.io/collector/pdata v1.16.0 h1:g02K8jlRnmQ7TQDuXpdgVL6vIxIVqr5Gbb1qIR27rto= go.opentelemetry.io/collector/pdata v1.16.0/go.mod h1:YZZJIt2ehxosYf/Y1pbvexjNWsIGNNrzzlCTO9jC1F4= go.opentelemetry.io/collector/semconv v0.110.0 h1:KHQnOHe3gUz0zsxe8ph9kN5OTypCFD4V+06AiBTfeNk= @@ -138,14 +164,24 @@ go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= +golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= @@ -153,15 +189,34 @@ golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbht golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= @@ -170,6 +225,8 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -196,6 +253,7 @@ gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= k8s.io/api v0.31.1 h1:Xe1hX/fPW3PXYYv8BlozYqw63ytA92snr96zMW9gWTU= diff --git a/helm-chart/dash0-operator/README.md b/helm-chart/dash0-operator/README.md index ff4a7327..85fea568 100644 --- a/helm-chart/dash0-operator/README.md +++ b/helm-chart/dash0-operator/README.md @@ -31,7 +31,8 @@ To use the operator, you will need provide two configuration values: * `endpoint`: The URL of the Dash0 ingress endpoint backend to which telemetry data will be sent. This property is mandatory when installing the operator. This is the OTLP/gRPC endpoint of your Dash0 organization. - The correct OTLP/gRPC endpoint can be copied fom https://app.dash0.com -> organization settings -> "Endpoints". + The correct OTLP/gRPC endpoint can be copied fom https://app.dash0.com -> organization settings -> "Endpoints" + -> "OTLP/gRPC". Note that the correct endpoint value will always start with `ingress.` and end in `dash0.com:4317`. Including a protocol prefix (e.g. `https://`) is optional. * Either `token` or `secretRef`: Exactly one of these two properties needs to be provided when installing the operator. @@ -139,7 +140,8 @@ You need to provide two configuration settings: * `spec.export.dash0.endpoint`: The URL of the Dash0 ingress endpoint backend to which telemetry data will be sent. This property is mandatory. Replace the value in the example above with the OTLP/gRPC endpoint of your Dash0 organization. - The correct OTLP/gRPC endpoint can be copied fom https://app.dash0.com -> organization settings -> "Endpoints". + The correct OTLP/gRPC endpoint can be copied fom https://app.dash0.com -> organization settings -> "Endpoints" + -> "OTLP/gRPC". Note that the correct endpoint value will always start with `ingress.` and end in `dash0.com:4317`. Including a protocol prefix (e.g. `https://`) is optional. * `spec.export.dash0.authorization.token` or `spec.export.dash0.authorization.secretRef`: Exactly one of these two diff --git a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml index 4d51afcf..792eae3f 100644 --- a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml +++ b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml @@ -10,6 +10,16 @@ metadata: rules: +# Permissions required to watch for the foreign CRD (Perses dashboards, Prometheus scrape configs). +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - get + - list + - watch + # Permissions required to instrument workloads in the apps API group. - apiGroups: - apps @@ -75,6 +85,16 @@ rules: - get - list +# Permissions required to watch for the Perses dashboard resources. +- apiGroups: + - perses.dev + resources: + - persesdashboards + verbs: + - get + - list + - watch + # Permissions required to manage the Dash0 monitoring resource, its finalizers and status. - apiGroups: - operator.dash0.com diff --git a/helm-chart/dash0-operator/templates/operator/custom-resource-definition-operator-configuration.yaml b/helm-chart/dash0-operator/templates/operator/custom-resource-definition-operator-configuration.yaml index ea02b997..1446572f 100644 --- a/helm-chart/dash0-operator/templates/operator/custom-resource-definition-operator-configuration.yaml +++ b/helm-chart/dash0-operator/templates/operator/custom-resource-definition-operator-configuration.yaml @@ -59,6 +59,14 @@ spec: description: The configuration of the Dash0 ingress endpoint to which telemetry data will be sent. properties: + apiEndpoint: + description: |- + The base URL of the Dash0 API to talk to. This is not where telemetry will be sent, but it is used for managing + dashboards and check rules via the operator. This property is mandatory. The value needs to be the API endpoint + of your Dash0 organization. The correct API endpoint can be copied fom https://app.dash0.com -> organization + settings -> "Endpoints" -> "API". The correct endpoint value will always start with "https://api." and end in + ".dash0.com" + type: string authorization: description: Mandatory authorization settings for sending data to Dash0. @@ -69,7 +77,7 @@ spec: description: |- A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is ignored if the token property is set. The authorization token for your Dash0 organization can be copied from - https://app.dash0.com/settings. + https://app.dash0.com -> organization settings -> "Auth Tokens". properties: key: default: token @@ -89,7 +97,8 @@ spec: description: |- The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization - token for your Dash0 organization can be copied from https://app.dash0.com/settings. + token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + "Auth Tokens". type: string type: object dataset: @@ -102,8 +111,8 @@ spec: description: |- The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom - https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in - `dash0.com:4317`. + https://app.dash0.com -> organization settings -> "Endpoints". The correct endpoint value will always start with + `ingress.` and end in `dash0.com:4317`. type: string required: - authorization diff --git a/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml b/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml index ef25b1ba..6673c4d1 100644 --- a/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml +++ b/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml @@ -100,6 +100,9 @@ spec: {{- else }} {{- fail "Error: operator.dash0Export.enabled is set to true, but neither operator.dash0Export.token nor operator.dash0Export.secretRef.name & operator.dash0Export.secretRef.key have been provided. Please refer to the installation instructions at https://github.com/dash0hq/dash0-operator/tree/main/helm-chart/dash0-operator." -}} {{- end }} +{{- if .Values.operator.dash0Export.apiEndpoint }} + - --operator-configuration-api-endpoint={{ .Values.operator.dash0Export.apiEndpoint }} +{{- end }} {{- end }} env: - name: DASH0_OPERATOR_NAMESPACE diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap index c80d9dda..c8f1048b 100644 --- a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap @@ -13,6 +13,14 @@ cluster roles should match snapshot: helm.sh/chart: dash0-operator-0.0.0 name: dash0-operator-manager-role rules: + - apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - get + - list + - watch - apiGroups: - apps resources: @@ -66,6 +74,14 @@ cluster roles should match snapshot: - delete - get - list + - apiGroups: + - perses.dev + resources: + - persesdashboards + verbs: + - get + - list + - watch - apiGroups: - operator.dash0.com resources: diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-operator-configuration_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-operator-configuration_test.yaml.snap index eacd0707..ffe1cf59 100644 --- a/helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-operator-configuration_test.yaml.snap +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/custom-resource-definition-operator-configuration_test.yaml.snap @@ -57,6 +57,14 @@ custom resource definition should match snapshot: dash0: description: The configuration of the Dash0 ingress endpoint to which telemetry data will be sent. properties: + apiEndpoint: + description: |- + The base URL of the Dash0 API to talk to. This is not where telemetry will be sent, but it is used for managing + dashboards and check rules via the operator. This property is mandatory. The value needs to be the API endpoint + of your Dash0 organization. The correct API endpoint can be copied fom https://app.dash0.com -> organization + settings -> "Endpoints" -> "API". The correct endpoint value will always start with "https://api." and end in + ".dash0.com" + type: string authorization: description: Mandatory authorization settings for sending data to Dash0. maxProperties: 1 @@ -66,7 +74,7 @@ custom resource definition should match snapshot: description: |- A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is ignored if the token property is set. The authorization token for your Dash0 organization can be copied from - https://app.dash0.com/settings. + https://app.dash0.com -> organization settings -> "Auth Tokens". properties: key: default: token @@ -84,7 +92,8 @@ custom resource definition should match snapshot: description: |- The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization - token for your Dash0 organization can be copied from https://app.dash0.com/settings. + token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + "Auth Tokens". type: string type: object dataset: @@ -97,8 +106,8 @@ custom resource definition should match snapshot: description: |- The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom - https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in - `dash0.com:4317`. + https://app.dash0.com -> organization settings -> "Endpoints". The correct endpoint value will always start with + `ingress.` and end in `dash0.com:4317`. type: string required: - authorization diff --git a/helm-chart/dash0-operator/tests/operator/deployment-and-webhooks_test.yaml b/helm-chart/dash0-operator/tests/operator/deployment-and-webhooks_test.yaml index 2ade23c6..ba2625cb 100644 --- a/helm-chart/dash0-operator/tests/operator/deployment-and-webhooks_test.yaml +++ b/helm-chart/dash0-operator/tests/operator/deployment-and-webhooks_test.yaml @@ -245,6 +245,7 @@ tests: enabled: true endpoint: https://ingress.dash0.com token: "very-secret-dash0-auth-token" + apiEndpoint: https://api.dash0.com asserts: - equal: path: spec.template.spec.containers[0].args[3] @@ -252,6 +253,9 @@ tests: - equal: path: spec.template.spec.containers[0].args[4] value: --operator-configuration-token=very-secret-dash0-auth-token + - equal: + path: spec.template.spec.containers[0].args[5] + value: --operator-configuration-api-endpoint=https://api.dash0.com # Note: We deliberately do not have a test for the operator.dash0Export.secretRef variant, since this would trigger # a check whether the secret actually exists in the cluster, which of course would fail when runnig helm chart unit # tests. diff --git a/helm-chart/dash0-operator/values.yaml b/helm-chart/dash0-operator/values.yaml index cca4c402..869ccd48 100644 --- a/helm-chart/dash0-operator/values.yaml +++ b/helm-chart/dash0-operator/values.yaml @@ -5,23 +5,31 @@ operator: # Use the operator.dash0Export settings to configure the connection to the Dash0 backend; telemetry will be sent to - # the configured Dash0 backend by default. Under the hood, this will create a Dash0OperatorConfiguration resource - # right away, when starting the operator. If left empty, you can always create a Dash0OperatorConfiguration resource - # manually later. + # the configured Dash0 backend by default. Under the hood, this will make sure a Dash0OperatorConfiguration resource + # will be created right away, when starting the operator. If left empty, you can always create a + # Dash0OperatorConfiguration resource manually later. dash0Export: # Set this to true to enable the creation of a Dash0OperatorConfiguration resource at startup. If a # Dash0OperatorConfiguration already exists in the cluster, no action will be taken. Note that if this is set to - # true, you will also need to provide a valid endpoint (operator.dash0Export.endpoint), and either or an auth - # token (operator.dash0Export.token) or a reference to a Kubernetes secret containing that token + # true, you will also need to provide a valid endpoint (operator.dash0Export.endpoint), and either an auth token + # (operator.dash0Export.token) or a reference to a Kubernetes secret containing that token # (operator.dash0Export.secretRef). enabled: false # The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory if # operator.dash0Export.enabled is true, otherwise it will be ignored. The value needs to be the OTLP/gRPC endpoint # of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom https://app.dash0.com -> organization - # settings -> "Endpoints". The correct endpoint value will always start with `ingress.` and end in `dash0.com:4317`. + # settings -> "Endpoints" -> "OTLP/gRPC". The correct endpoint value will always start with `ingress.` and end in + # `dash0.com:4317`. endpoint: + # The base URL of the Dash0 API to talk to. This is not where telemetry will be sent, but it is used for managing + # dashboards and check rules via the operator. This property is mandatory. The value needs to be the API endpoint + # of your Dash0 organization. The correct API endpoint can be copied fom https://app.dash0.com -> organization + # settings -> "Endpoints" -> "API". The correct endpoint value will always start with "https://api." and end in + # ".dash0.com" + apiEndpoint: + # The Dash0 authorization token. This property is optional, but either this property or the secretRef configuration # has to be provided if operator.dash0Export.enabled is true. If operator.dash0Export.enabled is false, this # property will be ignored. diff --git a/images/instrumentation/Dockerfile b/images/instrumentation/Dockerfile index 3ec7a5a1..1515a8d0 100644 --- a/images/instrumentation/Dockerfile +++ b/images/instrumentation/Dockerfile @@ -2,7 +2,7 @@ FROM node:20.13.1-alpine3.19 AS build-node.js RUN mkdir -p /dash0-init-container/instrumentation/node.js WORKDIR /dash0-init-container/instrumentation/node.js -COPY node.js/package* . +COPY node.js/package* ./ COPY node.js/dash0hq-opentelemetry-*.tgz . RUN NPM_CONFIG_UPDATE_NOTIFIER=false \ npm ci \ diff --git a/internal/backendconnection/backendconnection_manager_test.go b/internal/backendconnection/backendconnection_manager_test.go index 02f0f650..1306b02c 100644 --- a/internal/backendconnection/backendconnection_manager_test.go +++ b/internal/backendconnection/backendconnection_manager_test.go @@ -21,7 +21,7 @@ import ( ) var ( - operatorNamespace = Dash0OperatorNamespace + operatorNamespace = OperatorNamespace dash0MonitoringResource = &dash0v1alpha1.Dash0Monitoring{ Spec: dash0v1alpha1.Dash0MonitoringSpec{ @@ -46,7 +46,7 @@ var _ = Describe("The backend connection manager", Ordered, func() { var manager *BackendConnectionManager BeforeAll(func() { - EnsureDash0OperatorNamespaceExists(ctx, k8sClient) + EnsureOperatorNamespaceExists(ctx, k8sClient) EnsureTestNamespaceExists(ctx, k8sClient) }) diff --git a/internal/backendconnection/otelcolresources/desired_state.go b/internal/backendconnection/otelcolresources/desired_state.go index 2c794167..373abfc3 100644 --- a/internal/backendconnection/otelcolresources/desired_state.go +++ b/internal/backendconnection/otelcolresources/desired_state.go @@ -581,7 +581,7 @@ func assembleCollectorEnvVars(config *oTelColConfig, goMemLimit string) ([]corev if config.Export.Dash0 != nil { authTokenEnvVar, err := util.CreateEnvVarForAuthorization( - *config.Export.Dash0, + (*(config.Export.Dash0)).Authorization, authTokenEnvVarName, ) if err != nil { diff --git a/internal/backendconnection/otelcolresources/otelcol_resources.go b/internal/backendconnection/otelcolresources/otelcol_resources.go index 68aff6f3..1a16acc6 100644 --- a/internal/backendconnection/otelcolresources/otelcol_resources.go +++ b/internal/backendconnection/otelcolresources/otelcol_resources.go @@ -218,14 +218,6 @@ func (m *OTelColResourceManager) updateResource( desiredResource client.Object, logger *logr.Logger, ) (bool, error) { - if m.DevelopmentMode { - logger.Info(fmt.Sprintf( - "checking whether resource %s/%s requires update", - desiredResource.GetNamespace(), - desiredResource.GetName(), - )) - } - if err := m.setOwnerReference(desiredResource, logger); err != nil { return false, err } @@ -246,12 +238,6 @@ func (m *OTelColResourceManager) updateResource( } hasChanged := !patchResult.IsEmpty() && !isKnownIrrelevantPatch(patchResult) if !hasChanged { - if m.DevelopmentMode { - logger.Info(fmt.Sprintf("resource %s/%s is already up to date", - desiredResource.GetNamespace(), - desiredResource.GetName(), - )) - } return false, nil } diff --git a/internal/backendconnection/otelcolresources/otelcol_resources_test.go b/internal/backendconnection/otelcolresources/otelcol_resources_test.go index d06ebbb3..b672b582 100644 --- a/internal/backendconnection/otelcolresources/otelcol_resources_test.go +++ b/internal/backendconnection/otelcolresources/otelcol_resources_test.go @@ -29,7 +29,7 @@ var ( }, ObjectMeta: metav1.ObjectMeta{ Name: "test-config-map", - Namespace: Dash0OperatorNamespace, + Namespace: OperatorNamespace, Labels: map[string]string{ "label": "value", }, @@ -48,7 +48,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() var dash0MonitoringResource *dash0v1alpha1.Dash0Monitoring BeforeAll(func() { - EnsureDash0OperatorNamespaceExists(ctx, k8sClient) + EnsureOperatorNamespaceExists(ctx, k8sClient) EnsureTestNamespaceExists(ctx, k8sClient) dash0MonitoringResource = EnsureMonitoringResourceExists(ctx, k8sClient) }) @@ -67,13 +67,13 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() AfterEach(func() { Expect(oTelColResourceManager.DeleteResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, &logger, )).To(Succeed()) Eventually(func(g Gomega) { - VerifyCollectorResourcesDoNotExist(ctx, k8sClient, Dash0OperatorNamespace) + VerifyCollectorResourcesDoNotExist(ctx, k8sClient, OperatorNamespace) }, 500*time.Millisecond, 20*time.Millisecond).Should(Succeed()) - Expect(k8sClient.DeleteAllOf(ctx, &corev1.ConfigMap{}, client.InNamespace(Dash0OperatorNamespace))).To(Succeed()) + Expect(k8sClient.DeleteAllOf(ctx, &corev1.ConfigMap{}, client.InNamespace(OperatorNamespace))).To(Succeed()) }) Describe("when dealing with individual resources", func() { @@ -125,7 +125,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() resourcesHaveBeenCreated, resourcesHaveBeenUpdated, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, TestImages, dash0MonitoringResource, &logger, @@ -134,7 +134,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Expect(resourcesHaveBeenCreated).To(BeTrue()) Expect(resourcesHaveBeenUpdated).To(BeFalse()) - VerifyCollectorResources(ctx, k8sClient, Dash0OperatorNamespace) + VerifyCollectorResources(ctx, k8sClient, OperatorNamespace) }) It("should fall back to the operator configuration export settings if the monitoring resource has no export", func() { @@ -144,7 +144,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() ) resourcesHaveBeenCreated, resourcesHaveBeenUpdated, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, TestImages, &dash0v1alpha1.Dash0Monitoring{ Spec: dash0v1alpha1.Dash0MonitoringSpec{}, @@ -154,13 +154,13 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Expect(err).ToNot(HaveOccurred()) Expect(resourcesHaveBeenCreated).To(BeTrue()) Expect(resourcesHaveBeenUpdated).To(BeFalse()) - VerifyCollectorResources(ctx, k8sClient, Dash0OperatorNamespace) + VerifyCollectorResources(ctx, k8sClient, OperatorNamespace) }) It("should fail if the monitoring resource has no export and there is no operator configuration resource", func() { _, _, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, TestImages, &dash0v1alpha1.Dash0Monitoring{ Spec: dash0v1alpha1.Dash0MonitoringSpec{}, @@ -171,7 +171,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() MatchError( "the provided Dash0Monitoring resource does not have an export configuration and no " + "Dash0OperatorConfiguration resource has been found")) - VerifyCollectorResourcesDoNotExist(ctx, k8sClient, Dash0OperatorNamespace) + VerifyCollectorResourcesDoNotExist(ctx, k8sClient, OperatorNamespace) }) It("should fail if the monitoring resource has no export and the existing operator configuration "+ @@ -183,7 +183,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() ) _, _, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, TestImages, &dash0v1alpha1.Dash0Monitoring{ Spec: dash0v1alpha1.Dash0MonitoringSpec{}, @@ -192,7 +192,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() ) Expect(err).To(MatchError("the provided Dash0Monitoring resource does not have an export configuration " + "and the Dash0OperatorConfiguration resource does not have one either")) - VerifyCollectorResourcesDoNotExist(ctx, k8sClient, Dash0OperatorNamespace) + VerifyCollectorResourcesDoNotExist(ctx, k8sClient, OperatorNamespace) }) It("should delete outdated resources from older operator versions", func() { @@ -200,13 +200,13 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Expect(k8sClient.Create(ctx, &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: nameOfOutdatedResources, - Namespace: Dash0OperatorNamespace, + Namespace: OperatorNamespace, }, })).To(Succeed()) Expect(k8sClient.Create(ctx, &appsv1.DaemonSet{ ObjectMeta: metav1.ObjectMeta{ Name: nameOfOutdatedResources, - Namespace: Dash0OperatorNamespace, + Namespace: OperatorNamespace, }, Spec: appsv1.DaemonSetSpec{ Selector: &metav1.LabelSelector{ @@ -231,7 +231,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() _, _, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, TestImages, dash0MonitoringResource, &logger, @@ -241,14 +241,14 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() VerifyResourceDoesNotExist( ctx, k8sClient, - Dash0OperatorNamespace, + OperatorNamespace, nameOfOutdatedResources, &corev1.ConfigMap{}, ) VerifyResourceDoesNotExist( ctx, k8sClient, - Dash0OperatorNamespace, + OperatorNamespace, nameOfOutdatedResources, &appsv1.DaemonSet{}, ) @@ -260,7 +260,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() _, _, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, TestImages, dash0MonitoringResource, &logger, @@ -270,12 +270,12 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() // Change some arbitrary fields in some resources, then simulate a reconcile cycle and verify that all // resources are back in their desired state. - daemonSetConifgMap := GetOTelColDaemonSetConfigMap(ctx, k8sClient, Dash0OperatorNamespace) + daemonSetConifgMap := GetOTelColDaemonSetConfigMap(ctx, k8sClient, OperatorNamespace) daemonSetConifgMap.Data["config.yaml"] = "{}" daemonSetConifgMap.Data["bogus-key"] = "" Expect(k8sClient.Update(ctx, daemonSetConifgMap)).To(Succeed()) - daemonSet := GetOTelColDaemonSet(ctx, k8sClient, Dash0OperatorNamespace) + daemonSet := GetOTelColDaemonSet(ctx, k8sClient, OperatorNamespace) daemonSet.Spec.Template.Spec.InitContainers = []corev1.Container{} daemonSet.Spec.Template.Spec.Containers[0].Image = "wrong-collector-image:latest" daemonSet.Spec.Template.Spec.Containers[0].Ports = []corev1.ContainerPort{ @@ -284,12 +284,12 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() } Expect(k8sClient.Update(ctx, daemonSet)).To(Succeed()) - deploymentConfigMap := GetOTelColDeploymentConfigMap(ctx, k8sClient, Dash0OperatorNamespace) + deploymentConfigMap := GetOTelColDeploymentConfigMap(ctx, k8sClient, OperatorNamespace) deploymentConfigMap.Data["config.yaml"] = "{}" deploymentConfigMap.Data["bogus-key"] = "" Expect(k8sClient.Update(ctx, deploymentConfigMap)).To(Succeed()) - deployment := GetOTelColDeployment(ctx, k8sClient, Dash0OperatorNamespace) + deployment := GetOTelColDeployment(ctx, k8sClient, OperatorNamespace) var changedReplicas int32 = 5 deployment.Spec.Replicas = &changedReplicas deployment.Spec.Template.Spec.Containers[0].Ports = []corev1.ContainerPort{ @@ -300,7 +300,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() resourcesHaveBeenCreated, resourcesHaveBeenUpdated, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, TestImages, dash0MonitoringResource, &logger, @@ -309,7 +309,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Expect(resourcesHaveBeenCreated).To(BeFalse()) Expect(resourcesHaveBeenUpdated).To(BeTrue()) - VerifyCollectorResources(ctx, k8sClient, Dash0OperatorNamespace) + VerifyCollectorResources(ctx, k8sClient, OperatorNamespace) }) }) @@ -318,7 +318,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() _, _, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, TestImages, dash0MonitoringResource, &logger, @@ -328,19 +328,19 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() // Delete some arbitrary resources, then simulate a reconcile cycle and verify that all resources have been // recreated. - daemonSetConifgMap := GetOTelColDaemonSetConfigMap(ctx, k8sClient, Dash0OperatorNamespace) + daemonSetConifgMap := GetOTelColDaemonSetConfigMap(ctx, k8sClient, OperatorNamespace) Expect(k8sClient.Delete(ctx, daemonSetConifgMap)).To(Succeed()) - deploymentConfigMap := GetOTelColDeploymentConfigMap(ctx, k8sClient, Dash0OperatorNamespace) + deploymentConfigMap := GetOTelColDeploymentConfigMap(ctx, k8sClient, OperatorNamespace) Expect(k8sClient.Delete(ctx, deploymentConfigMap)).To(Succeed()) - deployment := GetOTelColDeployment(ctx, k8sClient, Dash0OperatorNamespace) + deployment := GetOTelColDeployment(ctx, k8sClient, OperatorNamespace) Expect(k8sClient.Delete(ctx, deployment)).To(Succeed()) resourcesHaveBeenCreated, _, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, TestImages, dash0MonitoringResource, &logger, @@ -348,7 +348,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Expect(err).ToNot(HaveOccurred()) Expect(resourcesHaveBeenCreated).To(BeTrue()) - VerifyCollectorResources(ctx, k8sClient, Dash0OperatorNamespace) + VerifyCollectorResources(ctx, k8sClient, OperatorNamespace) }) }) @@ -357,7 +357,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() // create resources _, _, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, TestImages, dash0MonitoringResource, &logger, @@ -369,7 +369,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() // resources). resourcesHaveBeenCreated, resourcesHaveBeenUpdated, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, TestImages, dash0MonitoringResource, &logger, @@ -383,7 +383,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() resourcesHaveBeenCreated, resourcesHaveBeenUpdated, err = oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, TestImages, dash0MonitoringResource, &logger, @@ -392,7 +392,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Expect(resourcesHaveBeenCreated).To(BeFalse()) Expect(resourcesHaveBeenUpdated).To(BeFalse()) - VerifyCollectorResources(ctx, k8sClient, Dash0OperatorNamespace) + VerifyCollectorResources(ctx, k8sClient, OperatorNamespace) }) }) @@ -401,23 +401,23 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() // create resources (so there is something to delete) _, _, err := oTelColResourceManager.CreateOrUpdateOpenTelemetryCollectorResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, TestImages, dash0MonitoringResource, &logger, ) Expect(err).ToNot(HaveOccurred()) - VerifyCollectorResources(ctx, k8sClient, Dash0OperatorNamespace) + VerifyCollectorResources(ctx, k8sClient, OperatorNamespace) // delete everything again err = oTelColResourceManager.DeleteResources( ctx, - Dash0OperatorNamespace, + OperatorNamespace, &logger, ) Expect(err).ToNot(HaveOccurred()) - VerifyCollectorResourcesDoNotExist(ctx, k8sClient, Dash0OperatorNamespace) + VerifyCollectorResourcesDoNotExist(ctx, k8sClient, OperatorNamespace) }) }) }) diff --git a/internal/dash0/controller/dash0_controller_test.go b/internal/dash0/controller/dash0_controller_test.go index 87c55b53..96cd48bd 100644 --- a/internal/dash0/controller/dash0_controller_test.go +++ b/internal/dash0/controller/dash0_controller_test.go @@ -36,10 +36,10 @@ import ( var ( namespace = TestNamespaceName extraDash0MonitoringResourceNames []types.NamespacedName - operatorNamespace = Dash0OperatorNamespace + operatorNamespace = OperatorNamespace ) -var _ = Describe("The Dash0 controller", Ordered, func() { +var _ = Describe("The monitoring resource controller", Ordered, func() { ctx := context.Background() var createdObjects []client.Object @@ -47,7 +47,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { BeforeAll(func() { EnsureTestNamespaceExists(ctx, k8sClient) - EnsureDash0OperatorNamespaceExists(ctx, k8sClient) + EnsureOperatorNamespaceExists(ctx, k8sClient) }) BeforeEach(func() { @@ -77,7 +77,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { Clientset: clientset, Instrumenter: instrumenter, Images: TestImages, - OperatorNamespace: Dash0OperatorNamespace, + OperatorNamespace: OperatorNamespace, BackendConnectionManager: backendConnectionManager, DanglingEventsTimeouts: &DanglingEventsTimeoutsTest, } diff --git a/internal/dash0/controller/operator_configuration_controller.go b/internal/dash0/controller/operator_configuration_controller.go index 27c3153f..03de7148 100644 --- a/internal/dash0/controller/operator_configuration_controller.go +++ b/internal/dash0/controller/operator_configuration_controller.go @@ -25,13 +25,14 @@ import ( type OperatorConfigurationReconciler struct { client.Client - Clientset *kubernetes.Clientset - Scheme *runtime.Scheme - Recorder record.EventRecorder - DeploymentSelfReference *appsv1.Deployment - DanglingEventsTimeouts *util.DanglingEventsTimeouts - Images util.Images - DevelopmentMode bool + Clientset *kubernetes.Clientset + PersesDashboardCrdReconciler *PersesDashboardCrdReconciler + Scheme *runtime.Scheme + Recorder record.EventRecorder + DeploymentSelfReference *appsv1.Deployment + DanglingEventsTimeouts *util.DanglingEventsTimeouts + Images util.Images + DevelopmentMode bool } const ( @@ -137,6 +138,11 @@ func (r *OperatorConfigurationReconciler) Reconcile(ctx context.Context, req ctr logger.Info("Reconciling the deletion of the operator configuration resource", "name", req.Name) } + // Irrespective of whether self-monitoring settings need to be upated, we always need to update the API token env + // var with the value from the Dash0 export of the operator configuration resource, if it exists (and remove if it + // does not exist). This is because the API token might be used for self monitoring, but also for API access, e.g. + // managing Perses dashboards via the operator. + currentSelfMonitoringConfiguration, err := selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( r.DeploymentSelfReference, @@ -151,9 +157,7 @@ func (r *OperatorConfigurationReconciler) Reconcile(ctx context.Context, req ctr if resourceDeleted { if currentSelfMonitoringConfiguration.Enabled { - if err = r.applySelfMonitoring(ctx, selfmonitoring.SelfMonitoringConfiguration{ - Enabled: false, - }); err != nil { + if err = r.removeSelfMonitoringAndUpdate(ctx); err != nil { logger.Error(err, "cannot disable self-monitoring of the controller deployment, requeuing reconcile request.") return ctrl.Result{ Requeue: true, @@ -167,6 +171,22 @@ func (r *OperatorConfigurationReconciler) Reconcile(ctx context.Context, req ctr return ctrl.Result{}, nil } + if resource.Spec.Export != nil && + resource.Spec.Export.Dash0 != nil && + resource.Spec.Export.Dash0.ApiEndpoint != "" { + dataset := resource.Spec.Export.Dash0.Dataset + if dataset == "" { + dataset = "default" + } + r.PersesDashboardCrdReconciler.SetApiEndpointAndDataset(&ApiConfig{ + Endpoint: resource.Spec.Export.Dash0.ApiEndpoint, + Dataset: dataset, + }) + } else { + logger.Info("Settings required for managing dashboards via the operator are missing, the operator will not " + + "update dashboards in Dash0.") + } + if _, err = util.InitStatusConditions( ctx, r.Client, @@ -187,45 +207,88 @@ func (r *OperatorConfigurationReconciler) Reconcile(ctx context.Context, req ctr }, err } - if reflect.DeepEqual(currentSelfMonitoringConfiguration, newSelfMonitoringConfiguration) { - logger.Info("Self-monitoring configuration of the controller deployment is up-to-date") - } else { - if err = r.applySelfMonitoring(ctx, newSelfMonitoringConfiguration); err != nil { - logger.Error(err, "Cannot apply self-monitoring configurations to the controller deployment") - resource.EnsureResourceIsMarkedAsDegraded("CannotApplySelfMonitoring", "Could not update the controller deployment to reflect the self-monitoring settings") - if statusUpdateErr := r.Status().Update(ctx, resource); statusUpdateErr != nil { - logger.Error(statusUpdateErr, "Failed to update Dash0 operator status conditions, requeuing reconcile request.") + deploymentNeedsUpdate := false + managerDeployment := &appsv1.Deployment{} + dash0Authorization := resource.GetDash0AuthorizationIfConfigured() + if err = r.Client.Get(ctx, client.ObjectKeyFromObject(r.DeploymentSelfReference), managerDeployment); err != nil { + return ctrl.Result{}, fmt.Errorf("cannot fetch the current controller deployment: %w", err) + } + if !reflect.DeepEqual(currentSelfMonitoringConfiguration, newSelfMonitoringConfiguration) { + if err = r.applySelfMonitoring( + managerDeployment, + newSelfMonitoringConfiguration, + dash0Authorization, + ); err != nil { + logger.Error(err, "cannot apply self-monitoring configuration to the controller deployment") + if statusUpdateErr := r.markAsDegraded( + ctx, + resource, + "CannotUpdatedControllerDeployment", + "Could not update the controller deployment to reflect the self-monitoring settings.", + &logger, + ); statusUpdateErr != nil { + return ctrl.Result{}, statusUpdateErr + } + return ctrl.Result{}, err + } + deploymentNeedsUpdate = true + } else if dash0Authorization != nil { + if err = selfmonitoring.UpdatesApiTokenWithoutSelfMonitoringToManagerDeployment( + managerDeployment, + ManagerContainerName, + *dash0Authorization, + ); err != nil { + logger.Error(err, "cannot update the API token in the controller deployment") + if statusUpdateErr := r.markAsDegraded( + ctx, + resource, + "CannotUpdatedControllerDeployment", + "Could not update the controller deployment to add the Dash0 API token.", + &logger, + ); statusUpdateErr != nil { return ctrl.Result{}, statusUpdateErr } - return ctrl.Result{ - Requeue: true, - }, nil + return ctrl.Result{}, err } + deploymentNeedsUpdate = true + } - logger.Info("Self-monitoring configurations applied to the controller deployment", "self-monitoring", newSelfMonitoringConfiguration) + if deploymentNeedsUpdate { + if err = r.Client.Update(ctx, managerDeployment); err != nil { + logger.Error(err, "cannot update the controller deployment") + if statusUpdateErr := r.markAsDegraded( + ctx, + resource, + "CannotUpdatedControllerDeployment", + "Could not update the controller deployment.", + &logger, + ); statusUpdateErr != nil { + return ctrl.Result{}, statusUpdateErr + } + return ctrl.Result{}, err + } + logger.Info("The controller deployment has been updated.") + } else { + logger.Info("The controller deployment is up to date.") } resource.EnsureResourceIsMarkedAsAvailable() if err = r.Status().Update(ctx, resource); err != nil { logger.Error(err, updateStatusFailedMessageOperatorConfiguration) - return ctrl.Result{}, fmt.Errorf("cannot mark Dash0 operator configuration resource as available: %w", err) + return ctrl.Result{}, fmt.Errorf("cannot mark the Dash0 operator configuration resource as available: %w", err) } return ctrl.Result{}, nil } func (r *OperatorConfigurationReconciler) applySelfMonitoring( - ctx context.Context, + managerDeployment *appsv1.Deployment, selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, + dash0Authorization *dash0v1alpha1.Authorization, ) error { - updatedDeployment := &appsv1.Deployment{} - if err := r.Client.Get(ctx, client.ObjectKeyFromObject(r.DeploymentSelfReference), updatedDeployment); err != nil { - return fmt.Errorf("cannot fetch the current controller deployment: %w", err) - } - if selfMonitoringConfiguration.Enabled { - if err := selfmonitoring.EnableSelfMonitoringInControllerDeployment( - updatedDeployment, + if err := selfmonitoring.EnableSelfMonitoringInManagerDeployment( + managerDeployment, ManagerContainerName, selfMonitoringConfiguration, r.Images.GetOperatorVersion(), @@ -234,13 +297,59 @@ func (r *OperatorConfigurationReconciler) applySelfMonitoring( return fmt.Errorf("cannot apply settings to enable self-monitoring to the controller deployment: %w", err) } } else { - if err := selfmonitoring.DisableSelfMonitoringInControllerDeployment( - updatedDeployment, + if dash0Authorization != nil { + if err := selfmonitoring.UpdatesApiTokenWithoutSelfMonitoringToManagerDeployment( + managerDeployment, + ManagerContainerName, + *dash0Authorization, + ); err != nil { + return fmt.Errorf("cannot add the Dash0 API token to the controller deployment: %w", err) + } + } + + if err := selfmonitoring.DisableSelfMonitoringInManagerDeployment( + managerDeployment, ManagerContainerName, + dash0Authorization != nil, ); err != nil { return fmt.Errorf("cannot apply settings to disable self-monitoring to the controller deployment: %w", err) } } + return nil +} + +func (r *OperatorConfigurationReconciler) removeSelfMonitoringAndUpdate(ctx context.Context) error { + updatedDeployment := &appsv1.Deployment{} + if err := r.Client.Get(ctx, client.ObjectKeyFromObject(r.DeploymentSelfReference), updatedDeployment); err != nil { + return fmt.Errorf("cannot fetch the current controller deployment: %w", err) + } + + if err := selfmonitoring.DisableSelfMonitoringInManagerDeployment( + updatedDeployment, + ManagerContainerName, + true, + ); err != nil { + return fmt.Errorf("cannot apply settings to disable self-monitoring to the controller deployment: %w", err) + } + return r.Client.Update(ctx, updatedDeployment) } + +func (r *OperatorConfigurationReconciler) markAsDegraded( + ctx context.Context, + resource *dash0v1alpha1.Dash0OperatorConfiguration, + reason string, + message string, + logger *logr.Logger, +) error { + resource.EnsureResourceIsMarkedAsDegraded( + reason, + message, + ) + if err := r.Status().Update(ctx, resource); err != nil { + logger.Error(err, "Failed to update Dash0 operator status conditions, requeuing reconcile request.") + return err + } + return nil +} diff --git a/internal/dash0/controller/operator_configuration_controller_test.go b/internal/dash0/controller/operator_configuration_controller_test.go index 47487607..c6fc1a5b 100644 --- a/internal/dash0/controller/operator_configuration_controller_test.go +++ b/internal/dash0/controller/operator_configuration_controller_test.go @@ -25,23 +25,23 @@ import ( type SelfMonitoringTestConfig struct { createExport func() dash0v1alpha1.Export - verify func(Gomega, selfmonitoring.SelfMonitoringConfiguration) + verify func(Gomega, selfmonitoring.SelfMonitoringConfiguration, *appsv1.Deployment) } var ( reconciler *OperatorConfigurationReconciler ) -var _ = Describe("The Dash0 controller", Ordered, func() { +var _ = Describe("The operation configuration resource controller", Ordered, func() { ctx := context.Background() var controllerDeployment *appsv1.Deployment BeforeAll(func() { EnsureTestNamespaceExists(ctx, k8sClient) - EnsureDash0OperatorNamespaceExists(ctx, k8sClient) + EnsureOperatorNamespaceExists(ctx, k8sClient) }) - Describe("when creating the Dash0Operator resource", func() { + Describe("when creating the operator configuration resource", func() { BeforeEach(func() { // When creating the resource, we assume the operator has no @@ -82,7 +82,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { ) Expect(err).NotTo(HaveOccurred()) Expect(selfMonitoringConfiguration.Enabled).To(BeTrue()) - config.verify(g, selfMonitoringConfiguration) + config.verify(g, selfMonitoringConfiguration, updatedDeployment) }, timeout, pollingInterval).Should(Succeed()) }, Entry("with a Dash0 export with a token", SelfMonitoringTestConfig{ @@ -104,6 +104,43 @@ var _ = Describe("The Dash0 controller", Ordered, func() { ) }) + DescribeTable("it adds the auth token to the controller deployment even if self-monitoring is not enabled", + func(config SelfMonitoringTestConfig) { + CreateOperatorConfigurationResourceWithSpec( + ctx, + k8sClient, + dash0v1alpha1.Dash0OperatorConfigurationSpec{ + Export: ExportToPrt(config.createExport()), + SelfMonitoring: dash0v1alpha1.SelfMonitoring{ + Enabled: false, + }, + }, + ) + + triggerOperatorConfigurationReconcileRequest(ctx, reconciler) + verifyOperatorConfigurationResourceIsAvailable(ctx) + Eventually(func(g Gomega) { + updatedDeployment := LoadOperatorDeploymentOrFail(ctx, k8sClient, g) + selfMonitoringConfiguration, err := + selfmonitoring.GetSelfMonitoringConfigurationFromControllerDeployment( + updatedDeployment, + ManagerContainerName, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(selfMonitoringConfiguration.Enabled).To(BeFalse()) + config.verify(g, selfMonitoringConfiguration, updatedDeployment) + }, timeout, pollingInterval).Should(Succeed()) + }, + Entry("with a Dash0 export with a token", SelfMonitoringTestConfig{ + createExport: Dash0ExportWithEndpointAndToken, + verify: verifyAuthTokenEnvVarFromToken, + }), + Entry("with a Dash0 export with a secret ref", SelfMonitoringTestConfig{ + createExport: Dash0ExportWithEndpointAndSecretRef, + verify: verifyAuthTokenEnvVarFromSecretRef, + }), + ) + Describe("disabling self-monitoring", func() { It("it does not change the controller deployment", func() { @@ -134,7 +171,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) }) - Describe("when updating the Dash0Operator resource", func() { + Describe("when updating the operator configuration resource", func() { Describe("enabling self-monitoring", func() { @@ -327,7 +364,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) }) - Describe("when deleting the Dash0Operator resource", func() { + Describe("when deleting the operator configuration resource", func() { Describe("when self-monitoring is enabled", func() { @@ -445,8 +482,8 @@ func controllerDeploymentWithoutSelfMonitoring() *appsv1.Deployment { return &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - Name: Dash0OperatorDeploymentName, - Namespace: Dash0OperatorNamespace, + Name: OperatorDeploymentName, + Namespace: OperatorNamespace, Labels: map[string]string{ "app.kubernetes.io/name": "dash0monitoring-operator", "app.kubernetes.io/component": "controller", @@ -495,7 +532,7 @@ func controllerDeploymentWithoutSelfMonitoring() *appsv1.Deployment { }, { Name: "DASH0_DEPLOYMENT_NAME", - Value: Dash0OperatorDeploymentName, + Value: OperatorDeploymentName, }, { Name: "OTEL_COLLECTOR_NAME_PREFIX", @@ -612,8 +649,8 @@ func controllerDeploymentWithSelfMonitoring() *appsv1.Deployment { return &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - Name: Dash0OperatorDeploymentName, - Namespace: Dash0OperatorNamespace, + Name: OperatorDeploymentName, + Namespace: OperatorNamespace, Labels: map[string]string{ "app.kubernetes.io/name": "dash0monitoring-operator", "app.kubernetes.io/component": "controller", @@ -662,7 +699,7 @@ func controllerDeploymentWithSelfMonitoring() *appsv1.Deployment { }, { Name: "DASH0_DEPLOYMENT_NAME", - Value: Dash0OperatorNamespace, + Value: OperatorNamespace, }, { Name: "OTEL_COLLECTOR_NAME_PREFIX", Value: "dash0monitoring-system", @@ -818,6 +855,7 @@ func verifyOperatorConfigurationResourceIsAvailable(ctx context.Context) { func verifySelfMonitoringConfigurationDash0Token( g Gomega, selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, + _ *appsv1.Deployment, ) { dash0ExportConfiguration := selfMonitoringConfiguration.Export.Dash0 g.Expect(dash0ExportConfiguration).NotTo(BeNil()) @@ -834,6 +872,7 @@ func verifySelfMonitoringConfigurationDash0Token( func verifySelfMonitoringConfigurationDash0SecretRef( g Gomega, selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, + _ *appsv1.Deployment, ) { dash0ExportConfiguration := selfMonitoringConfiguration.Export.Dash0 g.Expect(dash0ExportConfiguration).NotTo(BeNil()) @@ -851,6 +890,7 @@ func verifySelfMonitoringConfigurationDash0SecretRef( func verifySelfMonitoringConfigurationGrpc( g Gomega, selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, + _ *appsv1.Deployment, ) { grpcExportConfiguration := selfMonitoringConfiguration.Export.Grpc g.Expect(grpcExportConfiguration).NotTo(BeNil()) @@ -868,6 +908,7 @@ func verifySelfMonitoringConfigurationGrpc( func verifySelfMonitoringConfigurationHttp( g Gomega, selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, + _ *appsv1.Deployment, ) { httpExportConfiguration := selfMonitoringConfiguration.Export.Http g.Expect(httpExportConfiguration).NotTo(BeNil()) @@ -882,3 +923,31 @@ func verifySelfMonitoringConfigurationHttp( g.Expect(selfMonitoringConfiguration.Export.Dash0).To(BeNil()) g.Expect(selfMonitoringConfiguration.Export.Grpc).To(BeNil()) } + +func verifyAuthTokenEnvVarFromToken( + g Gomega, + selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, + controllerDeployment *appsv1.Deployment, +) { + g.Expect(selfMonitoringConfiguration.Enabled).To(BeFalse()) + g.Expect(selfMonitoringConfiguration.Export.Dash0).To(BeNil()) + g.Expect(selfMonitoringConfiguration.Export.Grpc).To(BeNil()) + g.Expect(selfMonitoringConfiguration.Export.Http).To(BeNil()) + container := controllerDeployment.Spec.Template.Spec.Containers[0] + g.Expect(container.Env).To( + ContainElement(MatchEnvVar("SELF_MONITORING_AND_API_AUTH_TOKEN", AuthorizationTokenTest))) +} + +func verifyAuthTokenEnvVarFromSecretRef( + g Gomega, + selfMonitoringConfiguration selfmonitoring.SelfMonitoringConfiguration, + controllerDeployment *appsv1.Deployment, +) { + g.Expect(selfMonitoringConfiguration.Enabled).To(BeFalse()) + g.Expect(selfMonitoringConfiguration.Export.Dash0).To(BeNil()) + g.Expect(selfMonitoringConfiguration.Export.Grpc).To(BeNil()) + g.Expect(selfMonitoringConfiguration.Export.Http).To(BeNil()) + container := controllerDeployment.Spec.Template.Spec.Containers[0] + g.Expect(container.Env).To( + ContainElement(MatchEnvVarValueFrom("SELF_MONITORING_AND_API_AUTH_TOKEN", "secret-ref", "key"))) +} diff --git a/internal/dash0/controller/perses_dashboards_controller.go b/internal/dash0/controller/perses_dashboards_controller.go new file mode 100644 index 00000000..078c99c4 --- /dev/null +++ b/internal/dash0/controller/perses_dashboards_controller.go @@ -0,0 +1,403 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "sync/atomic" + "time" + + "github.com/go-logr/logr" + persesv1alpha1 "github.com/perses/perses-operator/api/v1alpha1" + persesv1common "github.com/perses/perses/pkg/model/api/v1/common" + corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/workqueue" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/dash0hq/dash0-operator/internal/dash0/util" +) + +type PersesDashboardCrdReconciler struct { + AuthToken string + mgr ctrl.Manager + persesDashboardReconciler *PersesDashboardReconciler +} + +//+kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch + +var ( + retrySettings = wait.Backoff{ + Duration: 5 * time.Second, + Factor: 1.5, + Steps: 3, + } +) + +func (r *PersesDashboardCrdReconciler) SetupWithManager( + ctx context.Context, + mgr ctrl.Manager, + startupK8sClient client.Client, + logger *logr.Logger, +) error { + kubeSystemNamespace := &corev1.Namespace{} + if err := startupK8sClient.Get(ctx, client.ObjectKey{Name: "kube-system"}, kubeSystemNamespace); err != nil { + msg := "unable to get the kube-system namespace uid" + logger.Error(err, msg) + return fmt.Errorf("%s: %w", msg, err) + } + + r.mgr = mgr + r.persesDashboardReconciler = &PersesDashboardReconciler{ + pseudoClusterUid: kubeSystemNamespace.UID, + httpClient: &http.Client{}, + authToken: r.AuthToken, + } + + if err := startupK8sClient.Get(ctx, client.ObjectKey{ + Name: "persesdashboards.perses.dev", + }, &apiextensionsv1.CustomResourceDefinition{}); err != nil { + if apierrors.IsNotFound(err) { + logger.Info("The persesdashboards.perses.dev custom resource definition does not exist in this " + + "cluster, the operator will not watch for Perses dashboard resources.") + } else { + logger.Error(err, "unable to call get the persesdashboards.perses.dev custom resource definition") + return err + } + } else { + logger.Info("The persesdashboards.perses.dev custom resource definition is present in this " + + "cluster, the operator will watch for Perses dashboard resources.") + if err = r.startWatchingPersesDashboardResources(ctx, logger); err != nil { + return err + } + } + + // For now, we are not watching for the PersesDashboard CRD. Watching for a foreign CRD and reacting appropriately + // to its creation/deletion is work in progress in the prometheus scraping branch. Once that is finished, we can + // employ the same approach here. + return nil +} + +//+kubebuilder:rbac:groups=perses.dev,resources=persesdashboards,verbs=get;list;watch + +func (r *PersesDashboardCrdReconciler) startWatchingPersesDashboardResources( + _ context.Context, + logger *logr.Logger, +) error { + logger.Info("Setting up a watch for Perses dashboard custom resources.") + + if err := ctrl.NewControllerManagedBy(r.mgr). + Named("dash0_perses_dashboard_controller"). + Watches( + &persesv1alpha1.PersesDashboard{}, + // Deliberately not using a convenience mechanism like &handler.EnqueueRequestForObject{} (which would + // feed all events into the Reconcile method) here, since using the lower-level TypedEventHandler interface + // directly allows us to distinguish between create and delete events more easily. + r.persesDashboardReconciler, + ). + Complete(r.persesDashboardReconciler); err != nil { + logger.Error(err, "unable to create a new controller for watching Perses Dashboards") + return err + } + + return nil +} + +func (r *PersesDashboardCrdReconciler) SetApiEndpointAndDataset(apiConfig *ApiConfig) { + r.persesDashboardReconciler.apiConfig.Store(apiConfig) +} + +type ApiConfig struct { + Endpoint string + Dataset string +} + +type PersesDashboardReconciler struct { + pseudoClusterUid types.UID + httpClient *http.Client + apiConfig atomic.Pointer[ApiConfig] + authToken string +} + +func (r *PersesDashboardReconciler) Create( + ctx context.Context, + e event.TypedCreateEvent[client.Object], + _ workqueue.TypedRateLimitingInterface[reconcile.Request], +) { + logger := log.FromContext(ctx) + logger.Info( + "Detected a new Perses dashboard resource", + "namespace", + e.Object.GetNamespace(), + "name", + e.Object.GetName(), + ) + if err := r.UpsertDashboard(e.Object.(*persesv1alpha1.PersesDashboard), &logger); err != nil { + logger.Error(err, "unable to upsert the dashboard") + } +} + +func (r *PersesDashboardReconciler) Update( + ctx context.Context, + e event.TypedUpdateEvent[client.Object], + _ workqueue.TypedRateLimitingInterface[reconcile.Request], +) { + logger := log.FromContext(ctx) + logger.Info( + "Detected a change for a Perses dashboard resource", + "namespace", + e.ObjectNew.GetNamespace(), + "name", + e.ObjectNew.GetName(), + ) + + _ = util.RetryWithCustomBackoff( + "upsert dashboard", + func() error { + return r.UpsertDashboard(e.ObjectNew.(*persesv1alpha1.PersesDashboard), &logger) + }, + retrySettings, + true, + &logger, + ) +} + +func (r *PersesDashboardReconciler) Delete( + ctx context.Context, + e event.TypedDeleteEvent[client.Object], + _ workqueue.TypedRateLimitingInterface[reconcile.Request], +) { + logger := log.FromContext(ctx) + logger.Info( + "Detected the deletion of a Perses dashboard resource", + "namespace", + e.Object.GetNamespace(), + "name", + e.Object.GetName(), + ) + + _ = util.RetryWithCustomBackoff( + "delete dashboard", + func() error { + return r.DeleteDashboard(e.Object.(*persesv1alpha1.PersesDashboard), &logger) + }, + retrySettings, + true, + &logger, + ) +} + +func (r *PersesDashboardReconciler) Generic( + _ context.Context, + _ event.TypedGenericEvent[client.Object], + _ workqueue.TypedRateLimitingInterface[reconcile.Request], +) { + // ignoring generic events +} + +func (r *PersesDashboardReconciler) Reconcile( + context.Context, + reconcile.Request, +) (reconcile.Result, error) { + // Reconcile should not be called on the PersesDashboardReconciler, as we are using the TypedEventHandler interface + // directly when setting up the watch. We still need to implement the method, as the controller builder's Complete + // method requires implementing the Reconciler interface. + return reconcile.Result{}, nil +} + +func (r *PersesDashboardReconciler) UpsertDashboard( + persesDashboard *persesv1alpha1.PersesDashboard, + logger *logr.Logger, +) error { + apiConfig := r.apiConfig.Load() + dashboardUrl, dashboardOrigin, authToken, executeRequest := r.validateConfigAndRenderUrl( + persesDashboard, + apiConfig, + logger, + ) + if !executeRequest { + return nil + } + + if persesDashboard.Spec.Display == nil { + persesDashboard.Spec.Display = &persesv1common.Display{} + } + if persesDashboard.Spec.Display.Name == "" { + // Let the dashboard name default to the perses dashboard resource's namespace + name, if unset. + persesDashboard.Spec.Display.Name = fmt.Sprintf("%s/%s", persesDashboard.Namespace, persesDashboard.Name) + } + + serializedDashboard, _ := json.Marshal( + map[string]interface{}{ + "kind": "Dashboard", + "spec": persesDashboard.Spec, + "metadata": map[string]interface{}{ + "dash0Extensions": map[string]interface{}{ + "origin": dashboardOrigin, + }, + }, + }) + requestPayload := bytes.NewBuffer(serializedDashboard) + + req, err := http.NewRequest( + http.MethodPut, + dashboardUrl, + requestPayload, + ) + if err != nil { + logger.Error(err, "unable to create a new HTTP request to upsert the dashboard") + return err + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", authToken)) + logger.Info(fmt.Sprintf("Updating/creating dashboard %s in Dash0", dashboardOrigin)) + res, err := r.httpClient.Do(req) + if err != nil { + logger.Error(err, fmt.Sprintf("unable to execute the HTTP request to update the dashboard %s", dashboardOrigin)) + return err + } + + if res.StatusCode < http.StatusOK || res.StatusCode >= http.StatusMultipleChoices { + return r.handleNon2xxStatusCode(res, dashboardOrigin, logger) + } + + // http status code was 2xx, discard the response body and close it + defer func() { + _, _ = io.Copy(io.Discard, res.Body) + _ = res.Body.Close() + }() + + return nil +} + +func (r *PersesDashboardReconciler) DeleteDashboard( + persesDashboard *persesv1alpha1.PersesDashboard, + logger *logr.Logger, +) error { + apiConfig := r.apiConfig.Load() + dashboardUrl, dashboardOrigin, authToken, executeRequest := r.validateConfigAndRenderUrl( + persesDashboard, + apiConfig, + logger, + ) + if !executeRequest { + return nil + } + + req, err := http.NewRequest( + http.MethodDelete, + dashboardUrl, + nil, + ) + if err != nil { + logger.Error(err, "unable to create a new HTTP request to delete the dashboard") + return err + } + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", authToken)) + logger.Info(fmt.Sprintf("Deleting dashboard %s in Dash0", dashboardOrigin)) + res, err := r.httpClient.Do(req) + if err != nil { + logger.Error(err, fmt.Sprintf("unable to execute the HTTP request to delete the dashboard %s", dashboardOrigin)) + return err + } + + if res.StatusCode < http.StatusOK || res.StatusCode >= http.StatusMultipleChoices { + return r.handleNon2xxStatusCode(res, dashboardOrigin, logger) + } + + // http status code was 2xx, discard the response body and close it + defer func() { + _, _ = io.Copy(io.Discard, res.Body) + _ = res.Body.Close() + }() + + return nil +} + +func (r *PersesDashboardReconciler) validateConfigAndRenderUrl( + persesDashboard *persesv1alpha1.PersesDashboard, + apiConfig *ApiConfig, + logger *logr.Logger, +) (string, string, string, bool) { + if apiConfig == nil || apiConfig.Endpoint == "" { + logger.Info("No Dash0 API endpoint has been provided via the operator configuration resource, the dashboard " + + "will not be updated in Dash0.") + return "", "", "", false + } + if r.authToken == "" { + logger.Info("No auth token is set on the controller deployment, the dashboard will not be updated " + + "in Dash0.") + return "", "", "", false + } + + dataset := apiConfig.Dataset + if dataset == "" { + dataset = "default" + } + dashboardUrl, dashboardOrigin := r.renderDashboardUrl(apiConfig.Endpoint, persesDashboard, dataset) + return dashboardUrl, dashboardOrigin, r.authToken, true +} + +func (r *PersesDashboardReconciler) renderDashboardUrl( + dash0ApiEndpoint string, + persesDashboard *persesv1alpha1.PersesDashboard, + dataset string, +) (string, string) { + dashboardOrigin := fmt.Sprintf( + // we deliberately use _ as the separator, since that is an illegal character in Kubernetes names. This avoids + // any potential naming collisions (e.g. namespace="abc" & name="def-ghi" vs. namespace="abc-def" & name="ghi"). + "dash0-operator_%s_%s_%s", + r.pseudoClusterUid, + persesDashboard.Namespace, + persesDashboard.Name, + ) + if !strings.HasSuffix(dash0ApiEndpoint, "/") { + dash0ApiEndpoint += "/" + } + return fmt.Sprintf( + "%sapi/dashboards/%s?dataset=%s", + dash0ApiEndpoint, + dashboardOrigin, + dataset, + ), dashboardOrigin +} + +func (r *PersesDashboardReconciler) handleNon2xxStatusCode( + res *http.Response, + dashboardOrigin string, + logger *logr.Logger, +) error { + defer func() { + _ = res.Body.Close() + }() + responseBody, readErr := io.ReadAll(res.Body) + if readErr != nil { + readBodyErr := fmt.Errorf("unable to read the API response payload after receiving status code %d when "+ + "trying to udpate/create/delete the dashboard %s", res.StatusCode, dashboardOrigin) + logger.Error(readBodyErr, "unable to read the API response payload") + return readBodyErr + } + + statusCodeErr := fmt.Errorf( + "unexpected status code %d when updating/creating/deleting the dashboard %s, response body is %s", + res.StatusCode, + dashboardOrigin, + string(responseBody), + ) + logger.Error(statusCodeErr, "unexpected status code") + return statusCodeErr +} diff --git a/internal/dash0/instrumentation/instrumenter_test.go b/internal/dash0/instrumentation/instrumenter_test.go index d62df002..ff300762 100644 --- a/internal/dash0/instrumentation/instrumenter_test.go +++ b/internal/dash0/instrumentation/instrumenter_test.go @@ -40,7 +40,7 @@ var _ = Describe("The instrumenter", Ordered, func() { BeforeAll(func() { EnsureTestNamespaceExists(ctx, k8sClient) - EnsureDash0OperatorNamespaceExists(ctx, k8sClient) + EnsureOperatorNamespaceExists(ctx, k8sClient) }) BeforeEach(func() { diff --git a/internal/dash0/predelete/operator_pre_delete_handler_test.go b/internal/dash0/predelete/operator_pre_delete_handler_test.go index bf738365..a854aeec 100644 --- a/internal/dash0/predelete/operator_pre_delete_handler_test.go +++ b/internal/dash0/predelete/operator_pre_delete_handler_test.go @@ -49,7 +49,7 @@ var _ = Describe("Uninstalling the Dash0 Kubernetes operator", Ordered, func() { ) BeforeAll(func() { - EnsureDash0OperatorNamespaceExists(ctx, k8sClient) + EnsureOperatorNamespaceExists(ctx, k8sClient) }) BeforeEach(func() { diff --git a/internal/dash0/predelete/pre_delete_suite_test.go b/internal/dash0/predelete/pre_delete_suite_test.go index ffbc2097..a69b78d5 100644 --- a/internal/dash0/predelete/pre_delete_suite_test.go +++ b/internal/dash0/predelete/pre_delete_suite_test.go @@ -113,7 +113,7 @@ var _ = BeforeSuite(func() { Clientset: clientset, Images: TestImages, Instrumenter: instrumenter, - OperatorNamespace: Dash0OperatorNamespace, + OperatorNamespace: OperatorNamespace, BackendConnectionManager: backendConnectionManager, DanglingEventsTimeouts: &DanglingEventsTimeoutsTest, } diff --git a/internal/dash0/selfmonitoring/self_monitoring.go b/internal/dash0/selfmonitoring/self_monitoring.go index a2fa87bf..7767d7c8 100644 --- a/internal/dash0/selfmonitoring/self_monitoring.go +++ b/internal/dash0/selfmonitoring/self_monitoring.go @@ -37,14 +37,12 @@ const ( otelExporterOtlpProtocolEnvVarName = "OTEL_EXPORTER_OTLP_PROTOCOL" otelResourceAttribtuesEnvVarName = "OTEL_RESOURCE_ATTRIBUTES" otelLogLevelEnvVarName = "OTEL_LOG_LEVEL" - - selfMonitoringauthTokenEnvVarName = "SELF_MONITORING_AUTH_TOKEN" ) var ( dash0IngressEndpointRegex = regexp.MustCompile(`dash0(?:-dev)?\.com`) // See https://kubernetes.io/docs/tasks/inject-data-application/define-interdependent-environment-variables/ - authHeaderValue = fmt.Sprintf("Bearer $(%s)", selfMonitoringauthTokenEnvVarName) + authHeaderValue = fmt.Sprintf("Bearer $(%s)", util.SelfMonitoringAndApiAuthTokenEnvVarName) ) func ConvertOperatorConfigurationResourceToSelfMonitoringConfiguration( @@ -221,8 +219,8 @@ func enableSelfMonitoringInCollector( var authTokenEnvVar *corev1.EnvVar if selfMonitoringExport.Dash0 != nil { envVar, err := util.CreateEnvVarForAuthorization( - *selfMonitoringExport.Dash0, - selfMonitoringauthTokenEnvVarName, + (*(selfMonitoringExport.Dash0)).Authorization, + util.SelfMonitoringAndApiAuthTokenEnvVarName, ) if err != nil { return err @@ -260,86 +258,66 @@ func enableSelfMonitoringInCollector( } func GetSelfMonitoringConfigurationFromControllerDeployment( - controllerDeployment *appsv1.Deployment, + managerDeployment *appsv1.Deployment, managerContainerName string, ) (SelfMonitoringConfiguration, error) { - managerContainerIdx := slices.IndexFunc(controllerDeployment.Spec.Template.Spec.Containers, func(c corev1.Container) bool { - return c.Name == managerContainerName - }) - - if managerContainerIdx < 0 { + managerContainerIdx, err := findManagerContainer(managerDeployment, managerContainerName) + if err != nil { return SelfMonitoringConfiguration{ Enabled: false, }, &cannotFindContainerByNameError{ ContainerName: managerContainerName, - WorkloadGKV: controllerDeployment.GroupVersionKind(), - WorkloadNamespace: controllerDeployment.Namespace, - WorkloadName: controllerDeployment.Name, + WorkloadGKV: managerDeployment.GroupVersionKind(), + WorkloadNamespace: managerDeployment.Namespace, + WorkloadName: managerDeployment.Name, } } - return ParseSelfMonitoringConfigurationFromContainer(&controllerDeployment.Spec.Template.Spec.Containers[managerContainerIdx]) + return ParseSelfMonitoringConfigurationFromContainer(&managerDeployment.Spec.Template.Spec.Containers[managerContainerIdx]) } -func DisableSelfMonitoringInControllerDeployment( - controllerDeployment *appsv1.Deployment, +func DisableSelfMonitoringInManagerDeployment( + managerDeployment *appsv1.Deployment, managerContainerName string, + removeAuthToken bool, ) error { - managerContainerIdx := slices.IndexFunc(controllerDeployment.Spec.Template.Spec.Containers, func(c corev1.Container) bool { - return c.Name == managerContainerName - }) - - if managerContainerIdx < 0 { - return &cannotFindContainerByNameError{ - ContainerName: managerContainerName, - WorkloadGKV: controllerDeployment.GroupVersionKind(), - WorkloadNamespace: controllerDeployment.Namespace, - WorkloadName: controllerDeployment.Name, - } + managerContainerIdx, err := findManagerContainer(managerDeployment, managerContainerName) + if err != nil { + return err } - managerContainer := controllerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] - disableSelfMonitoringInContainer(&managerContainer) - controllerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] = managerContainer + managerContainer := managerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] + disableSelfMonitoringInContainer(&managerContainer, removeAuthToken) + managerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] = managerContainer return nil } -func EnableSelfMonitoringInControllerDeployment( - controllerDeployment *appsv1.Deployment, +func EnableSelfMonitoringInManagerDeployment( + managerDeployment *appsv1.Deployment, managerContainerName string, selfMonitoringConfiguration SelfMonitoringConfiguration, operatorVersion string, developmentMode bool, ) error { - managerContainerIdx := slices.IndexFunc( - controllerDeployment.Spec.Template.Spec.Containers, - func(c corev1.Container) bool { - return c.Name == managerContainerName - }) - - if managerContainerIdx < 0 { - return &cannotFindContainerByNameError{ - ContainerName: managerContainerName, - WorkloadGKV: controllerDeployment.GroupVersionKind(), - WorkloadNamespace: controllerDeployment.Namespace, - WorkloadName: controllerDeployment.Name, - } + managerContainerIdx, err := findManagerContainer(managerDeployment, managerContainerName) + if err != nil { + return err } selfMonitoringExport := selfMonitoringConfiguration.Export var authTokenEnvVar *corev1.EnvVar if selfMonitoringExport.Dash0 != nil { envVar, err := util.CreateEnvVarForAuthorization( - *selfMonitoringExport.Dash0, - selfMonitoringauthTokenEnvVarName, + (*(selfMonitoringExport.Dash0)).Authorization, + util.SelfMonitoringAndApiAuthTokenEnvVarName, ) if err != nil { return err } authTokenEnvVar = &envVar } - managerContainer := controllerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] + managerContainer := managerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] enableSelfMonitoringInContainer( &managerContainer, selfMonitoringExport, @@ -347,11 +325,57 @@ func EnableSelfMonitoringInControllerDeployment( operatorVersion, developmentMode, ) - controllerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] = managerContainer + managerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] = managerContainer return nil } +func UpdatesApiTokenWithoutSelfMonitoringToManagerDeployment( + managerDeployment *appsv1.Deployment, + managerContainerName string, + authorization dash0v1alpha1.Authorization, +) error { + managerContainerIdx, err := findManagerContainer(managerDeployment, managerContainerName) + if err != nil { + return err + } + + var authTokenEnvVar *corev1.EnvVar + envVar, err := util.CreateEnvVarForAuthorization( + authorization, + util.SelfMonitoringAndApiAuthTokenEnvVarName, + ) + if err != nil { + return err + } + authTokenEnvVar = &envVar + + managerContainer := managerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] + addAuthTokenToContainer( + &managerContainer, + authTokenEnvVar, + ) + managerDeployment.Spec.Template.Spec.Containers[managerContainerIdx] = managerContainer + + return nil +} + +func findManagerContainer(managerDeployment *appsv1.Deployment, managerContainerName string) (int, error) { + managerContainerIdx := slices.IndexFunc(managerDeployment.Spec.Template.Spec.Containers, func(c corev1.Container) bool { + return c.Name == managerContainerName + }) + if managerContainerIdx >= 0 { + return managerContainerIdx, nil + } + + return 0, &cannotFindContainerByNameError{ + ContainerName: managerContainerName, + WorkloadGKV: managerDeployment.GroupVersionKind(), + WorkloadNamespace: managerDeployment.Namespace, + WorkloadName: managerDeployment.Name, + } +} + func ParseSelfMonitoringConfigurationFromContainer(container *corev1.Container) (SelfMonitoringConfiguration, error) { endpoint, err := parseEndpoint(container) if err != nil { @@ -530,21 +554,7 @@ func enableSelfMonitoringInContainer( developmentMode bool, ) { if authTokenEnvVar != nil { - authTokenEnvVarIdx := slices.IndexFunc(container.Env, matchSelfMonitoringAuthTokenEnvVar) - if authTokenEnvVarIdx == 0 { - // update the existing value - container.Env[authTokenEnvVarIdx] = *authTokenEnvVar - } else if authTokenEnvVarIdx > 0 { - // Since we reference this env var in the OTEL_EXPORTER_OTLP_HEADERS env var, we want to have this as the - // very first env var, to make sure it is defined before OTEL_EXPORTER_OTLP_HEADERS. (This is a requirement - // for using - // https://kubernetes.io/docs/tasks/inject-data-application/define-interdependent-environment-variables/.) - container.Env = slices.Delete(container.Env, authTokenEnvVarIdx, authTokenEnvVarIdx+1) - container.Env = slices.Insert(container.Env, 0, *authTokenEnvVar) - } else { - // the env var is not present yet, add it to the start of the list - container.Env = slices.Insert(container.Env, 0, *authTokenEnvVar) - } + addAuthTokenToContainer(container, authTokenEnvVar) } exportSettings := ConvertExportConfigurationToEnvVarSettings(selfMonitoringExport) @@ -584,6 +594,24 @@ func enableSelfMonitoringInContainer( } } +func addAuthTokenToContainer(container *corev1.Container, authTokenEnvVar *corev1.EnvVar) { + authTokenEnvVarIdx := slices.IndexFunc(container.Env, matchSelfMonitoringAuthTokenEnvVar) + if authTokenEnvVarIdx == 0 { + // update the existing value + container.Env[authTokenEnvVarIdx] = *authTokenEnvVar + } else if authTokenEnvVarIdx > 0 { + // Since we reference this env var in the OTEL_EXPORTER_OTLP_HEADERS env var, we want to have this as the + // very first env var, to make sure it is defined before OTEL_EXPORTER_OTLP_HEADERS. (This is a requirement + // for using + // https://kubernetes.io/docs/tasks/inject-data-application/define-interdependent-environment-variables/.) + container.Env = slices.Delete(container.Env, authTokenEnvVarIdx, authTokenEnvVarIdx+1) + container.Env = slices.Insert(container.Env, 0, *authTokenEnvVar) + } else { + // the env var is not present yet, add it to the start of the list + container.Env = slices.Insert(container.Env, 0, *authTokenEnvVar) + } +} + func ConvertExportConfigurationToEnvVarSettings(selfMonitoringExport dash0v1alpha1.Export) EndpointAndHeaders { if selfMonitoringExport.Dash0 != nil { dash0Export := selfMonitoringExport.Dash0 @@ -645,11 +673,13 @@ func convertHeadersToEnvVarValue(headers []dash0v1alpha1.Header) string { return strings.Join(keyValuePairs, ",") } -func disableSelfMonitoringInContainer(container *corev1.Container) { +func disableSelfMonitoringInContainer(container *corev1.Container, removeAuthToken bool) { removeEnvVar(container, otelExporterOtlpEndpointEnvVarName) removeEnvVar(container, otelExporterOtlpProtocolEnvVarName) removeEnvVar(container, otelExporterOtlpHeadersEnvVarName) - removeEnvVar(container, selfMonitoringauthTokenEnvVarName) + if removeAuthToken { + removeEnvVar(container, util.SelfMonitoringAndApiAuthTokenEnvVarName) + } } func updateOrAppendEnvVar(container *corev1.Container, name string, value string) { @@ -690,5 +720,5 @@ func matchOtelExporterOtlpProtocolEnvVar(e corev1.EnvVar) bool { } func matchSelfMonitoringAuthTokenEnvVar(e corev1.EnvVar) bool { - return e.Name == selfMonitoringauthTokenEnvVarName + return e.Name == util.SelfMonitoringAndApiAuthTokenEnvVarName } diff --git a/internal/dash0/startup/auto_operator_configuration_handler.go b/internal/dash0/startup/auto_operator_configuration_handler.go index 48f26c86..cc929c08 100644 --- a/internal/dash0/startup/auto_operator_configuration_handler.go +++ b/internal/dash0/startup/auto_operator_configuration_handler.go @@ -29,6 +29,7 @@ type OperatorConfigurationValues struct { Endpoint string Token string SecretRef + ApiEndpoint string } type AutoOperatorConfigurationResourceHandler struct { @@ -198,7 +199,16 @@ func (r *AutoOperatorConfigurationResourceHandler) createOperatorConfigurationRe } } - if err := r.Create(ctx, &dash0v1alpha1.Dash0OperatorConfiguration{ + dash0Export := dash0v1alpha1.Export{ + Dash0: &dash0v1alpha1.Dash0Configuration{ + Endpoint: operatorConfiguration.Endpoint, + Authorization: authorization, + }, + } + if operatorConfiguration.ApiEndpoint != "" { + dash0Export.Dash0.ApiEndpoint = operatorConfiguration.ApiEndpoint + } + operatorConfigurationResource := dash0v1alpha1.Dash0OperatorConfiguration{ ObjectMeta: metav1.ObjectMeta{ Name: operatorConfigurationAutoResourceName, }, @@ -206,14 +216,10 @@ func (r *AutoOperatorConfigurationResourceHandler) createOperatorConfigurationRe SelfMonitoring: dash0v1alpha1.SelfMonitoring{ Enabled: true, }, - Export: &dash0v1alpha1.Export{ - Dash0: &dash0v1alpha1.Dash0Configuration{ - Endpoint: operatorConfiguration.Endpoint, - Authorization: authorization, - }, - }, + Export: &dash0Export, }, - }); err != nil { + } + if err := r.Create(ctx, &operatorConfigurationResource); err != nil { return fmt.Errorf("failed to create the Dash0 operator configuration resource: %w", err) } diff --git a/internal/dash0/startup/auto_operator_configuration_handler_test.go b/internal/dash0/startup/auto_operator_configuration_handler_test.go index 9150280c..e85d1b50 100644 --- a/internal/dash0/startup/auto_operator_configuration_handler_test.go +++ b/internal/dash0/startup/auto_operator_configuration_handler_test.go @@ -39,7 +39,7 @@ var _ = Describe("Create an operator configuration resource at startup", Ordered logger := log.FromContext(ctx) BeforeAll(func() { - EnsureDash0OperatorNamespaceExists(ctx, k8sClient) + EnsureOperatorNamespaceExists(ctx, k8sClient) }) AfterEach(func() { diff --git a/internal/dash0/startup/startup_suite_test.go b/internal/dash0/startup/startup_suite_test.go index 57c24b9d..05487620 100644 --- a/internal/dash0/startup/startup_suite_test.go +++ b/internal/dash0/startup/startup_suite_test.go @@ -63,7 +63,7 @@ var _ = BeforeSuite(func() { handler = &AutoOperatorConfigurationResourceHandler{ Client: k8sClient, - OperatorNamespace: Dash0OperatorNamespace, + OperatorNamespace: OperatorNamespace, NamePrefix: OTelCollectorNamePrefixTest, bypassWebhookCheck: true, } diff --git a/internal/dash0/util/constants.go b/internal/dash0/util/constants.go index a7105d2b..d6ecbf8e 100644 --- a/internal/dash0/util/constants.go +++ b/internal/dash0/util/constants.go @@ -7,4 +7,6 @@ const ( AuthorizationHeaderName = "Authorization" Dash0DatasetHeaderName = "Dash0-Dataset" DatasetInsights = "dash0-internal" + + SelfMonitoringAndApiAuthTokenEnvVarName = "SELF_MONITORING_AND_API_AUTH_TOKEN" ) diff --git a/internal/dash0/util/controller.go b/internal/dash0/util/controller.go index 174e0e07..8b11b885 100644 --- a/internal/dash0/util/controller.go +++ b/internal/dash0/util/controller.go @@ -404,11 +404,11 @@ func addFinalizerIfNecessary( } func CreateEnvVarForAuthorization( - dash0ExportConfiguration dash0v1alpha1.Dash0Configuration, + dash0Authorization dash0v1alpha1.Authorization, envVarName string, ) (corev1.EnvVar, error) { - token := dash0ExportConfiguration.Authorization.Token - secretRef := dash0ExportConfiguration.Authorization.SecretRef + token := dash0Authorization.Token + secretRef := dash0Authorization.SecretRef if token != nil && *token != "" { return corev1.EnvVar{ Name: envVarName, diff --git a/internal/dash0/webhooks/attach_dangling_events_test.go b/internal/dash0/webhooks/attach_dangling_events_test.go index 0418c903..3bf58813 100644 --- a/internal/dash0/webhooks/attach_dangling_events_test.go +++ b/internal/dash0/webhooks/attach_dangling_events_test.go @@ -33,7 +33,7 @@ var _ = Describe("The Dash0 webhook and the Dash0 controller", Ordered, func() { var createdObjects []client.Object BeforeAll(func() { - EnsureDash0OperatorNamespaceExists(ctx, k8sClient) + EnsureOperatorNamespaceExists(ctx, k8sClient) recorder := manager.GetEventRecorderFor("dash0-monitoring-controller") instrumenter := &instrumentation.Instrumenter{ @@ -61,7 +61,7 @@ var _ = Describe("The Dash0 webhook and the Dash0 controller", Ordered, func() { Clientset: clientset, Instrumenter: instrumenter, Images: TestImages, - OperatorNamespace: Dash0OperatorNamespace, + OperatorNamespace: OperatorNamespace, BackendConnectionManager: backendConnectionManager, DanglingEventsTimeouts: &DanglingEventsTimeoutsTest, } diff --git a/test-resources/.env.template b/test-resources/.env.template index a2b3a87b..d0fa295f 100644 --- a/test-resources/.env.template +++ b/test-resources/.env.template @@ -1,2 +1,3 @@ LOCAL_KUBECTX=docker-desktop +PATH_TO_PERSES_OPERATOR_REPO= DASH0_AUTHORIZATION_TOKEN= diff --git a/test-resources/bin/test-cleanup.sh b/test-resources/bin/test-cleanup.sh index aec4ceca..769c5901 100755 --- a/test-resources/bin/test-cleanup.sh +++ b/test-resources/bin/test-cleanup.sh @@ -38,8 +38,15 @@ kubectl delete secret \ kubectl delete ns dash0-system --ignore-not-found +# deliberately deleting the dashboard after undeploying the operator to avoid deleting the dashboard in Dash0 everytime +kubectl delete -n ${target_namespace} -f test-resources/customresources/persesdashboard/persesdashboard.yaml || true + kubectl delete --ignore-not-found=true customresourcedefinition dash0monitorings.operator.dash0.com kubectl delete --ignore-not-found=true customresourcedefinition dash0operatorconfigurations.operator.dash0.com +kubectl delete --ignore-not-found=true customresourcedefinition dash0operatorconfigurations.operator.dash0.com +kubectl delete --ignore-not-found=true customresourcedefinition perses.perses.dev +kubectl delete --ignore-not-found=true customresourcedefinition persesdashboards.perses.dev +kubectl delete --ignore-not-found=true customresourcedefinition persesdatasources.perses.dev # The following resources are deleted automatically with helm uninstall, unless for example when the operator manager # crashes and the helm pre-delete helm hook cannot run, then they might be left behind. diff --git a/test-resources/bin/test-scenario-01-aum-operator-cr.sh b/test-resources/bin/test-scenario-01-aum-operator-cr.sh index 65e26bd2..454a709c 100755 --- a/test-resources/bin/test-scenario-01-aum-operator-cr.sh +++ b/test-resources/bin/test-scenario-01-aum-operator-cr.sh @@ -15,42 +15,66 @@ load_env_file verify_kubectx setup_test_environment -echo "STEP 1: remove old test resources" +step_counter=1 + +echo "STEP $step_counter: remove old test resources" test-resources/bin/test-cleanup.sh ${target_namespace} false -test-resources/bin/ensure-namespace-exists.sh ${target_namespace} -echo -echo +finish_step -echo "STEP 2: creating target namespace (if necessary)" +echo "STEP $step_counter: creating target namespace (if necessary)" test-resources/bin/ensure-namespace-exists.sh ${target_namespace} -echo -echo +finish_step -echo "STEP 3: creating operator namespace and authorization token secret" +echo "STEP $step_counter: creating operator namespace and authorization token secret" test-resources/bin/ensure-namespace-exists.sh dash0-system kubectl create secret \ generic \ dash0-authorization-secret \ --namespace dash0-system \ --from-literal=token="${DASH0_AUTHORIZATION_TOKEN}" -echo -echo +finish_step -echo "STEP 4: rebuild images" +echo "STEP $step_counter: install foreign custom resource definitions" +install_foreign_crds +finish_step + +echo "STEP $step_counter: rebuild images" build_all_images -echo -echo +finish_step -echo "STEP 5: deploy application under monitoring" -test-resources/node.js/express/deploy.sh ${target_namespace} ${kind} -echo -echo +if [[ "${DEPLOY_APPLICATION_UNDER_MONITORING:-}" != false ]]; then + echo "STEP $step_counter: deploy application under monitoring" + test-resources/node.js/express/deploy.sh ${target_namespace} ${kind} + finish_step +fi -echo "STEP 6: deploy the Dash0 operator using helm" +echo "STEP $step_counter: deploy the Dash0 operator using helm" deploy_via_helm -echo -echo +finish_step + +if [[ "${DEPLOY_OPERATOR_CONFIGURATION_VIA_HELM:-}" == false ]]; then + # if no operator configuration resource has been deployed via the helm chart, deploy one now + echo "STEP $step_counter: deploy the Dash0 operator configuration resource" + install_operator_configuration_resource + finish_step +else + echo "not deploying a Dash0 operator configuration resource (has been deployed with the helm chart already)" + echo +fi -echo "STEP 7: deploy the Dash0 monitoring resource to namespace ${target_namespace}" -install_monitoring_resource +if [[ "${DEPLOY_MONITORING_RESOURCE:-}" != false ]]; then + echo "STEP $step_counter: deploy the Dash0 monitoring resource to namespace ${target_namespace}" + install_monitoring_resource + finish_step +else + echo "not deploying a Dash0 monitoring resource" + echo +fi +if [[ "${DEPLOY_PERSES_DASHBOARD:-}" == true ]]; then + echo "Waiting 30 seconds before deploying a Perses dashboard resource." + sleep 30 + echo "STEP $step_counter: deploy a Perses dashboard resource to namespace ${target_namespace}" + kubectl apply -n ${target_namespace} -f test-resources/customresources/persesdashboard/persesdashboard.yaml + finish_step +fi diff --git a/test-resources/bin/test-scenario-02-operator-cr-aum.sh b/test-resources/bin/test-scenario-02-operator-cr-aum.sh index 92237e0b..71f6f8cb 100755 --- a/test-resources/bin/test-scenario-02-operator-cr-aum.sh +++ b/test-resources/bin/test-scenario-02-operator-cr-aum.sh @@ -15,41 +15,66 @@ load_env_file verify_kubectx setup_test_environment -echo "STEP 1: remove old test resources" +step_counter=1 + +echo "STEP $step_counter: remove old test resources" test-resources/bin/test-cleanup.sh ${target_namespace} false -echo -echo +finish_step -echo "STEP 2: creating target namespace (if necessary)" +echo "STEP $step_counter: creating target namespace (if necessary)" test-resources/bin/ensure-namespace-exists.sh ${target_namespace} -echo -echo +finish_step -echo "STEP 3: creating operator namespace and authorization token secret" +echo "STEP $step_counter: creating operator namespace and authorization token secret" test-resources/bin/ensure-namespace-exists.sh dash0-system kubectl create secret \ generic \ dash0-authorization-secret \ --namespace dash0-system \ --from-literal=token="${DASH0_AUTHORIZATION_TOKEN}" -echo -echo +finish_step + +echo "STEP $step_counter: install foreign custom resource definitions" +install_foreign_crds +finish_step -echo "STEP 4: rebuild images" +echo "STEP $step_counter: rebuild images" build_all_images -echo -echo +finish_step -echo "STEP 5: deploy the Dash0 operator using helm" +echo "STEP $step_counter: deploy the Dash0 operator using helm" deploy_via_helm -echo -echo +finish_step + +if [[ "${DEPLOY_OPERATOR_CONFIGURATION_VIA_HELM:-}" == false ]]; then + # if no operator configuration resource has been deployed via the helm chart, deploy one now + echo "STEP $step_counter: deploy the Dash0 operator configuration resource" + install_operator_configuration_resource + finish_step +else + echo "not deploying a Dash0 operator configuration resource (has been deployed with the helm chart already)" + echo +fi -echo "STEP 6: deploy the Dash0 monitoring resource to namespace ${target_namespace}" -install_monitoring_resource -echo -echo +if [[ "${DEPLOY_MONITORING_RESOURCE:-}" != false ]]; then + echo "STEP $step_counter: deploy the Dash0 monitoring resource to namespace ${target_namespace}" + install_monitoring_resource + finish_step +else + echo "not deploying a Dash0 monitoring resource" + echo +fi -echo "STEP 7: deploy application under monitoring" -test-resources/node.js/express/deploy.sh ${target_namespace} ${kind} +if [[ "${DEPLOY_APPLICATION_UNDER_MONITORING:-}" != false ]]; then + echo "STEP $step_counter: deploy application under monitoring" + test-resources/node.js/express/deploy.sh ${target_namespace} ${kind} + finish_step +fi +if [[ "${DEPLOY_PERSES_DASHBOARD:-}" == true ]]; then + echo "Waiting 30 seconds before deploying a Perses dashboard resource." + sleep 30 + echo "STEP $step_counter: deploy a Perses dashboard resource to namespace ${target_namespace}" + kubectl apply -n ${target_namespace} -f test-resources/customresources/persesdashboard/persesdashboard.yaml + finish_step +fi diff --git a/test-resources/bin/util b/test-resources/bin/util index 04a249b3..dfe2134d 100644 --- a/test-resources/bin/util +++ b/test-resources/bin/util @@ -1,3 +1,5 @@ +#!/usr/bin/env bash + # SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. # SPDX-License-Identifier: Apache-2.0 @@ -25,6 +27,12 @@ setup_test_environment () { test-resources/bin/render-templates.sh } +finish_step() { + ((step_counter++)) + echo + echo +} + build_all_images() { make docker-build } @@ -101,10 +109,17 @@ deploy_via_helm() { fi # Deploy an operator configuration right away. - helm_install_command+=" --set operator.dash0Export.enabled=true" - helm_install_command+=" --set operator.dash0Export.endpoint=ingress.eu-west-1.aws.dash0-dev.com:4317" - helm_install_command+=" --set operator.dash0Export.secretRef.name=dash0-authorization-secret" - helm_install_command+=" --set operator.dash0Export.secretRef.key=token" + if [[ "${DEPLOY_OPERATOR_CONFIGURATION_VIA_HELM:-}" != false ]]; then + helm_install_command+=" --set operator.dash0Export.enabled=true" + helm_install_command+=" --set operator.dash0Export.endpoint=ingress.eu-west-1.aws.dash0-dev.com:4317" + if [[ "${OPERATOR_CONFIGURATION_VIA_HELM_USE_TOKEN:-}" == true ]]; then + helm_install_command+=" --set operator.dash0Export.token=${DASH0_AUTHORIZATION_TOKEN}" + else + helm_install_command+=" --set operator.dash0Export.secretRef.name=dash0-authorization-secret" + helm_install_command+=" --set operator.dash0Export.secretRef.key=token" + fi + helm_install_command+=" --set operator.dash0Export.apiEndpoint=https://api.eu-west-1.aws.dash0-dev.com" + fi helm_install_command+=" dash0-operator" helm_install_command+=" ${OPERATOR_HELM_CHART:-helm-chart/dash0-operator}" @@ -132,16 +147,18 @@ wait_for_operator_manager_and_webhook() { # We deploy an operator configuration at startup via operator.dash0Export.enabled=true, wait for that resource to # become available as well. - echo "waiting for the automatically created operator configuration resource to become available" - for ((i=0; i<=20; i++)); do - # wait until the resource has been created - if kubectl get dash0operatorconfigurations.operator.dash0.com/dash0-operator-configuration-auto-resource; then - break; - fi - sleep 1 - done - # wait until the resource has been reconciled and is marked as available - kubectl wait dash0operatorconfigurations.operator.dash0.com/dash0-operator-configuration-auto-resource --for condition=Available --timeout 30s + if [[ "${DEPLOY_OPERATOR_CONFIGURATION_VIA_HELM:-}" != false ]]; then + echo "waiting for the automatically created operator configuration resource to become available" + for ((i=0; i<=20; i++)); do + # wait until the resource has been created + if kubectl get dash0operatorconfigurations.operator.dash0.com/dash0-operator-configuration-auto-resource; then + break; + fi + sleep 1 + done + # wait until the resource has been reconciled and is marked as available + kubectl wait dash0operatorconfigurations.operator.dash0.com/dash0-operator-configuration-auto-resource --for condition=Available --timeout 30s + fi } has_been_set_to_empty_string() { @@ -175,3 +192,26 @@ install_monitoring_resource() { echo "waiting for the monitoring resource to become available" kubectl wait --namespace ${target_namespace} dash0monitorings.operator.dash0.com/dash0-monitoring-resource --for condition=Available } + +install_foreign_crds() { + if [[ -z "${PATH_TO_PERSES_OPERATOR_REPO:-}" ]]; then + echo "PATH_TO_PERSES_OPERATOR_REPO is not set, Perses CRDs will not be installed." + return + fi + if [[ ! -d "${PATH_TO_PERSES_OPERATOR_REPO}" ]]; then + echo "The path specified by PATH_TO_PERSES_OPERATOR_REPO (${PATH_TO_PERSES_OPERATOR_REPO}) does not exist or is not a directory. Perses CRDs will not be installed." + return + fi + + operator_dir=$(pwd) + kustomize_bin="$operator_dir/bin/kustomize" + if [[ ! -e "$kustomize_bin" ]]; then + echo "The kustomize executable is not installed at $kustomize_bin. Downloading it now." + make kustomize + fi + + pushd "${PATH_TO_PERSES_OPERATOR_REPO}" > /dev/null + "$kustomize_bin" build config/crd | kubectl apply -f - + popd > /dev/null +} + diff --git a/test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.secret.yaml b/test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.secret.yaml index be2eea58..a94c8c64 100644 --- a/test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.secret.yaml +++ b/test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.secret.yaml @@ -8,3 +8,4 @@ spec: endpoint: ingress.eu-west-1.aws.dash0-dev.com:4317 authorization: secretRef: {} + apiEndpoint: https://api.eu-west-1.aws.dash0-dev.com diff --git a/test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml.template b/test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml.template index 7d24037f..209d8a97 100644 --- a/test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml.template +++ b/test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml.template @@ -8,3 +8,4 @@ spec: endpoint: ingress.eu-west-1.aws.dash0-dev.com:4317 authorization: token: "$DASH0_AUTHORIZATION_TOKEN" + apiEndpoint: https://api.eu-west-1.aws.dash0-dev.com \ No newline at end of file diff --git a/test-resources/customresources/persesdashboard/persesdashboard.yaml b/test-resources/customresources/persesdashboard/persesdashboard.yaml new file mode 100644 index 00000000..bbeb3fae --- /dev/null +++ b/test-resources/customresources/persesdashboard/persesdashboard.yaml @@ -0,0 +1,565 @@ +apiVersion: perses.dev/v1alpha1 +kind: PersesDashboard +metadata: + name: perses-dashboard-test + labels: + app.kubernetes.io/name: perses-dashboard + app.kubernetes.io/instance: perses-dashboard-sample + app.kubernetes.io/part-of: perses-operator + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: perses-operator +spec: + duration: 5m + datasources: + PrometheusLocal: + default: false + plugin: + kind: PrometheusDatasource + spec: + proxy: + kind: HTTPProxy + spec: + url: http://localhost:9090 + variables: + - kind: ListVariable + spec: + name: job + allowMultiple: false + allowAllValue: false + plugin: + kind: PrometheusLabelValuesVariable + spec: + labelName: job + - kind: ListVariable + spec: + name: instance + allowMultiple: false + allowAllValue: false + plugin: + kind: PrometheusLabelValuesVariable + spec: + labelName: instance + matchers: + - up{job=~"$job"} + - kind: ListVariable + spec: + name: interval + plugin: + kind: StaticListVariable + spec: + values: + - 1m + - 5m + - kind: TextVariable + spec: + name: text + value: test + constant: true + panels: + defaultTimeSeriesChart: + kind: Panel + spec: + display: + name: Default Time Series Panel + plugin: + kind: TimeSeriesChart + spec: {} + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + query: up + seriesTest: + kind: Panel + spec: + display: + name: "~130 Series" + description: This is a line chart + plugin: + kind: TimeSeriesChart + spec: + yAxis: + format: + unit: bytes + shortValues: true + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + query: rate(caddy_http_response_duration_seconds_sum[$interval]) + basicEx: + kind: Panel + spec: + display: + name: Single Query + plugin: + kind: TimeSeriesChart + spec: + yAxis: + format: + unit: decimal + legend: + position: right + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + seriesNameFormat: Node memory - {{device}} {{instance}} + query: + 1 - node_filesystem_free_bytes{job='$job',instance=~'$instance',fstype!="rootfs",mountpoint!~"/(run|var).*",mountpoint!=""} + / node_filesystem_size_bytes{job='$job',instance=~'$instance'} + legendEx: + kind: Panel + spec: + display: + name: Legend Example + plugin: + kind: TimeSeriesChart + spec: + legend: + position: bottom + yAxis: + show: true + format: + unit: bytes + shortValues: true + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + seriesNameFormat: Node memory total + query: + node_memory_MemTotal_bytes{job='$job',instance=~'$instance'} + - node_memory_MemFree_bytes{job='$job',instance=~'$instance'} - + node_memory_Buffers_bytes{job='$job',instance=~'$instance'} - node_memory_Cached_bytes{job='$job',instance=~'$instance'} + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + seriesNameFormat: Memory (buffers) - {{instance}} + query: node_memory_Buffers_bytes{job='$job',instance=~'$instance'} + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + seriesNameFormat: Cached Bytes + query: node_memory_Cached_bytes{job='$job',instance=~'$instance'} + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + seriesNameFormat: MemFree Bytes + query: node_memory_MemFree_bytes{job='$job',instance=~'$instance'} + testNodeQuery: + kind: Panel + spec: + display: + name: Test Query + description: Description text + plugin: + kind: TimeSeriesChart + spec: + yAxis: + format: + unit: decimal + decimalPlaces: 2 + legend: + position: right + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + query: node_load15{instance=~"(demo.do.prometheus.io:9100)",job='$job'} + seriesNameFormat: Test {{job}} {{instance}} + testQueryAlt: + kind: Panel + spec: + display: + name: Test Query Alt + description: Description text + plugin: + kind: TimeSeriesChart + spec: + legend: + position: right + yAxis: + format: + unit: percent-decimal + decimalPlaces: 1 + thresholds: + steps: + - value: 0.4 + name: "Alert: Warning condition example" + - value: 0.75 + name: "Alert: Critical condition example" + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + query: node_load1{instance=~"(demo.do.prometheus.io:9100)",job='$job'} + cpuLine: + kind: Panel + spec: + display: + name: CPU - Line (Multi Series) + description: This is a line chart test + plugin: + kind: TimeSeriesChart + spec: + yAxis: + show: false + label: CPU Label + format: + unit: percent-decimal + decimalPlaces: 0 + legend: + position: bottom + thresholds: + steps: + - value: 0.2 + - value: 0.35 + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + seriesNameFormat: "{{mode}} mode - {{job}} {{instance}}" + query: avg without (cpu)(rate(node_cpu_seconds_total{job='$job',instance=~'$instance',mode!="nice",mode!="steal",mode!="irq"}[$interval])) + cpuGauge: + kind: Panel + spec: + display: + name: CPU - Gauge (Multi Series) + description: This is a gauge chart test + plugin: + kind: GaugeChart + spec: + calculation: last-number + format: + unit: percent-decimal + thresholds: + steps: + - value: 0.2 + - value: 0.35 + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + seriesNameFormat: "{{mode}} mode - {{job}} {{instance}}" + query: avg without (cpu)(rate(node_cpu_seconds_total{job='$job',instance=~'$instance',mode!="nice",mode!="steal",mode!="irq"}[$interval])) + statSm: + kind: Panel + spec: + display: + name: Stat Sm + plugin: + kind: StatChart + spec: + calculation: mean + format: + unit: decimal + decimalPlaces: 1 + shortValues: true + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + query: node_time_seconds{job='$job',instance=~'$instance'} - node_boot_time_seconds{job='$job',instance=~'$instance'} + gaugeRAM: + kind: Panel + spec: + display: + name: RAM Used + description: This is a stat chart + plugin: + kind: GaugeChart + spec: + calculation: last-number + format: + unit: percent + thresholds: + steps: + - value: 85 + - value: 95 + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + query: + 100 - ((node_memory_MemAvailable_bytes{job='$job',instance=~'$instance'} + * 100) / node_memory_MemTotal_bytes{job='$job',instance=~'$instance'}) + statRAM: + kind: Panel + spec: + display: + name: RAM Used + description: This is a stat chart + plugin: + kind: StatChart + spec: + calculation: last-number + format: + unit: percent + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + query: + 100 - ((node_memory_MemAvailable_bytes{job='$job',instance=~'$instance'} + * 100) / node_memory_MemTotal_bytes{job='$job',instance=~'$instance'}) + statTotalRAM: + kind: Panel + spec: + display: + name: RAM Total + description: This is a stat chart + plugin: + kind: StatChart + spec: + calculation: last-number + format: + unit: bytes + decimalPlaces: 1 + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + query: node_memory_MemTotal_bytes{job='$job',instance=~'$instance'} + statMd: + kind: Panel + spec: + display: + name: Stat Md + plugin: + kind: StatChart + spec: + calculation: sum + format: + unit: decimal + decimalPlaces: 2 + shortValues: true + sparkline: + color: "#e65013" + width: 1.5 + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + query: + avg(node_load15{job='node',instance=~'$instance'}) / count(count(node_cpu_seconds_total{job='node',instance=~'$instance'}) + by (cpu)) * 100 + statLg: + kind: Panel + spec: + display: + name: Stat Lg + description: This is a stat chart + plugin: + kind: StatChart + spec: + calculation: mean + format: + unit: percent + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + query: + (((count(count(node_cpu_seconds_total{job='$job',instance=~'$instance'}) + by (cpu))) - avg(sum by (mode)(rate(node_cpu_seconds_total{mode="idle",job='$job',instance=~'$instance'}[$interval])))) + * 100) / count(count(node_cpu_seconds_total{job='$job',instance=~'$instance'}) + by (cpu)) + gaugeEx: + kind: Panel + spec: + display: + name: Gauge Ex + description: This is a gauge chart + plugin: + kind: GaugeChart + spec: + calculation: last-number + format: + unit: percent + thresholds: + steps: + - value: 85 + - value: 95 + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + query: + (((count(count(node_cpu_seconds_total{job='$job',instance=~'$instance'}) + by (cpu))) - avg(sum by (mode)(rate(node_cpu_seconds_total{mode="idle",job='$job',instance=~'$instance'}[$interval])))) + * 100) / count(count(node_cpu_seconds_total{job='$job',instance=~'$instance'}) + by (cpu)) + gaugeAltEx: + kind: Panel + spec: + display: + name: Gauge Alt Ex + description: GaugeChart description text + plugin: + kind: GaugeChart + spec: + calculation: last-number + format: + unit: percent-decimal + decimalPlaces: 1 + thresholds: + steps: + - value: 0.5 + name: "Alert: Warning condition example" + - value: 0.75 + name: "Alert: Critical condition example" + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + query: node_load15{instance=~'$instance',job='$job'} + gaugeFormatTest: + kind: Panel + spec: + display: + name: Gauge Format Test + plugin: + kind: GaugeChart + spec: + calculation: last-number + format: + unit: bytes + max: 95000000 + thresholds: + steps: + - value: 71000000 + - value: 82000000 + queries: + - kind: TimeSeriesQuery + spec: + plugin: + kind: PrometheusTimeSeriesQuery + spec: + query: node_time_seconds{job='$job',instance=~'$instance'} - node_boot_time_seconds{job='$job',instance=~'$instance'} + layouts: + - kind: Grid + spec: + display: + title: Row 1 + collapse: + open: true + items: + - x: 0 + "y": 0 + width: 2 + height: 3 + content: + "$ref": "#/spec/panels/statRAM" + - x: 0 + "y": 4 + width: 2 + height: 3 + content: + "$ref": "#/spec/panels/statTotalRAM" + - x: 2 + "y": 0 + width: 4 + height: 6 + content: + "$ref": "#/spec/panels/statMd" + - x: 6 + "y": 0 + width: 10 + height: 6 + content: + "$ref": "#/spec/panels/statLg" + - x: 16 + "y": 0 + width: 4 + height: 6 + content: + "$ref": "#/spec/panels/gaugeFormatTest" + - x: 20 + "y": 0 + width: 4 + height: 6 + content: + "$ref": "#/spec/panels/gaugeRAM" + - kind: Grid + spec: + display: + title: Row 2 + collapse: + open: true + items: + - x: 0 + "y": 0 + width: 12 + height: 6 + content: + "$ref": "#/spec/panels/legendEx" + - x: 12 + "y": 0 + width: 12 + height: 6 + content: + "$ref": "#/spec/panels/basicEx" + - kind: Grid + spec: + display: + title: Row 3 + collapse: + open: false + items: + - x: 0 + "y": 0 + width: 24 + height: 6 + content: + "$ref": "#/spec/panels/cpuGauge" + - x: 0 + "y": 6 + width: 12 + height: 8 + content: + "$ref": "#/spec/panels/cpuLine" + - x: 12 + "y": 0 + width: 12 + height: 8 + content: + "$ref": "#/spec/panels/defaultTimeSeriesChart" diff --git a/test/util/constants.go b/test/util/constants.go index 9b1e483b..c3743932 100644 --- a/test/util/constants.go +++ b/test/util/constants.go @@ -17,7 +17,7 @@ import ( const ( TestNamespaceName = "test-namespace" - Dash0OperatorNamespace = "dash0-system" + OperatorNamespace = "dash0-system" OTelCollectorNamePrefixTest = "unit-test" CronJobNamePrefix = "cronjob" @@ -65,7 +65,7 @@ var ( DeploymentSelfReference = &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ - Namespace: Dash0OperatorNamespace, + Namespace: OperatorNamespace, Name: "unit-test-dash0-operator-controller", UID: "2f009c75-d69f-4b02-9d9d-fa17e76f5c1d", }, diff --git a/test/util/matchers.go b/test/util/matchers.go index 103c003f..cb8247d2 100644 --- a/test/util/matchers.go +++ b/test/util/matchers.go @@ -49,6 +49,56 @@ func (matcher *MatchEnvVarMatcher) NegatedFailureMessage(actual interface{}) (me return format.Message(actual, fmt.Sprintf("not %s", matcher.message())) } +func MatchEnvVarValueFrom(name string, secretName string, secretKey string, args ...interface{}) gomega.OmegaMatcher { + return &MatchEnvVarValueFromSecretMatcher{ + Name: name, + SecretName: secretName, + SecretKey: secretKey, + Args: args, + } +} + +type MatchEnvVarValueFromSecretMatcher struct { + Name string + SecretName string + SecretKey string + Args []interface{} +} + +func (matcher *MatchEnvVarValueFromSecretMatcher) Match(actual interface{}) (success bool, err error) { + envVar, ok := actual.(corev1.EnvVar) + if !ok { + return false, + fmt.Errorf( + "MatchEnvVarValueFromSecretMatcher matcher requires a corev1.EnvVar. Got:\n%s", + format.Object(actual, 1), + ) + } + return matcher.Name == envVar.Name && + envVar.ValueFrom != nil && + envVar.ValueFrom.SecretKeyRef != nil && + matcher.SecretName == envVar.ValueFrom.SecretKeyRef.Name && + matcher.SecretKey == envVar.ValueFrom.SecretKeyRef.Key, + nil +} + +func (matcher *MatchEnvVarValueFromSecretMatcher) FailureMessage(actual interface{}) (message string) { + return format.Message(actual, matcher.message()) +} + +func (matcher *MatchEnvVarValueFromSecretMatcher) message() string { + return fmt.Sprintf( + "to contain env var with name %s and value from secret %s/%s", + matcher.Name, + matcher.SecretName, + matcher.SecretKey, + ) +} + +func (matcher *MatchEnvVarValueFromSecretMatcher) NegatedFailureMessage(actual interface{}) (message string) { + return format.Message(actual, fmt.Sprintf("not %s", matcher.message())) +} + func MatchVolumeMount(name string, mountPath string, args ...interface{}) gomega.OmegaMatcher { return &MatchVolumeMountMatcher{ Name: name, diff --git a/test/util/operator_resource.go b/test/util/operator_resource.go index 9b457f2c..b34e2b33 100644 --- a/test/util/operator_resource.go +++ b/test/util/operator_resource.go @@ -20,7 +20,7 @@ import ( ) const ( - Dash0OperatorDeploymentName = "controller-deployment" + OperatorDeploymentName = "controller-deployment" OperatorConfigurationResourceName = "dash0-operator-configuration-test" ) @@ -133,7 +133,7 @@ func LoadOperatorDeploymentOrFail( deployment := &appsv1.Deployment{} if err := k8sClient.Get( ctx, - types.NamespacedName{Namespace: Dash0OperatorNamespace, Name: Dash0OperatorDeploymentName}, + types.NamespacedName{Namespace: OperatorNamespace, Name: OperatorDeploymentName}, deployment, ); err != nil { g.Expect(err).NotTo(HaveOccurred()) diff --git a/test/util/resources.go b/test/util/resources.go index fd00424f..1d33e1a9 100644 --- a/test/util/resources.go +++ b/test/util/resources.go @@ -63,11 +63,11 @@ func EnsureTestNamespaceExists( return EnsureNamespaceExists(ctx, k8sClient, TestNamespaceName) } -func EnsureDash0OperatorNamespaceExists( +func EnsureOperatorNamespaceExists( ctx context.Context, k8sClient client.Client, ) *corev1.Namespace { - return EnsureNamespaceExists(ctx, k8sClient, Dash0OperatorNamespace) + return EnsureNamespaceExists(ctx, k8sClient, OperatorNamespace) } func EnsureNamespaceExists( @@ -701,7 +701,7 @@ func DeploymentWithExistingDash0Artifacts(namespace string, name string) *appsv1 Value: "value", }, { - // Dash0 does not support injecting into containers that already have NODE_OPTIONS set via a + // The operator does not support injecting into containers that already have NODE_OPTIONS set via a // ValueFrom clause, thus this env var will not be modified. Name: "NODE_OPTIONS", ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.namespace"}},