From 9a27da5a353e7c55251d602b845efc2440d5fa4b Mon Sep 17 00:00:00 2001 From: Bastian Krol Date: Tue, 17 Sep 2024 18:14:43 +0200 Subject: [PATCH 1/3] feat(chart): create operator configuration resource via helm chart --- Makefile | 2 +- api/dash0monitoring/v1alpha1/types_common.go | 9 +- cmd/main.go | 85 +++++-- .../operator.dash0.com_dash0monitorings.yaml | 9 +- ...dash0.com_dash0operatorconfigurations.yaml | 9 +- config/rbac/role.yaml | 6 + helm-chart/dash0-operator/README.md | 3 +- .../templates/operator/cluster-roles.yaml | 8 + .../operator/deployment-and-webhooks.yaml | 22 ++ .../__snapshot__/cluster-roles_test.yaml.snap | 6 + .../deployment-and-webhooks_test.yaml | 21 ++ helm-chart/dash0-operator/values.yaml | 53 ++++- .../backendconnection_manager_suite_test.go | 7 - .../backendconnection_manager_test.go | 12 +- .../otelcolresources/desired_state_test.go | 2 +- .../otelcol_resources_test.go | 2 +- .../dash0/controller/controller_suite_test.go | 7 - internal/dash0/controller/dash0_controller.go | 1 + .../dash0/controller/dash0_controller_test.go | 20 +- .../instrumentation_suite_test.go | 7 - .../instrumentation/instrumenter_test.go | 2 +- .../operator_pre_delete_handler.go | 2 +- .../operator_pre_delete_handler_test.go | 6 +- .../pre_delete_suite_test.go} | 12 +- .../auto_operator_configuration_handler.go | 221 ++++++++++++++++++ ...uto_operator_configuration_handler_test.go | 147 ++++++++++++ internal/dash0/startup/startup_suite_test.go | 77 ++++++ .../webhooks/attach_dangling_events_test.go | 4 +- .../webhooks/instrumentation_webhook_test.go | 10 +- ...r_configuration_validation_webhook_test.go | 2 +- internal/dash0/webhooks/webhook_suite_test.go | 8 - test-resources/bin/test-cleanup.sh | 1 + .../bin/test-scenario-01-aum-operator-cr.sh | 7 +- .../bin/test-scenario-02-operator-cr-aum.sh | 9 +- test-resources/bin/util | 19 ++ test/e2e/operator.go | 4 - test/util/constants.go | 20 +- test/util/monitoring_resource.go | 6 +- test/util/operator_resource.go | 9 +- 39 files changed, 723 insertions(+), 134 deletions(-) rename internal/dash0/{removal => predelete}/operator_pre_delete_handler.go (99%) rename internal/dash0/{removal => predelete}/operator_pre_delete_handler_test.go (97%) rename internal/dash0/{removal/removal_suite_test.go => predelete/pre_delete_suite_test.go} (87%) create mode 100644 internal/dash0/startup/auto_operator_configuration_handler.go create mode 100644 internal/dash0/startup/auto_operator_configuration_handler_test.go create mode 100644 internal/dash0/startup/startup_suite_test.go diff --git a/Makefile b/Makefile index 21ce344c..427b8782 100644 --- a/Makefile +++ b/Makefile @@ -165,7 +165,7 @@ golangci-lint: lint: golangci-lint ## Run golangci-lint linter & yamllint @echo -------------------------------- $(GOLANGCI_LINT) run - helm lint helm-chart/dash0-operator --set operator.disableSecretCheck=true --set operator.disableOtlpEndpointCheck=true + helm lint helm-chart/dash0-operator .PHONY: lint-fix lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes diff --git a/api/dash0monitoring/v1alpha1/types_common.go b/api/dash0monitoring/v1alpha1/types_common.go index 1cb8d535..ee108cfb 100644 --- a/api/dash0monitoring/v1alpha1/types_common.go +++ b/api/dash0monitoring/v1alpha1/types_common.go @@ -36,8 +36,8 @@ type Export struct { type Dash0Configuration struct { // The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value // needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom - // https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in - // `dash0.com:4317`. + // https://app.dash0.com -> organization settings -> "Endpoints". The correct endpoint value will always start with + // `ingress.` and end in `dash0.com:4317`. // // +kubebuilder:validation:Required Endpoint string `json:"endpoint"` @@ -61,14 +61,15 @@ type Dash0Configuration struct { type Authorization struct { // The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has // to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization - // token for your Dash0 organization can be copied from https://app.dash0.com/settings. + // token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + // "Auth Tokens". // // +kubebuilder:validation:Optional Token *string `json:"token"` // either token or secret ref, with token taking precedence // A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is // ignored if the token property is set. The authorization token for your Dash0 organization can be copied from - // https://app.dash0.com/settings. + // https://app.dash0.com -> organization settings -> "Auth Tokens". // // +kubebuilder:validation:Optional SecretRef *SecretRef `json:"secretRef"` diff --git a/cmd/main.go b/cmd/main.go index f8e79e94..a539e488 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -39,8 +39,9 @@ import ( "github.com/dash0hq/dash0-operator/internal/backendconnection/otelcolresources" "github.com/dash0hq/dash0-operator/internal/dash0/controller" "github.com/dash0hq/dash0-operator/internal/dash0/instrumentation" - "github.com/dash0hq/dash0-operator/internal/dash0/removal" + "github.com/dash0hq/dash0-operator/internal/dash0/predelete" "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" + "github.com/dash0hq/dash0-operator/internal/dash0/startup" "github.com/dash0hq/dash0-operator/internal/dash0/util" "github.com/dash0hq/dash0-operator/internal/dash0/webhooks" //+kubebuilder:scaffold:imports @@ -105,25 +106,40 @@ func init() { func main() { ctx := context.Background() - var uninstrumentAll bool + var operatorConfigurationEndpoint string + var operatorConfigurationToken string + var operatorConfigurationSecretRefName string + var operatorConfigurationSecretRefKey string + var isUninstrumentAll bool var metricsAddr string var enableLeaderElection bool var probeAddr string var secureMetrics bool var enableHTTP2 bool - flag.BoolVar(&uninstrumentAll, "uninstrument-all", false, - "If set, the process will remove all Dash0 monitoring resources from all namespaces in the cluster. This "+ - "will trigger the Dash0 monitoring resources' finalizers in each namespace, which in turn will revert the "+ - "instrumentation of all workloads in all namespaces.") + + flag.BoolVar(&isUninstrumentAll, "uninstrument-all", false, + "If set, the process will remove all Dash0 monitoring resources from all namespaces in the cluste, then "+ + "exit. This will trigger the Dash0 monitoring resources' finalizers in each namespace, which in turn will "+ + "revert the instrumentation of all workloads in all namespaces.") + flag.StringVar(&operatorConfigurationEndpoint, "operator-configuration-endpoint", "", + "The Dash0 endpoint gRPC URL for creating an operator configuration resource.") + flag.StringVar(&operatorConfigurationToken, "operator-configuration-token", "", + "The Dash0 auth token for creating an operator configuration resource.") + flag.StringVar(&operatorConfigurationSecretRefName, "operator-configuration-secret-ref-name", "", + "The name of an existing Kubernetes secret containing the Dash0 auth token, used to creating an operator "+ + "configuration resource.") + flag.StringVar(&operatorConfigurationSecretRefKey, "operator-configuration-secret-ref-key", "", + "The key in an existing Kubernetes secret containing the Dash0 auth token, used to creating an operator "+ + "configuration resource.") flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.BoolVar(&enableLeaderElection, "leader-elect", false, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") flag.BoolVar(&secureMetrics, "metrics-secure", false, - "If set, the metrics endpoint is served securely") + "If set, the metrics endpoint is served securely.") flag.BoolVar(&enableHTTP2, "enable-http2", false, - "If set, HTTP/2 will be enabled for the metrics and webhook servers") + "If set, HTTP/2 will be enabled for the metrics and webhook servers.") var developmentMode bool developmentModeRaw, isSet := os.LookupEnv(developmentModeEnvVarName) @@ -143,8 +159,8 @@ func main() { ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) - if uninstrumentAll { - if err := deleteDash0MonitoringResourcesInAllNamespaces(&setupLog); err != nil { + if isUninstrumentAll { + if err := deleteMonitoringResourcesInAllNamespaces(&setupLog); err != nil { setupLog.Error(err, "deleting the Dash0 monitoring resources in all namespaces failed") os.Exit(1) } @@ -196,6 +212,18 @@ func main() { map[string]string{semconv.AttributeK8SDeploymentUID: string(deploymentSelfReference.UID)}, ) + var operatorConfiguration *startup.OperatorConfigurationValues + if len(operatorConfigurationEndpoint) > 0 { + operatorConfiguration = &startup.OperatorConfigurationValues{ + Endpoint: operatorConfigurationEndpoint, + Token: operatorConfigurationToken, + SecretRef: startup.SecretRef{ + Name: operatorConfigurationSecretRefName, + Key: operatorConfigurationSecretRefKey, + }, + } + } + if err = startOperatorManager( ctx, metricsAddr, @@ -204,6 +232,7 @@ func main() { webhookServer, probeAddr, enableLeaderElection, + operatorConfiguration, developmentMode, ); err != nil { setupLog.Error(err, "The Dash0 operator manager process failed to start.") @@ -219,6 +248,7 @@ func startOperatorManager( webhookServer k8swebhook.Server, probeAddr string, enableLeaderElection bool, + operatorConfiguration *startup.OperatorConfigurationValues, developmentMode bool, ) error { mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ @@ -285,7 +315,7 @@ func startOperatorManager( developmentMode, ) - err = startDash0Controllers(ctx, mgr, clientset, developmentMode) + err = startDash0Controllers(ctx, mgr, clientset, operatorConfiguration, developmentMode) if err != nil { return err } @@ -396,6 +426,7 @@ func startDash0Controllers( ctx context.Context, mgr manager.Manager, clientset *kubernetes.Clientset, + operatorConfiguration *startup.OperatorConfigurationValues, developmentMode bool, ) error { oTelCollectorBaseUrl := @@ -419,8 +450,10 @@ func startDash0Controllers( ctx, clientset, mgr.GetEventRecorderFor("dash0-startup-tasks"), + operatorConfiguration, images, oTelCollectorBaseUrl, + &setupLog, ) logCurrentSelfMonitoringSettings(deploymentSelfReference) @@ -555,9 +588,17 @@ func executeStartupTasks( ctx context.Context, clientset *kubernetes.Clientset, eventRecorder record.EventRecorder, + operatorConfiguration *startup.OperatorConfigurationValues, images util.Images, oTelCollectorBaseUrl string, + logger *logr.Logger, ) { + createOperatorConfiguration( + ctx, + startupTasksK8sClient, + operatorConfiguration, + logger, + ) instrumentAtStartup( ctx, startupTasksK8sClient, @@ -618,8 +659,26 @@ func logCurrentSelfMonitoringSettings(deploymentSelfReference *appsv1.Deployment } } -func deleteDash0MonitoringResourcesInAllNamespaces(logger *logr.Logger) error { - handler, err := removal.NewOperatorPreDeleteHandler() +func createOperatorConfiguration( + ctx context.Context, + k8sClient client.Client, + operatorConfiguration *startup.OperatorConfigurationValues, + logger *logr.Logger, +) { + if operatorConfiguration != nil { + handler := startup.AutoOperatorConfigurationResourceHandler{ + Client: k8sClient, + OperatorNamespace: envVars.operatorNamespace, + NamePrefix: envVars.oTelCollectorNamePrefix, + } + if err := handler.CreateOperatorConfigurationResource(ctx, operatorConfiguration, logger); err != nil { + logger.Error(err, "Failed to create the requested Dash0 operator configuration resource.") + } + } +} + +func deleteMonitoringResourcesInAllNamespaces(logger *logr.Logger) error { + handler, err := predelete.NewOperatorPreDeleteHandler() if err != nil { logger.Error(err, "Failed to create the OperatorPreDeleteHandler.") return err diff --git a/config/crd/bases/operator.dash0.com_dash0monitorings.yaml b/config/crd/bases/operator.dash0.com_dash0monitorings.yaml index 8b927444..ae4b5604 100644 --- a/config/crd/bases/operator.dash0.com_dash0monitorings.yaml +++ b/config/crd/bases/operator.dash0.com_dash0monitorings.yaml @@ -69,7 +69,7 @@ spec: description: |- A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is ignored if the token property is set. The authorization token for your Dash0 organization can be copied from - https://app.dash0.com/settings. + https://app.dash0.com -> organization settings -> "Auth Tokens". properties: key: default: token @@ -89,7 +89,8 @@ spec: description: |- The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization - token for your Dash0 organization can be copied from https://app.dash0.com/settings. + token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + "Auth Tokens". type: string type: object dataset: @@ -102,8 +103,8 @@ spec: description: |- The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom - https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in - `dash0.com:4317`. + https://app.dash0.com -> organization settings -> "Endpoints". The correct endpoint value will always start with + `ingress.` and end in `dash0.com:4317`. type: string required: - authorization diff --git a/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml b/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml index fbfddb4f..56fcfa3c 100644 --- a/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml +++ b/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml @@ -69,7 +69,7 @@ spec: description: |- A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is ignored if the token property is set. The authorization token for your Dash0 organization can be copied from - https://app.dash0.com/settings. + https://app.dash0.com -> organization settings -> "Auth Tokens". properties: key: default: token @@ -89,7 +89,8 @@ spec: description: |- The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization - token for your Dash0 organization can be copied from https://app.dash0.com/settings. + token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + "Auth Tokens". type: string type: object dataset: @@ -102,8 +103,8 @@ spec: description: |- The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom - https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in - `dash0.com:4317`. + https://app.dash0.com -> organization settings -> "Endpoints". The correct endpoint value will always start with + `ingress.` and end in `dash0.com:4317`. type: string required: - authorization diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index c822fe58..9d798ebc 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -28,6 +28,12 @@ rules: - patch - update - watch +- apiGroups: + - "" + resources: + - endpoints + verbs: + - get - apiGroups: - "" resources: diff --git a/helm-chart/dash0-operator/README.md b/helm-chart/dash0-operator/README.md index 33163c13..0b290b13 100644 --- a/helm-chart/dash0-operator/README.md +++ b/helm-chart/dash0-operator/README.md @@ -202,7 +202,8 @@ spec: If you want to provide the Dash0 authorization token via a Kubernetes secret instead of providing the token as a string, create the secret in the namespace where the Dash0 operator is installed. If you followed the guide above, the name of that namespace is `dash0-system`. -The authorization token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> "Auth Tokens". +The authorization token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings +-> "Auth Tokens". You can freely choose the name of the secret and the key of the token within the secret. Create the secret by using the following command: diff --git a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml index bc36910b..4d51afcf 100644 --- a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml +++ b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml @@ -38,6 +38,14 @@ rules: - update - watch +# Pmrmissions required top create a Dash0 operator configuration resources +- apiGroups: + - "" + resources: + - endpoints + verbs: + - get + # Permissions required to queue events to report about the operator's actions, and to attach dangling events to their # respective involved objects. - apiGroups: diff --git a/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml b/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml index bf7d328c..ecf21e58 100644 --- a/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml +++ b/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml @@ -78,6 +78,28 @@ spec: - --health-probe-bind-address=:8081 - --metrics-bind-address=127.0.0.1:8080 - --leader-elect +{{- if .Values.operator.dash0Backend.enabled }} +{{- if not .Values.operator.dash0Backend.endpoint }} +{{- fail "Error: operator.dash0Backend.enabled is set to true, but you did not provide a value for operator.dash0Backend.endpoint. Please refer to the installation instructions at https://github.com/dash0hq/dash0-operator/tree/main/helm-chart/dash0-operator." -}} +{{- end }} + - --operator-configuration-endpoint={{ .Values.operator.dash0Backend.endpoint }} +{{- if .Values.operator.dash0Backend.token }} + - --operator-configuration-token={{ .Values.operator.dash0Backend.token }} +{{- else if (and .Values.operator.dash0Backend.secretRef.name .Values.operator.dash0Backend.secretRef.key) }} +{{- $secret := lookup "v1" "Secret" .Release.Namespace .Values.operator.dash0Backend.secretRef.name -}} +{{- if $secret -}} +{{- if not (index $secret.data .Values.operator.dash0Backend.secretRef.key) -}} +{{- fail (printf "Error: There is a secret named \"%s\" in the target namespace \"%s\", but it does not have the required key \"%s\". Please refer to the installation instructions at https://github.com/dash0hq/dash0-operator/tree/main/helm-chart/dash0-operator." .Values.operator.dash0Backend.secretRef.name .Release.Namespace .Values.operator.dash0Backend.secretRef.key) -}} +{{- end -}} +{{- else -}} +{{- fail (printf "Error: There is no secret named \"%s\" in the target namespace \"%s\". Please refer to the installation instructions at https://github.com/dash0hq/dash0-operator/tree/main/helm-chart/dash0-operator." .Values.operator.dash0Backend.secretRef.name .Release.Namespace) -}} +{{- end }} + - --operator-configuration-secret-ref-name={{ .Values.operator.dash0Backend.secretRef.name }} + - --operator-configuration-secret-ref-key={{ .Values.operator.dash0Backend.secretRef.key }} +{{- else }} +{{- fail "Error: operator.dash0Backend.enabled is set to true, but neither operator.dash0Backend.token nor operator.dash0Backend.secretRef.name & operator.dash0Backend.secretRef.key have been provided. Please refer to the installation instructions at https://github.com/dash0hq/dash0-operator/tree/main/helm-chart/dash0-operator." -}} +{{- end }} +{{- end }} env: - name: DASH0_OPERATOR_NAMESPACE valueFrom: diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap index 4cbb6215..c80d9dda 100644 --- a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap @@ -37,6 +37,12 @@ cluster roles should match snapshot: - patch - update - watch + - apiGroups: + - "" + resources: + - endpoints + verbs: + - get - apiGroups: - "" resources: diff --git a/helm-chart/dash0-operator/tests/operator/deployment-and-webhooks_test.yaml b/helm-chart/dash0-operator/tests/operator/deployment-and-webhooks_test.yaml index 74da1f7c..cf8f4b98 100644 --- a/helm-chart/dash0-operator/tests/operator/deployment-and-webhooks_test.yaml +++ b/helm-chart/dash0-operator/tests/operator/deployment-and-webhooks_test.yaml @@ -235,6 +235,27 @@ tests: path: spec.template.spec.containers[0].resources.requests.memory value: 32Mi + - it: should add args for creating an operator configuration resource with a token to deployment + documentSelector: + path: metadata.name + value: dash0-operator-controller + set: + operator: + dash0Backend: + enabled: true + endpoint: https://ingress.dash0.com + token: "very-secret-dash0-auth-token" + asserts: + - equal: + path: spec.template.spec.containers[0].args[3] + value: --operator-configuration-endpoint=https://ingress.dash0.com + - equal: + path: spec.template.spec.containers[0].args[4] + value: --operator-configuration-token=very-secret-dash0-auth-token + # Note: We deliberately do not have a test for the operator.dash0Backend.secretRef variant, since this would trigger + # a check whether the secret actually exists in the cluster, which of course would fail when runnig helm chart unit + # tests. + - it: should render the "dash0.com/cert-digest" label documentSelector: path: metadata.name diff --git a/helm-chart/dash0-operator/values.yaml b/helm-chart/dash0-operator/values.yaml index 35cc3acd..dd78fdf3 100644 --- a/helm-chart/dash0-operator/values.yaml +++ b/helm-chart/dash0-operator/values.yaml @@ -3,6 +3,51 @@ # settings for the operator/controller operator: + + # Use the operator.dash0Backend settings to configure the connection to the Dash0 backend; telemetry will be sent to + # the configured Dash0 backend by default. Under the hood, this will create a Dash0OperatorConfiguration resource + # right away, when starting the operator. If left empty, you can always create a Dash0OperatorConfiguration resource + # manually later. + dash0Backend: + # Set this to true to enable the creation of a Dash0OperatorConfiguration resource at startup. If a + # Dash0OperatorConfiguration already exists in the cluster, no action will be taken. Note that if this is set to + # true, you will also need to provide a valid endpoint (operator.dash0Backend.endpoint), and either or an auth + # token (operator.dash0Backend.token) or a reference to a Kubernetes secret containing that token + # (operator.dash0Backend.secretRef). + enabled: false + + # The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory if + # operator.dash0Backend.enabled is true, otherwise it will be ignored. The value needs to be the OTLP/gRPC endpoint + # of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom https://app.dash0.com -> organization + # settings -> "Endpoints". The correct endpoint value will always start with `ingress.` and end in `dash0.com:4317`. + endpoint: + + # The Dash0 authorization token. This property is optional, but either this property or the secretRef configuration + # has to be provided if operator.dash0Backend.enabled is true. If operator.dash0Backend.enabled is false, this + # property will be ignored. + # If both token and secretRef are provided, the token will be used and secretRef will be ignored. The authorization + # token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + # "Auth Tokens". + token: + + # A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is + # ignored if either operator.dash0Backend.enabled is false or operator.dash0Backend.token is set. The authorization + # token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + # "Auth Tokens". + secretRef: + # The name of the secret containing the Dash0 authorization token. Example: Assume you have created the Kubernetes + # secret with the following command: + # kubectl create secret generic \ + # dash0-authorization-secret \ + # --namespace dash0-system \ + # --from-literal=token=auth_...your-token-here... + # + # Then you would set the property to "dash0-authorization-secret". + name: + # The key of the value which contains the Dash0 authorization token. Assuming you have created the Kubernetes + # secret with the command above (see property "name"), then you would set the property to "token". + key: + # number of replica for the controller manager deployment replicaCount: 1 @@ -135,11 +180,3 @@ operator: # If set to true, instructs the logger (Zap) to use a Zap development config (stacktraces on warnings, no sampling), # otherwise a Zap production config will be used (stacktraces on errors, sampling). developmentMode: false - - # If set to true, the operator Helm chart will skip the check for the Dash0 authorization secret. This should only - # be done for testing purposes. - disableSecretCheck: false - - # If set to true, the operator Helm chart will skip the check for the OTLP endpoing setting. This should only be done - # for testing purposes. - disableOtlpEndpointCheck: false diff --git a/internal/backendconnection/backendconnection_manager_suite_test.go b/internal/backendconnection/backendconnection_manager_suite_test.go index 8921445b..13604a0a 100644 --- a/internal/backendconnection/backendconnection_manager_suite_test.go +++ b/internal/backendconnection/backendconnection_manager_suite_test.go @@ -42,18 +42,11 @@ var _ = BeforeSuite(func() { testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, ErrorIfCRDPathMissing: true, - - // The BinaryAssetsDirectory is only required if you want to run the tests directly - // without call the makefile target test. If not informed it will look for the - // default path defined in controller-runtime which is /usr/local/kubebuilder/. - // Note that you must have the required binaries setup under the bin directory to perform - // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s", fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), } var err error - // cfg is defined in this file globally. cfg, err = testEnv.Start() Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) diff --git a/internal/backendconnection/backendconnection_manager_test.go b/internal/backendconnection/backendconnection_manager_test.go index 4c53877b..e9a008a3 100644 --- a/internal/backendconnection/backendconnection_manager_test.go +++ b/internal/backendconnection/backendconnection_manager_test.go @@ -56,7 +56,7 @@ var _ = Describe("The backend connection manager", Ordered, func() { Client: k8sClient, Scheme: k8sClient.Scheme(), DeploymentSelfReference: DeploymentSelfReference, - OTelCollectorNamePrefix: "unit-test", + OTelCollectorNamePrefix: OTelCollectorNamePrefixTest, } manager = &BackendConnectionManager{ Client: k8sClient, @@ -240,15 +240,15 @@ var _ = Describe("The backend connection manager", Ordered, func() { Describe("when cleaning up OpenTelemetry collector resources when the resource is deleted", func() { It("should not delete the collector if there are still Dash0 monitoring resources", func() { // create multiple Dash0 monitoring resources - firstName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-1"} + firstName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-1"} firstDash0MonitoringResource := CreateDefaultMonitoringResource(ctx, k8sClient, firstName) createdObjects = append(createdObjects, firstDash0MonitoringResource) - secondName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-2"} + secondName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-2"} secondDash0MonitoringResource := CreateDefaultMonitoringResource(ctx, k8sClient, secondName) createdObjects = append(createdObjects, secondDash0MonitoringResource) - thirdName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-3"} + thirdName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-3"} thirdDash0MonitoringResource := CreateDefaultMonitoringResource(ctx, k8sClient, thirdName) createdObjects = append(createdObjects, thirdDash0MonitoringResource) @@ -274,7 +274,7 @@ var _ = Describe("The backend connection manager", Ordered, func() { }) It("should not delete the collector if there is only one Dash0 monitoring resource left but it is not the one being deleted", func() { - resourceName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-1"} + resourceName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-1"} existingDash0MonitoringResource := CreateDefaultMonitoringResource(ctx, k8sClient, resourceName) createdObjects = append(createdObjects, existingDash0MonitoringResource) @@ -319,7 +319,7 @@ var _ = Describe("The backend connection manager", Ordered, func() { It("should delete the collector if the Dash0 monitoring resource that is being deleted is the only one left", func() { // create multiple Dash0 monitoring resources - resourceName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-1"} + resourceName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-1"} dash0MonitoringResource := CreateDefaultMonitoringResource(ctx, k8sClient, resourceName) createdObjects = append(createdObjects, dash0MonitoringResource) diff --git a/internal/backendconnection/otelcolresources/desired_state_test.go b/internal/backendconnection/otelcolresources/desired_state_test.go index 835ccdab..397e942f 100644 --- a/internal/backendconnection/otelcolresources/desired_state_test.go +++ b/internal/backendconnection/otelcolresources/desired_state_test.go @@ -23,7 +23,7 @@ import ( const ( namespace = "some-namespace" - namePrefix = "unit-test" + namePrefix = OTelCollectorNamePrefixTest ) var _ = Describe("The desired state of the OpenTelemetry Collector resources", func() { diff --git a/internal/backendconnection/otelcolresources/otelcol_resources_test.go b/internal/backendconnection/otelcolresources/otelcol_resources_test.go index dcdb55d5..f45d1359 100644 --- a/internal/backendconnection/otelcolresources/otelcol_resources_test.go +++ b/internal/backendconnection/otelcolresources/otelcol_resources_test.go @@ -58,7 +58,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Client: k8sClient, Scheme: k8sClient.Scheme(), DeploymentSelfReference: DeploymentSelfReference, - OTelCollectorNamePrefix: "unit-test", + OTelCollectorNamePrefix: OTelCollectorNamePrefixTest, DevelopmentMode: true, } }) diff --git a/internal/dash0/controller/controller_suite_test.go b/internal/dash0/controller/controller_suite_test.go index b7be7b25..88388df8 100644 --- a/internal/dash0/controller/controller_suite_test.go +++ b/internal/dash0/controller/controller_suite_test.go @@ -55,18 +55,11 @@ var _ = BeforeSuite(func() { testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, ErrorIfCRDPathMissing: true, - - // The BinaryAssetsDirectory is only required if you want to run the tests directly - // without call the makefile target test. If not informed it will look for the - // default path defined in controller-runtime which is /usr/local/kubebuilder/. - // Note that you must have the required binaries setup under the bin directory to perform - // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), } var err error - // cfg is defined in this file globally. cfg, err = testEnv.Start() Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) diff --git a/internal/dash0/controller/dash0_controller.go b/internal/dash0/controller/dash0_controller.go index 821f4902..db4a15ad 100644 --- a/internal/dash0/controller/dash0_controller.go +++ b/internal/dash0/controller/dash0_controller.go @@ -86,6 +86,7 @@ func (r *Dash0Reconciler) InitializeSelfMonitoringMetrics( //+kubebuilder:rbac:groups=core,resources=events,verbs=create;list;patch;update //+kubebuilder:rbac:groups=core,resources=namespaces,verbs=get //+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;delete +//+kubebuilder:rbac:groups=core,resources=endpoints,verbs=get //+kubebuilder:rbac:groups=operator.dash0.com,resources=dash0monitorings,verbs=get;list;watch;create;update;patch;delete;deletecollection //+kubebuilder:rbac:groups=operator.dash0.com,resources=dash0monitorings/finalizers,verbs=update //+kubebuilder:rbac:groups=operator.dash0.com,resources=dash0monitorings/status,verbs=get;update;patch diff --git a/internal/dash0/controller/dash0_controller_test.go b/internal/dash0/controller/dash0_controller_test.go index 4eba59be..9cd4d8fa 100644 --- a/internal/dash0/controller/dash0_controller_test.go +++ b/internal/dash0/controller/dash0_controller_test.go @@ -64,7 +64,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { Client: k8sClient, Scheme: k8sClient.Scheme(), DeploymentSelfReference: DeploymentSelfReference, - OTelCollectorNamePrefix: "unit-test", + OTelCollectorNamePrefix: OTelCollectorNamePrefixTest, } backendConnectionManager := &backendconnection.BackendConnectionManager{ Client: k8sClient, @@ -93,9 +93,9 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) AfterEach(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) for _, name := range extraDash0MonitoringResourceNames { - RemoveMonitoringResourceByName(ctx, k8sClient, name, true) + DeleteMonitoringResourceByName(ctx, k8sClient, name, true) } }) @@ -128,11 +128,11 @@ var _ = Describe("The Dash0 controller", Ordered, func() { firstDash0MonitoringResource := &dash0v1alpha1.Dash0Monitoring{} Expect(k8sClient.Get(ctx, MonitoringResourceQualifiedName, firstDash0MonitoringResource)).To(Succeed()) time.Sleep(10 * time.Millisecond) - secondName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-2"} + secondName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-2"} extraDash0MonitoringResourceNames = append(extraDash0MonitoringResourceNames, secondName) CreateDefaultMonitoringResource(ctx, k8sClient, secondName) time.Sleep(10 * time.Millisecond) - thirdName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-3"} + thirdName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-3"} extraDash0MonitoringResourceNames = append(extraDash0MonitoringResourceNames, thirdName) CreateDefaultMonitoringResource(ctx, k8sClient, thirdName) @@ -298,7 +298,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { Describe("when the instrumentWorkloads setting changes on an existing Dash0 monitoring resource", Ordered, func() { AfterEach(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) DescribeTable("when switching from instrumentWorkloads=none to instrumentWorkloads=created-and-updated", func(config WorkloadTestConfig) { @@ -717,7 +717,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should instrument workloads", func() { @@ -762,7 +762,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should not instrument workloads", func() { @@ -778,7 +778,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should not instrument workloads", func() { @@ -792,7 +792,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) AfterEach(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should remove the collector resources", func() { diff --git a/internal/dash0/instrumentation/instrumentation_suite_test.go b/internal/dash0/instrumentation/instrumentation_suite_test.go index 033df949..f8627474 100644 --- a/internal/dash0/instrumentation/instrumentation_suite_test.go +++ b/internal/dash0/instrumentation/instrumentation_suite_test.go @@ -48,18 +48,11 @@ var _ = BeforeSuite(func() { testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, ErrorIfCRDPathMissing: true, - - // The BinaryAssetsDirectory is only required if you want to run the tests directly - // without call the makefile target test. If not informed it will look for the - // default path defined in controller-runtime which is /usr/local/kubebuilder/. - // Note that you must have the required binaries setup under the bin directory to perform - // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), } var err error - // cfg is defined in this file globally. cfg, err = testEnv.Start() Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) diff --git a/internal/dash0/instrumentation/instrumenter_test.go b/internal/dash0/instrumentation/instrumenter_test.go index 3dc46c1e..d62df002 100644 --- a/internal/dash0/instrumentation/instrumenter_test.go +++ b/internal/dash0/instrumentation/instrumenter_test.go @@ -61,7 +61,7 @@ var _ = Describe("The instrumenter", Ordered, func() { createdObjects = DeleteAllCreatedObjects(ctx, k8sClient, createdObjects) DeleteAllEvents(ctx, clientset, namespace) - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) dash0MonitoringResource = nil }) diff --git a/internal/dash0/removal/operator_pre_delete_handler.go b/internal/dash0/predelete/operator_pre_delete_handler.go similarity index 99% rename from internal/dash0/removal/operator_pre_delete_handler.go rename to internal/dash0/predelete/operator_pre_delete_handler.go index 9648ab83..5b3f7b8e 100644 --- a/internal/dash0/removal/operator_pre_delete_handler.go +++ b/internal/dash0/predelete/operator_pre_delete_handler.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. // SPDX-License-Identifier: Apache-2.0 -package removal +package predelete import ( "context" diff --git a/internal/dash0/removal/operator_pre_delete_handler_test.go b/internal/dash0/predelete/operator_pre_delete_handler_test.go similarity index 97% rename from internal/dash0/removal/operator_pre_delete_handler_test.go rename to internal/dash0/predelete/operator_pre_delete_handler_test.go index 820de8f1..bf738365 100644 --- a/internal/dash0/removal/operator_pre_delete_handler_test.go +++ b/internal/dash0/predelete/operator_pre_delete_handler_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. // SPDX-License-Identifier: Apache-2.0 -package removal +package predelete import ( "context" @@ -69,8 +69,8 @@ var _ = Describe("Uninstalling the Dash0 Kubernetes operator", Ordered, func() { AfterEach(func() { createdObjects = DeleteAllCreatedObjects(ctx, k8sClient, createdObjects) - RemoveMonitoringResourceByName(ctx, k8sClient, dash0MonitoringResourceName1, false) - RemoveMonitoringResourceByName(ctx, k8sClient, dash0MonitoringResourceName2, false) + DeleteMonitoringResourceByName(ctx, k8sClient, dash0MonitoringResourceName1, false) + DeleteMonitoringResourceByName(ctx, k8sClient, dash0MonitoringResourceName2, false) }) It("should time out if the deletion of all Dash0 monitoring resources does not happen in a timely manner", func() { diff --git a/internal/dash0/removal/removal_suite_test.go b/internal/dash0/predelete/pre_delete_suite_test.go similarity index 87% rename from internal/dash0/removal/removal_suite_test.go rename to internal/dash0/predelete/pre_delete_suite_test.go index af85be12..5e55ba45 100644 --- a/internal/dash0/removal/removal_suite_test.go +++ b/internal/dash0/predelete/pre_delete_suite_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. // SPDX-License-Identifier: Apache-2.0 -package removal +package predelete import ( "fmt" @@ -48,7 +48,7 @@ var ( func TestRemoval(t *testing.T) { RegisterFailHandler(Fail) - RunSpecs(t, "Removal Suite") + RunSpecs(t, "Pre-Delete Suite") } var _ = BeforeSuite(func() { @@ -60,12 +60,6 @@ var _ = BeforeSuite(func() { testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, ErrorIfCRDPathMissing: true, - - // The BinaryAssetsDirectory is only required if you want to run the tests directly - // without call the makefile target test. If not informed it will look for the - // default path defined in controller-runtime which is /usr/local/kubebuilder/. - // Note that you must have the required binaries setup under the bin directory to perform - // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), } @@ -106,7 +100,7 @@ var _ = BeforeSuite(func() { Client: k8sClient, Scheme: k8sClient.Scheme(), DeploymentSelfReference: DeploymentSelfReference, - OTelCollectorNamePrefix: "unit-test", + OTelCollectorNamePrefix: OTelCollectorNamePrefixTest, } backendConnectionManager := &backendconnection.BackendConnectionManager{ Client: k8sClient, diff --git a/internal/dash0/startup/auto_operator_configuration_handler.go b/internal/dash0/startup/auto_operator_configuration_handler.go new file mode 100644 index 00000000..0a9f0848 --- /dev/null +++ b/internal/dash0/startup/auto_operator_configuration_handler.go @@ -0,0 +1,221 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package startup + +import ( + "context" + "fmt" + "time" + + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "sigs.k8s.io/controller-runtime/pkg/client" + + dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/dash0/util" +) + +type SecretRef struct { + Name string + Key string +} + +type OperatorConfigurationValues struct { + Endpoint string + Token string + SecretRef +} + +type AutoOperatorConfigurationResourceHandler struct { + client.Client + OperatorNamespace string + NamePrefix string + bypassWebhookCheck bool +} + +const ( + operatorConfigurationAutoResourceName = "dash0-operator-configuration-auto-resource" + + alreadyExistsMessage = "The operator is configured to deploy an operator configuration resource at startup, but there is already" + + "an operator configuration resource in the cluster. Hence no action is necessary. (This is not an error.)" +) + +func (r *AutoOperatorConfigurationResourceHandler) CreateOperatorConfigurationResource( + ctx context.Context, + operatorConfiguration *OperatorConfigurationValues, + logger *logr.Logger, +) error { + + // Fast path: check early on if there is already an operator configuration resource, skip all other steps if so. + // We will repeat this check immediately before creating the operator configuration resource, so if the check fails + // with an error we will ignore that error for now. + allOperatorConfigurationResources := &dash0v1alpha1.Dash0OperatorConfigurationList{} + if err := r.List(ctx, allOperatorConfigurationResources); err == nil { + if len(allOperatorConfigurationResources.Items) >= 1 { + logger.Info(alreadyExistsMessage) + return nil + } + } + + if err := r.validateOperatorConfiguration(operatorConfiguration); err != nil { + return err + } + + go func() { + // There is a validation webhook for operator configuration resources. Thus, before we can create an operator + // configuration resource, we need to wait for the webhook endpoint to become available. + if err := r.waitForWebserviceEndpoint(ctx, logger); err != nil { + logger.Error(err, "failed to create the Dash0 operator configuration resource") + } + if err := r.createOperatorConfigurationResourceWithRetry(ctx, operatorConfiguration, logger); err != nil { + logger.Error(err, "failed to create the Dash0 operator configuration resource") + } + }() + return nil +} + +func (r *AutoOperatorConfigurationResourceHandler) validateOperatorConfiguration( + operatorConfiguration *OperatorConfigurationValues, +) error { + if operatorConfiguration.Endpoint == "" { + return fmt.Errorf("invalid operator configuration: --operator-configuration-endpoint has not been provided") + } + if operatorConfiguration.Token == "" { + if operatorConfiguration.SecretRef.Name == "" { + return fmt.Errorf("invalid operator configuration: --operator-configuration-endpoint has been provided, " + + "indicating that an operator configuration resource should be created, but neither " + + "--operator-configuration-token nor --operator-configuration-secret-ref-name have been provided") + } + if operatorConfiguration.SecretRef.Key == "" { + return fmt.Errorf("invalid operator configuration: --operator-configuration-endpoint has been provided, " + + "indicating that an operator configuration resource should be created, but neither " + + "--operator-configuration-token nor --operator-configuration-secret-ref-key have been provided") + } + } + return nil +} + +func (r *AutoOperatorConfigurationResourceHandler) waitForWebserviceEndpoint( + ctx context.Context, + logger *logr.Logger, +) error { + if r.bypassWebhookCheck { + return nil + } + if err := util.RetryWithCustomBackoff( + "waiting for webservice endpoint to become available", + func() error { + return r.checkWebServiceEndpoint(ctx) + }, + wait.Backoff{ + Duration: 1 * time.Second, + Factor: 1.0, + Steps: 30, + Cap: 30 * time.Second, + }, + false, + logger, + ); err != nil { + return fmt.Errorf("failed to wait for the webservice endpoint to become available: %w", err) + } + + return nil +} + +func (r *AutoOperatorConfigurationResourceHandler) checkWebServiceEndpoint( + ctx context.Context, +) error { + endpoints := corev1.Endpoints{} + if err := r.Get(ctx, types.NamespacedName{ + Namespace: r.OperatorNamespace, + Name: fmt.Sprintf("%s-webhook-service", r.NamePrefix), + }, &endpoints); err != nil { + return err + } + + for _, subset := range endpoints.Subsets { + if len(subset.Addresses) == 0 { + // wait for the address to be listed in subset.Addresses instead of subset.NotReadyAddresses + continue + } + for _, port := range subset.Ports { + if port.Port == 9443 { + return nil + } + } + } + + return fmt.Errorf("the webservice endpoint is not available yet") +} + +func (r *AutoOperatorConfigurationResourceHandler) createOperatorConfigurationResourceWithRetry( + ctx context.Context, + operatorConfiguration *OperatorConfigurationValues, + logger *logr.Logger, +) error { + return util.RetryWithCustomBackoff( + "create operator configuration resource at startup", + func() error { + return r.createOperatorConfigurationResourceOnce(ctx, operatorConfiguration, logger) + }, + wait.Backoff{ + Duration: 3 * time.Second, + Factor: 1.5, + Steps: 6, + Cap: 60 * time.Second, + }, + true, + logger, + ) +} + +func (r *AutoOperatorConfigurationResourceHandler) createOperatorConfigurationResourceOnce( + ctx context.Context, + operatorConfiguration *OperatorConfigurationValues, + logger *logr.Logger, +) error { + allOperatorConfigurationResources := &dash0v1alpha1.Dash0OperatorConfigurationList{} + if err := r.List(ctx, allOperatorConfigurationResources); err != nil { + return fmt.Errorf("failed to list all Dash0 operator configuration resources: %w", err) + } + if len(allOperatorConfigurationResources.Items) >= 1 { + logger.Info(alreadyExistsMessage) + return nil + } + + authorization := dash0v1alpha1.Authorization{} + if operatorConfiguration.Token != "" { + authorization.Token = &operatorConfiguration.Token + } else { + authorization.SecretRef = &dash0v1alpha1.SecretRef{ + Name: operatorConfiguration.SecretRef.Name, + Key: operatorConfiguration.SecretRef.Key, + } + } + + if err := r.Create(ctx, &dash0v1alpha1.Dash0OperatorConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: operatorConfigurationAutoResourceName, + }, + Spec: dash0v1alpha1.Dash0OperatorConfigurationSpec{ + SelfMonitoring: dash0v1alpha1.SelfMonitoring{ + Enabled: true, + }, + Export: &dash0v1alpha1.Export{ + Dash0: &dash0v1alpha1.Dash0Configuration{ + Endpoint: operatorConfiguration.Endpoint, + Authorization: authorization, + }, + }, + }, + }); err != nil { + return fmt.Errorf("failed to create the Dash0 operator configuration resource: %w", err) + } + + logger.Info("a Dash0 operator configuration resource has been created") + return nil +} diff --git a/internal/dash0/startup/auto_operator_configuration_handler_test.go b/internal/dash0/startup/auto_operator_configuration_handler_test.go new file mode 100644 index 00000000..9150280c --- /dev/null +++ b/internal/dash0/startup/auto_operator_configuration_handler_test.go @@ -0,0 +1,147 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package startup + +import ( + "context" + "time" + + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/log" + + "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + . "github.com/dash0hq/dash0-operator/test/util" +) + +var ( + secretRef = SecretRef{ + Name: "test-secret", + Key: "test-key", + } + operatorConfigurationValuesWithToken = OperatorConfigurationValues{ + Endpoint: EndpointDash0Test, + Token: AuthorizationTokenTest, + } + operatorConfigurationValuesWithSecretRef = OperatorConfigurationValues{ + Endpoint: EndpointDash0Test, + SecretRef: secretRef, + } +) + +var _ = Describe("Create an operator configuration resource at startup", Ordered, func() { + + ctx := context.Background() + logger := log.FromContext(ctx) + + BeforeAll(func() { + EnsureDash0OperatorNamespaceExists(ctx, k8sClient) + }) + + AfterEach(func() { + DeleteAllOperatorConfigurationResources(ctx, k8sClient) + }) + + It("should not do anything if there already is an operator configuration resource in the cluster", func() { + CreateDefaultOperatorConfigurationResource(ctx, k8sClient) + // verify that there is only one resource + list := v1alpha1.Dash0OperatorConfigurationList{} + Expect(k8sClient.List(ctx, &list)).To(Succeed()) + Expect(list.Items).To(HaveLen(1)) + Expect(list.Items[0].Name).To(Equal(OperatorConfigurationResourceName)) + + Expect(handler.CreateOperatorConfigurationResource(ctx, &OperatorConfigurationValues{}, &logger)).To(Succeed()) + // verify that there is _still_ only one resource, and that its name is not the one that would be automatically + // created by AutoOperatorConfigurationResourceHandler. + Expect(k8sClient.List(ctx, &list)).To(Succeed()) + Expect(list.Items).To(HaveLen(1)) + Expect(list.Items[0].Name).To(Equal(OperatorConfigurationResourceName)) + }) + + It("should fail validation if no endpoint has been provided", func() { + Expect(handler.CreateOperatorConfigurationResource(ctx, &OperatorConfigurationValues{ + Token: AuthorizationTokenTest, + }, &logger)).To( + MatchError( + ContainSubstring( + "invalid operator configuration: --operator-configuration-endpoint has not been provided"))) + }) + + It("should fail validation if no token and no secret reference have been provided", func() { + Expect(handler.CreateOperatorConfigurationResource(ctx, &OperatorConfigurationValues{ + Endpoint: AuthorizationTokenTest, + }, &logger)).To( + MatchError( + ContainSubstring( + "neither --operator-configuration-token nor --operator-configuration-secret-ref-name have " + + "been provided"))) + }) + + It("should fail validation if no token and no secret reference key have been provided", func() { + Expect(handler.CreateOperatorConfigurationResource(ctx, &OperatorConfigurationValues{ + Endpoint: AuthorizationTokenTest, + SecretRef: SecretRef{ + Name: "test-secret", + }, + }, &logger)).To( + MatchError( + ContainSubstring( + "neither --operator-configuration-token nor --operator-configuration-secret-ref-key have " + + "been provided"))) + }) + + It("should create an operator configuration resource with a token", func() { + Expect( + handler.CreateOperatorConfigurationResource(ctx, &operatorConfigurationValuesWithToken, &logger), + ).To(Succeed()) + + Eventually(func(g Gomega) { + operatorConfiguration := v1alpha1.Dash0OperatorConfiguration{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: operatorConfigurationAutoResourceName, + }, &operatorConfiguration) + + g.Expect(err).ToNot(HaveOccurred()) + export := operatorConfiguration.Spec.Export + g.Expect(export).ToNot(BeNil()) + dash0Export := export.Dash0 + g.Expect(dash0Export).ToNot(BeNil()) + g.Expect(export.Grpc).To(BeNil()) + g.Expect(export.Http).To(BeNil()) + g.Expect(dash0Export.Endpoint).To(Equal(EndpointDash0Test)) + g.Expect(dash0Export.Authorization.Token).ToNot(BeNil()) + g.Expect(*dash0Export.Authorization.Token).To(Equal(AuthorizationTokenTest)) + g.Expect(dash0Export.Authorization.SecretRef).To(BeNil()) + }, 5*time.Second, 100*time.Millisecond).Should(Succeed()) + }) + + It("should create an operator configuration resource with a secret reference", func() { + Expect( + handler.CreateOperatorConfigurationResource(ctx, &operatorConfigurationValuesWithSecretRef, &logger), + ).To(Succeed()) + + Eventually(func(g Gomega) { + operatorConfiguration := v1alpha1.Dash0OperatorConfiguration{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: operatorConfigurationAutoResourceName, + }, &operatorConfiguration) + + g.Expect(err).ToNot(HaveOccurred()) + export := operatorConfiguration.Spec.Export + g.Expect(export).ToNot(BeNil()) + dash0Export := export.Dash0 + g.Expect(dash0Export).ToNot(BeNil()) + g.Expect(export.Grpc).To(BeNil()) + g.Expect(export.Http).To(BeNil()) + g.Expect(dash0Export.Endpoint).To(Equal(EndpointDash0Test)) + g.Expect(dash0Export.Authorization.Token).To(BeNil()) + g.Expect(dash0Export.Authorization.SecretRef).ToNot(BeNil()) + g.Expect(dash0Export.Authorization.SecretRef.Name).To(Equal("test-secret")) + g.Expect(dash0Export.Authorization.SecretRef.Key).To(Equal("test-key")) + }, 5*time.Second, 100*time.Millisecond).Should(Succeed()) + }) +}) diff --git a/internal/dash0/startup/startup_suite_test.go b/internal/dash0/startup/startup_suite_test.go new file mode 100644 index 00000000..57c24b9d --- /dev/null +++ b/internal/dash0/startup/startup_suite_test.go @@ -0,0 +1,77 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package startup + +import ( + "fmt" + "path/filepath" + "runtime" + "testing" + + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/onsi/gomega/format" + + . "github.com/dash0hq/dash0-operator/test/util" +) + +var ( + cfg *rest.Config + k8sClient client.Client + handler *AutoOperatorConfigurationResourceHandler + testEnv *envtest.Environment +) + +func TestStartup(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Startup Suite") +} + +var _ = BeforeSuite(func() { + format.MaxLength = 0 + + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: true, + BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", + fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), + } + + var err error + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + Expect(dash0v1alpha1.AddToScheme(scheme.Scheme)).To(Succeed()) + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + + handler = &AutoOperatorConfigurationResourceHandler{ + Client: k8sClient, + OperatorNamespace: Dash0OperatorNamespace, + NamePrefix: OTelCollectorNamePrefixTest, + bypassWebhookCheck: true, + } + Expect(err).NotTo(HaveOccurred()) +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/internal/dash0/webhooks/attach_dangling_events_test.go b/internal/dash0/webhooks/attach_dangling_events_test.go index d81e01c5..769ac0cc 100644 --- a/internal/dash0/webhooks/attach_dangling_events_test.go +++ b/internal/dash0/webhooks/attach_dangling_events_test.go @@ -47,7 +47,7 @@ var _ = Describe("The Dash0 webhook and the Dash0 controller", Ordered, func() { Client: k8sClient, Scheme: k8sClient.Scheme(), DeploymentSelfReference: DeploymentSelfReference, - OTelCollectorNamePrefix: "unit-test", + OTelCollectorNamePrefix: OTelCollectorNamePrefixTest, } backendConnectionManager := &backendconnection.BackendConnectionManager{ Client: k8sClient, @@ -69,7 +69,7 @@ var _ = Describe("The Dash0 webhook and the Dash0 controller", Ordered, func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) BeforeEach(func() { diff --git a/internal/dash0/webhooks/instrumentation_webhook_test.go b/internal/dash0/webhooks/instrumentation_webhook_test.go index e86a46a1..0bb90694 100644 --- a/internal/dash0/webhooks/instrumentation_webhook_test.go +++ b/internal/dash0/webhooks/instrumentation_webhook_test.go @@ -40,7 +40,7 @@ var _ = Describe("The Dash0 instrumentation webhook", func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) DescribeTable("when mutating new workloads", func(config WorkloadTestConfig) { @@ -491,7 +491,7 @@ var _ = Describe("The Dash0 instrumentation webhook", func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should not instrument workloads", func() { @@ -507,7 +507,7 @@ var _ = Describe("The Dash0 instrumentation webhook", func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should instrument workloads", func() { @@ -523,7 +523,7 @@ var _ = Describe("The Dash0 instrumentation webhook", func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should not instrument workloads", func() { @@ -539,7 +539,7 @@ var _ = Describe("The Dash0 instrumentation webhook", func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should instrument workloads", func() { diff --git a/internal/dash0/webhooks/operator_configuration_validation_webhook_test.go b/internal/dash0/webhooks/operator_configuration_validation_webhook_test.go index 45613c29..2718646a 100644 --- a/internal/dash0/webhooks/operator_configuration_validation_webhook_test.go +++ b/internal/dash0/webhooks/operator_configuration_validation_webhook_test.go @@ -14,7 +14,7 @@ import ( var _ = Describe("The validation webhook for the operator configuration resource", func() { AfterEach(func() { - Expect(k8sClient.DeleteAllOf(ctx, &dash0v1alpha1.Dash0OperatorConfiguration{})).To(Succeed()) + DeleteAllOperatorConfigurationResources(ctx, k8sClient) }) Describe("when validating", Ordered, func() { diff --git a/internal/dash0/webhooks/webhook_suite_test.go b/internal/dash0/webhooks/webhook_suite_test.go index 409a5aac..12b235a8 100644 --- a/internal/dash0/webhooks/webhook_suite_test.go +++ b/internal/dash0/webhooks/webhook_suite_test.go @@ -61,15 +61,8 @@ var _ = BeforeSuite(func() { testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, ErrorIfCRDPathMissing: false, - - // The BinaryAssetsDirectory is only required if you want to run the tests directly - // without call the makefile target test. If not informed it will look for the - // default path defined in controller-runtime which is /usr/local/kubebuilder/. - // Note that you must have the required binaries setup under the bin directory to perform - // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), - WebhookInstallOptions: envtest.WebhookInstallOptions{ Paths: []string{filepath.Join("..", "..", "..", "config", "webhook")}, }, @@ -77,7 +70,6 @@ var _ = BeforeSuite(func() { Expect(testEnv).NotTo(BeNil()) var err error - // cfg is defined in this file globally. cfg, err = testEnv.Start() Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) diff --git a/test-resources/bin/test-cleanup.sh b/test-resources/bin/test-cleanup.sh index 1b05c27c..aec4ceca 100755 --- a/test-resources/bin/test-cleanup.sh +++ b/test-resources/bin/test-cleanup.sh @@ -23,6 +23,7 @@ kubectl delete -n ${target_namespace} -f test-resources/customresources/dash0mon sleep 1 kubectl patch -f test-resources/customresources/dash0monitoring/dash0monitoring.yaml -p '{"metadata":{"finalizers":null}}' --type=merge || true kubectl delete -f test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml || true +kubectl delete dash0operatorconfigurations.operator.dash0.com/dash0-operator-configuration-auto-resource || true if [[ "${target_namespace}" != "default" ]] && [[ "${delete_namespace}" == "true" ]]; then kubectl delete ns ${target_namespace} --ignore-not-found diff --git a/test-resources/bin/test-scenario-01-aum-operator-cr.sh b/test-resources/bin/test-scenario-01-aum-operator-cr.sh index 0a77ce93..65e26bd2 100755 --- a/test-resources/bin/test-scenario-01-aum-operator-cr.sh +++ b/test-resources/bin/test-scenario-01-aum-operator-cr.sh @@ -51,11 +51,6 @@ deploy_via_helm echo echo -echo "STEP 7: deploy the Dash0 operator configuration resource to cluster" -install_operator_configuration_resource -echo -echo - -echo "STEP 8: deploy the Dash0 monitoring resource to namespace ${target_namespace}" +echo "STEP 7: deploy the Dash0 monitoring resource to namespace ${target_namespace}" install_monitoring_resource diff --git a/test-resources/bin/test-scenario-02-operator-cr-aum.sh b/test-resources/bin/test-scenario-02-operator-cr-aum.sh index c13c9867..92237e0b 100755 --- a/test-resources/bin/test-scenario-02-operator-cr-aum.sh +++ b/test-resources/bin/test-scenario-02-operator-cr-aum.sh @@ -45,16 +45,11 @@ deploy_via_helm echo echo -echo "STEP 6: deploy the Dash0 operator configuration resource to cluster" -install_operator_configuration_resource -echo -echo - -echo "STEP 7: deploy the Dash0 monitoring resource to namespace ${target_namespace}" +echo "STEP 6: deploy the Dash0 monitoring resource to namespace ${target_namespace}" install_monitoring_resource echo echo -echo "STEP 8: deploy application under monitoring" +echo "STEP 7: deploy application under monitoring" test-resources/node.js/express/deploy.sh ${target_namespace} ${kind} diff --git a/test-resources/bin/util b/test-resources/bin/util index 0b60c2e3..a40849b9 100644 --- a/test-resources/bin/util +++ b/test-resources/bin/util @@ -100,6 +100,12 @@ deploy_via_helm() { helm_install_command+=" --set operator.filelogOffsetSynchImage.pullPolicy=${FILELOG_OFFSET_SYNCH_IMG_PULL_POLICY:-Never}" fi + # Deploy an operator configuration right away. + helm_install_command+=" --set operator.dash0Backend.enabled=true" + helm_install_command+=" --set operator.dash0Backend.endpoint=ingress.eu-west-1.aws.dash0-dev.com:4317" + helm_install_command+=" --set operator.dash0Backend.secretRef.name=dash0-authorization-secret" + helm_install_command+=" --set operator.dash0Backend.secretRef.key=token" + helm_install_command+=" dash0-operator" helm_install_command+=" ${OPERATOR_HELM_CHART:-helm-chart/dash0-operator}" echo Helm install command: @@ -123,6 +129,19 @@ wait_for_operator_manager_and_webhook() { fi sleep 0.3 done + + # We deploy an operator configuration at startup via operator.dash0Backend.enabled=true, wait for that resource to + # become available as well. + echo "waiting for the automatically created operator configuration resource to become available" + for ((i=0; i<=20; i++)); do + # wait until the resource has been created + if kubectl get dash0operatorconfigurations.operator.dash0.com/dash0-operator-configuration-auto-resource; then + break; + fi + sleep 1 + done + # wait until the resource has been reconciled and is marked as available + kubectl wait dash0operatorconfigurations.operator.dash0.com/dash0-operator-configuration-auto-resource --for condition=Available --timeout 30s } has_been_set_to_empty_string() { diff --git a/test/e2e/operator.go b/test/e2e/operator.go index 7e0ba24e..26ba6a47 100644 --- a/test/e2e/operator.go +++ b/test/e2e/operator.go @@ -44,8 +44,6 @@ func deployOperator( operatorNamespace, "--create-namespace", "--set", "operator.developmentMode=true", - "--set", "operator.disableSecretCheck=true", - "--set", "operator.disableOtlpEndpointCheck=true", } arguments = addOptionalHelmParameters(arguments, operatorHelmChart, images) @@ -263,8 +261,6 @@ func upgradeOperator( "--namespace", operatorNamespace, "--set", "operator.developmentMode=true", - "--set", "operator.disableSecretCheck=true", - "--set", "operator.disableOtlpEndpointCheck=true", } arguments = addOptionalHelmParameters(arguments, operatorHelmChart, images) diff --git a/test/util/constants.go b/test/util/constants.go index 81d61781..9b1e483b 100644 --- a/test/util/constants.go +++ b/test/util/constants.go @@ -16,15 +16,17 @@ import ( ) const ( - TestNamespaceName = "test-namespace" - Dash0OperatorNamespace = "dash0-system" - CronJobNamePrefix = "cronjob" - DaemonSetNamePrefix = "daemonset" - DeploymentNamePrefix = "deployment" - JobNamePrefix = "job" - PodNamePrefix = "pod" - ReplicaSetNamePrefix = "replicaset" - StatefulSetNamePrefix = "statefulset" + TestNamespaceName = "test-namespace" + Dash0OperatorNamespace = "dash0-system" + OTelCollectorNamePrefixTest = "unit-test" + + CronJobNamePrefix = "cronjob" + DaemonSetNamePrefix = "daemonset" + DeploymentNamePrefix = "deployment" + JobNamePrefix = "job" + PodNamePrefix = "pod" + ReplicaSetNamePrefix = "replicaset" + StatefulSetNamePrefix = "statefulset" OperatorImageTest = "some-registry.com:1234/dash0hq/operator-controller:1.2.3" InitContainerImageTest = "some-registry.com:1234/dash0hq/instrumentation:4.5.6" diff --git a/test/util/monitoring_resource.go b/test/util/monitoring_resource.go index 6cb402fb..8ae83d7a 100644 --- a/test/util/monitoring_resource.go +++ b/test/util/monitoring_resource.go @@ -232,11 +232,11 @@ func VerifyMonitoringResourceByNameDoesNotExist( )).To(BeNil()) } -func RemoveMonitoringResource(ctx context.Context, k8sClient client.Client) { - RemoveMonitoringResourceByName(ctx, k8sClient, MonitoringResourceQualifiedName, true) +func DeleteMonitoringResource(ctx context.Context, k8sClient client.Client) { + DeleteMonitoringResourceByName(ctx, k8sClient, MonitoringResourceQualifiedName, true) } -func RemoveMonitoringResourceByName( +func DeleteMonitoringResourceByName( ctx context.Context, k8sClient client.Client, monitoringResourceName types.NamespacedName, diff --git a/test/util/operator_resource.go b/test/util/operator_resource.go index 43beea54..9b457f2c 100644 --- a/test/util/operator_resource.go +++ b/test/util/operator_resource.go @@ -21,7 +21,7 @@ import ( const ( Dash0OperatorDeploymentName = "controller-deployment" - OperatorConfigurationResourceName = "dash0-operator-test-resource" + OperatorConfigurationResourceName = "dash0-operator-configuration-test" ) var ( @@ -109,6 +109,13 @@ func DeleteOperatorConfigurationResource( Expect(k8sClient.DeleteAllOf(ctx, &dash0v1alpha1.Dash0OperatorConfiguration{})).To(Succeed()) } +func DeleteAllOperatorConfigurationResources( + ctx context.Context, + k8sClient client.Client, +) { + Expect(k8sClient.DeleteAllOf(ctx, &dash0v1alpha1.Dash0OperatorConfiguration{})).To(Succeed()) +} + func LoadOperatorConfigurationResourceByNameIfItExists( ctx context.Context, k8sClient client.Client, From cf4bc397236d9dc4c5bba8c54d36401478e9fe56 Mon Sep 17 00:00:00 2001 From: Bastian Krol Date: Wed, 18 Sep 2024 14:34:11 +0200 Subject: [PATCH 2/3] test(e2e): test creation of operator config resource via Helm values Also: Use a dash0 exporter in e2e tests instead of an http exporter. --- .../otelcolresources/collector_config_maps.go | 14 +- .../daemonset.config.yaml.template | 4 + .../deployment.config.yaml.template | 4 + .../auto_operator_configuration_handler.go | 1 + test/e2e/collector.go | 2 +- test/e2e/dash0_monitoring_resource.go | 9 +- .../dash0_operator_configuration_resource.go | 39 +++- test/e2e/dash0monitoring.e2e.yaml.template | 10 +- ...sh0operatorconfiguration.e2e.yaml.template | 4 +- test/e2e/e2e_test.go | 174 +++++++++++++----- test/e2e/operator.go | 81 +++++++- 11 files changed, 271 insertions(+), 71 deletions(-) diff --git a/internal/backendconnection/otelcolresources/collector_config_maps.go b/internal/backendconnection/otelcolresources/collector_config_maps.go index a609d7c0..74e7dc17 100644 --- a/internal/backendconnection/otelcolresources/collector_config_maps.go +++ b/internal/backendconnection/otelcolresources/collector_config_maps.go @@ -21,6 +21,7 @@ type OtlpExporter struct { Endpoint string Headers []dash0v1alpha1.Header Encoding string + Insecure bool } var ( @@ -126,11 +127,13 @@ func ConvertExportSettingsToExporterList(export dash0v1alpha1.Export) ([]OtlpExp Value: d0.Dataset, }) } - exporters = append(exporters, OtlpExporter{ + dash0Exporter := OtlpExporter{ Name: "otlp/dash0", Endpoint: export.Dash0.Endpoint, Headers: headers, - }) + } + setGrpcTls(export.Dash0.Endpoint, &dash0Exporter) + exporters = append(exporters, dash0Exporter) } if export.Grpc != nil { @@ -143,6 +146,7 @@ func ConvertExportSettingsToExporterList(export dash0v1alpha1.Export) ([]OtlpExp Endpoint: grpc.Endpoint, Headers: grpc.Headers, } + setGrpcTls(grpc.Endpoint, &grpcExporter) if grpc.Headers != nil && len(grpc.Headers) > 0 { grpcExporter.Headers = grpc.Headers } @@ -182,3 +186,9 @@ func renderCollectorConfiguration( } return collectorConfiguration.String(), nil } + +func setGrpcTls(endpoint string, exporter *OtlpExporter) { + if endpoint == "http://otlp-sink.otlp-sink.svc.cluster.local:4317" { + exporter.Insecure = true + } +} diff --git a/internal/backendconnection/otelcolresources/daemonset.config.yaml.template b/internal/backendconnection/otelcolresources/daemonset.config.yaml.template index d224655c..89eeddf2 100644 --- a/internal/backendconnection/otelcolresources/daemonset.config.yaml.template +++ b/internal/backendconnection/otelcolresources/daemonset.config.yaml.template @@ -9,6 +9,10 @@ exporters: {{- range $i, $exporter := .Exporters }} {{ $exporter.Name }}: endpoint: "{{ $exporter.Endpoint }}" +{{ if $exporter.Insecure }} + tls: + insecure: true +{{ end }} {{- if $exporter.Headers }} headers: {{- range $i, $header := $exporter.Headers }} diff --git a/internal/backendconnection/otelcolresources/deployment.config.yaml.template b/internal/backendconnection/otelcolresources/deployment.config.yaml.template index df39852c..d918306e 100644 --- a/internal/backendconnection/otelcolresources/deployment.config.yaml.template +++ b/internal/backendconnection/otelcolresources/deployment.config.yaml.template @@ -6,6 +6,10 @@ exporters: {{- range $i, $exporter := .Exporters }} {{ $exporter.Name }}: endpoint: "{{ $exporter.Endpoint }}" +{{ if $exporter.Insecure }} + tls: + insecure: true +{{ end }} {{- if $exporter.Headers }} headers: {{- range $i, $header := $exporter.Headers }} diff --git a/internal/dash0/startup/auto_operator_configuration_handler.go b/internal/dash0/startup/auto_operator_configuration_handler.go index 0a9f0848..48f26c86 100644 --- a/internal/dash0/startup/auto_operator_configuration_handler.go +++ b/internal/dash0/startup/auto_operator_configuration_handler.go @@ -16,6 +16,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/dash0/util" ) diff --git a/test/e2e/collector.go b/test/e2e/collector.go index 7b393fcb..604025fc 100644 --- a/test/e2e/collector.go +++ b/test/e2e/collector.go @@ -22,7 +22,7 @@ var ( collectorConfigMapNameQualified = fmt.Sprintf("configmap/%s", collectorConfigMapName) ) -func verifyThatCollectorIsRunning(operatorNamespace string, operatorHelmChart string) { +func waitForCollectorToStart(operatorNamespace string, operatorHelmChart string) { By("validating that the OpenTelemetry collector has been created and is running as expected") verifyCollectorIsUp := func(g Gomega) { // Even though this command comes with its own timeout, we still have to wrap it in an Eventually block, since diff --git a/test/e2e/dash0_monitoring_resource.go b/test/e2e/dash0_monitoring_resource.go index 4c681c6b..16e665cd 100644 --- a/test/e2e/dash0_monitoring_resource.go +++ b/test/e2e/dash0_monitoring_resource.go @@ -20,11 +20,11 @@ import ( type dash0MonitoringValues struct { InstrumentWorkloads dash0v1alpha1.InstrumentWorkloadsMode Endpoint string + Token string } const ( dash0MonitoringResourceName = "dash0-monitoring-resource-e2e" - defaultEndpoint = "http://otlp-sink.otlp-sink.svc.cluster.local:4318" ) var ( @@ -34,6 +34,7 @@ var ( defaultDash0MonitoringValues = dash0MonitoringValues{ Endpoint: defaultEndpoint, + Token: defaultToken, InstrumentWorkloads: dash0v1alpha1.All, } ) @@ -69,7 +70,7 @@ func deployDash0MonitoringResource( }() By(fmt.Sprintf( - "Deploying the Dash0 monitoring resource to namespace %s with values %v, operator namespace is %s", + "deploying the Dash0 monitoring resource to namespace %s with values %v, operator namespace is %s", namespace, dash0MonitoringValues, operatorNamespace)) Expect( runAndIgnoreOutput(exec.Command( @@ -82,7 +83,7 @@ func deployDash0MonitoringResource( ))).To(Succeed()) // Deploying the Dash0 monitoring resource will trigger creating the default OpenTelemetry collecor instance. - verifyThatCollectorIsRunning(operatorNamespace, operatorHelmChart) + waitForCollectorToStart(operatorNamespace, operatorHelmChart) } func updateEndpointOfDash0MonitoringResource( @@ -132,7 +133,7 @@ func truncateExportedTelemetry() { } func undeployDash0MonitoringResource(namespace string) { - By(fmt.Sprintf("Removing the Dash0 monitoring resource from namespace %s", namespace)) + By(fmt.Sprintf("removing the Dash0 monitoring resource from namespace %s", namespace)) Expect( runAndIgnoreOutput(exec.Command( "kubectl", diff --git a/test/e2e/dash0_operator_configuration_resource.go b/test/e2e/dash0_operator_configuration_resource.go index 864b5d63..93a65ca5 100644 --- a/test/e2e/dash0_operator_configuration_resource.go +++ b/test/e2e/dash0_operator_configuration_resource.go @@ -10,6 +10,7 @@ import ( "os" "os/exec" "text/template" + "time" . "github.com/onsi/ginkgo/v2" //nolint:golint,revive . "github.com/onsi/gomega" @@ -18,10 +19,21 @@ import ( type dash0OperatorConfigurationValues struct { SelfMonitoringEnabled bool Endpoint string + Token string } const ( dash0OperatorConfigurationResourceName = "dash0-operator-configuration-resource-e2e" + + // We are using the Dash0 exporter which uses a gRPC exporter under the hood, so actually omitting the http:// + // scheme would be fine, but for self-monitoring we would prepend https:// to URLs without scheme, see comment in + // self_monitoring.go#prependProtocol. Since the OTLP sink does not serve https, we use a URL with http:// to avoid + // this behavior. + defaultEndpoint = "http://otlp-sink.otlp-sink.svc.cluster.local:4317" + + // We only need a non-empty token to pass the validation in startup.auto_operator_configuration_handler.go, + // we do not actually send data to a Dash0 backend so no real token is required. + defaultToken = "dummy-token" ) var ( @@ -32,6 +44,7 @@ var ( defaultDash0OperatorConfigurationValues = dash0OperatorConfigurationValues{ SelfMonitoringEnabled: true, Endpoint: defaultEndpoint, + Token: defaultToken, } ) @@ -67,7 +80,7 @@ func deployDash0OperatorConfigurationResource( }() By(fmt.Sprintf( - "Deploying the Dash0 operator configuration resource with values %v", dash0OperatorConfigurationValues)) + "deploying the Dash0 operator configuration resource with values %v", dash0OperatorConfigurationValues)) Expect( runAndIgnoreOutput(exec.Command( "kubectl", @@ -77,8 +90,30 @@ func deployDash0OperatorConfigurationResource( ))).To(Succeed()) } +func waitForAutoOperatorConfigurationResourceToBecomeAvailable() { + By("waiting for the automatically create Dash0 operator configuration resource to become available") + Eventually(func(g Gomega) { + g.Expect( + runAndIgnoreOutput(exec.Command( + "kubectl", + "get", + "dash0operatorconfigurations.operator.dash0.com/dash0-operator-configuration-auto-resource", + ))).To(Succeed()) + }, 30*time.Second, 1*time.Second).Should(Succeed()) + Expect( + runAndIgnoreOutput(exec.Command( + "kubectl", + "wait", + "dash0operatorconfigurations.operator.dash0.com/dash0-operator-configuration-auto-resource", + "--for", + "condition=Available", + "--timeout", + "30s", + ))).To(Succeed()) +} + func undeployDash0OperatorConfigurationResource() { - By("Removing the Dash0 operator configuration resource") + By("removing the Dash0 operator configuration resource") Expect( runAndIgnoreOutput(exec.Command( "kubectl", diff --git a/test/e2e/dash0monitoring.e2e.yaml.template b/test/e2e/dash0monitoring.e2e.yaml.template index fab6d5c4..7c491233 100644 --- a/test/e2e/dash0monitoring.e2e.yaml.template +++ b/test/e2e/dash0monitoring.e2e.yaml.template @@ -4,10 +4,10 @@ metadata: name: dash0-monitoring-resource-e2e spec: instrumentWorkloads: {{ .InstrumentWorkloads }} - {{- if .Endpoint }} +{{- if .Endpoint }} export: - # In the e2e tests we only want to export to the local otlp-sink collector. Since the Dash0 exporter config uses - # gRPC, and gRPC requires a TLS connection, we are configuring the local otlp-sink collector via HTTP instead. - http: + dash0: endpoint: {{ .Endpoint }} - {{- end }} \ No newline at end of file + authorization: + token: {{ .Token }} +{{- end }} \ No newline at end of file diff --git a/test/e2e/dash0operatorconfiguration.e2e.yaml.template b/test/e2e/dash0operatorconfiguration.e2e.yaml.template index 19091180..9e1fee44 100644 --- a/test/e2e/dash0operatorconfiguration.e2e.yaml.template +++ b/test/e2e/dash0operatorconfiguration.e2e.yaml.template @@ -6,5 +6,7 @@ spec: selfMonitoring: enabled: {{ .SelfMonitoringEnabled }} export: - http: + dash0: endpoint: {{ .Endpoint }} + authorization: + token: {{ .Token }} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 13f67c76..09ba4965 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -14,10 +14,13 @@ import ( "github.com/google/uuid" dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/dash0/startup" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/onsi/gomega/format" + + "github.com/dash0hq/dash0-operator/test/util" ) const ( @@ -275,10 +278,12 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { Describe("webhook", func() { BeforeAll(func() { By("deploy the Dash0 operator") - deployOperator(operatorNamespace, operatorHelmChart, operatorHelmChartUrl, images) - - fmt.Fprint(GinkgoWriter, "waiting 5 seconds to give the webhook some time to get ready\n") - time.Sleep(5 * time.Second) + deployOperator( + operatorNamespace, + operatorHelmChart, + operatorHelmChartUrl, + images, + ) }) AfterAll(func() { @@ -368,7 +373,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { "webhook", ) - By("Adding the opt-out label to the deployment") + By("adding the opt-out label to the deployment") Expect(addOptOutLabel( applicationUnderTestNamespace, "deployment", @@ -394,7 +399,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { verifyNoDash0LabelsOrOnlyOptOut(g, applicationUnderTestNamespace, "daemonset", true) }, 10*time.Second, pollingInterval).Should(Succeed()) - By("Removing the opt-out label from the daemonset") + By("removing the opt-out label from the daemonset") Expect(removeOptOutLabel( applicationUnderTestNamespace, "daemonset", @@ -435,6 +440,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { applicationUnderTestNamespace, dash0MonitoringValues{ Endpoint: defaultEndpoint, + Token: defaultToken, InstrumentWorkloads: dash0v1alpha1.None, }, operatorNamespace, @@ -537,55 +543,104 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { }) Describe("using the operator configuration resource's connection settings", func() { - BeforeAll(func() { - By("deploy the Dash0 operator") - deployOperator(operatorNamespace, operatorHelmChart, operatorHelmChartUrl, images) + Describe("with a manually created operator configuration resource", func() { + BeforeAll(func() { + By("deploy the Dash0 operator") + deployOperator(operatorNamespace, operatorHelmChart, operatorHelmChartUrl, images) + }) - fmt.Fprint(GinkgoWriter, "waiting 5 seconds to give the webhook some time to get ready\n") - time.Sleep(5 * time.Second) - }) + AfterAll(func() { + undeployOperator(operatorNamespace) + }) - AfterAll(func() { - undeployOperator(operatorNamespace) - }) + BeforeEach(func() { + deployDash0OperatorConfigurationResource(dash0OperatorConfigurationValues{ + SelfMonitoringEnabled: false, + Endpoint: defaultEndpoint, + Token: defaultToken, + }) + deployDash0MonitoringResource( + applicationUnderTestNamespace, + dash0MonitoringValues{ + Endpoint: "", + Token: "", + InstrumentWorkloads: dash0v1alpha1.All, + }, + operatorNamespace, + operatorHelmChart, + ) + }) - BeforeEach(func() { - deployDash0OperatorConfigurationResource(dash0OperatorConfigurationValues{ - SelfMonitoringEnabled: false, - Endpoint: defaultEndpoint, + AfterEach(func() { + undeployDash0MonitoringResource(applicationUnderTestNamespace) + undeployDash0OperatorConfigurationResource() }) - deployDash0MonitoringResource( - applicationUnderTestNamespace, - dash0MonitoringValues{ - Endpoint: "", - InstrumentWorkloads: dash0v1alpha1.All, - }, - operatorNamespace, - operatorHelmChart, - ) - }) - AfterEach(func() { - undeployDash0MonitoringResource(applicationUnderTestNamespace) - undeployDash0OperatorConfigurationResource() + It("should instrumenting workloads", func() { + testId := generateTestId("deployment") + By("installing the Node.js deployment") + Expect(installNodeJsWorkload(workloadTypeDeployment, applicationUnderTestNamespace, testId)).To(Succeed()) + By("verifying that the Node.js deployment has been instrumented by the webhook") + verifyThatWorkloadHasBeenInstrumented( + applicationUnderTestNamespace, + "deployment", + workloadTypeDeployment.port, + false, + testId, + images, + "webhook", + ) + }) }) - It("when instrumenting workloads", func() { - workloadType := workloadTypeDeployment - testId := generateTestId(workloadType.workloadTypeString) - By(fmt.Sprintf("installing the Node.js %s", workloadType.workloadTypeString)) - Expect(installNodeJsWorkload(workloadType, applicationUnderTestNamespace, testId)).To(Succeed()) - By(fmt.Sprintf("verifying that the Node.js %s has been instrumented by the webhook", - workloadType.workloadTypeString)) - verifyThatWorkloadHasBeenInstrumented( - applicationUnderTestNamespace, - workloadType.workloadTypeString, - workloadType.port, - workloadType.isBatch, - testId, - images, - "webhook", - ) + Describe("using the automatically created operator configuration resource", func() { + BeforeAll(func() { + By("deploy the Dash0 operator and let it create an operator configuration resource") + deployOperatorWithDefaultAutoOperationConfiguration( + operatorNamespace, + operatorHelmChart, + operatorHelmChartUrl, + images, + ) + }) + + AfterAll(func() { + undeployOperator(operatorNamespace) + }) + + BeforeEach(func() { + deployDash0MonitoringResource( + applicationUnderTestNamespace, + dash0MonitoringValues{ + Endpoint: "", + Token: "", + InstrumentWorkloads: dash0v1alpha1.All, + }, + operatorNamespace, + operatorHelmChart, + ) + }) + + AfterEach(func() { + undeployDash0MonitoringResource(applicationUnderTestNamespace) + undeployDash0OperatorConfigurationResource() + }) + + It("should instrumenting workloads", func() { + testId := generateTestId("deployment") + By("installing the Node.js deployment") + Expect(installNodeJsWorkload(workloadTypeDeployment, applicationUnderTestNamespace, testId)).To(Succeed()) + By("verifying that the Node.js deployment has been instrumented by the webhook") + verifyThatWorkloadHasBeenInstrumented( + applicationUnderTestNamespace, + "deployment", + workloadTypeDeployment.port, + false, + testId, + images, + "webhook", + ) + }) }) }) @@ -641,7 +696,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { By("churning collector pods") _ = runAndIgnoreOutput(exec.Command("kubectl", "delete", "pods", "-n", operatorNamespace)) - verifyThatCollectorIsRunning(operatorNamespace, operatorHelmChart) + waitForCollectorToStart(operatorNamespace, operatorHelmChart) By("verifying that the previous log message is not reported again (checking for 30 seconds)") Consistently(func(g Gomega) error { @@ -703,6 +758,27 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { }) }) + Describe("operator installation", func() { + + It("should fail if asked to create an operator configuration resource with invalid settings", func() { + err := deployOperatorWithAutoOperationConfiguration( + operatorNamespace, + operatorHelmChart, + operatorHelmChartUrl, + images, + &startup.OperatorConfigurationValues{ + Endpoint: util.EndpointDash0Test, + // no token, no secret ref + }, + ) + Expect(err).To( + MatchError( + ContainSubstring("operator.dash0Backend.enabled is set to true, but neither " + + "operator.dash0Backend.token nor operator.dash0Backend.secretRef.name & " + + "operator.dash0Backend.secretRef.key have been provided."))) + }) + }) + Describe("operator removal", func() { const ( diff --git a/test/e2e/operator.go b/test/e2e/operator.go index 26ba6a47..c2bc4915 100644 --- a/test/e2e/operator.go +++ b/test/e2e/operator.go @@ -10,6 +10,8 @@ import ( "strings" "time" + "github.com/dash0hq/dash0-operator/internal/dash0/startup" + . "github.com/onsi/ginkgo/v2" //nolint:golint,revive . "github.com/onsi/gomega" ) @@ -31,6 +33,42 @@ func deployOperator( operatorHelmChartUrl string, images Images, ) { + err := deployOperatorWithAutoOperationConfiguration( + operatorNamespace, + operatorHelmChart, + operatorHelmChartUrl, + images, + nil, + ) + Expect(err).ToNot(HaveOccurred()) +} + +func deployOperatorWithDefaultAutoOperationConfiguration( + operatorNamespace string, + operatorHelmChart string, + operatorHelmChartUrl string, + images Images, +) { + err := deployOperatorWithAutoOperationConfiguration( + operatorNamespace, + operatorHelmChart, + operatorHelmChartUrl, + images, + &startup.OperatorConfigurationValues{ + Endpoint: defaultEndpoint, + Token: defaultToken, + }, + ) + Expect(err).ToNot(HaveOccurred()) +} + +func deployOperatorWithAutoOperationConfiguration( + operatorNamespace string, + operatorHelmChart string, + operatorHelmChartUrl string, + images Images, + operatorConfigurationValues *startup.OperatorConfigurationValues, +) error { ensureDash0OperatorHelmRepoIsInstalled(operatorHelmChart, operatorHelmChartUrl) By( @@ -47,11 +85,35 @@ func deployOperator( } arguments = addOptionalHelmParameters(arguments, operatorHelmChart, images) + if operatorConfigurationValues != nil { + arguments = setHelmParameter(arguments, "operator.dash0Backend.enabled", "true") + arguments = setIfNotEmpty(arguments, "operator.dash0Backend.endpoint", operatorConfigurationValues.Endpoint) + arguments = setIfNotEmpty(arguments, "operator.dash0Backend.token", operatorConfigurationValues.Token) + arguments = setIfNotEmpty( + arguments, + "operator.dash0Backend.secretRef.name", + operatorConfigurationValues.SecretRef.Name, + ) + arguments = setIfNotEmpty( + arguments, + "operator.dash0Backend.secretRef.key", + operatorConfigurationValues.SecretRef.Key, + ) + } + output, err := run(exec.Command("helm", arguments...)) - Expect(err).NotTo(HaveOccurred()) + if err != nil { + return err + } + fmt.Fprintf(GinkgoWriter, "output of helm install:\n%s", output) + waitForManagerPodAndWebhookToStart(operatorNamespace) + + if operatorConfigurationValues != nil { + waitForAutoOperatorConfigurationResourceToBecomeAvailable() + } - verifyThatManagerPodIsRunning(operatorNamespace) + return nil } func addOptionalHelmParameters(arguments []string, operatorHelmChart string, images Images) []string { @@ -93,12 +155,17 @@ func addOptionalHelmParameters(arguments []string, operatorHelmChart string, ima func setIfNotEmpty(arguments []string, key string, value string) []string { if value != "" { - arguments = append(arguments, "--set") - arguments = append(arguments, fmt.Sprintf("%s=%s", key, value)) + return setHelmParameter(arguments, key, value) } return arguments } +func setHelmParameter(arguments []string, key string, value string) []string { + arguments = append(arguments, "--set") + arguments = append(arguments, fmt.Sprintf("%s=%s", key, value)) + return arguments +} + func ensureDash0OperatorHelmRepoIsInstalled( operatorHelmChart string, operatorHelmChartUrl string, @@ -155,7 +222,7 @@ func ensureDash0OperatorHelmRepoIsInstalled( } } -func verifyThatManagerPodIsRunning(operatorNamespace string) { +func waitForManagerPodAndWebhookToStart(operatorNamespace string) { var managerPodName string By("validating that the manager pod is running as expected") verifyControllerUp := func(g Gomega) error { @@ -271,7 +338,7 @@ func upgradeOperator( By("waiting shortly, to give the operator time to restart after helm upgrade") time.Sleep(5 * time.Second) - verifyThatManagerPodIsRunning(operatorNamespace) + waitForManagerPodAndWebhookToStart(operatorNamespace) - verifyThatCollectorIsRunning(operatorNamespace, operatorHelmChart) + waitForCollectorToStart(operatorNamespace, operatorHelmChart) } From 28648975b4e7bd5d9ab0813a7c6fb6017614c24e Mon Sep 17 00:00:00 2001 From: Bastian Krol Date: Wed, 18 Sep 2024 16:37:12 +0200 Subject: [PATCH 3/3] docs(chart): document helm values to create an operator config resource --- helm-chart/dash0-operator/README.md | 116 ++++++++++++++++++++++++---- 1 file changed, 101 insertions(+), 15 deletions(-) diff --git a/helm-chart/dash0-operator/README.md b/helm-chart/dash0-operator/README.md index 0b290b13..34d6dd6f 100644 --- a/helm-chart/dash0-operator/README.md +++ b/helm-chart/dash0-operator/README.md @@ -27,6 +27,25 @@ Supported runtimes for automatic instrumentation: - [Helm](https://helm.sh) >= 3.x, please refer to Helm's [documentation](https://helm.sh/docs/) for more information on installing Helm. +To use the operator, you will need provide two configuration values: +* `endpoint`: The URL of the Dash0 ingress endpoint backend to which telemetry data will be sent. + This property is mandatory when installing the operator. + This is the OTLP/gRPC endpoint of your Dash0 organization. + The correct OTLP/gRPC endpoint can be copied fom https://app.dash0.com -> organization settings -> "Endpoints". + Note that the correct endpoint value will always start with `ingress.` and end in `dash0.com:4317`. + Including a protocol prefix (e.g. `https://`) is optional. +* Either `token` or `secretRef`: Exactly one of these two properties needs to be provided when installing the operator. + * `token`: This is the Dash0 authorization token of your organization. + The authorization token for your Dash0 organization can be copied from https://app.dash0.com -> organization + settings -> "Auth Tokens". + The prefix `Bearer ` must *not* be included in the value. + Note that when you provide a token, it will be rendered verbatim into a Kubernetes ConfigMap object. + Anyone with API access to the Kubernetes cluster will be able to read the value. + Use a secret reference and a Kubernetes secret if you want to avoid that. + * `secretRef`: A reference to an existing Kubernetes secret in the Dash0 operator's namespace. + The secret needs to contain the Dash0 authorization token. + See below for details on how exactly the secret should be created and configured. + ## Installation Before installing the operator, add the Dash0 operator's Helm repository as follows: @@ -38,6 +57,36 @@ helm repo update dash0-operator Now you can install the operator into your cluster via Helm with the following command: +```console +helm install \ + --namespace dash0-system \ + --create-namespace \ + --set operator.dash0Backend.enabled=true \ + --set operator.dash0Backend.endpoint=REPLACE THIS WITH YOUR DASH0 INGRESS ENDPOINT \ + --set operator.dash0Backend.token=REPLACE THIS WITH YOUR DASH0 AUTH TOKEN \ + dash0-operator \ + dash0-operator/dash0-operator +``` + +Instead of providing the auth token directly, you can also use a secret reference: + +```console +helm install \ + --namespace dash0-system \ + --create-namespace \ + --set operator.dash0Backend.enabled=true \ + --set operator.dash0Backend.endpoint=REPLACE THIS WITH YOUR DASH0 INGRESS ENDPOINT \ + --set operator.dash0Backend.secretRef.name=REPLACE THIS WITH THE NAME OF AN EXISTING KUBERNETES SECRET \ + --set operator.dash0Backend.secretRef.key=REPLACE THIS WITH THE PROPERTY KEY IN THAT SECRET \ + dash0-operator \ + dash0-operator/dash0-operator +``` + +See https://kubernetes.io/docs/concepts/configuration/secret/ for more information on using a Kubernetes secrets +with the Dash0 operator. + +Last but not least, you can also install the operator without providing a Dash0 backend configuration: + ```console helm install \ --namespace dash0-system \ @@ -46,6 +95,11 @@ helm install \ dash0-operator/dash0-operator ``` +However, you will need to create a Dash0 operator configuration resource later that provides the backend connection +settings. +That is, providing `--set operator.dash0Backend.enabled=true` and the other backend-related settings when running +`helm install` is simply a shortcut to deploy the Dash0 operator configuration resource automatically at startup. + On its own, the operator will not do much. To actually have the operator monitor your cluster, two more things need to be set up: 1. a [Dash0 backend connection](#configuring-the-dash0-backend-connection) has to be configured and @@ -57,7 +111,12 @@ Both steps are described in the following sections. ### Configuring the Dash0 Backend Connection -Create a file `dash0-operator-configuration.yaml` with the following content: +You can skip this step if you provided `--set operator.dash0Backend.enabled=true` together with the endpoint and either +a token or a secret reference when running `helm install`. In that case, proceed to the next section, +[Enable Dash0 Monitoring For a Namespace](#enable-dash0-monitoring-for-a-namespace). + +Otherwise, configure the backend connection now by creating a file `dash0-operator-configuration.yaml` with the +following content: ```yaml apiVersion: operator.dash0.com/v1alpha1 @@ -81,7 +140,7 @@ You need to provide two configuration settings: Replace the value in the example above with the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom https://app.dash0.com -> organization settings -> "Endpoints". Note that the correct endpoint value will always start with `ingress.` and end in `dash0.com:4317`. - A protocol prefix (e.g. `https://`) should not be included in the value. + Including a protocol prefix (e.g. `https://`) is optional. * `spec.export.dash0.authorization.token` or `spec.export.dash0.authorization.secretRef`: Exactly one of these two properties needs to be provided. Providing both will cause a validation error when installing the Dash0Monitoring resource. @@ -119,6 +178,7 @@ For _each namespace_ that you want to monitor with Dash0, enable workload monito resource_ into that namespace: Create a file `dash0-monitoring.yaml` with the following content: + ```yaml apiVersion: operator.dash0.com/v1alpha1 kind: Dash0Monitoring @@ -134,6 +194,7 @@ kubectl apply --namespace my-nodejs-applications -f dash0-monitoring.yaml ``` If you want to monitor the `default` namespace with Dash0, use the following command: + ```console kubectl apply -f dash0-monitoring.yaml ``` @@ -217,13 +278,31 @@ kubectl create secret generic \ With this example command, you would create a secret with the name `dash0-authorization-secret` in the namespace `dash0-system`. -If you installed the operator into a different namespace, replace the `--namespace` parameter accordingly. +If you installed (or plan to install) the operator into a different namespace, replace the `--namespace` parameter +accordingly. + +The name of the secret as well as the key of the token value within the secret must be provided when referencing the +secret during `helm install`, or in the YAML file for the Dash0 operator configuration resource (in the `secretRef` +property). + +For creating the operator configuration resource with `helm install`, the command would look like this, assuming the +secret has been created as shown above: + +```console +helm install \ + --namespace dash0-system \ + --set operator.dash0Backend.enabled=true \ + --set operator.dash0Backend.endpoint=REPLACE THIS WITH YOUR DASH0 INGRESS ENDPOINT \ + --set operator.dash0Backend.secretRef.name=dash0-authorization-secret \ + --set operator.dash0Backend.secretRef.key=token \ + dash0-operator \ + dash0-operator/dash0-operator +``` + +If you do not want to install the operator configuration resource via `helm install` but instead deploy it manually, +and use a secret reference for the auth token, the following example YAML file would work work with the secret created +above: -The name of the secret as well as the key of the token value within the secret must be provided in the YAML file for -the Dash0 monitoring resource, in the `secretRef` property. -If the `name` property is omitted, the name `dash0-authorization-secret` will be assumed. -If the `key` property is omitted, the key `token` will be assumed. -Here is an example that uses the secret created above: ```yaml apiVersion: operator.dash0.com/v1alpha1 kind: Dash0OperatorConfiguration @@ -232,17 +311,19 @@ metadata: spec: export: dash0: - endpoint: ingress... # TODO needs to be replaced with the actual value, see above + endpoint: ingress... # TODO REPLACE THIS WITH YOUR DASH0 INGRESS ENDPOINT authorization: - # Provide the name and key of a secret existing in the Dash0 operator's namespace as secretRef: secretRef: name: dash0-authorization-secret key: token ``` -Since the name `dash0-authorization-secret` and the key `token` are the defaults, this `secretRef` could have also been -written as follows: +When deploying the operator configuration resource via `kubectl`, the following defaults apply: +* If the `name` property is omitted, the name `dash0-authorization-secret` will be assumed. +* If the `key` property is omitted, the key `token` will be assumed. + +With these defaults in mind, the `secretRef` could have also been written as follows: ```yaml apiVersion: operator.dash0.com/v1alpha1 @@ -258,9 +339,13 @@ spec: secretRef: {} ``` +Note: There are no defaults when using `--set operator.dash0Backend.secretRef.name` and +`--set operator.dash0Backend.secretRef.key` with `helm install`, so for that approach the values must always be +provided explicitly. + Note that by default, Kubernetes secrets are stored _unencrypted_, and anyone with API access to the Kubernetes cluster will be able to read the value. -Additional steps are required to make sure secret values are encrypted. +Additional steps are required to make sure secret values are encrypted, if that is desired. See https://kubernetes.io/docs/concepts/configur**ation/secret/ for more information on Kubernetes secrets. ### Dash0 Dataset Configuration @@ -287,8 +372,9 @@ spec: ### Exporting Data to Other Observability Backends -Instead of `spec.export.dash0`, you can also provide `spec.export.http` or `spec.export.grpc` to export telemetry data -to arbitrary OTLP-compatible backends, or to another local OpenTelemetry collector. +Instead of `spec.export.dash0` in the Dash0 operator configuration resource, you can also provide `spec.export.http` or +`spec.export.grpc` to export telemetry data to arbitrary OTLP-compatible backends, or to another local OpenTelemetry +collector. Here is an example for HTTP: