diff --git a/Makefile b/Makefile index 21ce344c..427b8782 100644 --- a/Makefile +++ b/Makefile @@ -165,7 +165,7 @@ golangci-lint: lint: golangci-lint ## Run golangci-lint linter & yamllint @echo -------------------------------- $(GOLANGCI_LINT) run - helm lint helm-chart/dash0-operator --set operator.disableSecretCheck=true --set operator.disableOtlpEndpointCheck=true + helm lint helm-chart/dash0-operator .PHONY: lint-fix lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes diff --git a/api/dash0monitoring/v1alpha1/types_common.go b/api/dash0monitoring/v1alpha1/types_common.go index 1cb8d535..ee108cfb 100644 --- a/api/dash0monitoring/v1alpha1/types_common.go +++ b/api/dash0monitoring/v1alpha1/types_common.go @@ -36,8 +36,8 @@ type Export struct { type Dash0Configuration struct { // The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value // needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom - // https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in - // `dash0.com:4317`. + // https://app.dash0.com -> organization settings -> "Endpoints". The correct endpoint value will always start with + // `ingress.` and end in `dash0.com:4317`. // // +kubebuilder:validation:Required Endpoint string `json:"endpoint"` @@ -61,14 +61,15 @@ type Dash0Configuration struct { type Authorization struct { // The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has // to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization - // token for your Dash0 organization can be copied from https://app.dash0.com/settings. + // token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + // "Auth Tokens". // // +kubebuilder:validation:Optional Token *string `json:"token"` // either token or secret ref, with token taking precedence // A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is // ignored if the token property is set. The authorization token for your Dash0 organization can be copied from - // https://app.dash0.com/settings. + // https://app.dash0.com -> organization settings -> "Auth Tokens". // // +kubebuilder:validation:Optional SecretRef *SecretRef `json:"secretRef"` diff --git a/cmd/main.go b/cmd/main.go index f8e79e94..a539e488 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -39,8 +39,9 @@ import ( "github.com/dash0hq/dash0-operator/internal/backendconnection/otelcolresources" "github.com/dash0hq/dash0-operator/internal/dash0/controller" "github.com/dash0hq/dash0-operator/internal/dash0/instrumentation" - "github.com/dash0hq/dash0-operator/internal/dash0/removal" + "github.com/dash0hq/dash0-operator/internal/dash0/predelete" "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" + "github.com/dash0hq/dash0-operator/internal/dash0/startup" "github.com/dash0hq/dash0-operator/internal/dash0/util" "github.com/dash0hq/dash0-operator/internal/dash0/webhooks" //+kubebuilder:scaffold:imports @@ -105,25 +106,40 @@ func init() { func main() { ctx := context.Background() - var uninstrumentAll bool + var operatorConfigurationEndpoint string + var operatorConfigurationToken string + var operatorConfigurationSecretRefName string + var operatorConfigurationSecretRefKey string + var isUninstrumentAll bool var metricsAddr string var enableLeaderElection bool var probeAddr string var secureMetrics bool var enableHTTP2 bool - flag.BoolVar(&uninstrumentAll, "uninstrument-all", false, - "If set, the process will remove all Dash0 monitoring resources from all namespaces in the cluster. This "+ - "will trigger the Dash0 monitoring resources' finalizers in each namespace, which in turn will revert the "+ - "instrumentation of all workloads in all namespaces.") + + flag.BoolVar(&isUninstrumentAll, "uninstrument-all", false, + "If set, the process will remove all Dash0 monitoring resources from all namespaces in the cluste, then "+ + "exit. This will trigger the Dash0 monitoring resources' finalizers in each namespace, which in turn will "+ + "revert the instrumentation of all workloads in all namespaces.") + flag.StringVar(&operatorConfigurationEndpoint, "operator-configuration-endpoint", "", + "The Dash0 endpoint gRPC URL for creating an operator configuration resource.") + flag.StringVar(&operatorConfigurationToken, "operator-configuration-token", "", + "The Dash0 auth token for creating an operator configuration resource.") + flag.StringVar(&operatorConfigurationSecretRefName, "operator-configuration-secret-ref-name", "", + "The name of an existing Kubernetes secret containing the Dash0 auth token, used to creating an operator "+ + "configuration resource.") + flag.StringVar(&operatorConfigurationSecretRefKey, "operator-configuration-secret-ref-key", "", + "The key in an existing Kubernetes secret containing the Dash0 auth token, used to creating an operator "+ + "configuration resource.") flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.BoolVar(&enableLeaderElection, "leader-elect", false, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") flag.BoolVar(&secureMetrics, "metrics-secure", false, - "If set, the metrics endpoint is served securely") + "If set, the metrics endpoint is served securely.") flag.BoolVar(&enableHTTP2, "enable-http2", false, - "If set, HTTP/2 will be enabled for the metrics and webhook servers") + "If set, HTTP/2 will be enabled for the metrics and webhook servers.") var developmentMode bool developmentModeRaw, isSet := os.LookupEnv(developmentModeEnvVarName) @@ -143,8 +159,8 @@ func main() { ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) - if uninstrumentAll { - if err := deleteDash0MonitoringResourcesInAllNamespaces(&setupLog); err != nil { + if isUninstrumentAll { + if err := deleteMonitoringResourcesInAllNamespaces(&setupLog); err != nil { setupLog.Error(err, "deleting the Dash0 monitoring resources in all namespaces failed") os.Exit(1) } @@ -196,6 +212,18 @@ func main() { map[string]string{semconv.AttributeK8SDeploymentUID: string(deploymentSelfReference.UID)}, ) + var operatorConfiguration *startup.OperatorConfigurationValues + if len(operatorConfigurationEndpoint) > 0 { + operatorConfiguration = &startup.OperatorConfigurationValues{ + Endpoint: operatorConfigurationEndpoint, + Token: operatorConfigurationToken, + SecretRef: startup.SecretRef{ + Name: operatorConfigurationSecretRefName, + Key: operatorConfigurationSecretRefKey, + }, + } + } + if err = startOperatorManager( ctx, metricsAddr, @@ -204,6 +232,7 @@ func main() { webhookServer, probeAddr, enableLeaderElection, + operatorConfiguration, developmentMode, ); err != nil { setupLog.Error(err, "The Dash0 operator manager process failed to start.") @@ -219,6 +248,7 @@ func startOperatorManager( webhookServer k8swebhook.Server, probeAddr string, enableLeaderElection bool, + operatorConfiguration *startup.OperatorConfigurationValues, developmentMode bool, ) error { mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ @@ -285,7 +315,7 @@ func startOperatorManager( developmentMode, ) - err = startDash0Controllers(ctx, mgr, clientset, developmentMode) + err = startDash0Controllers(ctx, mgr, clientset, operatorConfiguration, developmentMode) if err != nil { return err } @@ -396,6 +426,7 @@ func startDash0Controllers( ctx context.Context, mgr manager.Manager, clientset *kubernetes.Clientset, + operatorConfiguration *startup.OperatorConfigurationValues, developmentMode bool, ) error { oTelCollectorBaseUrl := @@ -419,8 +450,10 @@ func startDash0Controllers( ctx, clientset, mgr.GetEventRecorderFor("dash0-startup-tasks"), + operatorConfiguration, images, oTelCollectorBaseUrl, + &setupLog, ) logCurrentSelfMonitoringSettings(deploymentSelfReference) @@ -555,9 +588,17 @@ func executeStartupTasks( ctx context.Context, clientset *kubernetes.Clientset, eventRecorder record.EventRecorder, + operatorConfiguration *startup.OperatorConfigurationValues, images util.Images, oTelCollectorBaseUrl string, + logger *logr.Logger, ) { + createOperatorConfiguration( + ctx, + startupTasksK8sClient, + operatorConfiguration, + logger, + ) instrumentAtStartup( ctx, startupTasksK8sClient, @@ -618,8 +659,26 @@ func logCurrentSelfMonitoringSettings(deploymentSelfReference *appsv1.Deployment } } -func deleteDash0MonitoringResourcesInAllNamespaces(logger *logr.Logger) error { - handler, err := removal.NewOperatorPreDeleteHandler() +func createOperatorConfiguration( + ctx context.Context, + k8sClient client.Client, + operatorConfiguration *startup.OperatorConfigurationValues, + logger *logr.Logger, +) { + if operatorConfiguration != nil { + handler := startup.AutoOperatorConfigurationResourceHandler{ + Client: k8sClient, + OperatorNamespace: envVars.operatorNamespace, + NamePrefix: envVars.oTelCollectorNamePrefix, + } + if err := handler.CreateOperatorConfigurationResource(ctx, operatorConfiguration, logger); err != nil { + logger.Error(err, "Failed to create the requested Dash0 operator configuration resource.") + } + } +} + +func deleteMonitoringResourcesInAllNamespaces(logger *logr.Logger) error { + handler, err := predelete.NewOperatorPreDeleteHandler() if err != nil { logger.Error(err, "Failed to create the OperatorPreDeleteHandler.") return err diff --git a/config/crd/bases/operator.dash0.com_dash0monitorings.yaml b/config/crd/bases/operator.dash0.com_dash0monitorings.yaml index 8b927444..ae4b5604 100644 --- a/config/crd/bases/operator.dash0.com_dash0monitorings.yaml +++ b/config/crd/bases/operator.dash0.com_dash0monitorings.yaml @@ -69,7 +69,7 @@ spec: description: |- A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is ignored if the token property is set. The authorization token for your Dash0 organization can be copied from - https://app.dash0.com/settings. + https://app.dash0.com -> organization settings -> "Auth Tokens". properties: key: default: token @@ -89,7 +89,8 @@ spec: description: |- The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization - token for your Dash0 organization can be copied from https://app.dash0.com/settings. + token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + "Auth Tokens". type: string type: object dataset: @@ -102,8 +103,8 @@ spec: description: |- The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom - https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in - `dash0.com:4317`. + https://app.dash0.com -> organization settings -> "Endpoints". The correct endpoint value will always start with + `ingress.` and end in `dash0.com:4317`. type: string required: - authorization diff --git a/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml b/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml index fbfddb4f..56fcfa3c 100644 --- a/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml +++ b/config/crd/bases/operator.dash0.com_dash0operatorconfigurations.yaml @@ -69,7 +69,7 @@ spec: description: |- A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is ignored if the token property is set. The authorization token for your Dash0 organization can be copied from - https://app.dash0.com/settings. + https://app.dash0.com -> organization settings -> "Auth Tokens". properties: key: default: token @@ -89,7 +89,8 @@ spec: description: |- The Dash0 authorization token. This property is optional, but either this property or the SecretRef property has to be provided. If both are provided, the token will be used and SecretRef will be ignored. The authorization - token for your Dash0 organization can be copied from https://app.dash0.com/settings. + token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + "Auth Tokens". type: string type: object dataset: @@ -102,8 +103,8 @@ spec: description: |- The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory. The value needs to be the OTLP/gRPC endpoint of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom - https://app.dash0.com/settings. The correct endpoint value will always start with `ingress.` and end in - `dash0.com:4317`. + https://app.dash0.com -> organization settings -> "Endpoints". The correct endpoint value will always start with + `ingress.` and end in `dash0.com:4317`. type: string required: - authorization diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index c822fe58..9d798ebc 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -28,6 +28,12 @@ rules: - patch - update - watch +- apiGroups: + - "" + resources: + - endpoints + verbs: + - get - apiGroups: - "" resources: diff --git a/helm-chart/dash0-operator/README.md b/helm-chart/dash0-operator/README.md index 33163c13..0b290b13 100644 --- a/helm-chart/dash0-operator/README.md +++ b/helm-chart/dash0-operator/README.md @@ -202,7 +202,8 @@ spec: If you want to provide the Dash0 authorization token via a Kubernetes secret instead of providing the token as a string, create the secret in the namespace where the Dash0 operator is installed. If you followed the guide above, the name of that namespace is `dash0-system`. -The authorization token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> "Auth Tokens". +The authorization token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings +-> "Auth Tokens". You can freely choose the name of the secret and the key of the token within the secret. Create the secret by using the following command: diff --git a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml index bc36910b..4d51afcf 100644 --- a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml +++ b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml @@ -38,6 +38,14 @@ rules: - update - watch +# Pmrmissions required top create a Dash0 operator configuration resources +- apiGroups: + - "" + resources: + - endpoints + verbs: + - get + # Permissions required to queue events to report about the operator's actions, and to attach dangling events to their # respective involved objects. - apiGroups: diff --git a/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml b/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml index bf7d328c..82c5b376 100644 --- a/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml +++ b/helm-chart/dash0-operator/templates/operator/deployment-and-webhooks.yaml @@ -78,6 +78,28 @@ spec: - --health-probe-bind-address=:8081 - --metrics-bind-address=127.0.0.1:8080 - --leader-elect +{{- if .Values.operator.dash0Backend.enabled }} +{{- if not .Values.operator.dash0Backend.endpoint }} +{{- fail "Error: operator.dash0Backend.enabled is set to true, but you did not provide a value for operator.dash0Backend.endpoint. Please refer to the installation instructions at https://github.com/dash0hq/dash0-operator/tree/main/helm-chart/dash0-operator." -}} +{{- end}} + - --operator-configuration-endpoint={{ .Values.operator.dash0Backend.endpoint }} +{{- if .Values.operator.dash0Backend.token }} + - --operator-configuration-token={{ .Values.operator.dash0Backend.token }} +{{- else if (and .Values.operator.dash0Backend.secretRef.name .Values.operator.dash0Backend.secretRef.key) }} +{{- $secret := lookup "v1" "Secret" .Release.Namespace .Values.operator.dash0Backend.secretRef.name -}} +{{- if $secret -}} +{{- if not (index $secret.data .Values.operator.dash0Backend.secretRef.key) -}} +{{- fail (printf "Error: There is a secret named \"%s\" in the target namespace \"%s\", but it does not have the required key \"%s\". Please refer to the installation instructions at https://github.com/dash0hq/dash0-operator/tree/main/helm-chart/dash0-operator." .Values.operator.dash0Backend.secretRef.name .Release.Namespace .Values.operator.dash0Backend.secretRef.key) -}} +{{- end -}} +{{- else -}} +{{- fail (printf "Error: There is no secret named \"%s\" in the target namespace \"%s\". Please refer to the installation instructions at https://github.com/dash0hq/dash0-operator/tree/main/helm-chart/dash0-operator." .Values.operator.dash0Backend.secretRef.name .Release.Namespace) -}} +{{- end }} + - --operator-configuration-secret-ref-name={{ .Values.operator.dash0Backend.secretRef.name }} + - --operator-configuration-secret-ref-key={{ .Values.operator.dash0Backend.secretRef.key }} +{{- else }} +{{- fail "Error: operator.dash0Backend.enabled is set to true, but neither operator.dash0Backend.token nor operator.dash0Backend.secretRef.name & operator.dash0Backend.secretRef.key have been provided. Please refer to the installation instructions at https://github.com/dash0hq/dash0-operator/tree/main/helm-chart/dash0-operator." -}} +{{- end }} +{{- end }} env: - name: DASH0_OPERATOR_NAMESPACE valueFrom: diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap index 4cbb6215..c80d9dda 100644 --- a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap @@ -37,6 +37,12 @@ cluster roles should match snapshot: - patch - update - watch + - apiGroups: + - "" + resources: + - endpoints + verbs: + - get - apiGroups: - "" resources: diff --git a/helm-chart/dash0-operator/tests/operator/deployment-and-webhooks_test.yaml b/helm-chart/dash0-operator/tests/operator/deployment-and-webhooks_test.yaml index 74da1f7c..cf8f4b98 100644 --- a/helm-chart/dash0-operator/tests/operator/deployment-and-webhooks_test.yaml +++ b/helm-chart/dash0-operator/tests/operator/deployment-and-webhooks_test.yaml @@ -235,6 +235,27 @@ tests: path: spec.template.spec.containers[0].resources.requests.memory value: 32Mi + - it: should add args for creating an operator configuration resource with a token to deployment + documentSelector: + path: metadata.name + value: dash0-operator-controller + set: + operator: + dash0Backend: + enabled: true + endpoint: https://ingress.dash0.com + token: "very-secret-dash0-auth-token" + asserts: + - equal: + path: spec.template.spec.containers[0].args[3] + value: --operator-configuration-endpoint=https://ingress.dash0.com + - equal: + path: spec.template.spec.containers[0].args[4] + value: --operator-configuration-token=very-secret-dash0-auth-token + # Note: We deliberately do not have a test for the operator.dash0Backend.secretRef variant, since this would trigger + # a check whether the secret actually exists in the cluster, which of course would fail when runnig helm chart unit + # tests. + - it: should render the "dash0.com/cert-digest" label documentSelector: path: metadata.name diff --git a/helm-chart/dash0-operator/values.yaml b/helm-chart/dash0-operator/values.yaml index 35cc3acd..dd78fdf3 100644 --- a/helm-chart/dash0-operator/values.yaml +++ b/helm-chart/dash0-operator/values.yaml @@ -3,6 +3,51 @@ # settings for the operator/controller operator: + + # Use the operator.dash0Backend settings to configure the connection to the Dash0 backend; telemetry will be sent to + # the configured Dash0 backend by default. Under the hood, this will create a Dash0OperatorConfiguration resource + # right away, when starting the operator. If left empty, you can always create a Dash0OperatorConfiguration resource + # manually later. + dash0Backend: + # Set this to true to enable the creation of a Dash0OperatorConfiguration resource at startup. If a + # Dash0OperatorConfiguration already exists in the cluster, no action will be taken. Note that if this is set to + # true, you will also need to provide a valid endpoint (operator.dash0Backend.endpoint), and either or an auth + # token (operator.dash0Backend.token) or a reference to a Kubernetes secret containing that token + # (operator.dash0Backend.secretRef). + enabled: false + + # The URL of the Dash0 ingress endpoint to which telemetry data will be sent. This property is mandatory if + # operator.dash0Backend.enabled is true, otherwise it will be ignored. The value needs to be the OTLP/gRPC endpoint + # of your Dash0 organization. The correct OTLP/gRPC endpoint can be copied fom https://app.dash0.com -> organization + # settings -> "Endpoints". The correct endpoint value will always start with `ingress.` and end in `dash0.com:4317`. + endpoint: + + # The Dash0 authorization token. This property is optional, but either this property or the secretRef configuration + # has to be provided if operator.dash0Backend.enabled is true. If operator.dash0Backend.enabled is false, this + # property will be ignored. + # If both token and secretRef are provided, the token will be used and secretRef will be ignored. The authorization + # token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + # "Auth Tokens". + token: + + # A reference to a Kubernetes secret containing the Dash0 authorization token. This property is optional, and is + # ignored if either operator.dash0Backend.enabled is false or operator.dash0Backend.token is set. The authorization + # token for your Dash0 organization can be copied from https://app.dash0.com -> organization settings -> + # "Auth Tokens". + secretRef: + # The name of the secret containing the Dash0 authorization token. Example: Assume you have created the Kubernetes + # secret with the following command: + # kubectl create secret generic \ + # dash0-authorization-secret \ + # --namespace dash0-system \ + # --from-literal=token=auth_...your-token-here... + # + # Then you would set the property to "dash0-authorization-secret". + name: + # The key of the value which contains the Dash0 authorization token. Assuming you have created the Kubernetes + # secret with the command above (see property "name"), then you would set the property to "token". + key: + # number of replica for the controller manager deployment replicaCount: 1 @@ -135,11 +180,3 @@ operator: # If set to true, instructs the logger (Zap) to use a Zap development config (stacktraces on warnings, no sampling), # otherwise a Zap production config will be used (stacktraces on errors, sampling). developmentMode: false - - # If set to true, the operator Helm chart will skip the check for the Dash0 authorization secret. This should only - # be done for testing purposes. - disableSecretCheck: false - - # If set to true, the operator Helm chart will skip the check for the OTLP endpoing setting. This should only be done - # for testing purposes. - disableOtlpEndpointCheck: false diff --git a/internal/backendconnection/backendconnection_manager_suite_test.go b/internal/backendconnection/backendconnection_manager_suite_test.go index 8921445b..13604a0a 100644 --- a/internal/backendconnection/backendconnection_manager_suite_test.go +++ b/internal/backendconnection/backendconnection_manager_suite_test.go @@ -42,18 +42,11 @@ var _ = BeforeSuite(func() { testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, ErrorIfCRDPathMissing: true, - - // The BinaryAssetsDirectory is only required if you want to run the tests directly - // without call the makefile target test. If not informed it will look for the - // default path defined in controller-runtime which is /usr/local/kubebuilder/. - // Note that you must have the required binaries setup under the bin directory to perform - // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s", fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), } var err error - // cfg is defined in this file globally. cfg, err = testEnv.Start() Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) diff --git a/internal/backendconnection/backendconnection_manager_test.go b/internal/backendconnection/backendconnection_manager_test.go index 4c53877b..e9a008a3 100644 --- a/internal/backendconnection/backendconnection_manager_test.go +++ b/internal/backendconnection/backendconnection_manager_test.go @@ -56,7 +56,7 @@ var _ = Describe("The backend connection manager", Ordered, func() { Client: k8sClient, Scheme: k8sClient.Scheme(), DeploymentSelfReference: DeploymentSelfReference, - OTelCollectorNamePrefix: "unit-test", + OTelCollectorNamePrefix: OTelCollectorNamePrefixTest, } manager = &BackendConnectionManager{ Client: k8sClient, @@ -240,15 +240,15 @@ var _ = Describe("The backend connection manager", Ordered, func() { Describe("when cleaning up OpenTelemetry collector resources when the resource is deleted", func() { It("should not delete the collector if there are still Dash0 monitoring resources", func() { // create multiple Dash0 monitoring resources - firstName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-1"} + firstName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-1"} firstDash0MonitoringResource := CreateDefaultMonitoringResource(ctx, k8sClient, firstName) createdObjects = append(createdObjects, firstDash0MonitoringResource) - secondName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-2"} + secondName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-2"} secondDash0MonitoringResource := CreateDefaultMonitoringResource(ctx, k8sClient, secondName) createdObjects = append(createdObjects, secondDash0MonitoringResource) - thirdName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-3"} + thirdName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-3"} thirdDash0MonitoringResource := CreateDefaultMonitoringResource(ctx, k8sClient, thirdName) createdObjects = append(createdObjects, thirdDash0MonitoringResource) @@ -274,7 +274,7 @@ var _ = Describe("The backend connection manager", Ordered, func() { }) It("should not delete the collector if there is only one Dash0 monitoring resource left but it is not the one being deleted", func() { - resourceName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-1"} + resourceName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-1"} existingDash0MonitoringResource := CreateDefaultMonitoringResource(ctx, k8sClient, resourceName) createdObjects = append(createdObjects, existingDash0MonitoringResource) @@ -319,7 +319,7 @@ var _ = Describe("The backend connection manager", Ordered, func() { It("should delete the collector if the Dash0 monitoring resource that is being deleted is the only one left", func() { // create multiple Dash0 monitoring resources - resourceName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-1"} + resourceName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-1"} dash0MonitoringResource := CreateDefaultMonitoringResource(ctx, k8sClient, resourceName) createdObjects = append(createdObjects, dash0MonitoringResource) diff --git a/internal/backendconnection/otelcolresources/desired_state_test.go b/internal/backendconnection/otelcolresources/desired_state_test.go index 835ccdab..397e942f 100644 --- a/internal/backendconnection/otelcolresources/desired_state_test.go +++ b/internal/backendconnection/otelcolresources/desired_state_test.go @@ -23,7 +23,7 @@ import ( const ( namespace = "some-namespace" - namePrefix = "unit-test" + namePrefix = OTelCollectorNamePrefixTest ) var _ = Describe("The desired state of the OpenTelemetry Collector resources", func() { diff --git a/internal/backendconnection/otelcolresources/otelcol_resources_test.go b/internal/backendconnection/otelcolresources/otelcol_resources_test.go index dcdb55d5..f45d1359 100644 --- a/internal/backendconnection/otelcolresources/otelcol_resources_test.go +++ b/internal/backendconnection/otelcolresources/otelcol_resources_test.go @@ -58,7 +58,7 @@ var _ = Describe("The OpenTelemetry Collector resource manager", Ordered, func() Client: k8sClient, Scheme: k8sClient.Scheme(), DeploymentSelfReference: DeploymentSelfReference, - OTelCollectorNamePrefix: "unit-test", + OTelCollectorNamePrefix: OTelCollectorNamePrefixTest, DevelopmentMode: true, } }) diff --git a/internal/dash0/controller/controller_suite_test.go b/internal/dash0/controller/controller_suite_test.go index b7be7b25..88388df8 100644 --- a/internal/dash0/controller/controller_suite_test.go +++ b/internal/dash0/controller/controller_suite_test.go @@ -55,18 +55,11 @@ var _ = BeforeSuite(func() { testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, ErrorIfCRDPathMissing: true, - - // The BinaryAssetsDirectory is only required if you want to run the tests directly - // without call the makefile target test. If not informed it will look for the - // default path defined in controller-runtime which is /usr/local/kubebuilder/. - // Note that you must have the required binaries setup under the bin directory to perform - // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), } var err error - // cfg is defined in this file globally. cfg, err = testEnv.Start() Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) diff --git a/internal/dash0/controller/dash0_controller.go b/internal/dash0/controller/dash0_controller.go index 821f4902..db4a15ad 100644 --- a/internal/dash0/controller/dash0_controller.go +++ b/internal/dash0/controller/dash0_controller.go @@ -86,6 +86,7 @@ func (r *Dash0Reconciler) InitializeSelfMonitoringMetrics( //+kubebuilder:rbac:groups=core,resources=events,verbs=create;list;patch;update //+kubebuilder:rbac:groups=core,resources=namespaces,verbs=get //+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;delete +//+kubebuilder:rbac:groups=core,resources=endpoints,verbs=get //+kubebuilder:rbac:groups=operator.dash0.com,resources=dash0monitorings,verbs=get;list;watch;create;update;patch;delete;deletecollection //+kubebuilder:rbac:groups=operator.dash0.com,resources=dash0monitorings/finalizers,verbs=update //+kubebuilder:rbac:groups=operator.dash0.com,resources=dash0monitorings/status,verbs=get;update;patch diff --git a/internal/dash0/controller/dash0_controller_test.go b/internal/dash0/controller/dash0_controller_test.go index 4eba59be..9cd4d8fa 100644 --- a/internal/dash0/controller/dash0_controller_test.go +++ b/internal/dash0/controller/dash0_controller_test.go @@ -64,7 +64,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { Client: k8sClient, Scheme: k8sClient.Scheme(), DeploymentSelfReference: DeploymentSelfReference, - OTelCollectorNamePrefix: "unit-test", + OTelCollectorNamePrefix: OTelCollectorNamePrefixTest, } backendConnectionManager := &backendconnection.BackendConnectionManager{ Client: k8sClient, @@ -93,9 +93,9 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) AfterEach(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) for _, name := range extraDash0MonitoringResourceNames { - RemoveMonitoringResourceByName(ctx, k8sClient, name, true) + DeleteMonitoringResourceByName(ctx, k8sClient, name, true) } }) @@ -128,11 +128,11 @@ var _ = Describe("The Dash0 controller", Ordered, func() { firstDash0MonitoringResource := &dash0v1alpha1.Dash0Monitoring{} Expect(k8sClient.Get(ctx, MonitoringResourceQualifiedName, firstDash0MonitoringResource)).To(Succeed()) time.Sleep(10 * time.Millisecond) - secondName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-2"} + secondName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-2"} extraDash0MonitoringResourceNames = append(extraDash0MonitoringResourceNames, secondName) CreateDefaultMonitoringResource(ctx, k8sClient, secondName) time.Sleep(10 * time.Millisecond) - thirdName := types.NamespacedName{Namespace: TestNamespaceName, Name: "das0-monitoring-test-resource-3"} + thirdName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-monitoring-test-resource-3"} extraDash0MonitoringResourceNames = append(extraDash0MonitoringResourceNames, thirdName) CreateDefaultMonitoringResource(ctx, k8sClient, thirdName) @@ -298,7 +298,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { Describe("when the instrumentWorkloads setting changes on an existing Dash0 monitoring resource", Ordered, func() { AfterEach(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) DescribeTable("when switching from instrumentWorkloads=none to instrumentWorkloads=created-and-updated", func(config WorkloadTestConfig) { @@ -717,7 +717,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should instrument workloads", func() { @@ -762,7 +762,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should not instrument workloads", func() { @@ -778,7 +778,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should not instrument workloads", func() { @@ -792,7 +792,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { }) AfterEach(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should remove the collector resources", func() { diff --git a/internal/dash0/instrumentation/instrumentation_suite_test.go b/internal/dash0/instrumentation/instrumentation_suite_test.go index 033df949..f8627474 100644 --- a/internal/dash0/instrumentation/instrumentation_suite_test.go +++ b/internal/dash0/instrumentation/instrumentation_suite_test.go @@ -48,18 +48,11 @@ var _ = BeforeSuite(func() { testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, ErrorIfCRDPathMissing: true, - - // The BinaryAssetsDirectory is only required if you want to run the tests directly - // without call the makefile target test. If not informed it will look for the - // default path defined in controller-runtime which is /usr/local/kubebuilder/. - // Note that you must have the required binaries setup under the bin directory to perform - // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), } var err error - // cfg is defined in this file globally. cfg, err = testEnv.Start() Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) diff --git a/internal/dash0/instrumentation/instrumenter_test.go b/internal/dash0/instrumentation/instrumenter_test.go index 3dc46c1e..d62df002 100644 --- a/internal/dash0/instrumentation/instrumenter_test.go +++ b/internal/dash0/instrumentation/instrumenter_test.go @@ -61,7 +61,7 @@ var _ = Describe("The instrumenter", Ordered, func() { createdObjects = DeleteAllCreatedObjects(ctx, k8sClient, createdObjects) DeleteAllEvents(ctx, clientset, namespace) - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) dash0MonitoringResource = nil }) diff --git a/internal/dash0/removal/operator_pre_delete_handler.go b/internal/dash0/predelete/operator_pre_delete_handler.go similarity index 99% rename from internal/dash0/removal/operator_pre_delete_handler.go rename to internal/dash0/predelete/operator_pre_delete_handler.go index 9648ab83..5b3f7b8e 100644 --- a/internal/dash0/removal/operator_pre_delete_handler.go +++ b/internal/dash0/predelete/operator_pre_delete_handler.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. // SPDX-License-Identifier: Apache-2.0 -package removal +package predelete import ( "context" diff --git a/internal/dash0/removal/operator_pre_delete_handler_test.go b/internal/dash0/predelete/operator_pre_delete_handler_test.go similarity index 97% rename from internal/dash0/removal/operator_pre_delete_handler_test.go rename to internal/dash0/predelete/operator_pre_delete_handler_test.go index 820de8f1..bf738365 100644 --- a/internal/dash0/removal/operator_pre_delete_handler_test.go +++ b/internal/dash0/predelete/operator_pre_delete_handler_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. // SPDX-License-Identifier: Apache-2.0 -package removal +package predelete import ( "context" @@ -69,8 +69,8 @@ var _ = Describe("Uninstalling the Dash0 Kubernetes operator", Ordered, func() { AfterEach(func() { createdObjects = DeleteAllCreatedObjects(ctx, k8sClient, createdObjects) - RemoveMonitoringResourceByName(ctx, k8sClient, dash0MonitoringResourceName1, false) - RemoveMonitoringResourceByName(ctx, k8sClient, dash0MonitoringResourceName2, false) + DeleteMonitoringResourceByName(ctx, k8sClient, dash0MonitoringResourceName1, false) + DeleteMonitoringResourceByName(ctx, k8sClient, dash0MonitoringResourceName2, false) }) It("should time out if the deletion of all Dash0 monitoring resources does not happen in a timely manner", func() { diff --git a/internal/dash0/removal/removal_suite_test.go b/internal/dash0/predelete/pre_delete_suite_test.go similarity index 87% rename from internal/dash0/removal/removal_suite_test.go rename to internal/dash0/predelete/pre_delete_suite_test.go index af85be12..5e55ba45 100644 --- a/internal/dash0/removal/removal_suite_test.go +++ b/internal/dash0/predelete/pre_delete_suite_test.go @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. // SPDX-License-Identifier: Apache-2.0 -package removal +package predelete import ( "fmt" @@ -48,7 +48,7 @@ var ( func TestRemoval(t *testing.T) { RegisterFailHandler(Fail) - RunSpecs(t, "Removal Suite") + RunSpecs(t, "Pre-Delete Suite") } var _ = BeforeSuite(func() { @@ -60,12 +60,6 @@ var _ = BeforeSuite(func() { testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, ErrorIfCRDPathMissing: true, - - // The BinaryAssetsDirectory is only required if you want to run the tests directly - // without call the makefile target test. If not informed it will look for the - // default path defined in controller-runtime which is /usr/local/kubebuilder/. - // Note that you must have the required binaries setup under the bin directory to perform - // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), } @@ -106,7 +100,7 @@ var _ = BeforeSuite(func() { Client: k8sClient, Scheme: k8sClient.Scheme(), DeploymentSelfReference: DeploymentSelfReference, - OTelCollectorNamePrefix: "unit-test", + OTelCollectorNamePrefix: OTelCollectorNamePrefixTest, } backendConnectionManager := &backendconnection.BackendConnectionManager{ Client: k8sClient, diff --git a/internal/dash0/startup/auto_operator_configuration_handler.go b/internal/dash0/startup/auto_operator_configuration_handler.go new file mode 100644 index 00000000..0a9f0848 --- /dev/null +++ b/internal/dash0/startup/auto_operator_configuration_handler.go @@ -0,0 +1,221 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package startup + +import ( + "context" + "fmt" + "time" + + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "sigs.k8s.io/controller-runtime/pkg/client" + + dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/dash0/util" +) + +type SecretRef struct { + Name string + Key string +} + +type OperatorConfigurationValues struct { + Endpoint string + Token string + SecretRef +} + +type AutoOperatorConfigurationResourceHandler struct { + client.Client + OperatorNamespace string + NamePrefix string + bypassWebhookCheck bool +} + +const ( + operatorConfigurationAutoResourceName = "dash0-operator-configuration-auto-resource" + + alreadyExistsMessage = "The operator is configured to deploy an operator configuration resource at startup, but there is already" + + "an operator configuration resource in the cluster. Hence no action is necessary. (This is not an error.)" +) + +func (r *AutoOperatorConfigurationResourceHandler) CreateOperatorConfigurationResource( + ctx context.Context, + operatorConfiguration *OperatorConfigurationValues, + logger *logr.Logger, +) error { + + // Fast path: check early on if there is already an operator configuration resource, skip all other steps if so. + // We will repeat this check immediately before creating the operator configuration resource, so if the check fails + // with an error we will ignore that error for now. + allOperatorConfigurationResources := &dash0v1alpha1.Dash0OperatorConfigurationList{} + if err := r.List(ctx, allOperatorConfigurationResources); err == nil { + if len(allOperatorConfigurationResources.Items) >= 1 { + logger.Info(alreadyExistsMessage) + return nil + } + } + + if err := r.validateOperatorConfiguration(operatorConfiguration); err != nil { + return err + } + + go func() { + // There is a validation webhook for operator configuration resources. Thus, before we can create an operator + // configuration resource, we need to wait for the webhook endpoint to become available. + if err := r.waitForWebserviceEndpoint(ctx, logger); err != nil { + logger.Error(err, "failed to create the Dash0 operator configuration resource") + } + if err := r.createOperatorConfigurationResourceWithRetry(ctx, operatorConfiguration, logger); err != nil { + logger.Error(err, "failed to create the Dash0 operator configuration resource") + } + }() + return nil +} + +func (r *AutoOperatorConfigurationResourceHandler) validateOperatorConfiguration( + operatorConfiguration *OperatorConfigurationValues, +) error { + if operatorConfiguration.Endpoint == "" { + return fmt.Errorf("invalid operator configuration: --operator-configuration-endpoint has not been provided") + } + if operatorConfiguration.Token == "" { + if operatorConfiguration.SecretRef.Name == "" { + return fmt.Errorf("invalid operator configuration: --operator-configuration-endpoint has been provided, " + + "indicating that an operator configuration resource should be created, but neither " + + "--operator-configuration-token nor --operator-configuration-secret-ref-name have been provided") + } + if operatorConfiguration.SecretRef.Key == "" { + return fmt.Errorf("invalid operator configuration: --operator-configuration-endpoint has been provided, " + + "indicating that an operator configuration resource should be created, but neither " + + "--operator-configuration-token nor --operator-configuration-secret-ref-key have been provided") + } + } + return nil +} + +func (r *AutoOperatorConfigurationResourceHandler) waitForWebserviceEndpoint( + ctx context.Context, + logger *logr.Logger, +) error { + if r.bypassWebhookCheck { + return nil + } + if err := util.RetryWithCustomBackoff( + "waiting for webservice endpoint to become available", + func() error { + return r.checkWebServiceEndpoint(ctx) + }, + wait.Backoff{ + Duration: 1 * time.Second, + Factor: 1.0, + Steps: 30, + Cap: 30 * time.Second, + }, + false, + logger, + ); err != nil { + return fmt.Errorf("failed to wait for the webservice endpoint to become available: %w", err) + } + + return nil +} + +func (r *AutoOperatorConfigurationResourceHandler) checkWebServiceEndpoint( + ctx context.Context, +) error { + endpoints := corev1.Endpoints{} + if err := r.Get(ctx, types.NamespacedName{ + Namespace: r.OperatorNamespace, + Name: fmt.Sprintf("%s-webhook-service", r.NamePrefix), + }, &endpoints); err != nil { + return err + } + + for _, subset := range endpoints.Subsets { + if len(subset.Addresses) == 0 { + // wait for the address to be listed in subset.Addresses instead of subset.NotReadyAddresses + continue + } + for _, port := range subset.Ports { + if port.Port == 9443 { + return nil + } + } + } + + return fmt.Errorf("the webservice endpoint is not available yet") +} + +func (r *AutoOperatorConfigurationResourceHandler) createOperatorConfigurationResourceWithRetry( + ctx context.Context, + operatorConfiguration *OperatorConfigurationValues, + logger *logr.Logger, +) error { + return util.RetryWithCustomBackoff( + "create operator configuration resource at startup", + func() error { + return r.createOperatorConfigurationResourceOnce(ctx, operatorConfiguration, logger) + }, + wait.Backoff{ + Duration: 3 * time.Second, + Factor: 1.5, + Steps: 6, + Cap: 60 * time.Second, + }, + true, + logger, + ) +} + +func (r *AutoOperatorConfigurationResourceHandler) createOperatorConfigurationResourceOnce( + ctx context.Context, + operatorConfiguration *OperatorConfigurationValues, + logger *logr.Logger, +) error { + allOperatorConfigurationResources := &dash0v1alpha1.Dash0OperatorConfigurationList{} + if err := r.List(ctx, allOperatorConfigurationResources); err != nil { + return fmt.Errorf("failed to list all Dash0 operator configuration resources: %w", err) + } + if len(allOperatorConfigurationResources.Items) >= 1 { + logger.Info(alreadyExistsMessage) + return nil + } + + authorization := dash0v1alpha1.Authorization{} + if operatorConfiguration.Token != "" { + authorization.Token = &operatorConfiguration.Token + } else { + authorization.SecretRef = &dash0v1alpha1.SecretRef{ + Name: operatorConfiguration.SecretRef.Name, + Key: operatorConfiguration.SecretRef.Key, + } + } + + if err := r.Create(ctx, &dash0v1alpha1.Dash0OperatorConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: operatorConfigurationAutoResourceName, + }, + Spec: dash0v1alpha1.Dash0OperatorConfigurationSpec{ + SelfMonitoring: dash0v1alpha1.SelfMonitoring{ + Enabled: true, + }, + Export: &dash0v1alpha1.Export{ + Dash0: &dash0v1alpha1.Dash0Configuration{ + Endpoint: operatorConfiguration.Endpoint, + Authorization: authorization, + }, + }, + }, + }); err != nil { + return fmt.Errorf("failed to create the Dash0 operator configuration resource: %w", err) + } + + logger.Info("a Dash0 operator configuration resource has been created") + return nil +} diff --git a/internal/dash0/startup/auto_operator_configuration_handler_test.go b/internal/dash0/startup/auto_operator_configuration_handler_test.go new file mode 100644 index 00000000..9150280c --- /dev/null +++ b/internal/dash0/startup/auto_operator_configuration_handler_test.go @@ -0,0 +1,147 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package startup + +import ( + "context" + "time" + + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/log" + + "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + . "github.com/dash0hq/dash0-operator/test/util" +) + +var ( + secretRef = SecretRef{ + Name: "test-secret", + Key: "test-key", + } + operatorConfigurationValuesWithToken = OperatorConfigurationValues{ + Endpoint: EndpointDash0Test, + Token: AuthorizationTokenTest, + } + operatorConfigurationValuesWithSecretRef = OperatorConfigurationValues{ + Endpoint: EndpointDash0Test, + SecretRef: secretRef, + } +) + +var _ = Describe("Create an operator configuration resource at startup", Ordered, func() { + + ctx := context.Background() + logger := log.FromContext(ctx) + + BeforeAll(func() { + EnsureDash0OperatorNamespaceExists(ctx, k8sClient) + }) + + AfterEach(func() { + DeleteAllOperatorConfigurationResources(ctx, k8sClient) + }) + + It("should not do anything if there already is an operator configuration resource in the cluster", func() { + CreateDefaultOperatorConfigurationResource(ctx, k8sClient) + // verify that there is only one resource + list := v1alpha1.Dash0OperatorConfigurationList{} + Expect(k8sClient.List(ctx, &list)).To(Succeed()) + Expect(list.Items).To(HaveLen(1)) + Expect(list.Items[0].Name).To(Equal(OperatorConfigurationResourceName)) + + Expect(handler.CreateOperatorConfigurationResource(ctx, &OperatorConfigurationValues{}, &logger)).To(Succeed()) + // verify that there is _still_ only one resource, and that its name is not the one that would be automatically + // created by AutoOperatorConfigurationResourceHandler. + Expect(k8sClient.List(ctx, &list)).To(Succeed()) + Expect(list.Items).To(HaveLen(1)) + Expect(list.Items[0].Name).To(Equal(OperatorConfigurationResourceName)) + }) + + It("should fail validation if no endpoint has been provided", func() { + Expect(handler.CreateOperatorConfigurationResource(ctx, &OperatorConfigurationValues{ + Token: AuthorizationTokenTest, + }, &logger)).To( + MatchError( + ContainSubstring( + "invalid operator configuration: --operator-configuration-endpoint has not been provided"))) + }) + + It("should fail validation if no token and no secret reference have been provided", func() { + Expect(handler.CreateOperatorConfigurationResource(ctx, &OperatorConfigurationValues{ + Endpoint: AuthorizationTokenTest, + }, &logger)).To( + MatchError( + ContainSubstring( + "neither --operator-configuration-token nor --operator-configuration-secret-ref-name have " + + "been provided"))) + }) + + It("should fail validation if no token and no secret reference key have been provided", func() { + Expect(handler.CreateOperatorConfigurationResource(ctx, &OperatorConfigurationValues{ + Endpoint: AuthorizationTokenTest, + SecretRef: SecretRef{ + Name: "test-secret", + }, + }, &logger)).To( + MatchError( + ContainSubstring( + "neither --operator-configuration-token nor --operator-configuration-secret-ref-key have " + + "been provided"))) + }) + + It("should create an operator configuration resource with a token", func() { + Expect( + handler.CreateOperatorConfigurationResource(ctx, &operatorConfigurationValuesWithToken, &logger), + ).To(Succeed()) + + Eventually(func(g Gomega) { + operatorConfiguration := v1alpha1.Dash0OperatorConfiguration{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: operatorConfigurationAutoResourceName, + }, &operatorConfiguration) + + g.Expect(err).ToNot(HaveOccurred()) + export := operatorConfiguration.Spec.Export + g.Expect(export).ToNot(BeNil()) + dash0Export := export.Dash0 + g.Expect(dash0Export).ToNot(BeNil()) + g.Expect(export.Grpc).To(BeNil()) + g.Expect(export.Http).To(BeNil()) + g.Expect(dash0Export.Endpoint).To(Equal(EndpointDash0Test)) + g.Expect(dash0Export.Authorization.Token).ToNot(BeNil()) + g.Expect(*dash0Export.Authorization.Token).To(Equal(AuthorizationTokenTest)) + g.Expect(dash0Export.Authorization.SecretRef).To(BeNil()) + }, 5*time.Second, 100*time.Millisecond).Should(Succeed()) + }) + + It("should create an operator configuration resource with a secret reference", func() { + Expect( + handler.CreateOperatorConfigurationResource(ctx, &operatorConfigurationValuesWithSecretRef, &logger), + ).To(Succeed()) + + Eventually(func(g Gomega) { + operatorConfiguration := v1alpha1.Dash0OperatorConfiguration{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: operatorConfigurationAutoResourceName, + }, &operatorConfiguration) + + g.Expect(err).ToNot(HaveOccurred()) + export := operatorConfiguration.Spec.Export + g.Expect(export).ToNot(BeNil()) + dash0Export := export.Dash0 + g.Expect(dash0Export).ToNot(BeNil()) + g.Expect(export.Grpc).To(BeNil()) + g.Expect(export.Http).To(BeNil()) + g.Expect(dash0Export.Endpoint).To(Equal(EndpointDash0Test)) + g.Expect(dash0Export.Authorization.Token).To(BeNil()) + g.Expect(dash0Export.Authorization.SecretRef).ToNot(BeNil()) + g.Expect(dash0Export.Authorization.SecretRef.Name).To(Equal("test-secret")) + g.Expect(dash0Export.Authorization.SecretRef.Key).To(Equal("test-key")) + }, 5*time.Second, 100*time.Millisecond).Should(Succeed()) + }) +}) diff --git a/internal/dash0/startup/startup_suite_test.go b/internal/dash0/startup/startup_suite_test.go new file mode 100644 index 00000000..57c24b9d --- /dev/null +++ b/internal/dash0/startup/startup_suite_test.go @@ -0,0 +1,77 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package startup + +import ( + "fmt" + "path/filepath" + "runtime" + "testing" + + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/onsi/gomega/format" + + . "github.com/dash0hq/dash0-operator/test/util" +) + +var ( + cfg *rest.Config + k8sClient client.Client + handler *AutoOperatorConfigurationResourceHandler + testEnv *envtest.Environment +) + +func TestStartup(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Startup Suite") +} + +var _ = BeforeSuite(func() { + format.MaxLength = 0 + + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: true, + BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", + fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), + } + + var err error + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + Expect(dash0v1alpha1.AddToScheme(scheme.Scheme)).To(Succeed()) + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + + handler = &AutoOperatorConfigurationResourceHandler{ + Client: k8sClient, + OperatorNamespace: Dash0OperatorNamespace, + NamePrefix: OTelCollectorNamePrefixTest, + bypassWebhookCheck: true, + } + Expect(err).NotTo(HaveOccurred()) +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/internal/dash0/webhooks/attach_dangling_events_test.go b/internal/dash0/webhooks/attach_dangling_events_test.go index d81e01c5..769ac0cc 100644 --- a/internal/dash0/webhooks/attach_dangling_events_test.go +++ b/internal/dash0/webhooks/attach_dangling_events_test.go @@ -47,7 +47,7 @@ var _ = Describe("The Dash0 webhook and the Dash0 controller", Ordered, func() { Client: k8sClient, Scheme: k8sClient.Scheme(), DeploymentSelfReference: DeploymentSelfReference, - OTelCollectorNamePrefix: "unit-test", + OTelCollectorNamePrefix: OTelCollectorNamePrefixTest, } backendConnectionManager := &backendconnection.BackendConnectionManager{ Client: k8sClient, @@ -69,7 +69,7 @@ var _ = Describe("The Dash0 webhook and the Dash0 controller", Ordered, func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) BeforeEach(func() { diff --git a/internal/dash0/webhooks/instrumentation_webhook_test.go b/internal/dash0/webhooks/instrumentation_webhook_test.go index e86a46a1..0bb90694 100644 --- a/internal/dash0/webhooks/instrumentation_webhook_test.go +++ b/internal/dash0/webhooks/instrumentation_webhook_test.go @@ -40,7 +40,7 @@ var _ = Describe("The Dash0 instrumentation webhook", func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) DescribeTable("when mutating new workloads", func(config WorkloadTestConfig) { @@ -491,7 +491,7 @@ var _ = Describe("The Dash0 instrumentation webhook", func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should not instrument workloads", func() { @@ -507,7 +507,7 @@ var _ = Describe("The Dash0 instrumentation webhook", func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should instrument workloads", func() { @@ -523,7 +523,7 @@ var _ = Describe("The Dash0 instrumentation webhook", func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should not instrument workloads", func() { @@ -539,7 +539,7 @@ var _ = Describe("The Dash0 instrumentation webhook", func() { }) AfterAll(func() { - RemoveMonitoringResource(ctx, k8sClient) + DeleteMonitoringResource(ctx, k8sClient) }) It("should instrument workloads", func() { diff --git a/internal/dash0/webhooks/operator_configuration_validation_webhook_test.go b/internal/dash0/webhooks/operator_configuration_validation_webhook_test.go index 45613c29..2718646a 100644 --- a/internal/dash0/webhooks/operator_configuration_validation_webhook_test.go +++ b/internal/dash0/webhooks/operator_configuration_validation_webhook_test.go @@ -14,7 +14,7 @@ import ( var _ = Describe("The validation webhook for the operator configuration resource", func() { AfterEach(func() { - Expect(k8sClient.DeleteAllOf(ctx, &dash0v1alpha1.Dash0OperatorConfiguration{})).To(Succeed()) + DeleteAllOperatorConfigurationResources(ctx, k8sClient) }) Describe("when validating", Ordered, func() { diff --git a/internal/dash0/webhooks/webhook_suite_test.go b/internal/dash0/webhooks/webhook_suite_test.go index 409a5aac..12b235a8 100644 --- a/internal/dash0/webhooks/webhook_suite_test.go +++ b/internal/dash0/webhooks/webhook_suite_test.go @@ -61,15 +61,8 @@ var _ = BeforeSuite(func() { testEnv = &envtest.Environment{ CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, ErrorIfCRDPathMissing: false, - - // The BinaryAssetsDirectory is only required if you want to run the tests directly - // without call the makefile target test. If not informed it will look for the - // default path defined in controller-runtime which is /usr/local/kubebuilder/. - // Note that you must have the required binaries setup under the bin directory to perform - // the tests directly. When we run make test it will be setup and used automatically. BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), - WebhookInstallOptions: envtest.WebhookInstallOptions{ Paths: []string{filepath.Join("..", "..", "..", "config", "webhook")}, }, @@ -77,7 +70,6 @@ var _ = BeforeSuite(func() { Expect(testEnv).NotTo(BeNil()) var err error - // cfg is defined in this file globally. cfg, err = testEnv.Start() Expect(err).NotTo(HaveOccurred()) Expect(cfg).NotTo(BeNil()) diff --git a/test-resources/bin/test-cleanup.sh b/test-resources/bin/test-cleanup.sh index 1b05c27c..aec4ceca 100755 --- a/test-resources/bin/test-cleanup.sh +++ b/test-resources/bin/test-cleanup.sh @@ -23,6 +23,7 @@ kubectl delete -n ${target_namespace} -f test-resources/customresources/dash0mon sleep 1 kubectl patch -f test-resources/customresources/dash0monitoring/dash0monitoring.yaml -p '{"metadata":{"finalizers":null}}' --type=merge || true kubectl delete -f test-resources/customresources/dash0operatorconfiguration/dash0operatorconfiguration.token.yaml || true +kubectl delete dash0operatorconfigurations.operator.dash0.com/dash0-operator-configuration-auto-resource || true if [[ "${target_namespace}" != "default" ]] && [[ "${delete_namespace}" == "true" ]]; then kubectl delete ns ${target_namespace} --ignore-not-found diff --git a/test-resources/bin/test-scenario-01-aum-operator-cr.sh b/test-resources/bin/test-scenario-01-aum-operator-cr.sh index 0a77ce93..65e26bd2 100755 --- a/test-resources/bin/test-scenario-01-aum-operator-cr.sh +++ b/test-resources/bin/test-scenario-01-aum-operator-cr.sh @@ -51,11 +51,6 @@ deploy_via_helm echo echo -echo "STEP 7: deploy the Dash0 operator configuration resource to cluster" -install_operator_configuration_resource -echo -echo - -echo "STEP 8: deploy the Dash0 monitoring resource to namespace ${target_namespace}" +echo "STEP 7: deploy the Dash0 monitoring resource to namespace ${target_namespace}" install_monitoring_resource diff --git a/test-resources/bin/test-scenario-02-operator-cr-aum.sh b/test-resources/bin/test-scenario-02-operator-cr-aum.sh index c13c9867..92237e0b 100755 --- a/test-resources/bin/test-scenario-02-operator-cr-aum.sh +++ b/test-resources/bin/test-scenario-02-operator-cr-aum.sh @@ -45,16 +45,11 @@ deploy_via_helm echo echo -echo "STEP 6: deploy the Dash0 operator configuration resource to cluster" -install_operator_configuration_resource -echo -echo - -echo "STEP 7: deploy the Dash0 monitoring resource to namespace ${target_namespace}" +echo "STEP 6: deploy the Dash0 monitoring resource to namespace ${target_namespace}" install_monitoring_resource echo echo -echo "STEP 8: deploy application under monitoring" +echo "STEP 7: deploy application under monitoring" test-resources/node.js/express/deploy.sh ${target_namespace} ${kind} diff --git a/test-resources/bin/util b/test-resources/bin/util index 0b60c2e3..a40849b9 100644 --- a/test-resources/bin/util +++ b/test-resources/bin/util @@ -100,6 +100,12 @@ deploy_via_helm() { helm_install_command+=" --set operator.filelogOffsetSynchImage.pullPolicy=${FILELOG_OFFSET_SYNCH_IMG_PULL_POLICY:-Never}" fi + # Deploy an operator configuration right away. + helm_install_command+=" --set operator.dash0Backend.enabled=true" + helm_install_command+=" --set operator.dash0Backend.endpoint=ingress.eu-west-1.aws.dash0-dev.com:4317" + helm_install_command+=" --set operator.dash0Backend.secretRef.name=dash0-authorization-secret" + helm_install_command+=" --set operator.dash0Backend.secretRef.key=token" + helm_install_command+=" dash0-operator" helm_install_command+=" ${OPERATOR_HELM_CHART:-helm-chart/dash0-operator}" echo Helm install command: @@ -123,6 +129,19 @@ wait_for_operator_manager_and_webhook() { fi sleep 0.3 done + + # We deploy an operator configuration at startup via operator.dash0Backend.enabled=true, wait for that resource to + # become available as well. + echo "waiting for the automatically created operator configuration resource to become available" + for ((i=0; i<=20; i++)); do + # wait until the resource has been created + if kubectl get dash0operatorconfigurations.operator.dash0.com/dash0-operator-configuration-auto-resource; then + break; + fi + sleep 1 + done + # wait until the resource has been reconciled and is marked as available + kubectl wait dash0operatorconfigurations.operator.dash0.com/dash0-operator-configuration-auto-resource --for condition=Available --timeout 30s } has_been_set_to_empty_string() { diff --git a/test/e2e/operator.go b/test/e2e/operator.go index 7e0ba24e..26ba6a47 100644 --- a/test/e2e/operator.go +++ b/test/e2e/operator.go @@ -44,8 +44,6 @@ func deployOperator( operatorNamespace, "--create-namespace", "--set", "operator.developmentMode=true", - "--set", "operator.disableSecretCheck=true", - "--set", "operator.disableOtlpEndpointCheck=true", } arguments = addOptionalHelmParameters(arguments, operatorHelmChart, images) @@ -263,8 +261,6 @@ func upgradeOperator( "--namespace", operatorNamespace, "--set", "operator.developmentMode=true", - "--set", "operator.disableSecretCheck=true", - "--set", "operator.disableOtlpEndpointCheck=true", } arguments = addOptionalHelmParameters(arguments, operatorHelmChart, images) diff --git a/test/util/constants.go b/test/util/constants.go index 81d61781..9b1e483b 100644 --- a/test/util/constants.go +++ b/test/util/constants.go @@ -16,15 +16,17 @@ import ( ) const ( - TestNamespaceName = "test-namespace" - Dash0OperatorNamespace = "dash0-system" - CronJobNamePrefix = "cronjob" - DaemonSetNamePrefix = "daemonset" - DeploymentNamePrefix = "deployment" - JobNamePrefix = "job" - PodNamePrefix = "pod" - ReplicaSetNamePrefix = "replicaset" - StatefulSetNamePrefix = "statefulset" + TestNamespaceName = "test-namespace" + Dash0OperatorNamespace = "dash0-system" + OTelCollectorNamePrefixTest = "unit-test" + + CronJobNamePrefix = "cronjob" + DaemonSetNamePrefix = "daemonset" + DeploymentNamePrefix = "deployment" + JobNamePrefix = "job" + PodNamePrefix = "pod" + ReplicaSetNamePrefix = "replicaset" + StatefulSetNamePrefix = "statefulset" OperatorImageTest = "some-registry.com:1234/dash0hq/operator-controller:1.2.3" InitContainerImageTest = "some-registry.com:1234/dash0hq/instrumentation:4.5.6" diff --git a/test/util/monitoring_resource.go b/test/util/monitoring_resource.go index 6cb402fb..8ae83d7a 100644 --- a/test/util/monitoring_resource.go +++ b/test/util/monitoring_resource.go @@ -232,11 +232,11 @@ func VerifyMonitoringResourceByNameDoesNotExist( )).To(BeNil()) } -func RemoveMonitoringResource(ctx context.Context, k8sClient client.Client) { - RemoveMonitoringResourceByName(ctx, k8sClient, MonitoringResourceQualifiedName, true) +func DeleteMonitoringResource(ctx context.Context, k8sClient client.Client) { + DeleteMonitoringResourceByName(ctx, k8sClient, MonitoringResourceQualifiedName, true) } -func RemoveMonitoringResourceByName( +func DeleteMonitoringResourceByName( ctx context.Context, k8sClient client.Client, monitoringResourceName types.NamespacedName, diff --git a/test/util/operator_resource.go b/test/util/operator_resource.go index 43beea54..9b457f2c 100644 --- a/test/util/operator_resource.go +++ b/test/util/operator_resource.go @@ -21,7 +21,7 @@ import ( const ( Dash0OperatorDeploymentName = "controller-deployment" - OperatorConfigurationResourceName = "dash0-operator-test-resource" + OperatorConfigurationResourceName = "dash0-operator-configuration-test" ) var ( @@ -109,6 +109,13 @@ func DeleteOperatorConfigurationResource( Expect(k8sClient.DeleteAllOf(ctx, &dash0v1alpha1.Dash0OperatorConfiguration{})).To(Succeed()) } +func DeleteAllOperatorConfigurationResources( + ctx context.Context, + k8sClient client.Client, +) { + Expect(k8sClient.DeleteAllOf(ctx, &dash0v1alpha1.Dash0OperatorConfiguration{})).To(Succeed()) +} + func LoadOperatorConfigurationResourceByNameIfItExists( ctx context.Context, k8sClient client.Client,