diff --git a/cmd/main.go b/cmd/main.go index 56e2b317..467dcaeb 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -14,6 +14,7 @@ import ( // to ensure that exec-entrypoint and run can make use of them. _ "k8s.io/client-go/plugin/pkg/client/auth" + "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" @@ -27,6 +28,7 @@ import ( operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" "github.com/dash0hq/dash0-operator/internal/controller" + "github.com/dash0hq/dash0-operator/internal/removal" "github.com/dash0hq/dash0-operator/internal/util" dash0webhook "github.com/dash0hq/dash0-operator/internal/webhook" //+kubebuilder:scaffold:imports @@ -55,18 +57,23 @@ func init() { } func main() { + var uninstrumentAll bool var metricsAddr string var enableLeaderElection bool var probeAddr string var secureMetrics bool var enableHTTP2 bool + flag.BoolVar(&uninstrumentAll, "uninstrument-all", false, + "If set, the process will remove all Dash0 custom resources from all namespaces in the cluster. This will "+ + "trigger the Dash0 custom resources' finalizers in each namespace, which in turn will revert the "+ + "instrumentation of all workloads in all namespaces.") flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.BoolVar(&enableLeaderElection, "leader-elect", false, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") flag.BoolVar(&secureMetrics, "metrics-secure", false, - "If set the metrics endpoint is served securely") + "If set, the metrics endpoint is served securely") flag.BoolVar(&enableHTTP2, "enable-http2", false, "If set, HTTP/2 will be enabled for the metrics and webhook servers") @@ -88,6 +95,14 @@ func main() { ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + if uninstrumentAll { + if err := deleteCustomResourcesInAllNamespaces(&setupLog); err != nil { + setupLog.Error(err, "deleting the Dash0 custom resources in all namespaces failed") + os.Exit(1) + } + os.Exit(0) + } + // if the enable-http2 flag is false (the default), http/2 should be disabled // due to its vulnerabilities. More specifically, disabling http/2 will // prevent from being vulnerable to the HTTP/2 Stream Cancelation and @@ -261,3 +276,17 @@ func readEnvironmentVariables() (string, string, string, corev1.PullPolicy, erro return otelCollectorBaseUrl, operatorImage, initContainerImage, initContainerImagePullPolicy, nil } + +func deleteCustomResourcesInAllNamespaces(logger *logr.Logger) error { + handler, err := removal.NewOperatorPreDeleteHandler() + if err != nil { + logger.Error(err, "Failed to create the OperatorPreDeleteHandler.") + return err + } + err = handler.DeleteAllDash0CustomResources() + if err != nil { + logger.Error(err, "Failed to delete all Dash0 custom resources.") + return err + } + return nil +} diff --git a/helm-chart/dash0-operator/templates/_helpers.tpl b/helm-chart/dash0-operator/templates/_helpers.tpl index ec0d1479..f73c0876 100644 --- a/helm-chart/dash0-operator/templates/_helpers.tpl +++ b/helm-chart/dash0-operator/templates/_helpers.tpl @@ -14,7 +14,7 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} app.kubernetes.io/part-of: {{ include "dash0-operator.chartName" . }} -app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/managed-by: {{ .Release.Service | quote }} helm.sh/chart: {{ include "dash0-operator.chartNameWithVersion" . }} {{- include "dash0-operator.additionalLabels" . }} {{- end }} @@ -54,4 +54,14 @@ helm.sh/chart: {{ include "dash0-operator.chartNameWithVersion" . }} {{/* the init container image */}} {{- define "dash0-operator.initContainerImage" -}} {{- printf "%s:%s" .Values.operator.initContainerImage.repository .Values.operator.initContainerImage.tag }} +{{- end }} + +{{- define "dash0-operator.restrictiveContainerSecurityContext" -}} +securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsNonRoot: true + capabilities: + drop: + - ALL {{- end }} \ No newline at end of file diff --git a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml index 8acc9dfc..063b4b4a 100644 --- a/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml +++ b/helm-chart/dash0-operator/templates/operator/cluster-roles.yaml @@ -46,6 +46,7 @@ rules: verbs: - create - delete + - deletecollection - get - list - patch diff --git a/helm-chart/dash0-operator/templates/operator/deployment.yaml b/helm-chart/dash0-operator/templates/operator/deployment.yaml index a3199ff5..bad52424 100644 --- a/helm-chart/dash0-operator/templates/operator/deployment.yaml +++ b/helm-chart/dash0-operator/templates/operator/deployment.yaml @@ -9,6 +9,7 @@ metadata: app.kubernetes.io/component: manager {{- include "dash0-operator.labels" . | nindent 4 }} control-plane: controller-manager + dash0.com/enable: "false" {{- with .Values.operator.deploymentAnnotations }} annotations: {{- toYaml . | nindent 4 }} @@ -76,11 +77,7 @@ spec: protocol: TCP resources: {{- toYaml .Values.operator.managerPodResources | nindent 10 }} - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL + {{ include "dash0-operator.restrictiveContainerSecurityContext" . | nindent 8 }} volumeMounts: - mountPath: /tmp/k8s-webhook-server/serving-certs name: cert @@ -124,6 +121,7 @@ spec: seccompProfile: type: RuntimeDefault serviceAccountName: {{ template "dash0-operator.serviceAccountName" . }} + automountServiceAccountToken: true terminationGracePeriodSeconds: 10 volumes: - name: cert diff --git a/helm-chart/dash0-operator/templates/operator/pre-delete-hook.yaml b/helm-chart/dash0-operator/templates/operator/pre-delete-hook.yaml new file mode 100644 index 00000000..8576168d --- /dev/null +++ b/helm-chart/dash0-operator/templates/operator/pre-delete-hook.yaml @@ -0,0 +1,39 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "dash0-operator.chartName" . }}-pre-delete + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: job + app.kubernetes.io/instance: pre-delete-hook + app.kubernetes.io/component: pre-delete + {{- include "dash0-operator.labels" . | nindent 4 }} + dash0.com/enable: "false" + annotations: + "helm.sh/hook": pre-delete + "helm.sh/hook-delete-policy": hook-succeeded +spec: + template: + metadata: + name: {{ .Release.Name }}-pre-delete-job + labels: + app.kubernetes.io/instance: pre-delete-hook + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + helm.sh/chart: {{ include "dash0-operator.chartNameWithVersion" . }} + spec: + restartPolicy: OnFailure + containers: + - name: pre-delete-job + image: {{ include "dash0-operator.image" . | quote }} + imagePullPolicy: {{ .Values.operator.image.pullPolicy }} + command: + - /manager + - "--uninstrument-all" + {{ include "dash0-operator.restrictiveContainerSecurityContext" . | nindent 10 }} + resources: + {{- toYaml .Values.operator.managerPodResources | nindent 12 }} + securityContext: + runAsNonRoot: true + serviceAccountName: {{ template "dash0-operator.serviceAccountName" . }} + automountServiceAccountToken: true + backoffLimit: 2 \ No newline at end of file diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap index 142d052a..9ead47c8 100644 --- a/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/cluster-roles_test.yaml.snap @@ -48,6 +48,7 @@ cluster roles should match snapshot: verbs: - create - delete + - deletecollection - get - list - patch diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/deployment_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/deployment_test.yaml.snap index 9e2c2226..ecff4361 100644 --- a/helm-chart/dash0-operator/tests/operator/__snapshot__/deployment_test.yaml.snap +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/deployment_test.yaml.snap @@ -11,6 +11,7 @@ deployment should match snapshot (default values): app.kubernetes.io/part-of: dash0-operator app.kubernetes.io/version: 1.0.0 control-plane: controller-manager + dash0.com/enable: "false" helm.sh/chart: dash0-operator-1.0.0 name: dash0-operator-controller-manager namespace: NAMESPACE @@ -26,6 +27,7 @@ deployment should match snapshot (default values): labels: control-plane: controller-manager spec: + automountServiceAccountToken: true containers: - args: - --health-probe-bind-address=:8081 @@ -72,6 +74,8 @@ deployment should match snapshot (default values): capabilities: drop: - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true volumeMounts: - mountPath: /tmp/k8s-webhook-server/serving-certs name: cert diff --git a/helm-chart/dash0-operator/tests/operator/__snapshot__/pre-delete-hook_test.yaml.snap b/helm-chart/dash0-operator/tests/operator/__snapshot__/pre-delete-hook_test.yaml.snap new file mode 100644 index 00000000..5d432b62 --- /dev/null +++ b/helm-chart/dash0-operator/tests/operator/__snapshot__/pre-delete-hook_test.yaml.snap @@ -0,0 +1,57 @@ +pre-delete hook job should match snapshot: + 1: | + apiVersion: batch/v1 + kind: Job + metadata: + annotations: + helm.sh/hook: pre-delete + helm.sh/hook-delete-policy: hook-succeeded + labels: + app.kubernetes.io/component: pre-delete + app.kubernetes.io/instance: pre-delete-hook + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: job + app.kubernetes.io/part-of: dash0-operator + app.kubernetes.io/version: 1.0.0 + dash0.com/enable: "false" + helm.sh/chart: dash0-operator-1.0.0 + name: dash0-operator-pre-delete + namespace: NAMESPACE + spec: + backoffLimit: 2 + template: + metadata: + labels: + app.kubernetes.io/instance: pre-delete-hook + app.kubernetes.io/managed-by: Helm + helm.sh/chart: dash0-operator-1.0.0 + name: RELEASE-NAME-pre-delete-job + spec: + automountServiceAccountToken: true + containers: + - command: + - /manager + - --uninstrument-all + image: dash0-operator-controller:1.0.0 + imagePullPolicy: null + name: pre-delete-job + resources: + limits: + cpu: 500m + ephemeral-storage: 500Mi + memory: 128Mi + requests: + cpu: 10m + ephemeral-storage: 500Mi + memory: 64Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + restartPolicy: OnFailure + securityContext: + runAsNonRoot: true + serviceAccountName: dash0-operator-controller-manager diff --git a/helm-chart/dash0-operator/tests/operator/pre-delete-hook_test.yaml b/helm-chart/dash0-operator/tests/operator/pre-delete-hook_test.yaml new file mode 100644 index 00000000..00624478 --- /dev/null +++ b/helm-chart/dash0-operator/tests/operator/pre-delete-hook_test.yaml @@ -0,0 +1,7 @@ +suite: test pre-delete hook job +templates: + - operator/pre-delete-hook.yaml +tests: + - it: pre-delete hook job should match snapshot + asserts: + - matchSnapshot: {} \ No newline at end of file diff --git a/internal/controller/controller_suite_test.go b/internal/controller/controller_suite_test.go index 4c07ce5f..bdea1378 100644 --- a/internal/controller/controller_suite_test.go +++ b/internal/controller/controller_suite_test.go @@ -9,9 +9,6 @@ import ( "runtime" "testing" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" @@ -22,6 +19,9 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" //+kubebuilder:scaffold:imports ) diff --git a/internal/controller/dash0_controller.go b/internal/controller/dash0_controller.go index 9638a56c..43e40993 100644 --- a/internal/controller/dash0_controller.go +++ b/internal/controller/dash0_controller.go @@ -222,10 +222,10 @@ func (r *Dash0Reconciler) verifyThatDash0CustomResourceIsUniqe( dash0CustomResource *operatorv1alpha1.Dash0, logger *logr.Logger, ) (bool, error) { - allDash0CustomResourcesNamespace := &operatorv1alpha1.Dash0List{} + allDash0CustomResourcesInNamespace := &operatorv1alpha1.Dash0List{} if err := r.Client.List( ctx, - allDash0CustomResourcesNamespace, + allDash0CustomResourcesInNamespace, &client.ListOptions{ Namespace: req.Namespace, }, @@ -234,14 +234,14 @@ func (r *Dash0Reconciler) verifyThatDash0CustomResourceIsUniqe( return true, err } - if len(allDash0CustomResourcesNamespace.Items) > 1 { + if len(allDash0CustomResourcesInNamespace.Items) > 1 { // There are multiple instances of the Dash0 custom resource in this namespace. If the resource that is // currently being reconciled is the one that has been most recently created, we assume that this is the source // of truth in terms of configuration settings etc., and we ignore the other instances in this reconcile request // (they will be handled when they are being reconciled). If the currently reconciled resource is not the most // recent one, we set its status to degraded. - sort.Sort(SortByCreationTimestamp(allDash0CustomResourcesNamespace.Items)) - mostRecentResource := allDash0CustomResourcesNamespace.Items[len(allDash0CustomResourcesNamespace.Items)-1] + sort.Sort(SortByCreationTimestamp(allDash0CustomResourcesInNamespace.Items)) + mostRecentResource := allDash0CustomResourcesInNamespace.Items[len(allDash0CustomResourcesInNamespace.Items)-1] if mostRecentResource.UID == dash0CustomResource.UID { logger.Info( "At least one other Dash0 custom resource exists in this namespace. This Dash0 custom " + diff --git a/internal/controller/dash0_controller_test.go b/internal/controller/dash0_controller_test.go index de8baf21..94e2f0cb 100644 --- a/internal/controller/dash0_controller_test.go +++ b/internal/controller/dash0_controller_test.go @@ -75,7 +75,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() { AfterEach(func() { RemoveDash0CustomResource(ctx, k8sClient) for _, name := range extraDash0CustomResourceNames { - RemoveDash0CustomResourceByName(ctx, k8sClient, name) + RemoveDash0CustomResourceByName(ctx, k8sClient, name, true) } }) diff --git a/internal/removal/operator_pre_delete_handler.go b/internal/removal/operator_pre_delete_handler.go new file mode 100644 index 00000000..90b345ca --- /dev/null +++ b/internal/removal/operator_pre_delete_handler.go @@ -0,0 +1,172 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package removal + +import ( + "context" + "fmt" + "time" + + "github.com/go-logr/logr" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" +) + +const ( + defaultTimeout = 2 * time.Minute +) + +type OperatorPreDeleteHandler struct { + client client.WithWatch + logger *logr.Logger + timeout time.Duration +} + +func NewOperatorPreDeleteHandler() (*OperatorPreDeleteHandler, error) { + config := ctrl.GetConfigOrDie() + return NewOperatorPreDeleteHandlerFromConfig(config) +} + +func NewOperatorPreDeleteHandlerFromConfig(config *rest.Config) (*OperatorPreDeleteHandler, error) { + logger := ctrl.Log.WithName("dash0-uninstrument-all") + s := runtime.NewScheme() + err := operatorv1alpha1.AddToScheme(s) + if err != nil { + return nil, err + } + client, err := client.NewWithWatch(config, client.Options{ + Scheme: s, + }) + if err != nil { + return nil, fmt.Errorf("failed to create the dynamic client: %w", err) + } + + return &OperatorPreDeleteHandler{ + client: client, + logger: &logger, + timeout: defaultTimeout, + }, nil +} + +func (r *OperatorPreDeleteHandler) SetTimeout(timeout time.Duration) { + r.timeout = timeout +} + +func (r *OperatorPreDeleteHandler) DeleteAllDash0CustomResources() error { + ctx := context.Background() + + totalNumberOfDash0CustomResources, err := r.findAllAndRequestDeletion(ctx) + if err != nil { + return err + } + + err = r.waitForAllDash0CustomResourcesToBeFinalizedAndDeleted(ctx, totalNumberOfDash0CustomResources) + if err != nil { + return err + } + + return nil +} + +func (r *OperatorPreDeleteHandler) findAllAndRequestDeletion(ctx context.Context) (int, error) { + allDash0CustomResources := &operatorv1alpha1.Dash0List{} + err := r.client.List(ctx, allDash0CustomResources) + if err != nil { + r.logger.Error(err, "failed to list all Dash0 custom resources across all namespaces") + return 0, fmt.Errorf("failed to list all Dash0 custom resourcesa cross all namespaces: %w", err) + } + + if len(allDash0CustomResources.Items) == 0 { + r.logger.Info("No Dash0 custom resources have been found. Nothing to delete.") + return 0, nil + } + + for _, dash0CustomResource := range allDash0CustomResources.Items { + namespace := dash0CustomResource.Namespace + // You would think that the following call without the "client.InNamespace(namespace)" would delete all + // resources across all namespaces in one go, but instead it fails with "the server could not find the requested + // resource". Same for the dynamic client. + err = r.client.DeleteAllOf(ctx, &operatorv1alpha1.Dash0{}, client.InNamespace(namespace)) + if err != nil { + r.logger.Error(err, fmt.Sprintf("Failed to delete Dash0 custom resource in namespace %s.", namespace)) + } else { + r.logger.Info( + fmt.Sprintf("Successfully requested the deletion of the Dash0 custom resource in namespace %s.", + namespace)) + } + } + + return len(allDash0CustomResources.Items), nil +} + +func (r *OperatorPreDeleteHandler) waitForAllDash0CustomResourcesToBeFinalizedAndDeleted( + ctx context.Context, + totalNumberOfDash0CustomResources int, +) error { + watcher, err := r.client.Watch(ctx, &operatorv1alpha1.Dash0List{}) + if err != nil { + r.logger.Error(err, "failed to watch Dash0 custom resources across all namespaces to wait for deletion") + return fmt.Errorf("failed to watch Dash0 custom resources across all namespaces to wait for deletion: %w", err) + } + + // by stopping the watcher we make sure the goroutine running r.watchAndProcessEvents will terminate + defer watcher.Stop() + + channelToSignalDeletions := make(chan string) + go func() { + r.watchAndProcessEvents(watcher, &channelToSignalDeletions) + }() + + r.logger.Info( + fmt.Sprintf("Waiting for the deletion of %d Dash0 custom resource(s) across all namespaces.", + totalNumberOfDash0CustomResources)) + successfullyDeletedDash0CustomResources := 0 + timeoutHasOccured := false + for !timeoutHasOccured && successfullyDeletedDash0CustomResources < totalNumberOfDash0CustomResources { + select { + case <-time.After(r.timeout): + timeoutHasOccured = true + + case namespaceOfDeletedResource := <-channelToSignalDeletions: + successfullyDeletedDash0CustomResources++ + r.logger.Info( + fmt.Sprintf("The deletion of the Dash0 custom resource in namespace %s has completed successfully (%d/%d).", + namespaceOfDeletedResource, successfullyDeletedDash0CustomResources, totalNumberOfDash0CustomResources)) + } + } + + if timeoutHasOccured { + r.logger.Info( + fmt.Sprintf("The deletion of all Dash0 custom resource(s) across all namespaces has not completed "+ + "successfully within the timeout of %d seconds. %d of %d resources have been deleted.", + int(r.timeout/time.Second), + successfullyDeletedDash0CustomResources, + totalNumberOfDash0CustomResources, + )) + } else { + r.logger.Info( + fmt.Sprintf("The deletion of all %d Dash0 custom resource(s) across all namespaces has completed successfully.", + totalNumberOfDash0CustomResources)) + } + + return nil +} + +func (r *OperatorPreDeleteHandler) watchAndProcessEvents( + watcher watch.Interface, + channelToSignalDeletions *chan string, +) { + for event := range watcher.ResultChan() { + switch event.Type { + case watch.Deleted: + namespace := event.Object.(*operatorv1alpha1.Dash0).Namespace + *channelToSignalDeletions <- namespace + } + } +} diff --git a/internal/removal/operator_pre_delete_handler_test.go b/internal/removal/operator_pre_delete_handler_test.go new file mode 100644 index 00000000..6f65112b --- /dev/null +++ b/internal/removal/operator_pre_delete_handler_test.go @@ -0,0 +1,157 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package removal + +import ( + "context" + "fmt" + "time" + + appv1 "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/dash0hq/dash0-operator/internal/controller" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + . "github.com/dash0hq/dash0-operator/test/util" +) + +const ( + namespace1 = "test-namespace-1" + namespace2 = "test-namespace-2" + testTimeout = 10 * time.Second + pollingInterval = 100 * time.Millisecond +) + +var ( + dash0CustomResourceName1 = types.NamespacedName{ + Namespace: namespace1, + Name: Dash0CustomResourceName, + } + dash0CustomResourceName2 = types.NamespacedName{ + Namespace: namespace2, + Name: Dash0CustomResourceName, + } +) + +var _ = Describe("Uninstalling the Dash0 Kubernetes operator", func() { + + ctx := context.Background() + var ( + createdObjects []client.Object + deployment1 *appv1.Deployment + deployment2 *appv1.Deployment + ) + + BeforeEach(func() { + createdObjects, deployment1 = setupNamespaceWithDash0CustomResourceAndWorkload( + ctx, + k8sClient, + dash0CustomResourceName1, + createdObjects, + ) + createdObjects, deployment2 = setupNamespaceWithDash0CustomResourceAndWorkload( + ctx, + k8sClient, + dash0CustomResourceName2, + createdObjects, + ) + }) + + AfterEach(func() { + createdObjects = DeleteAllCreatedObjects(ctx, k8sClient, createdObjects) + RemoveDash0CustomResourceByName(ctx, k8sClient, dash0CustomResourceName1, false) + RemoveDash0CustomResourceByName(ctx, k8sClient, dash0CustomResourceName2, false) + }) + + It("should time out if the deletion of all Dash0 custom resources does not happen in a timely manner", func() { + startTime := time.Now() + preDeleteHandlerTerminatedAt := time.Time{} + + go func() { + defer GinkgoRecover() + Expect(preDeleteHandler.DeleteAllDash0CustomResources()).To(Succeed()) + preDeleteHandlerTerminatedAt = time.Now() + }() + + // Deliberately not triggering a reconcile loop -> the finalizer action of the Dash0 custom resources will + // not trigger, and the Dash0 custom resources won't be deleted. Ultimately, the timeout will kick in. + + Eventually(func(g Gomega) { + g.Expect(preDeleteHandlerTerminatedAt).ToNot(BeZero()) + elapsedTime := preDeleteHandlerTerminatedAt.Sub(startTime).Nanoseconds() + g.Expect(elapsedTime).To(BeNumerically("~", preDeleteHandlerTimeoutForTests, time.Second)) + }, testTimeout, pollingInterval).Should(Succeed()) + }) + + It("should delete all Dash0 custom resources and uninstrument workloads", func() { + go func() { + defer GinkgoRecover() + Expect(preDeleteHandler.DeleteAllDash0CustomResources()).To(Succeed()) + }() + + // Triggering reconcile requests for both Dash0 custom resources to run cleanup actions and remove the + // finalizer, so that the resources actually get deleted. + go func() { + defer GinkgoRecover() + time.Sleep(500 * time.Millisecond) + triggerReconcileRequestForName( + ctx, + reconciler, + dash0CustomResourceName1, + ) + triggerReconcileRequestForName( + ctx, + reconciler, + dash0CustomResourceName2, + ) + }() + + Eventually(func(g Gomega) { + VerifyDash0CustomResourceByNameDoesNotExist(ctx, k8sClient, g, dash0CustomResourceName1) + VerifyDash0CustomResourceByNameDoesNotExist(ctx, k8sClient, g, dash0CustomResourceName2) + + VerifySuccessfulUninstrumentationEventEventually(ctx, clientset, g, deployment1.Namespace, deployment1.Name, "controller") + deployment1 := GetDeploymentEventually(ctx, k8sClient, g, deployment1.Namespace, deployment1.Name) + VerifyUnmodifiedDeploymentEventually(g, deployment1) + VerifyWebhookIgnoreOnceLabelIsPresentEventually(g, &deployment1.ObjectMeta) + + VerifySuccessfulUninstrumentationEventEventually(ctx, clientset, g, deployment2.Namespace, deployment2.Name, "controller") + deployment2 := GetDeploymentEventually(ctx, k8sClient, g, deployment2.Namespace, deployment2.Name) + VerifyUnmodifiedDeploymentEventually(g, deployment2) + VerifyWebhookIgnoreOnceLabelIsPresentEventually(g, &deployment2.ObjectMeta) + }, testTimeout, pollingInterval).Should(Succeed()) + }) +}) + +func setupNamespaceWithDash0CustomResourceAndWorkload( + ctx context.Context, + k8sClient client.Client, + dash0CustomResourceName types.NamespacedName, + createdObjects []client.Object, +) ([]client.Object, *appv1.Deployment) { + EnsureNamespaceExists(ctx, k8sClient, dash0CustomResourceName.Namespace) + EnsureDash0CustomResourceExistsAndIsAvailableInNamespace(ctx, k8sClient, dash0CustomResourceName) + deploymentName := UniqueName(DeploymentNamePrefix) + deployment := CreateInstrumentedDeployment(ctx, k8sClient, dash0CustomResourceName.Namespace, deploymentName) + // make sure the custom resource has the finalizer + triggerReconcileRequestForName(ctx, reconciler, dash0CustomResourceName) + return append(createdObjects, deployment), deployment +} + +func triggerReconcileRequestForName( + ctx context.Context, + reconciler *controller.Dash0Reconciler, + dash0CustomResourceName types.NamespacedName, +) { + By(fmt.Sprintf("Trigger reconcile request for %s/%s", dash0CustomResourceName.Namespace, dash0CustomResourceName.Name)) + _, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: dash0CustomResourceName, + }) + Expect(err).NotTo(HaveOccurred()) +} diff --git a/internal/removal/removal_suite_test.go b/internal/removal/removal_suite_test.go new file mode 100644 index 00000000..08c9af3d --- /dev/null +++ b/internal/removal/removal_suite_test.go @@ -0,0 +1,110 @@ +package removal + +import ( + "fmt" + "path/filepath" + "runtime" + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" + controller "github.com/dash0hq/dash0-operator/internal/controller" + "github.com/dash0hq/dash0-operator/internal/util" +) + +const ( + preDeleteHandlerTimeoutForTests = 5 * time.Second +) + +var ( + k8sClient client.Client + clientset *kubernetes.Clientset + preDeleteHandler *OperatorPreDeleteHandler + reconciler *controller.Dash0Reconciler + cfg *rest.Config + testEnv *envtest.Environment + + images = util.Images{ + OperatorImage: "some-registry.com:1234/dash0-operator-controller:1.2.3", + InitContainerImage: "some-registry.com:1234/dash0-instrumentation:4.5.6", + InitContainerImagePullPolicy: corev1.PullAlways, + } +) + +func TestRemoval(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Removal Suite") +} + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: true, + + // The BinaryAssetsDirectory is only required if you want to run the tests directly + // without call the makefile target test. If not informed it will look for the + // default path defined in controller-runtime which is /usr/local/kubebuilder/. + // Note that you must have the required binaries setup under the bin directory to perform + // the tests directly. When we run make test it will be setup and used automatically. + BinaryAssetsDirectory: filepath.Join("..", "..", "bin", "k8s", + fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), + } + + var err error + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + err = operatorv1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + preDeleteHandler, err = NewOperatorPreDeleteHandlerFromConfig(cfg) + Expect(err).NotTo(HaveOccurred()) + preDeleteHandler.SetTimeout(preDeleteHandlerTimeoutForTests) + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + + clientset, err = kubernetes.NewForConfig(cfg) + Expect(err).NotTo(HaveOccurred()) + Expect(clientset).NotTo(BeNil()) + + mgr, err := ctrl.NewManager(cfg, ctrl.Options{ + Scheme: scheme.Scheme, + }) + Expect(err).NotTo(HaveOccurred()) + Expect(mgr).NotTo(BeNil()) + + reconciler = &controller.Dash0Reconciler{ + Client: k8sClient, + ClientSet: clientset, + Recorder: mgr.GetEventRecorderFor("dash0-controller"), + Scheme: k8sClient.Scheme(), + Images: images, + OtelCollectorBaseUrl: "http://dash0-operator-opentelemetry-collector.dash0-operator-system.svc.cluster.local:4318", + } +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/internal/webhook/webhook_suite_test.go b/internal/webhook/webhook_suite_test.go index 4f9b592b..e601eb2c 100644 --- a/internal/webhook/webhook_suite_test.go +++ b/internal/webhook/webhook_suite_test.go @@ -7,7 +7,6 @@ import ( "context" "crypto/tls" "fmt" - "net" "path/filepath" "runtime" diff --git a/test/e2e/e2e_helpers.go b/test/e2e/e2e_helpers.go index 5451e539..d0483376 100644 --- a/test/e2e/e2e_helpers.go +++ b/test/e2e/e2e_helpers.go @@ -6,7 +6,6 @@ package e2e import ( "bufio" "encoding/json" - "errors" "fmt" "io" "net/http" @@ -36,6 +35,7 @@ const ( tracesJsonMaxLineLength = 1_048_576 verifyTelemetryTimeout = 90 * time.Second verifyTelemetryPollingInterval = 500 * time.Millisecond + dash0CustomResourceName = "dash0-sample" ) var ( @@ -400,6 +400,7 @@ func UndeployOperatorAndCollector(operatorNamespace string) { "uninstall", "--namespace", operatorNamespace, + "--ignore-not-found", operatorHelmReleaseName, ))).To(Succeed()) @@ -407,8 +408,20 @@ func UndeployOperatorAndCollector(operatorNamespace string) { // case/suite that tries to create the operator will run into issues when trying to recreate the namespace which is // still in the process of being deleted. ExpectWithOffset(1, - RunAndIgnoreOutput(exec.Command("kubectl", "delete", "ns", operatorNamespace))).To(Succeed()) - ExpectWithOffset(1, RunAndIgnoreOutput(exec.Command( + RunAndIgnoreOutput( + exec.Command( + "kubectl", + "delete", + "ns", + "--ignore-not-found", + operatorNamespace, + ))).To(Succeed()) + + VerifyDash0OperatorReleaseIsNotInstalled(Default, operatorNamespace) +} + +func VerifyDash0OperatorReleaseIsNotInstalled(g Gomega, operatorNamespace string) { + g.ExpectWithOffset(1, RunAndIgnoreOutput(exec.Command( "kubectl", "wait", "--for=delete", @@ -417,24 +430,13 @@ func UndeployOperatorAndCollector(operatorNamespace string) { ))).To(Succeed()) } -func DeployDash0Resource(namespace string) { +func DeployDash0CustomResource(namespace string) { ExpectWithOffset(1, RunAndIgnoreOutput(exec.Command( "kubectl", "apply", "-n", namespace, "-k", "config/samples"))).To(Succeed()) } -func UndeployDash0Resource(namespace string) { - // remove the finalizer from the resource to allow immediate deletion - _ = RunAndIgnoreOutput(exec.Command( - "kubectl", - "patch", - "dash0/dash0-sample", - "--namespace", - namespace, - "--type", - "json", - "--patch='[{\"op\":\"remove\",\"path\":\"/metadata/finalizers\"}]'", - )) +func UndeployDash0CustomResource(namespace string) { // remove the resource ExpectWithOffset(1, RunAndIgnoreOutput(exec.Command( @@ -448,6 +450,21 @@ func UndeployDash0Resource(namespace string) { ))).To(Succeed()) } +func VerifyDash0CustomResourceDoesNotExist(g Gomega, namespace string) { + output, err := Run(exec.Command( + "kubectl", + "get", + "--namespace", + namespace, + "--ignore-not-found", + "dash0", + dash0CustomResourceName, + )) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(output).NotTo(ContainSubstring(dash0CustomResourceName)) + +} + func RebuildNodeJsApplicationContainerImage() { By("building the dash0-operator-nodejs-20-express-test-app image") Expect( @@ -1114,24 +1131,6 @@ func Run(cmd *exec.Cmd, logCommandArgs ...bool) (string, error) { return string(output), nil } -// RunMultiple executes multiple commands -func RunMultiple(cmds []*exec.Cmd, logCommandArgs ...bool) error { - for _, cmd := range cmds { - if err := RunAndIgnoreOutput(cmd, logCommandArgs...); err != nil { - return err - } - } - return nil -} - -func RunMultipleFromStrings(cmdsAsStrings [][]string, logCommandArgs ...bool) error { - cmds := make([]*exec.Cmd, len(cmdsAsStrings)) - for i, cmdStrs := range cmdsAsStrings { - cmds[i] = exec.Command(cmdStrs[0], cmdStrs[1:]...) - } - return RunMultiple(cmds, logCommandArgs...) -} - func warnError(err error) { fmt.Fprintf(GinkgoWriter, "warning: %v\n", err) } @@ -1159,23 +1158,3 @@ func GetProjectDir() (string, error) { wd = strings.Replace(wd, "/test/e2e", "", -1) return wd, nil } - -func CopyFile(source string, destination string) error { - src, err := os.Open(source) - if err != nil { - return err - } - defer func() { - err = errors.Join(err, src.Close()) - }() - - dst, err := os.Create(destination) - if err != nil { - return err - } - defer func() { - err = errors.Join(err, dst.Close()) - }() - _, err = io.Copy(dst, src) - return err -} diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index d13ce2db..a9896e8e 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -32,14 +32,6 @@ var ( setupFinishedSuccessfully bool ) -type controllerTestWorkloadConfig struct { - workloadType string - port int - installWorkload func(string) error - isBatch bool - restartPodsManually bool -} - var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { BeforeAll(func() { @@ -91,10 +83,18 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { Describe("controller", func() { AfterEach(func() { - UndeployDash0Resource(applicationUnderTestNamespace) + UndeployDash0CustomResource(applicationUnderTestNamespace) UndeployOperatorAndCollector(operatorNamespace) }) + type controllerTestWorkloadConfig struct { + workloadType string + port int + installWorkload func(string) error + isBatch bool + restartPodsManually bool + } + workloadConfigs := []controllerTestWorkloadConfig{ { workloadType: "cronjob", @@ -131,7 +131,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { By("deploy the operator and the Dash0 custom resource") DeployOperatorWithCollectorAndClearExportedTelemetry(operatorNamespace, operatorImageRepository, operatorImageTag) - DeployDash0Resource(applicationUnderTestNamespace) + DeployDash0CustomResource(applicationUnderTestNamespace) testIds := make(map[string]string) runInParallelForAllWorkloadTypes(workloadConfigs, func(config controllerTestWorkloadConfig) { @@ -146,7 +146,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { ) }) - UndeployDash0Resource(applicationUnderTestNamespace) + UndeployDash0CustomResource(applicationUnderTestNamespace) runInParallelForAllWorkloadTypes(workloadConfigs, func(config controllerTestWorkloadConfig) { VerifyThatInstrumentationHasBeenReverted( @@ -162,13 +162,13 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { }) }) - Describe("when it detects existing jobs or pods", func() { + Describe("when it detects existing jobs or ownerless pods", func() { It("should label immutable jobs accordingly", func() { By("installing the Node.js job") Expect(InstallNodeJsJob(applicationUnderTestNamespace)).To(Succeed()) By("deploy the operator and the Dash0 custom resource") DeployOperatorWithCollectorAndClearExportedTelemetry(operatorNamespace, operatorImageRepository, operatorImageTag) - DeployDash0Resource(applicationUnderTestNamespace) + DeployDash0CustomResource(applicationUnderTestNamespace) By("verifying that the Node.js job has been labelled by the controller and that an event has been emitted") Eventually(func(g Gomega) { VerifyLabels(g, applicationUnderTestNamespace, "job", false, "controller") @@ -183,7 +183,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { ) }, verifyTelemetryTimeout, verifyTelemetryPollingInterval).Should(Succeed()) - UndeployDash0Resource(applicationUnderTestNamespace) + UndeployDash0CustomResource(applicationUnderTestNamespace) VerifyThatFailedInstrumentationAttemptLabelsHaveBeenRemovedRemoved(applicationUnderTestNamespace, "job") }) @@ -193,7 +193,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { Expect(InstallNodeJsPod(applicationUnderTestNamespace)).To(Succeed()) By("deploy the operator and the Dash0 custom resource") DeployOperatorWithCollectorAndClearExportedTelemetry(operatorNamespace, operatorImageRepository, operatorImageTag) - DeployDash0Resource(applicationUnderTestNamespace) + DeployDash0CustomResource(applicationUnderTestNamespace) By("verifying that the Node.js pod has not been labelled") Eventually(func(g Gomega) { VerifyNoDash0Labels(g, applicationUnderTestNamespace, "pod") @@ -215,11 +215,11 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { }) BeforeEach(func() { - DeployDash0Resource(applicationUnderTestNamespace) + DeployDash0CustomResource(applicationUnderTestNamespace) }) AfterEach(func() { - UndeployDash0Resource(applicationUnderTestNamespace) + UndeployDash0CustomResource(applicationUnderTestNamespace) }) type webhookTest struct { @@ -250,7 +250,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { // uninstrumentation procedure there. Thus, for jobs, we test the failing uninstrumentation and // its effects here. By("verifying that removing the Dash0 custom resource attempts to uninstruments the job") - UndeployDash0Resource(applicationUnderTestNamespace) + UndeployDash0CustomResource(applicationUnderTestNamespace) Eventually(func(g Gomega) { // Verify that the instrumentation labels are still in place -- since we cannot undo the @@ -311,16 +311,115 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { }), ) }) + + Describe("operator removal", func() { + + const ( + namespace1 = "e2e-application-under-test-namespace-removal-1" + namespace2 = "e2e-application-under-test-namespace-removal-2" + ) + + BeforeAll(func() { + By("creating test namespaces") + RecreateNamespace(namespace1) + RecreateNamespace(namespace2) + }) + + AfterEach(func() { + UndeployDash0CustomResource(namespace1) + UndeployDash0CustomResource(namespace2) + UndeployOperatorAndCollector(operatorNamespace) + }) + + AfterAll(func() { + By("removing test namespaces") + _ = RunAndIgnoreOutput(exec.Command("kubectl", "delete", "ns", namespace1)) + _ = RunAndIgnoreOutput(exec.Command("kubectl", "delete", "ns", namespace2)) + UndeployOperatorAndCollector(operatorNamespace) + }) + + type removalTestNamespaceConfig struct { + namespace string + workloadType string + port int + installWorkload func(string) error + } + + configs := []removalTestNamespaceConfig{ + { + namespace: "e2e-application-under-test-namespace-removal-1", + workloadType: "daemonset", + port: 1206, + installWorkload: InstallNodeJsDaemonSet, + }, + { + namespace: "e2e-application-under-test-namespace-removal-2", + workloadType: "deployment", + port: 1207, + installWorkload: InstallNodeJsDeployment, + }, + } + + Describe("when uninstalling the operator via helm", func() { + It("should remove all Dash0 custom resources and uninstrument all workloads", func() { + By("deploying workloads") + runInParallelForAllWorkloadTypes(configs, func(config removalTestNamespaceConfig) { + By(fmt.Sprintf("deploying the Node.js %s to namespace %s", config.workloadType, config.namespace)) + Expect(config.installWorkload(config.namespace)).To(Succeed()) + }) + + By("deploy the operator and the Dash0 custom resource") + DeployOperatorWithCollectorAndClearExportedTelemetry(operatorNamespace, operatorImageRepository, operatorImageTag) + runInParallelForAllWorkloadTypes(configs, func(config removalTestNamespaceConfig) { + DeployDash0CustomResource(config.namespace) + }) + + testIds := make(map[string]string) + runInParallelForAllWorkloadTypes(configs, func(config removalTestNamespaceConfig) { + By(fmt.Sprintf("verifying that the Node.js %s has been instrumented by the controller", config.workloadType)) + testIds[config.workloadType] = VerifyThatWorkloadHasBeenInstrumented( + config.namespace, + config.workloadType, + config.port, + false, + false, + "controller", + ) + }) + + UndeployOperatorAndCollector(operatorNamespace) + + runInParallelForAllWorkloadTypes(configs, func(config removalTestNamespaceConfig) { + VerifyThatInstrumentationHasBeenReverted( + config.namespace, + config.workloadType, + config.port, + false, + false, + testIds[config.workloadType], + "controller", + ) + }) + + Eventually(func(g Gomega) { + for _, config := range configs { + VerifyDash0CustomResourceDoesNotExist(g, config.namespace) + } + VerifyDash0OperatorReleaseIsNotInstalled(g, operatorNamespace) + }).Should(Succeed()) + }) + }) + }) }) -func runInParallelForAllWorkloadTypes( - workloadConfigs []controllerTestWorkloadConfig, - testStep func(controllerTestWorkloadConfig), +func runInParallelForAllWorkloadTypes[C any]( + workloadConfigs []C, + testStep func(C), ) { var wg sync.WaitGroup for _, config := range workloadConfigs { wg.Add(1) - go func(cfg controllerTestWorkloadConfig) { + go func(cfg C) { defer GinkgoRecover() defer wg.Done() testStep(cfg) diff --git a/test/util/dash0_custom_resource.go b/test/util/dash0_custom_resource.go index 839460ae..07a0d3b1 100644 --- a/test/util/dash0_custom_resource.go +++ b/test/util/dash0_custom_resource.go @@ -32,6 +32,18 @@ var ( func EnsureDash0CustomResourceExists( ctx context.Context, k8sClient client.Client, +) *operatorv1alpha1.Dash0 { + return EnsureDash0CustomResourceExistsWithNamespacedName( + ctx, + k8sClient, + Dash0CustomResourceQualifiedName, + ) +} + +func EnsureDash0CustomResourceExistsWithNamespacedName( + ctx context.Context, + k8sClient client.Client, + namespacesName types.NamespacedName, ) *operatorv1alpha1.Dash0 { By("creating the Dash0 custom resource") object := EnsureKubernetesObjectExists( @@ -41,8 +53,8 @@ func EnsureDash0CustomResourceExists( &operatorv1alpha1.Dash0{}, &operatorv1alpha1.Dash0{ ObjectMeta: metav1.ObjectMeta{ - Name: Dash0CustomResourceQualifiedName.Name, - Namespace: Dash0CustomResourceQualifiedName.Namespace, + Name: namespacesName.Name, + Namespace: namespacesName.Namespace, }, }, ) @@ -68,7 +80,19 @@ func EnsureDash0CustomResourceExistsAndIsAvailable( ctx context.Context, k8sClient client.Client, ) *operatorv1alpha1.Dash0 { - dash0CustomResource := EnsureDash0CustomResourceExists(ctx, k8sClient) + return EnsureDash0CustomResourceExistsAndIsAvailableInNamespace(ctx, k8sClient, Dash0CustomResourceQualifiedName) +} + +func EnsureDash0CustomResourceExistsAndIsAvailableInNamespace( + ctx context.Context, + k8sClient client.Client, + namespacedName types.NamespacedName, +) *operatorv1alpha1.Dash0 { + dash0CustomResource := EnsureDash0CustomResourceExistsWithNamespacedName( + ctx, + k8sClient, + namespacedName, + ) dash0CustomResource.EnsureResourceIsMarkedAsAvailable() Expect(k8sClient.Status().Update(ctx, dash0CustomResource)).To(Succeed()) return dash0CustomResource @@ -92,9 +116,10 @@ func EnsureDash0CustomResourceExistsAndIsDegraded( func LoadDash0CustomResourceByNameIfItExists( ctx context.Context, k8sClient client.Client, + g Gomega, dash0CustomResourceName types.NamespacedName, ) *operatorv1alpha1.Dash0 { - return LoadDash0CustomResourceByName(ctx, k8sClient, Default, dash0CustomResourceName, false) + return LoadDash0CustomResourceByName(ctx, k8sClient, g, dash0CustomResourceName, false) } func LoadDash0CustomResourceOrFail(ctx context.Context, k8sClient client.Client, g Gomega) *operatorv1alpha1.Dash0 { @@ -129,31 +154,49 @@ func LoadDash0CustomResourceByName( } else { // an error occurred, but it is not an IsNotFound error, fail test immediately g.Expect(err).NotTo(HaveOccurred()) + return nil } } return dash0CustomResource } +func VerifyDash0CustomResourceByNameDoesNotExist( + ctx context.Context, + k8sClient client.Client, + g Gomega, + dash0CustomResourceName types.NamespacedName, +) { + g.Expect(LoadDash0CustomResourceByNameIfItExists(ctx, k8sClient, g, dash0CustomResourceName)).To(BeNil()) +} + func RemoveDash0CustomResource(ctx context.Context, k8sClient client.Client) { - RemoveDash0CustomResourceByName(ctx, k8sClient, Dash0CustomResourceQualifiedName) + RemoveDash0CustomResourceByName(ctx, k8sClient, Dash0CustomResourceQualifiedName, true) } func RemoveDash0CustomResourceByName( ctx context.Context, k8sClient client.Client, dash0CustomResourceName types.NamespacedName, + failOnErr bool, ) { By("Removing the Dash0 custom resource instance") if dash0CustomResource := LoadDash0CustomResourceByNameIfItExists( ctx, k8sClient, + Default, dash0CustomResourceName, ); dash0CustomResource != nil { // We want to delete the custom resource, but we need to remove the finalizer first, otherwise the first // reconcile of the next test case will actually run the finalizers. removeFinalizer(ctx, k8sClient, dash0CustomResource) - Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed()) + err := k8sClient.Delete(ctx, dash0CustomResource) + if failOnErr { + // If the test already triggered the deletion of the custom resource, but it was blocked by the finalizer + // removing the finalizer may immediately delete the custom resource. In these cases it is okay to ignore + // the error from k8sClient.Delete(ctx, dash0CustomResource). + Expect(err).NotTo(HaveOccurred()) + } } } diff --git a/test/util/resources.go b/test/util/resources.go index bf517665..4a1938bd 100644 --- a/test/util/resources.go +++ b/test/util/resources.go @@ -61,7 +61,7 @@ var ( } ) -func TestNamespace(name string) *corev1.Namespace { +func Namespace(name string) *corev1.Namespace { namespace := &corev1.Namespace{} namespace.Name = name return namespace @@ -70,14 +70,22 @@ func TestNamespace(name string) *corev1.Namespace { func EnsureTestNamespaceExists( ctx context.Context, k8sClient client.Client, +) *corev1.Namespace { + return EnsureNamespaceExists(ctx, k8sClient, TestNamespaceName) +} + +func EnsureNamespaceExists( + ctx context.Context, + k8sClient client.Client, + namespace string, ) *corev1.Namespace { By("creating the test namespace") object := EnsureKubernetesObjectExists( ctx, k8sClient, - types.NamespacedName{Name: TestNamespaceName}, + types.NamespacedName{Name: namespace}, &corev1.Namespace{}, - TestNamespace(TestNamespaceName), + Namespace(namespace), ) return object.(*corev1.Namespace) } @@ -919,13 +927,23 @@ func GetDeployment( k8sClient client.Client, namespace string, name string, +) *appsv1.Deployment { + return GetDeploymentEventually(ctx, k8sClient, Default, namespace, name) +} + +func GetDeploymentEventually( + ctx context.Context, + k8sClient client.Client, + g Gomega, + namespace string, + name string, ) *appsv1.Deployment { workload := &appsv1.Deployment{} namespacedName := types.NamespacedName{ Namespace: namespace, Name: name, } - ExpectWithOffset(1, k8sClient.Get(ctx, namespacedName, workload)).Should(Succeed()) + g.ExpectWithOffset(1, k8sClient.Get(ctx, namespacedName, workload)).Should(Succeed()) return workload } diff --git a/test/util/verification.go b/test/util/verification.go index 8d4c6855..9b4b11ea 100644 --- a/test/util/verification.go +++ b/test/util/verification.go @@ -7,14 +7,14 @@ import ( "context" "fmt" - . "github.com/onsi/gomega" - appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" + . "github.com/onsi/gomega" + "github.com/dash0hq/dash0-operator/internal/util" ) @@ -98,9 +98,13 @@ func VerifyModifiedDeployment(resource *appsv1.Deployment, expectations PodSpecE } func VerifyUnmodifiedDeployment(resource *appsv1.Deployment) { - verifyUnmodifiedPodSpec(resource.Spec.Template.Spec) - verifyNoDash0Labels(resource.ObjectMeta) - verifyNoDash0Labels(resource.Spec.Template.ObjectMeta) + VerifyUnmodifiedDeploymentEventually(Default, resource) +} + +func VerifyUnmodifiedDeploymentEventually(g Gomega, resource *appsv1.Deployment) { + verifyUnmodifiedPodSpecEventually(g, resource.Spec.Template.Spec) + verifyNoDash0LabelsEventually(g, resource.ObjectMeta) + verifyNoDash0LabelsEventually(g, resource.Spec.Template.ObjectMeta) } func VerifyRevertedDeployment(resource *appsv1.Deployment, expectations PodSpecExpectations) { @@ -257,13 +261,17 @@ func verifyPodSpec(podSpec corev1.PodSpec, expectations PodSpecExpectations) { } func verifyUnmodifiedPodSpec(podSpec corev1.PodSpec) { - Expect(podSpec.Volumes).To(BeEmpty()) - Expect(podSpec.InitContainers).To(BeEmpty()) - Expect(podSpec.Containers).To(HaveLen(1)) + verifyUnmodifiedPodSpecEventually(Default, podSpec) +} + +func verifyUnmodifiedPodSpecEventually(g Gomega, podSpec corev1.PodSpec) { + g.Expect(podSpec.Volumes).To(BeEmpty()) + g.Expect(podSpec.InitContainers).To(BeEmpty()) + g.Expect(podSpec.Containers).To(HaveLen(1)) for i, container := range podSpec.Containers { - Expect(container.Name).To(Equal(fmt.Sprintf("test-container-%d", i))) - Expect(container.VolumeMounts).To(BeEmpty()) - Expect(container.Env).To(BeEmpty()) + g.Expect(container.Name).To(Equal(fmt.Sprintf("test-container-%d", i))) + g.Expect(container.VolumeMounts).To(BeEmpty()) + g.Expect(container.Env).To(BeEmpty()) } } @@ -285,11 +293,15 @@ func verifyLabelsAfterFailureToModify(meta metav1.ObjectMeta) { } func verifyNoDash0Labels(meta metav1.ObjectMeta) { - Expect(meta.Labels["dash0.com/instrumented"]).To(Equal("")) - Expect(meta.Labels["dash0.com/operator-image"]).To(Equal("")) - Expect(meta.Labels["dash0.com/init-container-image"]).To(Equal("")) - Expect(meta.Labels["dash0.com/instrumented-by"]).To(Equal("")) - Expect(meta.Labels["dash0.com/enable"]).To(Equal("")) + verifyNoDash0LabelsEventually(Default, meta) +} + +func verifyNoDash0LabelsEventually(g Gomega, meta metav1.ObjectMeta) { + g.Expect(meta.Labels["dash0.com/instrumented"]).To(Equal("")) + g.Expect(meta.Labels["dash0.com/operator-image"]).To(Equal("")) + g.Expect(meta.Labels["dash0.com/init-container-image"]).To(Equal("")) + g.Expect(meta.Labels["dash0.com/instrumented-by"]).To(Equal("")) + g.Expect(meta.Labels["dash0.com/enable"]).To(Equal("")) } func verifyLabelsForOptOutWorkload(meta metav1.ObjectMeta) { @@ -301,12 +313,16 @@ func verifyLabelsForOptOutWorkload(meta metav1.ObjectMeta) { Expect(meta.Labels["dash0.com/enable"]).To(Equal("false")) } -func VerifyWebhookIgnoreOnceLabelIsPresent(objectMeta *metav1.ObjectMeta) bool { - return Expect(objectMeta.Labels["dash0.com/webhook-ignore-once"]).To(Equal("true")) +func VerifyWebhookIgnoreOnceLabelIsPresent(objectMeta *metav1.ObjectMeta) { + VerifyWebhookIgnoreOnceLabelIsPresentEventually(Default, objectMeta) } -func VerifyWebhookIgnoreOnceLabelIsAbesent(objectMeta *metav1.ObjectMeta) bool { - return Expect(objectMeta.Labels["dash0.com/webhook-ignore-once"]).To(Equal("")) +func VerifyWebhookIgnoreOnceLabelIsPresentEventually(g Gomega, objectMeta *metav1.ObjectMeta) { + g.Expect(objectMeta.Labels["dash0.com/webhook-ignore-once"]).To(Equal("true")) +} + +func VerifyWebhookIgnoreOnceLabelIsAbesent(objectMeta *metav1.ObjectMeta) { + Expect(objectMeta.Labels["dash0.com/webhook-ignore-once"]).To(Equal("")) } func VerifyNoEvents( @@ -329,6 +345,7 @@ func VerifySuccessfulInstrumentationEvent( verifyEvent( ctx, clientset, + Default, namespace, resourceName, util.ReasonSuccessfulInstrumentation, @@ -346,6 +363,7 @@ func VerifyNoInstrumentationNecessaryEvent( verifyEvent( ctx, clientset, + Default, namespace, resourceName, util.ReasonNoInstrumentationNecessary, @@ -365,6 +383,7 @@ func VerifyFailedInstrumentationEvent( verifyEvent( ctx, clientset, + Default, namespace, resourceName, util.ReasonFailedInstrumentation, @@ -378,10 +397,22 @@ func VerifySuccessfulUninstrumentationEvent( namespace string, resourceName string, eventSource string, +) { + VerifySuccessfulUninstrumentationEventEventually(ctx, clientset, Default, namespace, resourceName, eventSource) +} + +func VerifySuccessfulUninstrumentationEventEventually( + ctx context.Context, + clientset *kubernetes.Clientset, + g Gomega, + namespace string, + resourceName string, + eventSource string, ) { verifyEvent( ctx, clientset, + g, namespace, resourceName, util.ReasonSuccessfulUninstrumentation, @@ -399,6 +430,7 @@ func VerifyFailedUninstrumentationEvent( verifyEvent( ctx, clientset, + Default, namespace, resourceName, util.ReasonFailedUninstrumentation, @@ -416,6 +448,7 @@ func VerifyNoUninstrumentationNecessaryEvent( verifyEvent( ctx, clientset, + Default, namespace, resourceName, util.ReasonNoUninstrumentationNecessary, @@ -426,15 +459,16 @@ func VerifyNoUninstrumentationNecessaryEvent( func verifyEvent( ctx context.Context, clientset *kubernetes.Clientset, + g Gomega, namespace string, resourceName string, reason util.Reason, message string, ) { allEvents, err := clientset.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{}) - Expect(err).NotTo(HaveOccurred()) - Expect(allEvents.Items).To(HaveLen(1)) - Expect(allEvents.Items).To( + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(allEvents.Items).To(HaveLen(1)) + g.Expect(allEvents.Items).To( ContainElement( MatchEvent( namespace,