diff --git a/api/v1alpha1/dash0_types.go b/api/v1alpha1/dash0_types.go index be2f0a00..5317dce7 100644 --- a/api/v1alpha1/dash0_types.go +++ b/api/v1alpha1/dash0_types.go @@ -4,7 +4,10 @@ package v1alpha1 import ( + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/dash0hq/dash0-operator/internal/util" ) // Dash0Spec defines the desired state of the Dash0 custom resource. @@ -28,6 +31,79 @@ type Dash0 struct { Status Dash0Status `json:"status,omitempty"` } +func (d *Dash0) IsAvailable() bool { + if condition := d.getCondition(util.ConditionTypeAvailable); condition != nil { + return condition.Status == metav1.ConditionTrue + } + return false +} + +func (d *Dash0) getCondition(conditionType util.ConditionType) *metav1.Condition { + for _, c := range d.Status.Conditions { + if c.Type == string(conditionType) { + return &c + + } + } + return nil +} + +func (d *Dash0) SetAvailableConditionToUnknown() { + meta.SetStatusCondition( + &d.Status.Conditions, + metav1.Condition{ + Type: string(util.ConditionTypeAvailable), + Status: metav1.ConditionUnknown, + Reason: "ReconcileStarted", + Message: "Dash0 has started resource reconciliation.", + }) + meta.SetStatusCondition( + &d.Status.Conditions, + metav1.Condition{ + Type: string(util.ConditionTypeDegraded), + Status: metav1.ConditionTrue, + Reason: "ReconcileStarted", + Message: "Dash0 is still starting.", + }) +} + +func (d *Dash0) EnsureResourceIsMarkedAsAvailable() { + // If the available status is already true, the status condition is not updated, except for Reason, Message and + // ObservedGeneration timestamp. In particular, LastTransitionTime is not updated. Thus, this operation is + // effectively idempotent. + meta.SetStatusCondition( + &d.Status.Conditions, + metav1.Condition{ + Type: string(util.ConditionTypeAvailable), + Status: metav1.ConditionTrue, + Reason: "ReconcileFinished", + Message: "Dash0 is active in this namespace now.", + }) + meta.RemoveStatusCondition(&d.Status.Conditions, string(util.ConditionTypeDegraded)) +} + +func (d *Dash0) EnsureResourceIsMarkedAsUnavailable() { + // If the available status is already false, the status condition is not updated, except for Reason, Message and + // ObservedGeneration timestamp. In particular, LastTransitionTime is not updated. Thus, this operation is + // effectively idempotent. + meta.SetStatusCondition( + &d.Status.Conditions, + metav1.Condition{ + Type: string(util.ConditionTypeAvailable), + Status: metav1.ConditionFalse, + Reason: "Dash0CustomResourceHasBeenRemoved", + Message: "Dash0 is inactive in this namespace now.", + }) + meta.SetStatusCondition( + &d.Status.Conditions, + metav1.Condition{ + Type: string(util.ConditionTypeDegraded), + Status: metav1.ConditionTrue, + Reason: "Dash0CustomResourceHasBeenRemoved", + Message: "Dash0 is about to be deleted.", + }) +} + //+kubebuilder:object:root=true // Dash0List contains a list of Dash0 diff --git a/internal/controller/dash0_controller.go b/internal/controller/dash0_controller.go index 5204e83d..26a5c1f9 100644 --- a/internal/controller/dash0_controller.go +++ b/internal/controller/dash0_controller.go @@ -19,6 +19,7 @@ import ( "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" @@ -32,6 +33,15 @@ const ( resourceNameLabel = "resource name" ) +var ( + resourcesWithoutDash0InstrumentedLabelFilter = metav1.ListOptions{ + LabelSelector: fmt.Sprintf("!%s", util.InstrumentedLabelKey), + } + resourcesWithDash0InstrumentedLabelFilter = metav1.ListOptions{ + LabelSelector: util.InstrumentedLabelKey, + } +) + type Dash0Reconciler struct { client.Client ClientSet *kubernetes.Clientset @@ -40,14 +50,33 @@ type Dash0Reconciler struct { Versions util.Versions } +type ModificationMode string + +const ( + Instrumentation ModificationMode = "Instrumentation" + Uninstrumentation ModificationMode = "Uninstrumentation" +) + type ImmutableResourceError struct { - resourceType string - resource string + resourceType string + resource string + modificationMode ModificationMode } func (e ImmutableResourceError) Error() string { + var modificationParticle string + switch e.modificationMode { + case Instrumentation: + modificationParticle = "instrument" + case Uninstrumentation: + modificationParticle = "remove the instrumentation from" + default: + modificationParticle = "modify" + } + return fmt.Sprintf( - "Dash0 cannot instrument the existing %s %s, since the this type of resource is immutable.", + "Dash0 cannot %s the existing %s %s, since the this type of resource is immutable.", + modificationParticle, e.resourceType, e.resource, ) @@ -82,118 +111,143 @@ func (r *Dash0Reconciler) SetupWithManager(mgr ctrl.Manager) error { // - About Controllers: https://kubernetes.io/docs/concepts/architecture/controller/ // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile func (r *Dash0Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - log := log.FromContext(ctx) - log.Info("Processig reconcile request") + logger := log.FromContext(ctx) + logger.Info("Processig reconcile request") + + namespaceStillExists, err := r.checkIfNamespaceExists(ctx, req.Namespace, &logger) + if err != nil { + // The error has already been logged in checkIfNamespaceExists. + return ctrl.Result{}, err + } else if !namespaceStillExists { + logger.Info("The namespace seems to have been deleted after this reconcile request has been scheduled. Ignoring the reconcile request.") + return ctrl.Result{}, nil + } // Check whether the Dash0 custom resource exists. dash0CustomResource := &operatorv1alpha1.Dash0{} - err := r.Get(ctx, req.NamespacedName, dash0CustomResource) + err = r.Get(ctx, req.NamespacedName, dash0CustomResource) if err != nil { if apierrors.IsNotFound(err) { - log.Info("The Dash0 custom resource has not been found, either it hasn't been installed or it has been deleted. Ignoring the reconcile request.") + logger.Info( + "The Dash0 custom resource has not been found, either it hasn't been installed or it has been " + + "deleted. Ignoring the reconcile request.") // stop the reconciliation return ctrl.Result{}, nil } - log.Error(err, "Failed to get the Dash0 custom resource, requeuing reconcile request.") - // requeue the request. + logger.Error(err, "Failed to get the Dash0 custom resource, requeuing reconcile request.") return ctrl.Result{}, err } - isFirstReconcile, err := r.initStatusConditions(ctx, dash0CustomResource, &log) + isFirstReconcile, err := r.initStatusConditions(ctx, dash0CustomResource, &logger) if err != nil { + // The error has already been logged in initStatusConditions return ctrl.Result{}, err } - isMarkedForDeletion := r.handleFinalizers(dash0CustomResource) - if isMarkedForDeletion { - // Dash0 is marked for deletion, no further reconciliation is necessary. + isMarkedForDeletion, err := r.checkImminentDeletionAndHandleFinalizers(ctx, dash0CustomResource, &logger) + if err != nil { + // The error has already been logged in checkImminentDeletionAndHandleFinalizers + return ctrl.Result{}, err + } else if isMarkedForDeletion { + // The Dash0 custom resource is slated for deletion, all cleanup actions (like reverting instrumented resources) + // have been processed, no further reconciliation is necessary. return ctrl.Result{}, nil } if isFirstReconcile { - r.handleFirstReconcile(ctx, dash0CustomResource, &log) + if err = r.handleFirstReconcile(ctx, dash0CustomResource, &logger); err != nil { + // The error has already been logged in handleFirstReconcile + return ctrl.Result{}, err + } } - ensureResourceIsMarkedAsAvailable(dash0CustomResource) + dash0CustomResource.EnsureResourceIsMarkedAsAvailable() if err := r.Status().Update(ctx, dash0CustomResource); err != nil { - log.Error(err, "Failed to update Dash0 status conditions, requeuing reconcile request.") + logger.Error(err, "Failed to update Dash0 status conditions, requeuing reconcile request.") return ctrl.Result{}, err } return ctrl.Result{}, nil } -func (r *Dash0Reconciler) initStatusConditions(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0, log *logr.Logger) (bool, error) { +func (r *Dash0Reconciler) checkIfNamespaceExists(ctx context.Context, namespace string, logger *logr.Logger) (bool, error) { + _, err := r.ClientSet.CoreV1().Namespaces().Get(ctx, namespace, metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } else { + logger.Error(err, "Failed to fetch the current namespace, requeuing reconcile request.") + return true, err + } + } + return true, nil +} + +func (r *Dash0Reconciler) initStatusConditions(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0, logger *logr.Logger) (bool, error) { firstReconcile := false needsRefresh := false if dash0CustomResource.Status.Conditions == nil || len(dash0CustomResource.Status.Conditions) == 0 { - setAvailableConditionToUnknown(dash0CustomResource) + dash0CustomResource.SetAvailableConditionToUnknown() firstReconcile = true needsRefresh = true } else if availableCondition := meta.FindStatusCondition(dash0CustomResource.Status.Conditions, string(util.ConditionTypeAvailable)); availableCondition == nil { - setAvailableConditionToUnknown(dash0CustomResource) + dash0CustomResource.SetAvailableConditionToUnknown() needsRefresh = true } if needsRefresh { - err := r.refreshStatus(ctx, dash0CustomResource, log) + err := r.refreshStatus(ctx, dash0CustomResource, logger) if err != nil { + // The error has already been logged in refreshStatus return firstReconcile, err } } return firstReconcile, nil } -func (r *Dash0Reconciler) handleFinalizers(dash0CustomResource *operatorv1alpha1.Dash0) bool { - isMarkedForDeletion := dash0CustomResource.GetDeletionTimestamp() != nil - // if !isMarkedForDeletion { - // add finalizers here - // } else /* if has finalizers */ { - // execute finalizers here - // } - return isMarkedForDeletion -} - -func (r *Dash0Reconciler) handleFirstReconcile(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0, log *logr.Logger) { - log.Info("Initial reconcile in progress.") +func (r *Dash0Reconciler) handleFirstReconcile(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0, logger *logr.Logger) error { + logger.Info("Initial reconcile in progress.") instrumentationEnabled := true instrumentingExistingResourcesEnabled := true if !instrumentationEnabled { - log.Info( + logger.Info( "Instrumentation is not enabled, neither new nor existing resources will be modified to send telemetry to Dash0.", ) - return + return nil } if !instrumentingExistingResourcesEnabled { - log.Info( + logger.Info( "Instrumenting existing resources is not enabled, only new resources will be modified (at deploy time) to send telemetry to Dash0.", ) - return + return nil } - log.Info("Modifying existing resources to make them send telemetry to Dash0.") - if err := r.modifyExistingResources(ctx, dash0CustomResource); err != nil { - log.Error(err, "Modifying existing resources failed.") + logger.Info("Modifying existing resources to make them send telemetry to Dash0.") + if err := r.instrumentExistingResources(ctx, dash0CustomResource, logger); err != nil { + logger.Error(err, "Instrumenting existing resources failed, requeuing reconcile request.") + return err } + + return nil } -func (r *Dash0Reconciler) refreshStatus(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0, log *logr.Logger) error { +func (r *Dash0Reconciler) refreshStatus(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0, logger *logr.Logger) error { if err := r.Status().Update(ctx, dash0CustomResource); err != nil { - log.Error(err, "Cannot update the status of the Dash0 custom resource, requeuing reconcile request.") + logger.Error(err, "Cannot update the status of the Dash0 custom resource, requeuing reconcile request.") return err } return nil } -func (r *Dash0Reconciler) modifyExistingResources(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0) error { +func (r *Dash0Reconciler) instrumentExistingResources(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0, logger *logr.Logger) error { namespace := dash0CustomResource.Namespace - errCronJobs := r.findAndModifyCronJobs(ctx, namespace) - errDaemonSets := r.findAndModifyDaemonSets(ctx, namespace) - errDeployments := r.findAndModifyDeployments(ctx, namespace) - errJobs := r.findAndHandleJobs(ctx, namespace) - errReplicaSets := r.findAndModifyReplicaSets(ctx, namespace) - errStatefulSets := r.findAndModifyStatefulSets(ctx, namespace) + errCronJobs := r.findAndInstrumentCronJobs(ctx, namespace, logger) + errDaemonSets := r.findAndInstrumentyDaemonSets(ctx, namespace, logger) + errDeployments := r.findAndInstrumentDeployments(ctx, namespace, logger) + errJobs := r.findAndAddLabelsToImmutableJobsOnInstrumentation(ctx, namespace, logger) + errReplicaSets := r.findAndInstrumentReplicaSets(ctx, namespace, logger) + errStatefulSets := r.findAndInstrumentStatefulSets(ctx, namespace, logger) combinedErrors := errors.Join( errCronJobs, errDaemonSets, @@ -208,23 +262,23 @@ func (r *Dash0Reconciler) modifyExistingResources(ctx context.Context, dash0Cust return nil } -func (r *Dash0Reconciler) findAndModifyCronJobs(ctx context.Context, namespace string) error { - matchingResourcesInNamespace, err := r.ClientSet.BatchV1().CronJobs(namespace).List(ctx, metav1.ListOptions{}) +func (r *Dash0Reconciler) findAndInstrumentCronJobs(ctx context.Context, namespace string, logger *logr.Logger) error { + matchingResourcesInNamespace, err := r.ClientSet.BatchV1().CronJobs(namespace).List(ctx, resourcesWithoutDash0InstrumentedLabelFilter) if err != nil { return fmt.Errorf("error when querying cron jobs: %w", err) } for _, resource := range matchingResourcesInNamespace.Items { - r.modifyCronJob(ctx, resource) + r.instrumentCronJob(ctx, resource, logger) } return nil } -func (r *Dash0Reconciler) modifyCronJob(ctx context.Context, cronJob batchv1.CronJob) { +func (r *Dash0Reconciler) instrumentCronJob(ctx context.Context, cronJob batchv1.CronJob, reconcileLogger *logr.Logger) { if cronJob.DeletionTimestamp != nil { // do not modify resources that are being deleted return } - logger := log.FromContext(ctx).WithValues( + logger := reconcileLogger.WithValues( resourceTypeLabel, "CronJob", resourceNamespaceLabel, @@ -233,7 +287,7 @@ func (r *Dash0Reconciler) modifyCronJob(ctx context.Context, cronJob batchv1.Cro cronJob.GetName(), ) hasBeenModified := false - retryErr := util.Retry("modifying cron job", func() error { + retryErr := util.Retry("instrumenting cron job", func() error { if err := r.Client.Get(ctx, client.ObjectKey{ Namespace: cronJob.GetNamespace(), Name: cronJob.GetName(), @@ -248,26 +302,26 @@ func (r *Dash0Reconciler) modifyCronJob(ctx context.Context, cronJob batchv1.Cro } }, &logger) - r.postProcess(&cronJob, hasBeenModified, logger, retryErr) + r.postProcessInstrumentation(&cronJob, hasBeenModified, retryErr, &logger) } -func (r *Dash0Reconciler) findAndModifyDaemonSets(ctx context.Context, namespace string) error { - matchingResourcesInNamespace, err := r.ClientSet.AppsV1().DaemonSets(namespace).List(ctx, metav1.ListOptions{}) +func (r *Dash0Reconciler) findAndInstrumentyDaemonSets(ctx context.Context, namespace string, logger *logr.Logger) error { + matchingResourcesInNamespace, err := r.ClientSet.AppsV1().DaemonSets(namespace).List(ctx, resourcesWithoutDash0InstrumentedLabelFilter) if err != nil { return fmt.Errorf("error when querying daemon sets: %w", err) } for _, resource := range matchingResourcesInNamespace.Items { - r.modifyDaemonSet(ctx, resource) + r.instrumentDaemonSet(ctx, resource, logger) } return nil } -func (r *Dash0Reconciler) modifyDaemonSet(ctx context.Context, daemonSet appsv1.DaemonSet) { +func (r *Dash0Reconciler) instrumentDaemonSet(ctx context.Context, daemonSet appsv1.DaemonSet, reconcileLogger *logr.Logger) { if daemonSet.DeletionTimestamp != nil { // do not modify resources that are being deleted return } - logger := log.FromContext(ctx).WithValues( + logger := reconcileLogger.WithValues( resourceTypeLabel, "DaemonSet", resourceNamespaceLabel, @@ -276,7 +330,7 @@ func (r *Dash0Reconciler) modifyDaemonSet(ctx context.Context, daemonSet appsv1. daemonSet.GetName(), ) hasBeenModified := false - retryErr := util.Retry("modifying daemon set", func() error { + retryErr := util.Retry("instrumenting daemon set", func() error { if err := r.Client.Get(ctx, client.ObjectKey{ Namespace: daemonSet.GetNamespace(), Name: daemonSet.GetName(), @@ -291,26 +345,26 @@ func (r *Dash0Reconciler) modifyDaemonSet(ctx context.Context, daemonSet appsv1. } }, &logger) - r.postProcess(&daemonSet, hasBeenModified, logger, retryErr) + r.postProcessInstrumentation(&daemonSet, hasBeenModified, retryErr, &logger) } -func (r *Dash0Reconciler) findAndModifyDeployments(ctx context.Context, namespace string) error { - matchingResourcesInNamespace, err := r.ClientSet.AppsV1().Deployments(namespace).List(ctx, metav1.ListOptions{}) +func (r *Dash0Reconciler) findAndInstrumentDeployments(ctx context.Context, namespace string, logger *logr.Logger) error { + matchingResourcesInNamespace, err := r.ClientSet.AppsV1().Deployments(namespace).List(ctx, resourcesWithoutDash0InstrumentedLabelFilter) if err != nil { return fmt.Errorf("error when querying deployments: %w", err) } for _, resource := range matchingResourcesInNamespace.Items { - r.modifyDeployment(ctx, resource) + r.instrumentDeployment(ctx, resource, logger) } return nil } -func (r *Dash0Reconciler) modifyDeployment(ctx context.Context, deployment appsv1.Deployment) { +func (r *Dash0Reconciler) instrumentDeployment(ctx context.Context, deployment appsv1.Deployment, reconcileLogger *logr.Logger) { if deployment.DeletionTimestamp != nil { // do not modify resources that are being deleted return } - logger := log.FromContext(ctx).WithValues( + logger := reconcileLogger.WithValues( resourceTypeLabel, "Deployment", resourceNamespaceLabel, @@ -319,7 +373,7 @@ func (r *Dash0Reconciler) modifyDeployment(ctx context.Context, deployment appsv deployment.GetName(), ) hasBeenModified := false - retryErr := util.Retry("modifying deployment", func() error { + retryErr := util.Retry("instrumenting deployment", func() error { if err := r.Client.Get(ctx, client.ObjectKey{ Namespace: deployment.GetNamespace(), Name: deployment.GetName(), @@ -334,27 +388,27 @@ func (r *Dash0Reconciler) modifyDeployment(ctx context.Context, deployment appsv } }, &logger) - r.postProcess(&deployment, hasBeenModified, logger, retryErr) + r.postProcessInstrumentation(&deployment, hasBeenModified, retryErr, &logger) } -func (r *Dash0Reconciler) findAndHandleJobs(ctx context.Context, namespace string) error { - matchingResourcesInNamespace, err := r.ClientSet.BatchV1().Jobs(namespace).List(ctx, metav1.ListOptions{}) +func (r *Dash0Reconciler) findAndAddLabelsToImmutableJobsOnInstrumentation(ctx context.Context, namespace string, logger *logr.Logger) error { + matchingResourcesInNamespace, err := r.ClientSet.BatchV1().Jobs(namespace).List(ctx, resourcesWithoutDash0InstrumentedLabelFilter) if err != nil { - return fmt.Errorf("error when querying cron jobs: %w", err) + return fmt.Errorf("error when querying jobs: %w", err) } for _, job := range matchingResourcesInNamespace.Items { - r.handleJob(ctx, job) + r.addLabelsToImmutableJobsOnInstrumentation(ctx, job, logger) } return nil } -func (r *Dash0Reconciler) handleJob(ctx context.Context, job batchv1.Job) { +func (r *Dash0Reconciler) addLabelsToImmutableJobsOnInstrumentation(ctx context.Context, job batchv1.Job, reconcileLogger *logr.Logger) { if job.DeletionTimestamp != nil { // do not modify resources that are being deleted return } - logger := log.FromContext(ctx).WithValues( + logger := reconcileLogger.WithValues( resourceTypeLabel, "Job", resourceNamespaceLabel, @@ -374,37 +428,33 @@ func (r *Dash0Reconciler) handleJob(ctx context.Context, job batchv1.Job) { }, &logger) if retryErr != nil { - r.postProcess(&job, false, logger, retryErr) + r.postProcessInstrumentation(&job, false, retryErr, &logger) } else { - r.postProcess( - &job, - false, - logger, - ImmutableResourceError{ - resourceType: "job", - resource: fmt.Sprintf("%s/%s", job.GetNamespace(), job.GetName()), - }, - ) + r.postProcessInstrumentation(&job, false, ImmutableResourceError{ + resourceType: "job", + resource: fmt.Sprintf("%s/%s", job.GetNamespace(), job.GetName()), + modificationMode: Instrumentation, + }, &logger) } } -func (r *Dash0Reconciler) findAndModifyReplicaSets(ctx context.Context, namespace string) error { - matchingResourcesInNamespace, err := r.ClientSet.AppsV1().ReplicaSets(namespace).List(ctx, metav1.ListOptions{}) +func (r *Dash0Reconciler) findAndInstrumentReplicaSets(ctx context.Context, namespace string, logger *logr.Logger) error { + matchingResourcesInNamespace, err := r.ClientSet.AppsV1().ReplicaSets(namespace).List(ctx, resourcesWithoutDash0InstrumentedLabelFilter) if err != nil { return fmt.Errorf("error when querying deployments: %w", err) } for _, resource := range matchingResourcesInNamespace.Items { - r.modifyReplicaSet(ctx, resource) + r.instrumentReplicaSet(ctx, resource, logger) } return nil } -func (r *Dash0Reconciler) modifyReplicaSet(ctx context.Context, replicaSet appsv1.ReplicaSet) { +func (r *Dash0Reconciler) instrumentReplicaSet(ctx context.Context, replicaSet appsv1.ReplicaSet, reconcileLogger *logr.Logger) { if replicaSet.DeletionTimestamp != nil { // do not modify resources that are being deleted return } - logger := log.FromContext(ctx).WithValues( + logger := reconcileLogger.WithValues( resourceTypeLabel, "ReplicaSet", resourceNamespaceLabel, @@ -413,7 +463,7 @@ func (r *Dash0Reconciler) modifyReplicaSet(ctx context.Context, replicaSet appsv replicaSet.GetName(), ) hasBeenModified := false - retryErr := util.Retry("modifying replicaset", func() error { + retryErr := util.Retry("instrumenting replicaset", func() error { if err := r.Client.Get(ctx, client.ObjectKey{ Namespace: replicaSet.GetNamespace(), Name: replicaSet.GetName(), @@ -433,26 +483,26 @@ func (r *Dash0Reconciler) modifyReplicaSet(ctx context.Context, replicaSet appsv // to manually restart the pods of their replica sets after they have been instrumented. We could consider finding // all pods for that are owned by the replica set and restart them automatically. - r.postProcess(&replicaSet, hasBeenModified, logger, retryErr) + r.postProcessInstrumentation(&replicaSet, hasBeenModified, retryErr, &logger) } -func (r *Dash0Reconciler) findAndModifyStatefulSets(ctx context.Context, namespace string) error { - matchingResourcesInNamespace, err := r.ClientSet.AppsV1().StatefulSets(namespace).List(ctx, metav1.ListOptions{}) +func (r *Dash0Reconciler) findAndInstrumentStatefulSets(ctx context.Context, namespace string, logger *logr.Logger) error { + matchingResourcesInNamespace, err := r.ClientSet.AppsV1().StatefulSets(namespace).List(ctx, resourcesWithoutDash0InstrumentedLabelFilter) if err != nil { return fmt.Errorf("error when querying stateful sets: %w", err) } for _, resource := range matchingResourcesInNamespace.Items { - r.modifyStatefulSet(ctx, resource) + r.instrumentStatefulSet(ctx, resource, logger) } return nil } -func (r *Dash0Reconciler) modifyStatefulSet(ctx context.Context, statefulSet appsv1.StatefulSet) { +func (r *Dash0Reconciler) instrumentStatefulSet(ctx context.Context, statefulSet appsv1.StatefulSet, reconcileLogger *logr.Logger) { if statefulSet.DeletionTimestamp != nil { // do not modify resources that are being deleted return } - logger := log.FromContext(ctx).WithValues( + logger := reconcileLogger.WithValues( resourceTypeLabel, "StatefulSet", resourceNamespaceLabel, @@ -461,7 +511,7 @@ func (r *Dash0Reconciler) modifyStatefulSet(ctx context.Context, statefulSet app statefulSet.GetName(), ) hasBeenModified := false - retryErr := util.Retry("modifying stateful set", func() error { + retryErr := util.Retry("instrumenting stateful set", func() error { if err := r.Client.Get(ctx, client.ObjectKey{ Namespace: statefulSet.GetNamespace(), Name: statefulSet.GetName(), @@ -476,24 +526,14 @@ func (r *Dash0Reconciler) modifyStatefulSet(ctx context.Context, statefulSet app } }, &logger) - r.postProcess(&statefulSet, hasBeenModified, logger, retryErr) -} - -func (r *Dash0Reconciler) newResourceModifier(logger *logr.Logger) *k8sresources.ResourceModifier { - return k8sresources.NewResourceModifier( - util.InstrumentationMetadata{ - Versions: r.Versions, - InstrumentedBy: "controller", - }, - logger, - ) + r.postProcessInstrumentation(&statefulSet, hasBeenModified, retryErr, &logger) } -func (r *Dash0Reconciler) postProcess( +func (r *Dash0Reconciler) postProcessInstrumentation( resource runtime.Object, hasBeenModified bool, - logger logr.Logger, retryErr error, + logger *logr.Logger, ) { if retryErr != nil { e := &ImmutableResourceError{} @@ -504,10 +544,427 @@ func (r *Dash0Reconciler) postProcess( } util.QueueFailedInstrumentationEvent(r.Recorder, resource, "controller", retryErr) } else if !hasBeenModified { - logger.Info("Dash0 instrumentation already present, no modification by controller is necessary.") + logger.Info("Dash0 instrumentation was already present on this resource, no modification by controller is necessary.") util.QueueAlreadyInstrumentedEvent(r.Recorder, resource, "controller") } else { logger.Info("The controller has added Dash0 instrumentation to the resource.") util.QueueSuccessfulInstrumentationEvent(r.Recorder, resource, "controller") } } + +func (r *Dash0Reconciler) checkImminentDeletionAndHandleFinalizers(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0, logger *logr.Logger) (bool, error) { + deletionTimestamp := dash0CustomResource.GetDeletionTimestamp() + isMarkedForDeletion := deletionTimestamp != nil && !deletionTimestamp.IsZero() + if !isMarkedForDeletion { + err := r.addFinalizerIfNecessary(ctx, dash0CustomResource) + if err != nil { + logger.Error(err, "Failed to add finalizer to Dash0 custom resource, requeuing reconcile request.") + return isMarkedForDeletion, err + } + } else { + if controllerutil.ContainsFinalizer(dash0CustomResource, util.FinalizerId) { + err := r.runCleanupActions(ctx, dash0CustomResource, logger) + if err != nil { + // error has already been logged in runCleanupActions + return isMarkedForDeletion, err + } + } + } + return isMarkedForDeletion, nil +} + +func (r *Dash0Reconciler) runCleanupActions(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0, logger *logr.Logger) error { + err := r.uninstrumentResourcesIfAvailable(ctx, dash0CustomResource, logger) + if err != nil { + logger.Error(err, "Failed to uninstrument resources, requeuing reconcile request.") + return err + } + + // The Dash0 custom resource will be deleted after this reconcile finished, so updating the status is + // probably unnecessary. But for due process we do it anyway. In particular, if deleting it should fail + // for any reason or take a while, the resource is no longer marked as available. + dash0CustomResource.EnsureResourceIsMarkedAsUnavailable() + if err = r.Status().Update(ctx, dash0CustomResource); err != nil { + logger.Error(err, "Failed to update Dash0 status conditions, requeuing reconcile request.") + return err + } + + controllerutil.RemoveFinalizer(dash0CustomResource, util.FinalizerId) + if err = r.Update(ctx, dash0CustomResource); err != nil { + logger.Error(err, "Failed to remove the finalizer from the Dash0 custom resource, requeuing reconcile request.") + return err + } + return nil +} + +func (r *Dash0Reconciler) addFinalizerIfNecessary(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0) error { + finalizerHasBeenAdded := controllerutil.AddFinalizer(dash0CustomResource, util.FinalizerId) + if finalizerHasBeenAdded { + return r.Update(ctx, dash0CustomResource) + } + // The resource already had the finalizer, no update necessary. + return nil +} + +func (r *Dash0Reconciler) uninstrumentResourcesIfAvailable(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0, logger *logr.Logger) error { + if dash0CustomResource.IsAvailable() { + if err := r.uninstrumentResources(ctx, dash0CustomResource, logger); err != nil { + logger.Error(err, "Uninstrumenting existing resources failed.") + return err + } + } else { + logger.Info("Removing the Dash0 custom resource and running finalizers, but Dash0 is not marked as available." + + " Dash0 Instrumentation will not be removed from workloads..") + } + return nil +} + +func (r *Dash0Reconciler) uninstrumentResources(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0, logger *logr.Logger) error { + namespace := dash0CustomResource.Namespace + + errCronJobs := r.findAndUninstrumentCronJobs(ctx, namespace, logger) + errDaemonSets := r.findAndUninstrumentDaemonSets(ctx, namespace, logger) + errDeployments := r.findAndUninstrumentDeployments(ctx, namespace, logger) + errJobs := r.findAndHandleJobOnUninstrumentation(ctx, namespace, logger) + errReplicaSets := r.findAndUninstrumentReplicaSets(ctx, namespace, logger) + errStatefulSets := r.findAndUninstrumentStatefulSets(ctx, namespace, logger) + combinedErrors := errors.Join( + errCronJobs, + errDaemonSets, + errDeployments, + errJobs, + errReplicaSets, + errStatefulSets, + ) + if combinedErrors != nil { + return combinedErrors + } + return nil +} + +func (r *Dash0Reconciler) findAndUninstrumentCronJobs(ctx context.Context, namespace string, logger *logr.Logger) error { + matchingResourcesInNamespace, err := + r.ClientSet.BatchV1().CronJobs(namespace).List(ctx, resourcesWithDash0InstrumentedLabelFilter) + if err != nil { + return fmt.Errorf("error when querying instrumented cron jobs: %w", err) + } + for _, resource := range matchingResourcesInNamespace.Items { + r.uninstrumentCronJob(ctx, resource, logger) + } + return nil +} + +func (r *Dash0Reconciler) uninstrumentCronJob(ctx context.Context, cronJob batchv1.CronJob, reconcileLogger *logr.Logger) { + if cronJob.DeletionTimestamp != nil { + // do not modify resources that are being deleted + return + } + logger := reconcileLogger.WithValues( + resourceTypeLabel, + "CronJob", + resourceNamespaceLabel, + cronJob.GetNamespace(), + resourceNameLabel, + cronJob.GetName(), + ) + hasBeenModified := false + + retryErr := util.Retry("uninstrumenting cron job", func() error { + if err := r.Client.Get(ctx, client.ObjectKey{ + Namespace: cronJob.GetNamespace(), + Name: cronJob.GetName(), + }, &cronJob); err != nil { + return fmt.Errorf("error when fetching cron job %s/%s: %w", cronJob.GetNamespace(), cronJob.GetName(), err) + } + hasBeenModified = r.newResourceModifier(&logger).RevertCronJob(&cronJob) + if hasBeenModified { + return r.Client.Update(ctx, &cronJob) + } else { + return nil + } + }, &logger) + + r.postProcessUninstrumentation(&cronJob, hasBeenModified, retryErr, &logger) +} + +func (r *Dash0Reconciler) findAndUninstrumentDaemonSets(ctx context.Context, namespace string, logger *logr.Logger) error { + matchingResourcesInNamespace, err := + r.ClientSet.AppsV1().DaemonSets(namespace).List(ctx, resourcesWithDash0InstrumentedLabelFilter) + if err != nil { + return fmt.Errorf("error when querying instrumented daemon sets: %w", err) + } + for _, resource := range matchingResourcesInNamespace.Items { + r.uninstrumentDaemonSet(ctx, resource, logger) + } + return nil +} + +func (r *Dash0Reconciler) uninstrumentDaemonSet(ctx context.Context, daemonSet appsv1.DaemonSet, reconcileLogger *logr.Logger) { + if daemonSet.DeletionTimestamp != nil { + // do not modify resources that are being deleted + return + } + logger := reconcileLogger.WithValues( + resourceTypeLabel, + "DaemonSet", + resourceNamespaceLabel, + daemonSet.GetNamespace(), + resourceNameLabel, + daemonSet.GetName(), + ) + hasBeenModified := false + retryErr := util.Retry("uninstrumenting daemon set", func() error { + if err := r.Client.Get(ctx, client.ObjectKey{ + Namespace: daemonSet.GetNamespace(), + Name: daemonSet.GetName(), + }, &daemonSet); err != nil { + return fmt.Errorf("error when fetching daemon set %s/%s: %w", daemonSet.GetNamespace(), daemonSet.GetName(), err) + } + hasBeenModified = r.newResourceModifier(&logger).RevertDaemonSet(&daemonSet) + if hasBeenModified { + return r.Client.Update(ctx, &daemonSet) + } else { + return nil + } + }, &logger) + + r.postProcessUninstrumentation(&daemonSet, hasBeenModified, retryErr, &logger) +} + +func (r *Dash0Reconciler) findAndUninstrumentDeployments(ctx context.Context, namespace string, logger *logr.Logger) error { + matchingResourcesInNamespace, err := + r.ClientSet.AppsV1().Deployments(namespace).List(ctx, resourcesWithDash0InstrumentedLabelFilter) + if err != nil { + return fmt.Errorf("error when querying instrumented deployments: %w", err) + } + for _, resource := range matchingResourcesInNamespace.Items { + r.uninstrumentDeployment(ctx, resource, logger) + } + return nil +} + +func (r *Dash0Reconciler) uninstrumentDeployment(ctx context.Context, deployment appsv1.Deployment, reconcileLogger *logr.Logger) { + if deployment.DeletionTimestamp != nil { + // do not modify resources that are being deleted + return + } + logger := reconcileLogger.WithValues( + resourceTypeLabel, + "Deployment", + resourceNamespaceLabel, + deployment.GetNamespace(), + resourceNameLabel, + deployment.GetName(), + ) + hasBeenModified := false + retryErr := util.Retry("uninstrumenting deployment", func() error { + if err := r.Client.Get(ctx, client.ObjectKey{ + Namespace: deployment.GetNamespace(), + Name: deployment.GetName(), + }, &deployment); err != nil { + return fmt.Errorf("error when fetching deployment %s/%s: %w", deployment.GetNamespace(), deployment.GetName(), err) + } + hasBeenModified = r.newResourceModifier(&logger).RevertDeployment(&deployment) + if hasBeenModified { + return r.Client.Update(ctx, &deployment) + } else { + return nil + } + }, &logger) + + r.postProcessUninstrumentation(&deployment, hasBeenModified, retryErr, &logger) +} + +func (r *Dash0Reconciler) findAndHandleJobOnUninstrumentation(ctx context.Context, namespace string, logger *logr.Logger) error { + matchingResourcesInNamespace, err := r.ClientSet.BatchV1().Jobs(namespace).List(ctx, resourcesWithDash0InstrumentedLabelFilter) + if err != nil { + return fmt.Errorf("error when querying instrumented jobs: %w", err) + } + + for _, job := range matchingResourcesInNamespace.Items { + r.handleJobOnUninstrumentation(ctx, job, logger) + } + return nil +} + +func (r *Dash0Reconciler) handleJobOnUninstrumentation(ctx context.Context, job batchv1.Job, reconcileLogger *logr.Logger) { + if job.DeletionTimestamp != nil { + // do not modify resources that are being deleted + return + } + logger := reconcileLogger.WithValues( + resourceTypeLabel, + "Job", + resourceNamespaceLabel, + job.GetNamespace(), + resourceNameLabel, + job.GetName(), + ) + + createImmutableResourceError := false + retryErr := util.Retry("removing labels from immutable job", func() error { + if err := r.Client.Get(ctx, client.ObjectKey{ + Namespace: job.GetNamespace(), + Name: job.GetName(), + }, &job); err != nil { + return fmt.Errorf("error when fetching job %s/%s: %w", job.GetNamespace(), job.GetName(), err) + } + isInstrumented := job.GetObjectMeta().GetLabels()[util.InstrumentedLabelKey] + if isInstrumented == "true" { + // This job has been instrumented, presumably by the webhook. We cannot undo the instrumentation here, since + // jobs are immutable. + + // Deliberately not calling r.newResourceModifier(&logger).RemoveLabelsFromImmutableJob(&job) here since + // we cannot remove the instrumentation, so we also have to leave the labels in place. + createImmutableResourceError = true + return nil + } else { + // There was an attempt to instrument this job (probably by the controller), which has not been successful. + // We only need remove the labels from that instrumentation attempt to clean up. + r.newResourceModifier(&logger).RemoveLabelsFromImmutableJob(&job) + return r.Client.Update(ctx, &job) + } + }, &logger) + + if retryErr != nil { + // For the case that the job was instrumented and we could not uninstrument it, we create a + // ImmutableResourceError inside the retry loop. This error is then handled in the postProcessUninstrumentation. + // The same is true for any other error types (for example errors in r.ClientUpdate). + r.postProcessUninstrumentation(&job, false, retryErr, &logger) + } else if createImmutableResourceError { + // + r.postProcessUninstrumentation(&job, false, ImmutableResourceError{ + resourceType: "job", + resource: fmt.Sprintf("%s/%s", job.GetNamespace(), job.GetName()), + modificationMode: Uninstrumentation, + }, &logger) + } else { + r.postProcessUninstrumentation(&job, false, nil, &logger) + } +} + +func (r *Dash0Reconciler) findAndUninstrumentReplicaSets(ctx context.Context, namespace string, logger *logr.Logger) error { + matchingResourcesInNamespace, err := + r.ClientSet.AppsV1().ReplicaSets(namespace).List(ctx, resourcesWithDash0InstrumentedLabelFilter) + if err != nil { + return fmt.Errorf("error when querying instrumented replica sets: %w", err) + } + for _, resource := range matchingResourcesInNamespace.Items { + r.uninstrumentReplicaSet(ctx, resource, logger) + } + return nil +} + +func (r *Dash0Reconciler) uninstrumentReplicaSet(ctx context.Context, replicaSet appsv1.ReplicaSet, reconcileLogger *logr.Logger) { + if replicaSet.DeletionTimestamp != nil { + // do not modify resources that are being deleted + return + } + logger := reconcileLogger.WithValues( + resourceTypeLabel, + "ReplicaSet", + resourceNamespaceLabel, + replicaSet.GetNamespace(), + resourceNameLabel, + replicaSet.GetName(), + ) + hasBeenModified := false + retryErr := util.Retry("uninstrumenting replica set", func() error { + if err := r.Client.Get(ctx, client.ObjectKey{ + Namespace: replicaSet.GetNamespace(), + Name: replicaSet.GetName(), + }, &replicaSet); err != nil { + return fmt.Errorf("error when fetching replica set %s/%s: %w", replicaSet.GetNamespace(), replicaSet.GetName(), err) + } + hasBeenModified = r.newResourceModifier(&logger).RevertReplicaSet(&replicaSet) + if hasBeenModified { + return r.Client.Update(ctx, &replicaSet) + } else { + return nil + } + }, &logger) + + // Note: ReplicaSet pods are not restarted automatically by Kubernetes when their spec is change (for other resource + // types like deployments or daemonsets this is managed by Kubernetes automatically). For now, we rely on the user + // to manually restart the pods of their replica sets after they have been instrumented. We could consider finding + // all pods for that are owned by the replica set and restart them automatically. + + r.postProcessUninstrumentation(&replicaSet, hasBeenModified, retryErr, &logger) +} + +func (r *Dash0Reconciler) findAndUninstrumentStatefulSets(ctx context.Context, namespace string, logger *logr.Logger) error { + matchingResourcesInNamespace, err := + r.ClientSet.AppsV1().StatefulSets(namespace).List(ctx, resourcesWithDash0InstrumentedLabelFilter) + if err != nil { + return fmt.Errorf("error when querying instrumented stateful sets: %w", err) + } + for _, resource := range matchingResourcesInNamespace.Items { + r.uninstrumentStatefulSet(ctx, resource, logger) + } + return nil +} + +func (r *Dash0Reconciler) uninstrumentStatefulSet(ctx context.Context, statefulSet appsv1.StatefulSet, reconcileLogger *logr.Logger) { + if statefulSet.DeletionTimestamp != nil { + // do not modify resources that are being deleted + return + } + logger := reconcileLogger.WithValues( + resourceTypeLabel, + "StatefulSet", + resourceNamespaceLabel, + statefulSet.GetNamespace(), + resourceNameLabel, + statefulSet.GetName(), + ) + hasBeenModified := false + retryErr := util.Retry("uninstrumenting stateful set", func() error { + if err := r.Client.Get(ctx, client.ObjectKey{ + Namespace: statefulSet.GetNamespace(), + Name: statefulSet.GetName(), + }, &statefulSet); err != nil { + return fmt.Errorf("error when fetching stateful set %s/%s: %w", statefulSet.GetNamespace(), statefulSet.GetName(), err) + } + hasBeenModified = r.newResourceModifier(&logger).RevertStatefulSet(&statefulSet) + if hasBeenModified { + return r.Client.Update(ctx, &statefulSet) + } else { + return nil + } + }, &logger) + + r.postProcessUninstrumentation(&statefulSet, hasBeenModified, retryErr, &logger) +} + +func (r *Dash0Reconciler) postProcessUninstrumentation( + resource runtime.Object, + hasBeenModified bool, + retryErr error, + logger *logr.Logger, +) { + if retryErr != nil { + e := &ImmutableResourceError{} + if errors.As(retryErr, e) { + logger.Info(e.Error()) + } else { + logger.Error(retryErr, "Dash0's removal of instrumentation by controller has not been successful.") + } + util.QueueFailedUninstrumentationEvent(r.Recorder, resource, "controller", retryErr) + } else if !hasBeenModified { + logger.Info("Dash0 instrumentations was not present on this resource, no modification by controller has been necessary.") + util.QueueAlreadyNotInstrumentedEvent(r.Recorder, resource, "controller") + } else { + logger.Info("The controller has removed Dash0 instrumentation from the resource.") + util.QueueSuccessfulUninstrumentationEvent(r.Recorder, resource, "controller") + } +} + +func (r *Dash0Reconciler) newResourceModifier(logger *logr.Logger) *k8sresources.ResourceModifier { + return k8sresources.NewResourceModifier( + util.InstrumentationMetadata{ + Versions: r.Versions, + InstrumentedBy: "controller", + }, + logger, + ) +} diff --git a/internal/controller/dash0_controller_test.go b/internal/controller/dash0_controller_test.go index e43631a9..7d1339f6 100644 --- a/internal/controller/dash0_controller_test.go +++ b/internal/controller/dash0_controller_test.go @@ -10,10 +10,13 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" @@ -43,7 +46,7 @@ var ( ) var _ = Describe("Dash0 Controller", func() { - Context("When reconciling a resource", func() { + Context("When reconciling the Dash0 custom resource", func() { ctx := context.Background() var createdObjects []client.Object @@ -77,196 +80,399 @@ var _ = Describe("Dash0 Controller", func() { }) AfterEach(func() { - By("Cleanup the Dash0 resource instance") - dash0CustomResource := &operatorv1alpha1.Dash0{} - err := k8sClient.Get(ctx, dash0CustomResourceQualifiedName, dash0CustomResource) - Expect(err).NotTo(HaveOccurred()) - Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed()) - + By("Remove all created objects") for _, object := range createdObjects { Expect(k8sClient.Delete(ctx, object, &client.DeleteOptions{ GracePeriodSeconds: new(int64), })).To(Succeed()) } - DeleteAllEvents(ctx, clientset, namespace) + By("Cleanup the Dash0 resource instance") + if dash0CustomResource := loadDash0CustomResourceIfItExists(ctx); dash0CustomResource != nil { + // We want to delete the custom resource, but we need to remove the finalizer first, otherwise the first + // reconcile of the next test case will actually run the finalizers. + removeFinalizer(ctx, dash0CustomResource) + Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed()) + } + + deleteAllEvents(ctx, clientset, namespace) allEvents, err := clientset.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{}) Expect(err).NotTo(HaveOccurred()) Expect(allEvents.Items).To(BeEmpty()) }) It("should successfully run the first reconcile (no modifiable resources exist)", func() { - By("Reconciling the created resource") - _, err := reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0CustomResourceQualifiedName, - }) - Expect(err).NotTo(HaveOccurred()) - - verifyStatusConditions(ctx, dash0CustomResourceQualifiedName) + By("Trigger reconcile request") + triggerReconcileRequest(ctx, reconciler, "") + verifyDash0ResourceIsAvailable(ctx) }) It("should successfully run multiple reconciles (no modifiable resources exist)", func() { - By("First reconcile request") - _, err := reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0CustomResourceQualifiedName, - }) - Expect(err).NotTo(HaveOccurred()) + triggerReconcileRequest(ctx, reconciler, "First reconcile request") - firstAvailableStatusCondition := verifyStatusConditions(ctx, dash0CustomResourceQualifiedName) + firstAvailableStatusCondition := verifyDash0ResourceIsAvailable(ctx) originalTransitionTimestamp := firstAvailableStatusCondition.LastTransitionTime.Time time.Sleep(50 * time.Millisecond) - By("Second reconcile request") - _, err = reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0CustomResourceQualifiedName, - }) - Expect(err).NotTo(HaveOccurred()) + triggerReconcileRequest(ctx, reconciler, "Second reconcile request") // The LastTransitionTime should not change with subsequent reconciliations. - secondAvailableCondition := verifyStatusConditions(ctx, dash0CustomResourceQualifiedName) + secondAvailableCondition := verifyDash0ResourceIsAvailable(ctx) Expect(secondAvailableCondition.LastTransitionTime.Time).To(Equal(originalTransitionTimestamp)) }) - It("should modify an existing cron job", func() { + It("should instrument an existing cron job", func() { name := CronJobName By("Inititalize a cron job") cronJob := CreateBasicCronJob(ctx, k8sClient, namespace, name) createdObjects = append(createdObjects, cronJob) - By("Reconciling the created resource") - _, err := reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0CustomResourceQualifiedName, - }) - Expect(err).NotTo(HaveOccurred()) + triggerReconcileRequest(ctx, reconciler, "") - verifyStatusConditionAndSuccessEvent(ctx, namespace, name) - VerifyModifiedCronJob(GetCronJob(ctx, k8sClient, namespace, name), BasicPodSpecExpectations) + verifyStatusConditionAndSuccessfulInstrumentationEvent(ctx, namespace, name) + VerifyModifiedCronJob(GetCronJob(ctx, k8sClient, namespace, name), BasicInstrumentedPodSpecExpectations) }) - It("should modify an existing daemon set", func() { + It("should instrument an existing daemon set", func() { name := DaemonSetName By("Inititalize a daemon set") daemonSet := CreateBasicDaemonSet(ctx, k8sClient, namespace, name) createdObjects = append(createdObjects, daemonSet) - By("Reconciling the created resource") - _, err := reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0CustomResourceQualifiedName, - }) - Expect(err).NotTo(HaveOccurred()) + triggerReconcileRequest(ctx, reconciler, "") - verifyStatusConditionAndSuccessEvent(ctx, namespace, name) - VerifyModifiedDaemonSet(GetDaemonSet(ctx, k8sClient, namespace, name), BasicPodSpecExpectations) + verifyStatusConditionAndSuccessfulInstrumentationEvent(ctx, namespace, name) + VerifyModifiedDaemonSet(GetDaemonSet(ctx, k8sClient, namespace, name), BasicInstrumentedPodSpecExpectations) }) - It("should modify an existing deployment", func() { + It("should instrument an existing deployment", func() { name := DeploymentName By("Inititalize a deployment") deployment := CreateBasicDeployment(ctx, k8sClient, namespace, name) createdObjects = append(createdObjects, deployment) - By("Reconciling the created resource") - _, err := reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0CustomResourceQualifiedName, - }) - Expect(err).NotTo(HaveOccurred()) + triggerReconcileRequest(ctx, reconciler, "") - verifyStatusConditionAndSuccessEvent(ctx, namespace, name) - VerifyModifiedDeployment(GetDeployment(ctx, k8sClient, namespace, name), BasicPodSpecExpectations) + verifyStatusConditionAndSuccessfulInstrumentationEvent(ctx, namespace, name) + VerifyModifiedDeployment(GetDeployment(ctx, k8sClient, namespace, name), BasicInstrumentedPodSpecExpectations) }) - It("should record a failure event for an existing job and add labels", func() { - name := JobName + It("should record a failure event when attempting to instrument an existing job and add labels", func() { + name := JobName1 By("Inititalize a job") job := CreateBasicJob(ctx, k8sClient, namespace, name) createdObjects = append(createdObjects, job) - By("Reconciling the created resource") - _, err := reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0CustomResourceQualifiedName, - }) - Expect(err).NotTo(HaveOccurred()) + triggerReconcileRequest(ctx, reconciler, "") - verifyStatusConditions(ctx, dash0CustomResourceQualifiedName) - VerifyFailureEvent( + verifyDash0ResourceIsAvailable(ctx) + VerifyFailedInstrumentationEvent( ctx, clientset, namespace, name, - "controller", - "Dash0 instrumentation by controller has not been successful. Error message: Dash0 cannot"+ - " instrument the existing job test-namespace/job, since the this type of resource is immutable.", + "Dash0 instrumentation of this resource by the controller has not been successful. Error message: Dash0 cannot"+ + " instrument the existing job test-namespace/job1, since the this type of resource is immutable.", ) - VerifyUnmodifiedJob(GetJob(ctx, k8sClient, namespace, name)) + VerifyImmutableJobCouldNotBeModified(GetJob(ctx, k8sClient, namespace, name)) }) - It("should modify an existing orphan replicaset", func() { + It("should instrument an existing orphan replicaset", func() { name := DeploymentName By("Inititalize a replicaset") replicaSet := CreateBasicReplicaSet(ctx, k8sClient, namespace, name) createdObjects = append(createdObjects, replicaSet) - By("Reconciling the created resource") - _, err := reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0CustomResourceQualifiedName, - }) - Expect(err).NotTo(HaveOccurred()) + triggerReconcileRequest(ctx, reconciler, "") - verifyStatusConditionAndSuccessEvent(ctx, namespace, name) - VerifyModifiedReplicaSet(GetReplicaSet(ctx, k8sClient, namespace, name), BasicPodSpecExpectations) + verifyStatusConditionAndSuccessfulInstrumentationEvent(ctx, namespace, name) + VerifyModifiedReplicaSet(GetReplicaSet(ctx, k8sClient, namespace, name), BasicInstrumentedPodSpecExpectations) }) - It("should not modify an existing replicaset with an owner", func() { + It("should not modify an existing replicaset owned by a deployment", func() { name := DeploymentName By("Inititalize a replicaset") replicaSet := CreateReplicaSetOwnedByDeployment(ctx, k8sClient, namespace, name) createdObjects = append(createdObjects, replicaSet) - By("Reconciling the created resource") - _, err := reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0CustomResourceQualifiedName, - }) - Expect(err).NotTo(HaveOccurred()) + triggerReconcileRequest(ctx, reconciler, "") - verifyStatusConditions(ctx, dash0CustomResourceQualifiedName) + verifyDash0ResourceIsAvailable(ctx) VerifyUnmodifiedReplicaSet(GetReplicaSet(ctx, k8sClient, namespace, name)) }) - It("should modify an stateful set", func() { + It("should instrument an stateful set", func() { name := StatefulSetName By("Inititalize a stateful set") statefulSet := CreateBasicStatefulSet(ctx, k8sClient, namespace, name) createdObjects = append(createdObjects, statefulSet) - By("Reconciling the created resource") - _, err := reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0CustomResourceQualifiedName, - }) - Expect(err).NotTo(HaveOccurred()) + triggerReconcileRequest(ctx, reconciler, "") + + verifyStatusConditionAndSuccessfulInstrumentationEvent(ctx, namespace, name) + VerifyModifiedStatefulSet(GetStatefulSet(ctx, k8sClient, namespace, name), BasicInstrumentedPodSpecExpectations) + }) + + It("should revert an instrumented cron job", func() { + // We trigger one reconcile request before creating any workload and before deleting the Dash0 custom + // resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer. + // Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually + // happens in production. + triggerReconcileRequest(ctx, reconciler, "Trigger first reconcile request") + + name := CronJobName + By("Create an instrumented cron job") + cronJob := CreateInstrumentedCronJob(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, cronJob) + + By("Queue the deletion of the Dash0 custom resource") + dash0CustomResource := loadDash0CustomResourceOrFail(ctx, Default) + Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed()) + + triggerReconcileRequest(ctx, reconciler, "Trigger a reconcile request to revert the instrumented workload") + + VerifySuccessfulUninstrumentationEvent(ctx, clientset, namespace, name, "controller") + VerifyUnmodifiedCronJob(GetCronJob(ctx, k8sClient, namespace, name)) + }) + + It("should revert an instrumented daemon set", func() { + // We trigger one reconcile request before creating any workload and before deleting the Dash0 custom + // resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer. + // Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually + // happens in production. + triggerReconcileRequest(ctx, reconciler, "Trigger first reconcile request") + + name := DaemonSetName + By("Create an instrumented daemon set") + daemonSet := CreateInstrumentedDaemonSet(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, daemonSet) + + By("Queue the deletion of the Dash0 custom resource") + dash0CustomResource := loadDash0CustomResourceOrFail(ctx, Default) + Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed()) + + triggerReconcileRequest(ctx, reconciler, "Trigger a reconcile request to revert the instrumented workload") + + VerifySuccessfulUninstrumentationEvent(ctx, clientset, namespace, name, "controller") + VerifyUnmodifiedDaemonSet(GetDaemonSet(ctx, k8sClient, namespace, name)) + }) + + It("should revert an instrumented deployment", func() { + // We trigger one reconcile request before creating any workload and before deleting the Dash0 custom + // resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer. + // Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually + // happens in production. + triggerReconcileRequest(ctx, reconciler, "Trigger first reconcile request") + + name := DeploymentName + By("Create an instrumented deployment") + deployment := CreateInstrumentedDeployment(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, deployment) + + By("Queue the deletion of the Dash0 custom resource") + dash0CustomResource := loadDash0CustomResourceOrFail(ctx, Default) + Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed()) + + triggerReconcileRequest(ctx, reconciler, "Trigger a reconcile request to revert the instrumented workload") + + VerifySuccessfulUninstrumentationEvent(ctx, clientset, namespace, name, "controller") + VerifyUnmodifiedDeployment(GetDeployment(ctx, k8sClient, namespace, name)) + }) + + It("should record a failure event when attempting to revert an existing instrumenting job (which has been instrumented by the webhook)", func() { + // We trigger one reconcile request before creating any workload and before deleting the Dash0 custom + // resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer. + // Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually + // happens in production. + triggerReconcileRequest(ctx, reconciler, "Trigger first reconcile request") + + name := JobName2 + By("Create an instrumented job") + job := CreateInstrumentedJob(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, job) + + By("Queue the deletion of the Dash0 custom resource") + dash0CustomResource := loadDash0CustomResourceOrFail(ctx, Default) + Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed()) + + triggerReconcileRequest(ctx, reconciler, "Trigger a reconcile request to attempt to revert the instrumented job") + + VerifyFailedUninstrumentationEvent( + ctx, + clientset, + namespace, + name, + "The controller's attempt to remove the Dash0 instrumentation from this resource has not been successful. Error message: Dash0 cannot remove the instrumentation from the existing job test-namespace/job2, since the this type of resource is immutable.", + ) + VerifyModifiedJob(GetJob(ctx, k8sClient, namespace, name), BasicInstrumentedPodSpecExpectations) + }) + + It("should remove instrumentation labels from an existing uninstrumented job (which has been labelled by the controller without being instrumented)", func() { + // We trigger one reconcile request before creating any workload and before deleting the Dash0 custom + // resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer. + // Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually + // happens in production. + triggerReconcileRequest(ctx, reconciler, "Trigger first reconcile request") + + name := JobName3 + By("Create a job with labels (dash0.instrumented=false)") + job := CreateJobWithInstrumentationLabels(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, job) + + By("Queue the deletion of the Dash0 custom resource") + dash0CustomResource := loadDash0CustomResourceOrFail(ctx, Default) + Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed()) + + triggerReconcileRequest(ctx, reconciler, "Trigger a reconcile request to attempt to revert the instrumented job") + + VerifyAlreadyNotInstrumented(ctx, clientset, namespace, name, "Dash0 instrumentation was not present on this resource, no modification by the controller has been necessary.") + VerifyUnmodifiedJob(GetJob(ctx, k8sClient, namespace, name)) + }) + + It("should revert an instrumented orphan replica set", func() { + // We trigger one reconcile request before creating any workload and before deleting the Dash0 custom + // resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer. + // Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually + // happens in production. + triggerReconcileRequest(ctx, reconciler, "Trigger first reconcile request") + + name := ReplicaSetName + By("Create an instrumented replica set") + replicaSet := CreateInstrumentedReplicaSet(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, replicaSet) + + By("Queue the deletion of the Dash0 custom resource") + dash0CustomResource := loadDash0CustomResourceOrFail(ctx, Default) + Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed()) + + triggerReconcileRequest(ctx, reconciler, "Trigger a reconcile request to revert the instrumented workload") + + VerifySuccessfulUninstrumentationEvent(ctx, clientset, namespace, name, "controller") + VerifyUnmodifiedReplicaSet(GetReplicaSet(ctx, k8sClient, namespace, name)) + }) + + It("should not leave existing uninstrumented replica sets owned by deployments alone", func() { + // We trigger one reconcile request before creating any workload and before deleting the Dash0 custom + // resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer. + // Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually + // happens in production. + triggerReconcileRequest(ctx, reconciler, "Trigger first reconcile request") + + name := ReplicaSetName + By("Create an instrumented replica set owned by a deployment") + replicaSet := CreateReplicaSetOwnedByDeployment(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, replicaSet) + + By("Queue the deletion of the Dash0 custom resource") + dash0CustomResource := loadDash0CustomResourceOrFail(ctx, Default) + Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed()) + + triggerReconcileRequest(ctx, reconciler, "Trigger a reconcile request to revert the instrumented workload") + + VerifyNoEvents(ctx, clientset, namespace) + VerifyUnmodifiedReplicaSet(GetReplicaSet(ctx, k8sClient, namespace, name)) + }) + + It("should revert an instrumented stateful set", func() { + // We trigger one reconcile request before creating any workload and before deleting the Dash0 custom + // resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer. + // Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually + // happens in production. + triggerReconcileRequest(ctx, reconciler, "Trigger first reconcile request") + + name := StatefulSetName + By("Create an instrumented stateful set") + statefulSet := CreateInstrumentedStatefulSet(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, statefulSet) + + By("Queue the deletion of the Dash0 custom resource") + dash0CustomResource := loadDash0CustomResourceOrFail(ctx, Default) + Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed()) - verifyStatusConditionAndSuccessEvent(ctx, namespace, name) - VerifyModifiedStatefulSet(GetStatefulSet(ctx, k8sClient, namespace, name), BasicPodSpecExpectations) + triggerReconcileRequest(ctx, reconciler, "Trigger a reconcile request to revert the instrumented workload") + + VerifySuccessfulUninstrumentationEvent(ctx, clientset, namespace, name, "controller") + VerifyUnmodifiedStatefulSet(GetStatefulSet(ctx, k8sClient, namespace, name)) }) }) }) -func verifyStatusConditionAndSuccessEvent(ctx context.Context, namespace string, name string) { - verifyStatusConditions(ctx, dash0CustomResourceQualifiedName) - VerifySuccessEvent(ctx, clientset, namespace, name, "controller") +func triggerReconcileRequest(ctx context.Context, reconciler *Dash0Reconciler, stepMessage string) { + if stepMessage == "" { + stepMessage = "Trigger reconcile request" + } + By(stepMessage) + _, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: dash0CustomResourceQualifiedName, + }) + Expect(err).NotTo(HaveOccurred()) +} + +func loadDash0CustomResourceOrFail(ctx context.Context, g Gomega) *operatorv1alpha1.Dash0 { + return loadDash0CustomResource(ctx, g, true) +} + +func loadDash0CustomResourceIfItExists(ctx context.Context) *operatorv1alpha1.Dash0 { + return loadDash0CustomResource(ctx, Default, false) } -func verifyStatusConditions(ctx context.Context, typeNamespacedName types.NamespacedName) *metav1.Condition { - var available *metav1.Condition +func loadDash0CustomResource(ctx context.Context, g Gomega, failTestsOnNonExists bool) *operatorv1alpha1.Dash0 { + dash0CustomResource := &operatorv1alpha1.Dash0{} + if err := k8sClient.Get(ctx, dash0CustomResourceQualifiedName, dash0CustomResource); err != nil { + if apierrors.IsNotFound(err) { + if failTestsOnNonExists { + g.Expect(err).NotTo(HaveOccurred()) + return nil + } else { + return nil + } + } else { + // an error occurred, but it is not an IsNotFound error, fail test immediately + g.Expect(err).NotTo(HaveOccurred()) + } + } + + return dash0CustomResource +} + +func verifyStatusConditionAndSuccessfulInstrumentationEvent(ctx context.Context, namespace string, name string) { + verifyDash0ResourceIsAvailable(ctx) + VerifySuccessfulInstrumentationEvent(ctx, clientset, namespace, name, "controller") +} + +func verifyDash0ResourceIsAvailable(ctx context.Context) *metav1.Condition { + return verifyDash0ResourceStatus(ctx, metav1.ConditionTrue) +} + +func verifyDash0ResourceStatus(ctx context.Context, expectedStatus metav1.ConditionStatus) *metav1.Condition { + var availableCondition *metav1.Condition By("Verifying status conditions") Eventually(func(g Gomega) { - dash0 := &operatorv1alpha1.Dash0{} - g.Expect(k8sClient.Get(ctx, typeNamespacedName, dash0)).To(Succeed()) - available = meta.FindStatusCondition(dash0.Status.Conditions, string(util.ConditionTypeAvailable)) - g.Expect(available).NotTo(BeNil()) - g.Expect(available.Status).To(Equal(metav1.ConditionTrue)) - degraded := meta.FindStatusCondition(dash0.Status.Conditions, string(util.ConditionTypeDegraded)) + dash0CustomResource := loadDash0CustomResourceOrFail(ctx, g) + availableCondition = meta.FindStatusCondition(dash0CustomResource.Status.Conditions, string(util.ConditionTypeAvailable)) + g.Expect(availableCondition).NotTo(BeNil()) + g.Expect(availableCondition.Status).To(Equal(expectedStatus)) + degraded := meta.FindStatusCondition(dash0CustomResource.Status.Conditions, string(util.ConditionTypeDegraded)) g.Expect(degraded).To(BeNil()) }, timeout, pollingInterval).Should(Succeed()) - return available + return availableCondition +} + +func removeFinalizer(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0) { + finalizerHasBeenRemoved := controllerutil.RemoveFinalizer(dash0CustomResource, util.FinalizerId) + if finalizerHasBeenRemoved { + Expect(k8sClient.Update(ctx, dash0CustomResource)).To(Succeed()) + } +} + +func deleteAllEvents( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, +) { + err := clientset.CoreV1().Events(namespace).DeleteCollection(ctx, metav1.DeleteOptions{ + GracePeriodSeconds: new(int64), // delete immediately + }, metav1.ListOptions{}) + Expect(err).NotTo(HaveOccurred()) } diff --git a/internal/controller/status_conditions.go b/internal/controller/status_conditions.go deleted file mode 100644 index 2da590cd..00000000 --- a/internal/controller/status_conditions.go +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. -// SPDX-License-Identifier: Apache-2.0 - -package controller - -import ( - "github.com/dash0hq/dash0-operator/internal/util" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" -) - -func setAvailableConditionToUnknown(dash0CustomResource *operatorv1alpha1.Dash0) { - meta.SetStatusCondition( - &dash0CustomResource.Status.Conditions, - metav1.Condition{ - Type: string(util.ConditionTypeAvailable), - Status: metav1.ConditionUnknown, - Reason: "ReconcileStarted", - Message: "Dash0 has started resource reconciliation.", - }) -} - -func ensureResourceIsMarkedAsAvailable(dash0CustomResource *operatorv1alpha1.Dash0) { - // If the available status is already true, the status condition is not updated, except for Reason, Message and - // ObservedGeneration timestamp. In particular, LastTransitionTime is not updated. Thus, this operation is - // effectively idempotent. - meta.SetStatusCondition( - &dash0CustomResource.Status.Conditions, - metav1.Condition{ - Type: string(util.ConditionTypeAvailable), - Status: metav1.ConditionTrue, - Reason: "ReconcileFinished", - Message: "Dash0 is is active in this namespace now.", - }) - meta.RemoveStatusCondition(&dash0CustomResource.Status.Conditions, string(util.ConditionTypeDegraded)) -} diff --git a/internal/k8sresources/modify.go b/internal/k8sresources/resource_modifier.go similarity index 68% rename from internal/k8sresources/modify.go rename to internal/k8sresources/resource_modifier.go index f17dc050..53ca83f0 100644 --- a/internal/k8sresources/modify.go +++ b/internal/k8sresources/resource_modifier.go @@ -8,6 +8,7 @@ import ( "reflect" "slices" "strconv" + "strings" "github.com/go-logr/logr" appsv1 "k8s.io/api/apps/v1" @@ -79,15 +80,9 @@ func (m *ResourceModifier) AddLabelsToImmutableJob(job *batchv1.Job) { } func (m *ResourceModifier) ModifyReplicaSet(replicaSet *appsv1.ReplicaSet, namespace string) bool { - ownerReferences := replicaSet.ObjectMeta.OwnerReferences - if len(ownerReferences) > 0 { - for _, ownerReference := range ownerReferences { - if ownerReference.APIVersion == "apps/v1" && ownerReference.Kind == "Deployment" { - return false - } - } + if m.hasDeploymentOwnerReference(replicaSet) { + return false } - return m.modifyResource(&replicaSet.Spec.Template, &replicaSet.ObjectMeta, namespace) } @@ -305,3 +300,156 @@ func (m *ResourceModifier) addLabel(meta *metav1.ObjectMeta, key string, value s } meta.Labels[key] = value } + +func (m *ResourceModifier) RevertCronJob(cronJob *batchv1.CronJob) bool { + return m.revertResource(&cronJob.Spec.JobTemplate.Spec.Template, &cronJob.ObjectMeta) +} + +func (m *ResourceModifier) RevertDaemonSet(daemonSet *appsv1.DaemonSet) bool { + return m.revertResource(&daemonSet.Spec.Template, &daemonSet.ObjectMeta) +} + +func (m *ResourceModifier) RevertDeployment(deployment *appsv1.Deployment) bool { + return m.revertResource(&deployment.Spec.Template, &deployment.ObjectMeta) +} + +func (m *ResourceModifier) RevertJob(job *batchv1.Job) bool { + return m.revertResource(&job.Spec.Template, &job.ObjectMeta) +} + +func (m *ResourceModifier) RemoveLabelsFromImmutableJob(job *batchv1.Job) { + m.removeInstrumentationLabels(&job.ObjectMeta) +} + +func (m *ResourceModifier) RevertReplicaSet(replicaSet *appsv1.ReplicaSet) bool { + if m.hasDeploymentOwnerReference(replicaSet) { + return false + } + return m.revertResource(&replicaSet.Spec.Template, &replicaSet.ObjectMeta) +} + +func (m *ResourceModifier) RevertStatefulSet(statefulSet *appsv1.StatefulSet) bool { + return m.revertResource(&statefulSet.Spec.Template, &statefulSet.ObjectMeta) +} + +func (m *ResourceModifier) revertResource(podTemplateSpec *corev1.PodTemplateSpec, meta *metav1.ObjectMeta) bool { + if meta.GetLabels()[util.InstrumentedLabelKey] == "false" { + // resource has never been instrumented successfully, only remove labels + m.removeInstrumentationLabels(meta) + m.removeInstrumentationLabels(&podTemplateSpec.ObjectMeta) + return true + } + hasBeenModified := m.revertPodSpec(&podTemplateSpec.Spec) + if hasBeenModified { + m.removeInstrumentationLabels(meta) + m.removeInstrumentationLabels(&podTemplateSpec.ObjectMeta) + return true + } + return false +} + +func (m *ResourceModifier) revertPodSpec(podSpec *corev1.PodSpec) bool { + originalSpec := podSpec.DeepCopy() + m.removeInstrumentationVolume(podSpec) + m.removeInitContainer(podSpec) + for idx := range podSpec.Containers { + container := &podSpec.Containers[idx] + m.uninstrumentContainer(container) + } + + return !reflect.DeepEqual(originalSpec, podSpec) +} + +func (m *ResourceModifier) removeInstrumentationVolume(podSpec *corev1.PodSpec) { + if podSpec.Volumes == nil { + return + } + podSpec.Volumes = slices.DeleteFunc(podSpec.Volumes, func(c corev1.Volume) bool { + return c.Name == dash0VolumeName + }) +} + +func (m *ResourceModifier) removeInitContainer(podSpec *corev1.PodSpec) { + if podSpec.InitContainers == nil { + return + } + podSpec.InitContainers = slices.DeleteFunc(podSpec.InitContainers, func(c corev1.Container) bool { + return c.Name == initContainerName + }) +} + +func (m *ResourceModifier) uninstrumentContainer(container *corev1.Container) { + m.removeMount(container) + m.removeEnvironmentVariables(container) +} + +func (m *ResourceModifier) removeMount(container *corev1.Container) { + if container.VolumeMounts == nil { + return + } + container.VolumeMounts = slices.DeleteFunc(container.VolumeMounts, func(c corev1.VolumeMount) bool { + return c.Name == dash0VolumeName || c.MountPath == dash0InstrumentationDirectory + }) +} + +func (m *ResourceModifier) removeEnvironmentVariables(container *corev1.Container) { + m.removeNodeOptions(container) + m.removeEnvironmentVariable(container, envVarDash0CollectorBaseUrlName) +} + +func (m *ResourceModifier) removeNodeOptions(container *corev1.Container) { + if container.Env == nil { + return + } + idx := slices.IndexFunc(container.Env, func(c corev1.EnvVar) bool { + return c.Name == envVarNodeOptionsName + }) + + if idx < 0 { + return + } else { + envVar := container.Env[idx] + previousValue := envVar.Value + if previousValue == "" && envVar.ValueFrom != nil { + // Specified via ValueFrom, this has not been done by us, so we assume there is no Dash0-specific + // NODE_OPTIONS part. + return + } else if previousValue == envVarNodeOptionsValue { + container.Env = slices.Delete(container.Env, idx, idx+1) + } + + container.Env[idx].Value = strings.Replace(previousValue, envVarNodeOptionsValue, "", -1) + } +} + +func (m *ResourceModifier) removeEnvironmentVariable(container *corev1.Container, name string) { + if container.Env == nil { + return + } + container.Env = slices.DeleteFunc(container.Env, func(c corev1.EnvVar) bool { + return c.Name == name + }) +} + +func (m *ResourceModifier) removeInstrumentationLabels(meta *metav1.ObjectMeta) { + m.removeLabel(meta, util.InstrumentedLabelKey) + m.removeLabel(meta, util.OperatorVersionLabelKey) + m.removeLabel(meta, util.InitContainerImageVersionLabelKey) + m.removeLabel(meta, util.InstrumentedByLabelKey) +} + +func (m *ResourceModifier) removeLabel(meta *metav1.ObjectMeta, key string) { + delete(meta.Labels, key) +} + +func (m *ResourceModifier) hasDeploymentOwnerReference(replicaSet *appsv1.ReplicaSet) bool { + ownerReferences := replicaSet.ObjectMeta.OwnerReferences + if len(ownerReferences) > 0 { + for _, ownerReference := range ownerReferences { + if ownerReference.APIVersion == "apps/v1" && ownerReference.Kind == "Deployment" { + return true + } + } + } + return false +} diff --git a/internal/k8sresources/modify_test.go b/internal/k8sresources/resource_modifier_test.go similarity index 52% rename from internal/k8sresources/modify_test.go rename to internal/k8sresources/resource_modifier_test.go index e8c90dc1..bc0e7da3 100644 --- a/internal/k8sresources/modify_test.go +++ b/internal/k8sresources/resource_modifier_test.go @@ -35,16 +35,16 @@ var _ = Describe("Dash0 Resource Modification", func() { logger := log.FromContext(ctx) resourceModifier := NewResourceModifier(instrumentationMetadata, &logger) - Context("when mutating new resources", func() { - It("should inject Dash0 into a new basic deployment", func() { + Context("when instrumenting resources", func() { + It("should add Dash0 to a basic deployment", func() { deployment := BasicDeployment(TestNamespaceName, DeploymentName) result := resourceModifier.ModifyDeployment(deployment, TestNamespaceName) Expect(result).To(BeTrue()) - VerifyModifiedDeployment(deployment, BasicPodSpecExpectations) + VerifyModifiedDeployment(deployment, BasicInstrumentedPodSpecExpectations) }) - It("should inject Dash0 into a new deployment that has multiple Containers, and already has Volumes and init Containers", func() { + It("should add Dash0 to a deployment that has multiple containers, and already has volumes and init containers", func() { deployment := DeploymentWithMoreBellsAndWhistles(TestNamespaceName, DeploymentName) result := resourceModifier.ModifyDeployment(deployment, TestNamespaceName) @@ -75,7 +75,7 @@ var _ = Describe("Dash0 Resource Modification", func() { }) }) - It("should update existing Dash0 artifacts in a new deployment", func() { + It("should update existing Dash0 artifacts in a deployment", func() { deployment := DeploymentWithExistingDash0Artifacts(TestNamespaceName, DeploymentName) result := resourceModifier.ModifyDeployment(deployment, TestNamespaceName) @@ -108,39 +108,39 @@ var _ = Describe("Dash0 Resource Modification", func() { }) }) - It("should inject Dash0 into a new basic cron job", func() { + It("should add Dash0 to a basic cron job", func() { resource := BasicCronJob(TestNamespaceName, CronJobName) result := resourceModifier.ModifyCronJob(resource, TestNamespaceName) Expect(result).To(BeTrue()) - VerifyModifiedCronJob(resource, BasicPodSpecExpectations) + VerifyModifiedCronJob(resource, BasicInstrumentedPodSpecExpectations) }) - It("should inject Dash0 into a new basic daemon set", func() { + It("should add Dash0 to a basic daemon set", func() { resource := BasicDaemonSet(TestNamespaceName, DaemonSetName) result := resourceModifier.ModifyDaemonSet(resource, TestNamespaceName) Expect(result).To(BeTrue()) - VerifyModifiedDaemonSet(resource, BasicPodSpecExpectations) + VerifyModifiedDaemonSet(resource, BasicInstrumentedPodSpecExpectations) }) - It("should inject Dash0 into a new basic job", func() { - resource := BasicJob(TestNamespaceName, JobName) + It("should add Dash0 to a basic job", func() { + resource := BasicJob(TestNamespaceName, JobName1) result := resourceModifier.ModifyJob(resource, TestNamespaceName) Expect(result).To(BeTrue()) - VerifyModifiedJob(resource, BasicPodSpecExpectations) + VerifyModifiedJob(resource, BasicInstrumentedPodSpecExpectations) }) - It("should inject Dash0 into a new basic replica set", func() { + It("should add Dash0 to a basic replica set", func() { resource := BasicReplicaSet(TestNamespaceName, ReplicaSetName) result := resourceModifier.ModifyReplicaSet(resource, TestNamespaceName) Expect(result).To(BeTrue()) - VerifyModifiedReplicaSet(resource, BasicPodSpecExpectations) + VerifyModifiedReplicaSet(resource, BasicInstrumentedPodSpecExpectations) }) - It("should not inject Dash0 into a new basic replica set that is owned by a deployment", func() { + It("should not add Dash0 to a basic replica set that is owned by a deployment", func() { resource := ReplicaSetOwnedByDeployment(TestNamespaceName, ReplicaSetName) result := resourceModifier.ModifyReplicaSet(resource, TestNamespaceName) @@ -148,12 +148,107 @@ var _ = Describe("Dash0 Resource Modification", func() { VerifyUnmodifiedReplicaSet(resource) }) - It("should inject Dash0 into a new basic stateful set", func() { + It("should add Dash0 to a basic stateful set", func() { resource := BasicStatefulSet(TestNamespaceName, StatefulSetName) result := resourceModifier.ModifyStatefulSet(resource, TestNamespaceName) Expect(result).To(BeTrue()) - VerifyModifiedStatefulSet(resource, BasicPodSpecExpectations) + VerifyModifiedStatefulSet(resource, BasicInstrumentedPodSpecExpectations) + }) + }) + + Context("when reverting resources", func() { + It("should remove Dash0 from an instrumented deployment", func() { + deployment := InstrumentedDeployment(TestNamespaceName, DeploymentName) + result := resourceModifier.RevertDeployment(deployment) + + Expect(result).To(BeTrue()) + VerifyUnmodifiedDeployment(deployment) + }) + + It("should only remove labels from deployment that has dash0.instrumented=false", func() { + deployment := DeploymentWithInstrumentedFalseLabel(TestNamespaceName, DeploymentName) + result := resourceModifier.RevertDeployment(deployment) + + Expect(result).To(BeTrue()) + VerifyUnmodifiedDeployment(deployment) + }) + + It("should remove Dash0 from a instrumented deployment that has multiple containers, and already has volumes and init containers previous to being instrumented", func() { + deployment := InstrumentedDeploymentWithMoreBellsAndWhistles(TestNamespaceName, DeploymentName) + result := resourceModifier.RevertDeployment(deployment) + + Expect(result).To(BeTrue()) + VerifyRevertedDeployment(deployment, PodSpecExpectations{ + Volumes: 2, + Dash0VolumeIdx: -1, + InitContainers: 2, + Dash0InitContainerIdx: -1, + Containers: []ContainerExpectations{ + { + VolumeMounts: 1, + Dash0VolumeMountIdx: -1, + EnvVars: 1, + NodeOptionsEnvVarIdx: -1, + Dash0CollectorBaseUrlEnvVarIdx: -1, + }, + { + VolumeMounts: 2, + Dash0VolumeMountIdx: -1, + EnvVars: 2, + NodeOptionsEnvVarIdx: -1, + Dash0CollectorBaseUrlEnvVarIdx: -1, + }, + }, + }) + }) + + It("should remove Dash0 from an instrumented cron job", func() { + resource := InstrumentedCronJob(TestNamespaceName, CronJobName) + result := resourceModifier.RevertCronJob(resource) + + Expect(result).To(BeTrue()) + VerifyUnmodifiedCronJob(resource) + }) + + It("should remove Dash0 from an instrumented daemon set", func() { + resource := InstrumentedDaemonSet(TestNamespaceName, DaemonSetName) + result := resourceModifier.RevertDaemonSet(resource) + + Expect(result).To(BeTrue()) + VerifyUnmodifiedDaemonSet(resource) + }) + + It("should remove Dash0 from an instrumented job", func() { + resource := InstrumentedJob(TestNamespaceName, JobName1) + result := resourceModifier.RevertJob(resource) + + Expect(result).To(BeTrue()) + VerifyUnmodifiedJob(resource) + }) + + It("should remove Dash0 from an instrumented replica set", func() { + resource := InstrumentedReplicaSet(TestNamespaceName, ReplicaSetName) + result := resourceModifier.RevertReplicaSet(resource) + + Expect(result).To(BeTrue()) + VerifyUnmodifiedReplicaSet(resource) + }) + + It("should not remove Dash0 from a replica set that is owned by a deployment", func() { + resource := InstrumentedReplicaSetOwnedByDeployment(TestNamespaceName, ReplicaSetName) + result := resourceModifier.RevertReplicaSet(resource) + + Expect(result).To(BeFalse()) + VerifyModifiedReplicaSet(resource, BasicInstrumentedPodSpecExpectations) + }) + + It("should remove Dash0 from an instrumented stateful set", func() { + resource := InstrumentedStatefulSet(TestNamespaceName, StatefulSetName) + result := resourceModifier.RevertStatefulSet(resource) + + Expect(result).To(BeTrue()) + VerifyUnmodifiedStatefulSet(resource) }) }) }) diff --git a/internal/util/constants.go b/internal/util/constants.go index 6a3922bc..6930bb04 100644 --- a/internal/util/constants.go +++ b/internal/util/constants.go @@ -8,4 +8,6 @@ const ( OperatorVersionLabelKey = "dash0.operator.version" InitContainerImageVersionLabelKey = "dash0.initcontainer.image.version" InstrumentedByLabelKey = "dash0.instrumented.by" + + FinalizerId = "operator.dash0.com/finalizer" ) diff --git a/internal/util/k8sevents.go b/internal/util/k8sevents.go index 28f8990a..ce9b8579 100644 --- a/internal/util/k8sevents.go +++ b/internal/util/k8sevents.go @@ -16,7 +16,7 @@ func QueueSuccessfulInstrumentationEvent(eventRecorder record.EventRecorder, res resource, corev1.EventTypeNormal, string(ReasonSuccessfulInstrumentation), - fmt.Sprintf("Dash0 instrumentation by %s has been successful.", eventSource), + fmt.Sprintf("Dash0 instrumentation of this resource by the %s has been successful.", eventSource), ) } @@ -24,8 +24,8 @@ func QueueAlreadyInstrumentedEvent(eventRecorder record.EventRecorder, resource eventRecorder.Event( resource, corev1.EventTypeNormal, - string(ReasonSuccessfulInstrumentation), - fmt.Sprintf("Dash0 instrumentation already present, no modification by %s is necessary.", eventSource), + string(ReasonAlreadyInstrumented), + fmt.Sprintf("Dash0 instrumentation was already present on this resource, no modification by the %s is necessary.", eventSource), ) } @@ -34,6 +34,33 @@ func QueueFailedInstrumentationEvent(eventRecorder record.EventRecorder, resourc resource, corev1.EventTypeWarning, string(ReasonFailedInstrumentation), - fmt.Sprintf("Dash0 instrumentation by %s has not been successful. Error message: %s", eventSource, err.Error()), + fmt.Sprintf("Dash0 instrumentation of this resource by the %s has not been successful. Error message: %s", eventSource, err.Error()), + ) +} + +func QueueSuccessfulUninstrumentationEvent(eventRecorder record.EventRecorder, resource runtime.Object, eventSource string) { + eventRecorder.Event( + resource, + corev1.EventTypeNormal, + string(ReasonSuccessfulUninstrumentation), + fmt.Sprintf("The %s successfully removed the Dash0 instrumentation from this resource.", eventSource), + ) +} + +func QueueAlreadyNotInstrumentedEvent(eventRecorder record.EventRecorder, resource runtime.Object, eventSource string) { + eventRecorder.Event( + resource, + corev1.EventTypeNormal, + string(ReasonAlreadyNotInstrumented), + fmt.Sprintf("Dash0 instrumentation was not present on this resource, no modification by the %s has been necessary.", eventSource), + ) +} + +func QueueFailedUninstrumentationEvent(eventRecorder record.EventRecorder, resource runtime.Object, eventSource string, err error) { + eventRecorder.Event( + resource, + corev1.EventTypeWarning, + string(ReasonFailedUninstrumentation), + fmt.Sprintf("The %s's attempt to remove the Dash0 instrumentation from this resource has not been successful. Error message: %s", eventSource, err.Error()), ) } diff --git a/internal/util/types.go b/internal/util/types.go index 5700d1ae..5335a37d 100644 --- a/internal/util/types.go +++ b/internal/util/types.go @@ -10,8 +10,12 @@ const ( ConditionTypeAvailable ConditionType = "Available" ConditionTypeDegraded ConditionType = "Degraded" - ReasonSuccessfulInstrumentation Reason = "SuccessfulInstrumentation" - ReasonFailedInstrumentation Reason = "FailedInstrumentation" + ReasonSuccessfulInstrumentation Reason = "SuccessfulInstrumentation" + ReasonAlreadyInstrumented Reason = "ReasonAlreadyInstrumented" + ReasonFailedInstrumentation Reason = "FailedInstrumentation" + ReasonSuccessfulUninstrumentation Reason = "SuccessfulUninstrumentation" + ReasonAlreadyNotInstrumented Reason = "ReasonAlreadyNotInstrumented" + ReasonFailedUninstrumentation Reason = "FailedUninstrumentation" ) type Versions struct { diff --git a/internal/webhook/dash0_webhook_test.go b/internal/webhook/dash0_webhook_test.go index 24f42998..f1615114 100644 --- a/internal/webhook/dash0_webhook_test.go +++ b/internal/webhook/dash0_webhook_test.go @@ -22,7 +22,7 @@ var _ = Describe("Dash0 Webhook", func() { _ = k8sClient.Delete(ctx, BasicCronJob(TestNamespaceName, CronJobName)) _ = k8sClient.Delete(ctx, BasicDaemonSet(TestNamespaceName, DaemonSetName)) _ = k8sClient.Delete(ctx, BasicDeployment(TestNamespaceName, DeploymentName)) - _ = k8sClient.Delete(ctx, BasicJob(TestNamespaceName, JobName)) + _ = k8sClient.Delete(ctx, BasicJob(TestNamespaceName, JobName1)) err := k8sClient.Delete(ctx, BasicReplicaSet(TestNamespaceName, ReplicaSetName)) if err != nil { fmt.Fprintf(GinkgoWriter, "cannot delete replicaset: %v\n", err) @@ -35,7 +35,7 @@ var _ = Describe("Dash0 Webhook", func() { It("should inject Dash0 into a new basic deployment", func() { CreateBasicDeployment(ctx, k8sClient, TestNamespaceName, DeploymentName) deployment := GetDeployment(ctx, k8sClient, TestNamespaceName, DeploymentName) - VerifyModifiedDeployment(deployment, BasicPodSpecExpectations) + VerifyModifiedDeployment(deployment, BasicInstrumentedPodSpecExpectations) }) It("should inject Dash0 into a new deployment that has multiple containers, and already has volumes and init containers", func() { @@ -67,7 +67,7 @@ var _ = Describe("Dash0 Webhook", func() { }, }, }) - VerifySuccessEvent(ctx, clientset, TestNamespaceName, DeploymentName, "webhook") + VerifySuccessfulInstrumentationEvent(ctx, clientset, TestNamespaceName, DeploymentName, "webhook") }) It("should update existing Dash0 artifacts in a new deployment", func() { @@ -106,25 +106,25 @@ var _ = Describe("Dash0 Webhook", func() { It("should inject Dash0 into a new basic cron job", func() { CreateBasicCronJob(ctx, k8sClient, TestNamespaceName, CronJobName) cronJob := GetCronJob(ctx, k8sClient, TestNamespaceName, CronJobName) - VerifyModifiedCronJob(cronJob, BasicPodSpecExpectations) + VerifyModifiedCronJob(cronJob, BasicInstrumentedPodSpecExpectations) }) It("should inject Dash0 into a new basic daemon set", func() { CreateBasicDaemonSet(ctx, k8sClient, TestNamespaceName, DaemonSetName) daemonSet := GetDaemonSet(ctx, k8sClient, TestNamespaceName, DaemonSetName) - VerifyModifiedDaemonSet(daemonSet, BasicPodSpecExpectations) + VerifyModifiedDaemonSet(daemonSet, BasicInstrumentedPodSpecExpectations) }) It("should inject Dash0 into a new basic job", func() { - CreateBasicJob(ctx, k8sClient, TestNamespaceName, JobName) - job := GetJob(ctx, k8sClient, TestNamespaceName, JobName) - VerifyModifiedJob(job, BasicPodSpecExpectations) + CreateBasicJob(ctx, k8sClient, TestNamespaceName, JobName1) + job := GetJob(ctx, k8sClient, TestNamespaceName, JobName1) + VerifyModifiedJob(job, BasicInstrumentedPodSpecExpectations) }) It("should inject Dash0 into a new basic replica set", func() { CreateBasicReplicaSet(ctx, k8sClient, TestNamespaceName, ReplicaSetName) replicaSet := GetReplicaSet(ctx, k8sClient, TestNamespaceName, ReplicaSetName) - VerifyModifiedReplicaSet(replicaSet, BasicPodSpecExpectations) + VerifyModifiedReplicaSet(replicaSet, BasicInstrumentedPodSpecExpectations) }) It("should not inject Dash0 into a new replica set owned by a deployment", func() { @@ -136,7 +136,7 @@ var _ = Describe("Dash0 Webhook", func() { It("should inject Dash0 into a new basic stateful set", func() { CreateBasicStatefulSet(ctx, k8sClient, TestNamespaceName, StatefulSetName) statefulSet := GetStatefulSet(ctx, k8sClient, TestNamespaceName, StatefulSetName) - VerifyModifiedStatefulSet(statefulSet, BasicPodSpecExpectations) + VerifyModifiedStatefulSet(statefulSet, BasicInstrumentedPodSpecExpectations) }) }) }) diff --git a/test/util/helpers.go b/test/util/helpers.go deleted file mode 100644 index 7fb21105..00000000 --- a/test/util/helpers.go +++ /dev/null @@ -1,82 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. -// SPDX-License-Identifier: Apache-2.0 - -package util - -import ( - "context" - "fmt" - - "github.com/dash0hq/dash0-operator/internal/util" - . "github.com/onsi/gomega" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" -) - -func VerifySuccessEvent( - ctx context.Context, - clientset *kubernetes.Clientset, - namespace string, - resourceName string, - eventSource string, -) { - verifyEvent( - ctx, - clientset, - namespace, - resourceName, - util.ReasonSuccessfulInstrumentation, - fmt.Sprintf("Dash0 instrumentation by %s has been successful.", eventSource), - ) -} - -func VerifyFailureEvent( - ctx context.Context, - clientset *kubernetes.Clientset, - namespace string, - resourceName string, - eventSource string, - message string, -) { - verifyEvent( - ctx, - clientset, - namespace, - resourceName, - util.ReasonFailedInstrumentation, - message, - ) -} - -func verifyEvent( - ctx context.Context, - clientset *kubernetes.Clientset, - namespace string, - resourceName string, - reason util.Reason, - message string, -) { - allEvents, err := clientset.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{}) - Expect(err).NotTo(HaveOccurred()) - Expect(allEvents.Items).To(HaveLen(1)) - Expect(allEvents.Items).To( - ContainElement( - MatchEvent( - namespace, - resourceName, - reason, - message, - ))) -} - -func DeleteAllEvents( - ctx context.Context, - clientset *kubernetes.Clientset, - namespace string, -) { - err := clientset.CoreV1().Events(namespace).DeleteCollection(ctx, metav1.DeleteOptions{ - GracePeriodSeconds: new(int64), // delete immediately - }, metav1.ListOptions{}) - Expect(err).NotTo(HaveOccurred()) -} diff --git a/test/util/resources.go b/test/util/resources.go index e0b5c168..9445d953 100644 --- a/test/util/resources.go +++ b/test/util/resources.go @@ -5,15 +5,21 @@ package util import ( "context" + "fmt" + "strconv" - . "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + + . "github.com/onsi/gomega" + + "github.com/dash0hq/dash0-operator/internal/util" ) const ( @@ -21,11 +27,41 @@ const ( CronJobName = "cronjob" DaemonSetName = "daemonset" DeploymentName = "deployment" - JobName = "job" + JobName1 = "job1" + JobName2 = "job2" + JobName3 = "job3" ReplicaSetName = "replicaset" StatefulSetName = "statefulset" ) +var ( + True = true + False = false + ArbitraryNumer int64 = 1302 + + instrumentationInitContainer = corev1.Container{ + Name: "dash0-instrumentation", + Image: "dash0-instrumentation:1.2.3", + Env: []corev1.EnvVar{{ + Name: "DASH0_INSTRUMENTATION_FOLDER_DESTINATION", + Value: "/opt/dash0", + }}, + SecurityContext: &corev1.SecurityContext{ + AllowPrivilegeEscalation: &False, + Privileged: &False, + ReadOnlyRootFilesystem: &True, + RunAsNonRoot: &False, + RunAsUser: &ArbitraryNumer, + RunAsGroup: &ArbitraryNumer, + }, + VolumeMounts: []corev1.VolumeMount{{ + Name: "dash0-instrumentation", + ReadOnly: false, + MountPath: "/opt/dash0", + }}, + } +) + func EnsureDash0CustomResourceExists( ctx context.Context, k8sClient client.Client, @@ -90,7 +126,22 @@ func CreateBasicCronJob( namespace string, name string, ) *batchv1.CronJob { - return create(ctx, k8sClient, BasicCronJob(namespace, name)).(*batchv1.CronJob) + return createResource(ctx, k8sClient, BasicCronJob(namespace, name)).(*batchv1.CronJob) +} + +func InstrumentedCronJob(namespace string, name string) *batchv1.CronJob { + resource := BasicCronJob(namespace, name) + simulateInstrumentedResource(&resource.Spec.JobTemplate.Spec.Template, &resource.ObjectMeta, namespace) + return resource +} + +func CreateInstrumentedCronJob( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *batchv1.CronJob { + return createResource(ctx, k8sClient, InstrumentedCronJob(namespace, name)).(*batchv1.CronJob) } func BasicDaemonSet(namespace string, name string) *appsv1.DaemonSet { @@ -109,7 +160,22 @@ func CreateBasicDaemonSet( namespace string, name string, ) *appsv1.DaemonSet { - return create(ctx, k8sClient, BasicDaemonSet(namespace, name)).(*appsv1.DaemonSet) + return createResource(ctx, k8sClient, BasicDaemonSet(namespace, name)).(*appsv1.DaemonSet) +} + +func InstrumentedDaemonSet(namespace string, name string) *appsv1.DaemonSet { + resource := BasicDaemonSet(namespace, name) + simulateInstrumentedResource(&resource.Spec.Template, &resource.ObjectMeta, namespace) + return resource +} + +func CreateInstrumentedDaemonSet( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *appsv1.DaemonSet { + return createResource(ctx, k8sClient, InstrumentedDaemonSet(namespace, name)).(*appsv1.DaemonSet) } func BasicDeployment(namespace string, name string) *appsv1.Deployment { @@ -128,7 +194,28 @@ func CreateBasicDeployment( namespace string, name string, ) *appsv1.Deployment { - return create(ctx, k8sClient, BasicDeployment(namespace, name)).(*appsv1.Deployment) + return createResource(ctx, k8sClient, BasicDeployment(namespace, name)).(*appsv1.Deployment) +} + +func InstrumentedDeployment(namespace string, name string) *appsv1.Deployment { + resource := BasicDeployment(namespace, name) + simulateInstrumentedResource(&resource.Spec.Template, &resource.ObjectMeta, namespace) + return resource +} + +func CreateInstrumentedDeployment( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *appsv1.Deployment { + return createResource(ctx, k8sClient, InstrumentedDeployment(namespace, name)).(*appsv1.Deployment) +} + +func DeploymentWithInstrumentedFalseLabel(namespace string, name string) *appsv1.Deployment { + resource := BasicDeployment(namespace, name) + addInstrumentationLabels(&resource.ObjectMeta, false) + return resource } func BasicJob(namespace string, name string) *batchv1.Job { @@ -147,7 +234,37 @@ func CreateBasicJob( namespace string, name string, ) *batchv1.Job { - return create(ctx, k8sClient, BasicJob(namespace, name)).(*batchv1.Job) + return createResource(ctx, k8sClient, BasicJob(namespace, name)).(*batchv1.Job) +} + +func InstrumentedJob(namespace string, name string) *batchv1.Job { + resource := BasicJob(namespace, name) + simulateInstrumentedResource(&resource.Spec.Template, &resource.ObjectMeta, namespace) + return resource +} + +func CreateInstrumentedJob( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *batchv1.Job { + return createResource(ctx, k8sClient, InstrumentedJob(namespace, name)).(*batchv1.Job) +} + +func JobWithInstrumentationLabels(namespace string, name string) *batchv1.Job { + resource := BasicJob(namespace, name) + addInstrumentationLabels(&resource.ObjectMeta, false) + return resource +} + +func CreateJobWithInstrumentationLabels( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *batchv1.Job { + return createResource(ctx, k8sClient, JobWithInstrumentationLabels(namespace, name)).(*batchv1.Job) } func BasicReplicaSet(namespace string, name string) *appsv1.ReplicaSet { @@ -166,7 +283,22 @@ func CreateBasicReplicaSet( namespace string, name string, ) *appsv1.ReplicaSet { - return create(ctx, k8sClient, BasicReplicaSet(namespace, name)).(*appsv1.ReplicaSet) + return createResource(ctx, k8sClient, BasicReplicaSet(namespace, name)).(*appsv1.ReplicaSet) +} + +func InstrumentedReplicaSet(namespace string, name string) *appsv1.ReplicaSet { + resource := BasicReplicaSet(namespace, name) + simulateInstrumentedResource(&resource.Spec.Template, &resource.ObjectMeta, namespace) + return resource +} + +func CreateInstrumentedReplicaSet( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *appsv1.ReplicaSet { + return createResource(ctx, k8sClient, InstrumentedReplicaSet(namespace, name)).(*appsv1.ReplicaSet) } func ReplicaSetOwnedByDeployment(namespace string, name string) *appsv1.ReplicaSet { @@ -190,7 +322,22 @@ func CreateReplicaSetOwnedByDeployment( namespace string, name string, ) *appsv1.ReplicaSet { - return create(ctx, k8sClient, ReplicaSetOwnedByDeployment(namespace, name)).(*appsv1.ReplicaSet) + return createResource(ctx, k8sClient, ReplicaSetOwnedByDeployment(namespace, name)).(*appsv1.ReplicaSet) +} + +func InstrumentedReplicaSetOwnedByDeployment(namespace string, name string) *appsv1.ReplicaSet { + resource := ReplicaSetOwnedByDeployment(namespace, name) + simulateInstrumentedResource(&resource.Spec.Template, &resource.ObjectMeta, namespace) + return resource +} + +func CreateInstrumentedReplicaSetOwnedByDeployment( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *appsv1.ReplicaSet { + return createResource(ctx, k8sClient, InstrumentedReplicaSetOwnedByDeployment(namespace, name)).(*appsv1.ReplicaSet) } func BasicStatefulSet(namespace string, name string) *appsv1.StatefulSet { @@ -209,7 +356,22 @@ func CreateBasicStatefulSet( namespace string, name string, ) *appsv1.StatefulSet { - return create(ctx, k8sClient, BasicStatefulSet(namespace, name)).(*appsv1.StatefulSet) + return createResource(ctx, k8sClient, BasicStatefulSet(namespace, name)).(*appsv1.StatefulSet) +} + +func InstrumentedStatefulSet(namespace string, name string) *appsv1.StatefulSet { + resource := BasicStatefulSet(namespace, name) + simulateInstrumentedResource(&resource.Spec.Template, &resource.ObjectMeta, namespace) + return resource +} + +func CreateInstrumentedStatefulSet( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *appsv1.StatefulSet { + return createResource(ctx, k8sClient, InstrumentedStatefulSet(namespace, name)).(*appsv1.StatefulSet) } func basicPodSpecTemplate() corev1.PodTemplateSpec { @@ -228,7 +390,7 @@ func createSelector() *metav1.LabelSelector { return selector } -func create(ctx context.Context, k8sClient client.Client, resource client.Object) client.Object { +func createResource(ctx context.Context, k8sClient client.Client, resource client.Object) client.Object { Expect(k8sClient.Create(ctx, resource)).Should(Succeed()) return resource } @@ -339,10 +501,7 @@ func DeploymentWithExistingDash0Artifacts(namespace string, name string) *appsv1 Value: "value", }}, }, - { - Name: "dash0-instrumentation", - Image: "ubuntu", - }, + instrumentationInitContainer, { Name: "test-init-container-2", Image: "ubuntu", @@ -431,6 +590,156 @@ func DeploymentWithExistingDash0Artifacts(namespace string, name string) *appsv1 return deployment } +func InstrumentedDeploymentWithMoreBellsAndWhistles(namespace string, name string) *appsv1.Deployment { + deployment := DeploymentWithMoreBellsAndWhistles(namespace, name) + podSpec := &deployment.Spec.Template.Spec + podSpec.Volumes = []corev1.Volume{ + { + Name: "test-volume-0", + VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}}, + }, + { + Name: "test-volume-1", + VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}}, + }, + { + Name: "dash0-instrumentation", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{ + SizeLimit: resource.NewScaledQuantity(150, resource.Mega), + }, + }, + }, + } + podSpec.InitContainers = []corev1.Container{ + { + Name: "test-init-container-0", + Image: "ubuntu", + Env: []corev1.EnvVar{{ + Name: "TEST_INIT_0", + Value: "value", + }}, + }, + { + Name: "test-init-container-1", + Image: "ubuntu", + Env: []corev1.EnvVar{ + { + Name: "TEST_INIT_0", + Value: "value", + }, + { + Name: "TEST_INIT_1", + Value: "value", + }, + }, + }, + instrumentationInitContainer, + } + podSpec.Containers = []corev1.Container{ + { + Name: "test-container-0", + Image: "ubuntu", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test-volume-0", + MountPath: "/test-1", + }, + { + Name: "dash0-instrumentation", + MountPath: "/opt/dash0", + }, + }, + Env: []corev1.EnvVar{ + { + Name: "TEST0", + Value: "value", + }, + { + Name: "NODE_OPTIONS", + Value: "--require /opt/dash0/instrumentation/node.js/node_modules/@dash0/opentelemetry/src/index.js", + }, + { + Name: "DASH0_OTEL_COLLECTOR_BASE_URL", + Value: fmt.Sprintf("http://dash0-opentelemetry-collector-daemonset.%s.svc.cluster.local:4318", namespace), + }, + }, + }, + { + Name: "test-container-1", + Image: "ubuntu", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test-volume-0", + MountPath: "/test-0", + }, + { + Name: "test-volume-1", + MountPath: "/test-1", + }, + { + Name: "dash0-instrumentation", + MountPath: "/opt/dash0", + }, + }, + Env: []corev1.EnvVar{ + { + Name: "TEST0", + Value: "value", + }, + { + Name: "TEST1", + ValueFrom: &corev1.EnvVarSource{FieldRef: &corev1.ObjectFieldSelector{FieldPath: "metadata.namespace"}}, + }, + { + Name: "NODE_OPTIONS", + Value: "--require /opt/dash0/instrumentation/node.js/node_modules/@dash0/opentelemetry/src/index.js", + }, + { + Name: "DASH0_OTEL_COLLECTOR_BASE_URL", + Value: fmt.Sprintf("http://dash0-opentelemetry-collector-daemonset.%s.svc.cluster.local:4318", namespace), + }, + }, + }, + } + + return deployment +} + +func simulateInstrumentedResource(podTemplateSpec *corev1.PodTemplateSpec, meta *metav1.ObjectMeta, namespace string) { + podSpec := &podTemplateSpec.Spec + podSpec.Volumes = []corev1.Volume{ + { + Name: "dash0-instrumentation", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{ + SizeLimit: resource.NewScaledQuantity(150, resource.Mega), + }, + }, + }, + } + podSpec.InitContainers = []corev1.Container{instrumentationInitContainer} + + container := &podSpec.Containers[0] + container.VolumeMounts = []corev1.VolumeMount{{ + Name: "dash0-instrumentation", + MountPath: "/opt/dash0", + }} + container.Env = []corev1.EnvVar{ + { + Name: "NODE_OPTIONS", + Value: "--require /opt/dash0/instrumentation/node.js/node_modules/@dash0/opentelemetry/src/index.js", + }, + { + Name: "DASH0_OTEL_COLLECTOR_BASE_URL", + Value: fmt.Sprintf("http://dash0-opentelemetry-collector-daemonset.%s.svc.cluster.local:4318", namespace), + }, + } + + addInstrumentationLabels(meta, true) + addInstrumentationLabels(&podTemplateSpec.ObjectMeta, true) +} + func GetCronJob( ctx context.Context, k8sClient client.Client, @@ -520,3 +829,17 @@ func GetStatefulSet( ExpectWithOffset(1, k8sClient.Get(ctx, namespacedName, resource)).Should(Succeed()) return resource } + +func addInstrumentationLabels(meta *metav1.ObjectMeta, instrumented bool) { + addLabel(meta, util.InstrumentedLabelKey, strconv.FormatBool(instrumented)) + addLabel(meta, util.OperatorVersionLabelKey, "1.2.3") + addLabel(meta, util.InitContainerImageVersionLabelKey, "4.5.6") + addLabel(meta, util.InstrumentedByLabelKey, "someone") +} + +func addLabel(meta *metav1.ObjectMeta, key string, value string) { + if meta.Labels == nil { + meta.Labels = make(map[string]string, 1) + } + meta.Labels[key] = value +} diff --git a/test/util/verification.go b/test/util/verification.go index e43a6df2..ee63d132 100644 --- a/test/util/verification.go +++ b/test/util/verification.go @@ -4,6 +4,7 @@ package util import ( + "context" "fmt" . "github.com/onsi/gomega" @@ -12,6 +13,9 @@ import ( batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + + "github.com/dash0hq/dash0-operator/internal/util" ) type ContainerExpectations struct { @@ -34,7 +38,7 @@ type PodSpecExpectations struct { } var ( - BasicPodSpecExpectations = PodSpecExpectations{ + BasicInstrumentedPodSpecExpectations = PodSpecExpectations{ Volumes: 1, Dash0VolumeIdx: 0, InitContainers: 1, @@ -52,37 +56,67 @@ var ( ) func VerifyModifiedCronJob(resource *batchv1.CronJob, expectations PodSpecExpectations) { - verifyModifiedPodSpec(resource.Spec.JobTemplate.Spec.Template.Spec, expectations) + verifyPodSpec(resource.Spec.JobTemplate.Spec.Template.Spec, expectations) verifyLabelsAfterSuccessfulModification(resource.Spec.JobTemplate.Spec.Template.ObjectMeta) verifyLabelsAfterSuccessfulModification(resource.ObjectMeta) } +func VerifyUnmodifiedCronJob(resource *batchv1.CronJob) { + verifyUnmodifiedPodSpec(resource.Spec.JobTemplate.Spec.Template.Spec) + verifyNoDash0Labels(resource.Spec.JobTemplate.Spec.Template.ObjectMeta) + verifyNoDash0Labels(resource.ObjectMeta) +} + func VerifyModifiedDaemonSet(resource *appsv1.DaemonSet, expectations PodSpecExpectations) { - verifyModifiedPodSpec(resource.Spec.Template.Spec, expectations) + verifyPodSpec(resource.Spec.Template.Spec, expectations) verifyLabelsAfterSuccessfulModification(resource.Spec.Template.ObjectMeta) verifyLabelsAfterSuccessfulModification(resource.ObjectMeta) } +func VerifyUnmodifiedDaemonSet(resource *appsv1.DaemonSet) { + verifyUnmodifiedPodSpec(resource.Spec.Template.Spec) + verifyNoDash0Labels(resource.Spec.Template.ObjectMeta) + verifyNoDash0Labels(resource.ObjectMeta) +} + func VerifyModifiedDeployment(resource *appsv1.Deployment, expectations PodSpecExpectations) { - verifyModifiedPodSpec(resource.Spec.Template.Spec, expectations) + verifyPodSpec(resource.Spec.Template.Spec, expectations) verifyLabelsAfterSuccessfulModification(resource.Spec.Template.ObjectMeta) verifyLabelsAfterSuccessfulModification(resource.ObjectMeta) } +func VerifyUnmodifiedDeployment(resource *appsv1.Deployment) { + verifyUnmodifiedPodSpec(resource.Spec.Template.Spec) + verifyNoDash0Labels(resource.Spec.Template.ObjectMeta) + verifyNoDash0Labels(resource.ObjectMeta) +} + +func VerifyRevertedDeployment(resource *appsv1.Deployment, expectations PodSpecExpectations) { + verifyPodSpec(resource.Spec.Template.Spec, expectations) + verifyNoDash0Labels(resource.Spec.Template.ObjectMeta) + verifyNoDash0Labels(resource.ObjectMeta) +} + func VerifyModifiedJob(resource *batchv1.Job, expectations PodSpecExpectations) { - verifyModifiedPodSpec(resource.Spec.Template.Spec, expectations) + verifyPodSpec(resource.Spec.Template.Spec, expectations) verifyLabelsAfterSuccessfulModification(resource.Spec.Template.ObjectMeta) verifyLabelsAfterSuccessfulModification(resource.ObjectMeta) } -func VerifyUnmodifiedJob(resource *batchv1.Job) { +func VerifyImmutableJobCouldNotBeModified(resource *batchv1.Job) { verifyUnmodifiedPodSpec(resource.Spec.Template.Spec) verifyNoDash0Labels(resource.Spec.Template.ObjectMeta) verifyLabelsAfterFailureToModify(resource.ObjectMeta) } +func VerifyUnmodifiedJob(resource *batchv1.Job) { + verifyUnmodifiedPodSpec(resource.Spec.Template.Spec) + verifyNoDash0Labels(resource.Spec.Template.ObjectMeta) + verifyNoDash0Labels(resource.ObjectMeta) +} + func VerifyModifiedReplicaSet(resource *appsv1.ReplicaSet, expectations PodSpecExpectations) { - verifyModifiedPodSpec(resource.Spec.Template.Spec, expectations) + verifyPodSpec(resource.Spec.Template.Spec, expectations) verifyLabelsAfterSuccessfulModification(resource.Spec.Template.ObjectMeta) verifyLabelsAfterSuccessfulModification(resource.ObjectMeta) } @@ -94,12 +128,18 @@ func VerifyUnmodifiedReplicaSet(resource *appsv1.ReplicaSet) { } func VerifyModifiedStatefulSet(resource *appsv1.StatefulSet, expectations PodSpecExpectations) { - verifyModifiedPodSpec(resource.Spec.Template.Spec, expectations) + verifyPodSpec(resource.Spec.Template.Spec, expectations) verifyLabelsAfterSuccessfulModification(resource.Spec.Template.ObjectMeta) verifyLabelsAfterSuccessfulModification(resource.ObjectMeta) } -func verifyModifiedPodSpec(podSpec corev1.PodSpec, expectations PodSpecExpectations) { +func VerifyUnmodifiedStatefulSet(resource *appsv1.StatefulSet) { + verifyUnmodifiedPodSpec(resource.Spec.Template.Spec) + verifyNoDash0Labels(resource.Spec.Template.ObjectMeta) + verifyNoDash0Labels(resource.ObjectMeta) +} + +func verifyPodSpec(podSpec corev1.PodSpec, expectations PodSpecExpectations) { Expect(podSpec.Volumes).To(HaveLen(expectations.Volumes)) for i, volume := range podSpec.Volumes { if i == expectations.Dash0VolumeIdx { @@ -179,16 +219,135 @@ func verifyLabelsAfterSuccessfulModification(meta metav1.ObjectMeta) { Expect(meta.Labels["dash0.instrumented"]).To(Equal("true")) Expect(meta.Labels["dash0.operator.version"]).To(Equal("1.2.3")) Expect(meta.Labels["dash0.initcontainer.image.version"]).To(Equal("4.5.6")) + Expect(meta.Labels["dash0.instrumented.by"]).NotTo(Equal("")) } func verifyLabelsAfterFailureToModify(meta metav1.ObjectMeta) { Expect(meta.Labels["dash0.instrumented"]).To(Equal("false")) Expect(meta.Labels["dash0.operator.version"]).To(Equal("1.2.3")) Expect(meta.Labels["dash0.initcontainer.image.version"]).To(Equal("4.5.6")) + Expect(meta.Labels["dash0.instrumented.by"]).NotTo(Equal("")) } func verifyNoDash0Labels(meta metav1.ObjectMeta) { Expect(meta.Labels["dash0.instrumented"]).To(Equal("")) Expect(meta.Labels["dash0.operator.version"]).To(Equal("")) Expect(meta.Labels["dash0.initcontainer.image.version"]).To(Equal("")) + Expect(meta.Labels["dash0.instrumented.by"]).To(Equal("")) +} + +func VerifyNoEvents( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, +) { + allEvents, err := clientset.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{}) + Expect(err).NotTo(HaveOccurred()) + Expect(allEvents.Items).To(BeEmpty()) +} + +func VerifySuccessfulInstrumentationEvent( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, + resourceName string, + eventSource string, +) { + verifyEvent( + ctx, + clientset, + namespace, + resourceName, + util.ReasonSuccessfulInstrumentation, + fmt.Sprintf("Dash0 instrumentation of this resource by the %s has been successful.", eventSource), + ) +} + +func VerifyFailedInstrumentationEvent( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, + resourceName string, + message string, +) { + verifyEvent( + ctx, + clientset, + namespace, + resourceName, + util.ReasonFailedInstrumentation, + message, + ) +} + +func VerifySuccessfulUninstrumentationEvent( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, + resourceName string, + eventSource string, +) { + verifyEvent( + ctx, + clientset, + namespace, + resourceName, + util.ReasonSuccessfulUninstrumentation, + fmt.Sprintf("The %s successfully removed the Dash0 instrumentation from this resource.", eventSource), + ) +} + +func VerifyFailedUninstrumentationEvent( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, + resourceName string, + message string, +) { + verifyEvent( + ctx, + clientset, + namespace, + resourceName, + util.ReasonFailedUninstrumentation, + message, + ) +} + +func VerifyAlreadyNotInstrumented( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, + resourceName string, + message string, +) { + verifyEvent( + ctx, + clientset, + namespace, + resourceName, + util.ReasonAlreadyNotInstrumented, + message, + ) +} + +func verifyEvent( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, + resourceName string, + reason util.Reason, + message string, +) { + allEvents, err := clientset.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{}) + Expect(err).NotTo(HaveOccurred()) + Expect(allEvents.Items).To(HaveLen(1)) + Expect(allEvents.Items).To( + ContainElement( + MatchEvent( + namespace, + resourceName, + reason, + message, + ))) }