From 6f57bad6c167e60330a62738a1963810b9822b78 Mon Sep 17 00:00:00 2001 From: Bastian Krol Date: Thu, 23 May 2024 15:41:47 +0200 Subject: [PATCH] feat: add support for more resource types * CronJob * DaemonSet * Job * ReplicaSet * StatefulSet --- .gitignore | 4 +- .prettierrc | 8 + api/v1alpha1/dash0_types.go | 12 - cmd/main.go | 17 +- internal/controller/dash0_controller.go | 337 +++++++++++++++- internal/controller/dash0_controller_test.go | 274 +++++++++---- internal/controller/status_conditions.go | 10 +- internal/k8sresources/modify.go | 148 ++++--- internal/k8sresources/modify_test.go | 108 ++++-- internal/util/k8sevents.go | 8 +- internal/util/types.go | 25 ++ internal/webhook/dash0_webhook.go | 232 +++++++++-- internal/webhook/dash0_webhook_test.go | 81 ++-- internal/webhook/webhook_suite_test.go | 10 +- test-resources/bin/render-templates.sh | 30 ++ test-resources/bin/test-cleanup.sh | 14 +- .../bin/test-roundtrip-01-aum-cr-operator.sh | 7 +- .../bin/test-roundtrip-02-aum-operator-cr.sh | 7 +- .../bin/test-roundtrip-03-operator-cr-aum.sh | 5 +- test-resources/collector/.gitignore | 3 +- test-resources/collector/deploy.sh | 18 +- ...yaml.template => e2e.values.yaml.template} | 23 +- .../collector/manual.values.yaml.template | 57 +++ .../collector-received-data/.gitignore | 1 + .../e2e-test-volumes/test-uuid/.gitignore | 1 + test-resources/node.js/express/.gitignore | 6 + test-resources/node.js/express/app.js | 44 ++- .../node.js/express/cronjob.yaml.template | 35 ++ .../node.js/express/daemonset.yaml.template | 39 ++ .../{build-and-deploy.sh => deploy.sh} | 9 +- .../{deploy.yaml => deployment.yaml.template} | 2 +- .../node.js/express/job.yaml.template | 32 ++ .../node.js/express/replicaset.yaml.template | 40 ++ .../node.js/express/statefulset.yaml.template | 41 ++ test-resources/node.js/express/undeploy.sh | 3 +- test/e2e/e2e_helpers.go | 366 ++++++++++++++---- test/e2e/e2e_suite_test.go | 2 +- test/e2e/e2e_test.go | 124 ++++-- test/util/helpers.go | 68 +++- test/util/matchers.go | 5 +- test/util/resources.go | 272 +++++++++++-- test/util/verification.go | 99 ++++- 42 files changed, 2148 insertions(+), 479 deletions(-) create mode 100644 .prettierrc create mode 100644 internal/util/types.go create mode 100755 test-resources/bin/render-templates.sh rename test-resources/collector/{values.yaml.template => e2e.values.yaml.template} (64%) create mode 100644 test-resources/collector/manual.values.yaml.template create mode 100644 test-resources/e2e-test-volumes/collector-received-data/.gitignore create mode 100644 test-resources/e2e-test-volumes/test-uuid/.gitignore create mode 100644 test-resources/node.js/express/cronjob.yaml.template create mode 100644 test-resources/node.js/express/daemonset.yaml.template rename test-resources/node.js/express/{build-and-deploy.sh => deploy.sh} (60%) rename test-resources/node.js/express/{deploy.yaml => deployment.yaml.template} (98%) create mode 100644 test-resources/node.js/express/job.yaml.template create mode 100644 test-resources/node.js/express/replicaset.yaml.template create mode 100644 test-resources/node.js/express/statefulset.yaml.template diff --git a/.gitignore b/.gitignore index 1b4545d9..8f51885d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,7 @@ *.dll *.so *.dylib -bin/* +/bin/* Dockerfile.cross # Test binary, built with `go test -c` @@ -26,5 +26,3 @@ go.work *.swp *.swo *~ - -e2e-test-received-data/ diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 00000000..82f96a0a --- /dev/null +++ b/.prettierrc @@ -0,0 +1,8 @@ +{ + "arrowParens": "avoid", + "quoteProps": "as-needed", + "printWidth": 120, + "semi": true, + "singleQuote": true, + "trailingComma": "all" +} diff --git a/api/v1alpha1/dash0_types.go b/api/v1alpha1/dash0_types.go index 18c59c39..be2f0a00 100644 --- a/api/v1alpha1/dash0_types.go +++ b/api/v1alpha1/dash0_types.go @@ -37,18 +37,6 @@ type Dash0List struct { Items []Dash0 `json:"items"` } -type ConditionType string -type Reason string - -const ( - ConditionTypeAvailable ConditionType = "Available" - ConditionTypeDegraded ConditionType = "Degraded" - - ReasonSuccessfulInstrumentation Reason = "SuccessfulInstrumentation" - ReasonAlreadyInstrumented Reason = "AlreadyInstrumented" - ReasonFailedInstrumentation Reason = "FailedInstrumentation" -) - func init() { SchemeBuilder.Register(&Dash0{}, &Dash0List{}) } diff --git a/cmd/main.go b/cmd/main.go index f6abcd66..52e3f70a 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -25,7 +25,7 @@ import ( operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" "github.com/dash0hq/dash0-operator/internal/controller" - "github.com/dash0hq/dash0-operator/internal/k8sresources" + "github.com/dash0hq/dash0-operator/internal/util" dash0webhook "github.com/dash0hq/dash0-operator/internal/webhook" //+kubebuilder:scaffold:imports ) @@ -155,15 +155,17 @@ func startOperatorManager( initContainerImageVersion, ) + versions := util.Versions{ + OperatorVersion: operatorVersion, + InitContainerImageVersion: initContainerImageVersion, + } + if err = (&controller.Dash0Reconciler{ Client: mgr.GetClient(), ClientSet: clientSet, Scheme: mgr.GetScheme(), Recorder: mgr.GetEventRecorderFor("dash0-controller"), - Versions: k8sresources.Versions{ - OperatorVersion: operatorVersion, - InitContainerImageVersion: initContainerImageVersion, - }, + Versions: versions, }).SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to set up the Dash0 reconciler: %w", err) } @@ -172,10 +174,7 @@ func startOperatorManager( if os.Getenv("ENABLE_WEBHOOKS") != "false" { if err = (&dash0webhook.Handler{ Recorder: mgr.GetEventRecorderFor("dash0-webhook"), - Versions: k8sresources.Versions{ - OperatorVersion: operatorVersion, - InitContainerImageVersion: initContainerImageVersion, - }, + Versions: versions, }).SetupWebhookWithManager(mgr); err != nil { return fmt.Errorf("unable to create the Dash0 webhook: %w", err) } diff --git a/internal/controller/dash0_controller.go b/internal/controller/dash0_controller.go index 19c0de47..1691c77e 100644 --- a/internal/controller/dash0_controller.go +++ b/internal/controller/dash0_controller.go @@ -5,10 +5,12 @@ package controller import ( "context" + "errors" "fmt" "github.com/go-logr/logr" appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -24,18 +26,42 @@ import ( "github.com/dash0hq/dash0-operator/internal/util" ) +const ( + resourceTypeLabel = "resource type" + resourceNamespaceLabel = "resource namespace" + resourceNameLabel = "resource name" +) + type Dash0Reconciler struct { client.Client ClientSet *kubernetes.Clientset Scheme *runtime.Scheme Recorder record.EventRecorder - Versions k8sresources.Versions + Versions util.Versions +} + +type ImmutableResourceError struct { + resourceType string + resource string } +func (e ImmutableResourceError) Error() string { + return fmt.Sprintf( + "Dash0 cannot instrument the existing %s %s, since the this type of resource is immutable.", + e.resourceType, + e.resource, + ) +} + +var ( + labelNotSetFilter = metav1.ListOptions{ + LabelSelector: fmt.Sprintf("!%s", util.Dash0AutoInstrumentationLabel), + } +) + func (r *Dash0Reconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&operatorv1alpha1.Dash0{}). - // Watches(&source.Kind{Type: &appsv1.Deployment{}}, handler.EnqueueRequestsFromMapFunc(r.enqueueIfApplicable)). Owns(&appsv1.Deployment{}). Complete(r) } @@ -111,7 +137,7 @@ func (r *Dash0Reconciler) initStatusConditions(ctx context.Context, dash0CustomR setAvailableConditionToUnknown(dash0CustomResource) firstReconcile = true needsRefresh = true - } else if availableCondition := meta.FindStatusCondition(dash0CustomResource.Status.Conditions, string(operatorv1alpha1.ConditionTypeAvailable)); availableCondition == nil { + } else if availableCondition := meta.FindStatusCondition(dash0CustomResource.Status.Conditions, string(util.ConditionTypeAvailable)); availableCondition == nil { setAvailableConditionToUnknown(dash0CustomResource) needsRefresh = true } @@ -169,52 +195,321 @@ func (r *Dash0Reconciler) refreshStatus(ctx context.Context, dash0CustomResource func (r *Dash0Reconciler) modifyExistingResources(ctx context.Context, dash0CustomResource *operatorv1alpha1.Dash0) error { namespace := dash0CustomResource.Namespace - listOptions := metav1.ListOptions{ - LabelSelector: fmt.Sprintf("!%s", util.Dash0AutoInstrumentationLabel), + errCronJobs := r.findAndModifyCronJobs(ctx, namespace) + errDaemonSets := r.findAndModifyDaemonSets(ctx, namespace) + errDeployments := r.findAndModifyDeployments(ctx, namespace) + errJobs := r.findAndHandleJobs(ctx, namespace) + errReplicaSets := r.findAndModifyReplicaSets(ctx, namespace) + errStatefulSets := r.findAndModifyStatefulSets(ctx, namespace) + combinedErrors := errors.Join( + errCronJobs, + errDaemonSets, + errDeployments, + errJobs, + errReplicaSets, + errStatefulSets, + ) + if combinedErrors != nil { + return combinedErrors } + return nil +} - deploymentsInNamespace, err := r.ClientSet.AppsV1().Deployments(namespace).List(ctx, listOptions) +func (r *Dash0Reconciler) findAndModifyCronJobs(ctx context.Context, namespace string) error { + matchingResourcesInNamespace, err := r.ClientSet.BatchV1().CronJobs(namespace).List(ctx, labelNotSetFilter) if err != nil { - return fmt.Errorf("error when querying deployments: %w", err) + return fmt.Errorf("error when querying cron jobs: %w", err) + } + for _, resource := range matchingResourcesInNamespace.Items { + r.modifyCronJob(ctx, resource) + } + return nil +} + +func (r *Dash0Reconciler) modifyCronJob(ctx context.Context, cronJob batchv1.CronJob) { + if cronJob.DeletionTimestamp != nil { + // do not modify resources that are being deleted + return + } + logger := log.FromContext(ctx).WithValues( + resourceTypeLabel, + "CronJob", + resourceNamespaceLabel, + cronJob.GetNamespace(), + resourceNameLabel, + cronJob.GetName(), + ) + hasBeenModified := false + retryErr := util.Retry("modifying cron job", func() error { + if err := r.Client.Get(ctx, client.ObjectKey{ + Namespace: cronJob.GetNamespace(), + Name: cronJob.GetName(), + }, &cronJob); err != nil { + return fmt.Errorf("error when fetching cron job %s/%s: %w", cronJob.GetNamespace(), cronJob.GetName(), err) + } + hasBeenModified = r.newResourceModifier(&logger).ModifyCronJob(&cronJob, cronJob.GetNamespace()) + if hasBeenModified { + return r.Client.Update(ctx, &cronJob) + } else { + return nil + } + }, &logger) + + r.postProcess(&cronJob, hasBeenModified, logger, retryErr) +} + +func (r *Dash0Reconciler) findAndModifyDaemonSets(ctx context.Context, namespace string) error { + matchingResourcesInNamespace, err := r.ClientSet.AppsV1().DaemonSets(namespace).List(ctx, labelNotSetFilter) + if err != nil { + return fmt.Errorf("error when querying daemon sets: %w", err) + } + for _, resource := range matchingResourcesInNamespace.Items { + r.modifyDaemonSet(ctx, resource) } + return nil +} + +func (r *Dash0Reconciler) modifyDaemonSet(ctx context.Context, daemonSet appsv1.DaemonSet) { + if daemonSet.DeletionTimestamp != nil { + // do not modify resources that are being deleted + return + } + logger := log.FromContext(ctx).WithValues( + resourceTypeLabel, + "DaemonSet", + resourceNamespaceLabel, + daemonSet.GetNamespace(), + resourceNameLabel, + daemonSet.GetName(), + ) + hasBeenModified := false + retryErr := util.Retry("modifying daemon set", func() error { + if err := r.Client.Get(ctx, client.ObjectKey{ + Namespace: daemonSet.GetNamespace(), + Name: daemonSet.GetName(), + }, &daemonSet); err != nil { + return fmt.Errorf("error when fetching daemon set %s/%s: %w", daemonSet.GetNamespace(), daemonSet.GetName(), err) + } + hasBeenModified = r.newResourceModifier(&logger).ModifyDaemonSet(&daemonSet, daemonSet.GetNamespace()) + if hasBeenModified { + return r.Client.Update(ctx, &daemonSet) + } else { + return nil + } + }, &logger) + + r.postProcess(&daemonSet, hasBeenModified, logger, retryErr) +} - for _, deployment := range deploymentsInNamespace.Items { - r.modifySingleResource(ctx, deployment) +func (r *Dash0Reconciler) findAndModifyDeployments(ctx context.Context, namespace string) error { + matchingResourcesInNamespace, err := r.ClientSet.AppsV1().Deployments(namespace).List(ctx, labelNotSetFilter) + if err != nil { + return fmt.Errorf("error when querying deployments: %w", err) + } + for _, resource := range matchingResourcesInNamespace.Items { + r.modifyDeployment(ctx, resource) } return nil } -func (r *Dash0Reconciler) modifySingleResource(ctx context.Context, deployment appsv1.Deployment) { - logger := log.FromContext(ctx).WithValues("resource type", "deployment", "resource namespace", deployment.GetNamespace(), "resource name", deployment.GetName()) +func (r *Dash0Reconciler) modifyDeployment(ctx context.Context, deployment appsv1.Deployment) { + if deployment.DeletionTimestamp != nil { + // do not modify resources that are being deleted + return + } + logger := log.FromContext(ctx).WithValues( + resourceTypeLabel, + "Deployment", + resourceNamespaceLabel, + deployment.GetNamespace(), + resourceNameLabel, + deployment.GetName(), + ) hasBeenModified := false - retryErr := util.Retry("Modifying deployment", func() error { + retryErr := util.Retry("modifying deployment", func() error { if err := r.Client.Get(ctx, client.ObjectKey{ Namespace: deployment.GetNamespace(), Name: deployment.GetName(), }, &deployment); err != nil { return fmt.Errorf("error when fetching deployment %s/%s: %w", deployment.GetNamespace(), deployment.GetName(), err) } - hasBeenModified = k8sresources.ModifyDeployment( - &deployment, - deployment.GetNamespace(), - r.Versions, + hasBeenModified = r.newResourceModifier(&logger).ModifyDeployment(&deployment, deployment.GetNamespace()) + if hasBeenModified { + return r.Client.Update(ctx, &deployment) + } else { + return nil + } + }, &logger) + + r.postProcess(&deployment, hasBeenModified, logger, retryErr) +} + +func (r *Dash0Reconciler) findAndHandleJobs(ctx context.Context, namespace string) error { + matchingResourcesInNamespace, err := r.ClientSet.BatchV1().Jobs(namespace).List(ctx, labelNotSetFilter) + if err != nil { + return fmt.Errorf("error when querying cron jobs: %w", err) + } + + for _, job := range matchingResourcesInNamespace.Items { + r.handleJob(ctx, job) + } + return nil +} + +func (r *Dash0Reconciler) handleJob(ctx context.Context, job batchv1.Job) { + if job.DeletionTimestamp != nil { + // do not modify resources that are being deleted + return + } + logger := log.FromContext(ctx).WithValues( + resourceTypeLabel, + "Job", + resourceNamespaceLabel, + job.GetNamespace(), + resourceNameLabel, + job.GetName(), + ) + retryErr := util.Retry("labelling immutable job", func() error { + if err := r.Client.Get(ctx, client.ObjectKey{ + Namespace: job.GetNamespace(), + Name: job.GetName(), + }, &job); err != nil { + return fmt.Errorf("error when fetching job %s/%s: %w", job.GetNamespace(), job.GetName(), err) + } + r.newResourceModifier(&logger).AddLabelsToImmutableJob(&job) + return r.Client.Update(ctx, &job) + }, &logger) + + if retryErr != nil { + r.postProcess(&job, false, logger, retryErr) + } else { + r.postProcess( + &job, + false, logger, + ImmutableResourceError{ + resourceType: "job", + resource: fmt.Sprintf("%s/%s", job.GetNamespace(), job.GetName()), + }, ) + } +} + +func (r *Dash0Reconciler) findAndModifyReplicaSets(ctx context.Context, namespace string) error { + matchingResourcesInNamespace, err := r.ClientSet.AppsV1().ReplicaSets(namespace).List(ctx, labelNotSetFilter) + if err != nil { + return fmt.Errorf("error when querying deployments: %w", err) + } + for _, resource := range matchingResourcesInNamespace.Items { + r.modifyReplicaSet(ctx, resource) + } + return nil +} + +func (r *Dash0Reconciler) modifyReplicaSet(ctx context.Context, replicaSet appsv1.ReplicaSet) { + if replicaSet.DeletionTimestamp != nil { + // do not modify resources that are being deleted + return + } + logger := log.FromContext(ctx).WithValues( + resourceTypeLabel, + "ReplicaSet", + resourceNamespaceLabel, + replicaSet.GetNamespace(), + resourceNameLabel, + replicaSet.GetName(), + ) + hasBeenModified := false + retryErr := util.Retry("modifying replicaset", func() error { + if err := r.Client.Get(ctx, client.ObjectKey{ + Namespace: replicaSet.GetNamespace(), + Name: replicaSet.GetName(), + }, &replicaSet); err != nil { + return fmt.Errorf("error when fetching replicaset %s/%s: %w", replicaSet.GetNamespace(), replicaSet.GetName(), err) + } + hasBeenModified = r.newResourceModifier(&logger).ModifyReplicaSet(&replicaSet, replicaSet.GetNamespace()) if hasBeenModified { - return r.Client.Update(ctx, &deployment) + return r.Client.Update(ctx, &replicaSet) } else { return nil } }, &logger) + r.postProcess(&replicaSet, hasBeenModified, logger, retryErr) +} + +func (r *Dash0Reconciler) findAndModifyStatefulSets(ctx context.Context, namespace string) error { + matchingResourcesInNamespace, err := r.ClientSet.AppsV1().StatefulSets(namespace).List(ctx, labelNotSetFilter) + if err != nil { + return fmt.Errorf("error when querying stateful sets: %w", err) + } + for _, resource := range matchingResourcesInNamespace.Items { + r.modifyStatefulSet(ctx, resource) + } + return nil +} + +func (r *Dash0Reconciler) modifyStatefulSet(ctx context.Context, statefulSet appsv1.StatefulSet) { + if statefulSet.DeletionTimestamp != nil { + // do not modify resources that are being deleted + return + } + logger := log.FromContext(ctx).WithValues( + resourceTypeLabel, + "StatefulSet", + resourceNamespaceLabel, + statefulSet.GetNamespace(), + resourceNameLabel, + statefulSet.GetName(), + ) + hasBeenModified := false + retryErr := util.Retry("modifying stateful set", func() error { + if err := r.Client.Get(ctx, client.ObjectKey{ + Namespace: statefulSet.GetNamespace(), + Name: statefulSet.GetName(), + }, &statefulSet); err != nil { + return fmt.Errorf("error when fetching stateful set %s/%s: %w", statefulSet.GetNamespace(), statefulSet.GetName(), err) + } + hasBeenModified = r.newResourceModifier(&logger).ModifyStatefulSet(&statefulSet, statefulSet.GetNamespace()) + if hasBeenModified { + return r.Client.Update(ctx, &statefulSet) + } else { + return nil + } + }, &logger) + + r.postProcess(&statefulSet, hasBeenModified, logger, retryErr) +} + +func (r *Dash0Reconciler) newResourceModifier(logger *logr.Logger) *k8sresources.ResourceModifier { + return k8sresources.NewResourceModifier( + util.InstrumentationMetadata{ + Versions: r.Versions, + InstrumentedBy: "controller", + }, + logger, + ) +} + +func (r *Dash0Reconciler) postProcess( + resource runtime.Object, + hasBeenModified bool, + logger logr.Logger, + retryErr error, +) { if retryErr != nil { - logger.Error(retryErr, "Dash0 instrumentation by controller has not been successful.") - util.QueueFailedInstrumentationEvent(r.Recorder, &deployment, "controller", retryErr) + e := &ImmutableResourceError{} + if errors.As(retryErr, e) { + logger.Info(e.Error()) + } else { + logger.Error(retryErr, "Dash0 instrumentation by controller has not been successful.") + } + util.QueueFailedInstrumentationEvent(r.Recorder, resource, "controller", retryErr) } else if !hasBeenModified { logger.Info("Dash0 instrumentation already present, no modification by controller is necessary.") - util.QueueAlreadyInstrumentedEvent(r.Recorder, &deployment, "controller") + util.QueueAlreadyInstrumentedEvent(r.Recorder, resource, "controller") } else { logger.Info("The controller has added Dash0 instrumentation to the resource.") - util.QueueSuccessfulInstrumentationEvent(r.Recorder, &deployment, "controller") + util.QueueSuccessfulInstrumentationEvent(r.Recorder, resource, "controller") } } diff --git a/internal/controller/dash0_controller_test.go b/internal/controller/dash0_controller_test.go index b9a2cfa8..e43631a9 100644 --- a/internal/controller/dash0_controller_test.go +++ b/internal/controller/dash0_controller_test.go @@ -7,26 +7,36 @@ import ( "context" "time" - "k8s.io/apimachinery/pkg/api/meta" - . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - . "github.com/dash0hq/dash0-operator/test/util" - + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" - "github.com/dash0hq/dash0-operator/internal/k8sresources" + "github.com/dash0hq/dash0-operator/internal/util" + + . "github.com/dash0hq/dash0-operator/test/util" +) + +const ( + dash0CustomResourceName = "dash0-test-resource" ) var ( + dash0CustomResourceQualifiedName = types.NamespacedName{ + Namespace: TestNamespaceName, + Name: dash0CustomResourceName, + } + namespace = dash0CustomResourceQualifiedName.Namespace + timeout = 15 * time.Second pollingInterval = 50 * time.Millisecond - versions = k8sresources.Versions{ + versions = util.Versions{ OperatorVersion: "1.2.3", InitContainerImageVersion: "4.5.6", } @@ -34,33 +44,27 @@ var ( var _ = Describe("Dash0 Controller", func() { Context("When reconciling a resource", func() { - const resourceName = "dash0-test-resource" ctx := context.Background() + var createdObjects []client.Object - dash0ResourceName := types.NamespacedName{ - Namespace: TestNamespaceName, - Name: resourceName, - } - namespace := dash0ResourceName.Namespace var reconciler *Dash0Reconciler BeforeEach(func() { - By("creating the custom resource for the Kind Dash0", func() { - EnsureTestNamespaceExists(ctx, k8sClient, namespace) - EnsureResourceExists( - ctx, - k8sClient, - dash0ResourceName, - &operatorv1alpha1.Dash0{}, - &operatorv1alpha1.Dash0{ - ObjectMeta: metav1.ObjectMeta{ - Name: dash0ResourceName.Name, - Namespace: dash0ResourceName.Namespace, - }, + By("creating the custom resource for the Kind Dash0") + EnsureTestNamespaceExists(ctx, k8sClient, namespace) + EnsureDash0CustomResourceExists( + ctx, + k8sClient, + dash0CustomResourceQualifiedName, + &operatorv1alpha1.Dash0{}, + &operatorv1alpha1.Dash0{ + ObjectMeta: metav1.ObjectMeta{ + Name: dash0CustomResourceQualifiedName.Name, + Namespace: dash0CustomResourceQualifiedName.Namespace, }, - ) - }) + }, + ) reconciler = &Dash0Reconciler{ Client: k8sClient, @@ -69,98 +73,200 @@ var _ = Describe("Dash0 Controller", func() { Scheme: k8sClient.Scheme(), Versions: versions, } + createdObjects = make([]client.Object, 0) }) AfterEach(func() { - By("Cleanup the Dash0 resource instance", func() { - resource := &operatorv1alpha1.Dash0{} - err := k8sClient.Get(ctx, dash0ResourceName, resource) - Expect(err).NotTo(HaveOccurred()) - Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) - }) + By("Cleanup the Dash0 resource instance") + dash0CustomResource := &operatorv1alpha1.Dash0{} + err := k8sClient.Get(ctx, dash0CustomResourceQualifiedName, dash0CustomResource) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed()) + + for _, object := range createdObjects { + Expect(k8sClient.Delete(ctx, object, &client.DeleteOptions{ + GracePeriodSeconds: new(int64), + })).To(Succeed()) + } + + DeleteAllEvents(ctx, clientset, namespace) + allEvents, err := clientset.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{}) + Expect(err).NotTo(HaveOccurred()) + Expect(allEvents.Items).To(BeEmpty()) }) It("should successfully run the first reconcile (no modifiable resources exist)", func() { - By("Reconciling the created resource", func() { - _, err := reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0ResourceName, - }) - Expect(err).NotTo(HaveOccurred()) + By("Reconciling the created resource") + _, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: dash0CustomResourceQualifiedName, }) + Expect(err).NotTo(HaveOccurred()) - verifyStatusConditions(ctx, dash0ResourceName) + verifyStatusConditions(ctx, dash0CustomResourceQualifiedName) }) It("should successfully run multiple reconciles (no modifiable resources exist)", func() { - By("First reconcile request", func() { - _, err := reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0ResourceName, - }) - Expect(err).NotTo(HaveOccurred()) + By("First reconcile request") + _, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: dash0CustomResourceQualifiedName, }) + Expect(err).NotTo(HaveOccurred()) - firstAvailableStatusCondition := verifyStatusConditions(ctx, dash0ResourceName) + firstAvailableStatusCondition := verifyStatusConditions(ctx, dash0CustomResourceQualifiedName) originalTransitionTimestamp := firstAvailableStatusCondition.LastTransitionTime.Time time.Sleep(50 * time.Millisecond) - By("Second reconcile request", func() { - _, err := reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0ResourceName, - }) - Expect(err).NotTo(HaveOccurred()) + By("Second reconcile request") + _, err = reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: dash0CustomResourceQualifiedName, }) + Expect(err).NotTo(HaveOccurred()) // The LastTransitionTime should not change with subsequent reconciliations. - secondAvailableCondition := verifyStatusConditions(ctx, dash0ResourceName) + secondAvailableCondition := verifyStatusConditions(ctx, dash0CustomResourceQualifiedName) Expect(secondAvailableCondition.LastTransitionTime.Time).To(Equal(originalTransitionTimestamp)) }) + It("should modify an existing cron job", func() { + name := CronJobName + By("Inititalize a cron job") + cronJob := CreateBasicCronJob(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, cronJob) + + By("Reconciling the created resource") + _, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: dash0CustomResourceQualifiedName, + }) + Expect(err).NotTo(HaveOccurred()) + + verifyStatusConditionAndSuccessEvent(ctx, namespace, name) + VerifyModifiedCronJob(GetCronJob(ctx, k8sClient, namespace, name), BasicPodSpecExpectations) + }) + + It("should modify an existing daemon set", func() { + name := DaemonSetName + By("Inititalize a daemon set") + daemonSet := CreateBasicDaemonSet(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, daemonSet) + + By("Reconciling the created resource") + _, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: dash0CustomResourceQualifiedName, + }) + Expect(err).NotTo(HaveOccurred()) + + verifyStatusConditionAndSuccessEvent(ctx, namespace, name) + VerifyModifiedDaemonSet(GetDaemonSet(ctx, k8sClient, namespace, name), BasicPodSpecExpectations) + }) + It("should modify an existing deployment", func() { - deploymentName := DeploymentNameExisting - By("Inititalize a deployment", func() { - CreateBasicDeployment(ctx, k8sClient, namespace, deploymentName) + name := DeploymentName + By("Inititalize a deployment") + deployment := CreateBasicDeployment(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, deployment) + + By("Reconciling the created resource") + _, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: dash0CustomResourceQualifiedName, }) + Expect(err).NotTo(HaveOccurred()) + + verifyStatusConditionAndSuccessEvent(ctx, namespace, name) + VerifyModifiedDeployment(GetDeployment(ctx, k8sClient, namespace, name), BasicPodSpecExpectations) + }) + + It("should record a failure event for an existing job and add labels", func() { + name := JobName + By("Inititalize a job") + job := CreateBasicJob(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, job) - By("Reconciling the created resource", func() { - _, err := reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: dash0ResourceName, - }) - Expect(err).NotTo(HaveOccurred()) + By("Reconciling the created resource") + _, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: dash0CustomResourceQualifiedName, }) + Expect(err).NotTo(HaveOccurred()) - verifyStatusConditions(ctx, dash0ResourceName) - VerifyModifiedDeployment(GetDeployment(ctx, k8sClient, namespace, deploymentName), DeploymentExpectations{ - Volumes: 1, - Dash0VolumeIdx: 0, - InitContainers: 1, - Dash0InitContainerIdx: 0, - Containers: []ContainerExpectations{{ - VolumeMounts: 1, - Dash0VolumeMountIdx: 0, - EnvVars: 2, - NodeOptionsEnvVarIdx: 0, - Dash0CollectorBaseUrlEnvVarIdx: 1, - Dash0CollectorBaseUrlEnvVarExpectedValue: "http://dash0-opentelemetry-collector-daemonset.test-namespace.svc.cluster.local:4318", - }}, + verifyStatusConditions(ctx, dash0CustomResourceQualifiedName) + VerifyFailureEvent( + ctx, + clientset, + namespace, + name, + "controller", + "Dash0 instrumentation by controller has not been successful. Error message: Dash0 cannot"+ + " instrument the existing job test-namespace/job, since the this type of resource is immutable.", + ) + VerifyUnmodifiedJob(GetJob(ctx, k8sClient, namespace, name)) + }) + + It("should modify an existing orphan replicaset", func() { + name := DeploymentName + By("Inititalize a replicaset") + replicaSet := CreateBasicReplicaSet(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, replicaSet) + + By("Reconciling the created resource") + _, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: dash0CustomResourceQualifiedName, }) - VerifySuccessEvent(ctx, clientset, namespace, deploymentName, "controller") + Expect(err).NotTo(HaveOccurred()) + + verifyStatusConditionAndSuccessEvent(ctx, namespace, name) + VerifyModifiedReplicaSet(GetReplicaSet(ctx, k8sClient, namespace, name), BasicPodSpecExpectations) + }) + + It("should not modify an existing replicaset with an owner", func() { + name := DeploymentName + By("Inititalize a replicaset") + replicaSet := CreateReplicaSetOwnedByDeployment(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, replicaSet) + + By("Reconciling the created resource") + _, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: dash0CustomResourceQualifiedName, + }) + Expect(err).NotTo(HaveOccurred()) + + verifyStatusConditions(ctx, dash0CustomResourceQualifiedName) + VerifyUnmodifiedReplicaSet(GetReplicaSet(ctx, k8sClient, namespace, name)) + }) + + It("should modify an stateful set", func() { + name := StatefulSetName + By("Inititalize a stateful set") + statefulSet := CreateBasicStatefulSet(ctx, k8sClient, namespace, name) + createdObjects = append(createdObjects, statefulSet) + + By("Reconciling the created resource") + _, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: dash0CustomResourceQualifiedName, + }) + Expect(err).NotTo(HaveOccurred()) + + verifyStatusConditionAndSuccessEvent(ctx, namespace, name) + VerifyModifiedStatefulSet(GetStatefulSet(ctx, k8sClient, namespace, name), BasicPodSpecExpectations) }) }) }) +func verifyStatusConditionAndSuccessEvent(ctx context.Context, namespace string, name string) { + verifyStatusConditions(ctx, dash0CustomResourceQualifiedName) + VerifySuccessEvent(ctx, clientset, namespace, name, "controller") +} + func verifyStatusConditions(ctx context.Context, typeNamespacedName types.NamespacedName) *metav1.Condition { var available *metav1.Condition - By("Verifying status conditions", func() { - Eventually(func(g Gomega) { - dash0 := &operatorv1alpha1.Dash0{} - g.Expect(k8sClient.Get(ctx, typeNamespacedName, dash0)).To(Succeed()) - available = meta.FindStatusCondition(dash0.Status.Conditions, string(operatorv1alpha1.ConditionTypeAvailable)) - g.Expect(available).NotTo(BeNil()) - g.Expect(available.Status).To(Equal(metav1.ConditionTrue)) - degraded := meta.FindStatusCondition(dash0.Status.Conditions, string(operatorv1alpha1.ConditionTypeDegraded)) - g.Expect(degraded).To(BeNil()) - }, timeout, pollingInterval).Should(Succeed()) - }) + By("Verifying status conditions") + Eventually(func(g Gomega) { + dash0 := &operatorv1alpha1.Dash0{} + g.Expect(k8sClient.Get(ctx, typeNamespacedName, dash0)).To(Succeed()) + available = meta.FindStatusCondition(dash0.Status.Conditions, string(util.ConditionTypeAvailable)) + g.Expect(available).NotTo(BeNil()) + g.Expect(available.Status).To(Equal(metav1.ConditionTrue)) + degraded := meta.FindStatusCondition(dash0.Status.Conditions, string(util.ConditionTypeDegraded)) + g.Expect(degraded).To(BeNil()) + }, timeout, pollingInterval).Should(Succeed()) return available } diff --git a/internal/controller/status_conditions.go b/internal/controller/status_conditions.go index c312b6f8..2da590cd 100644 --- a/internal/controller/status_conditions.go +++ b/internal/controller/status_conditions.go @@ -4,16 +4,18 @@ package controller import ( - operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/util" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" ) func setAvailableConditionToUnknown(dash0CustomResource *operatorv1alpha1.Dash0) { meta.SetStatusCondition( &dash0CustomResource.Status.Conditions, metav1.Condition{ - Type: string(operatorv1alpha1.ConditionTypeAvailable), + Type: string(util.ConditionTypeAvailable), Status: metav1.ConditionUnknown, Reason: "ReconcileStarted", Message: "Dash0 has started resource reconciliation.", @@ -27,10 +29,10 @@ func ensureResourceIsMarkedAsAvailable(dash0CustomResource *operatorv1alpha1.Das meta.SetStatusCondition( &dash0CustomResource.Status.Conditions, metav1.Condition{ - Type: string(operatorv1alpha1.ConditionTypeAvailable), + Type: string(util.ConditionTypeAvailable), Status: metav1.ConditionTrue, Reason: "ReconcileFinished", Message: "Dash0 is is active in this namespace now.", }) - meta.RemoveStatusCondition(&dash0CustomResource.Status.Conditions, string(operatorv1alpha1.ConditionTypeDegraded)) + meta.RemoveStatusCondition(&dash0CustomResource.Status.Conditions, string(util.ConditionTypeDegraded)) } diff --git a/internal/k8sresources/modify.go b/internal/k8sresources/modify.go index 5d2db217..3958dc41 100644 --- a/internal/k8sresources/modify.go +++ b/internal/k8sresources/modify.go @@ -7,12 +7,16 @@ import ( "fmt" "reflect" "slices" + "strconv" "github.com/go-logr/logr" appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/dash0hq/dash0-operator/internal/util" ) const ( @@ -25,14 +29,15 @@ const ( dash0InstrumentationDirectory = "/opt/dash0/instrumentation" // envVarLdPreloadName = "LD_PRELOAD" // envVarLdPreloadValue = "/opt/dash0/preload/inject.so" - envVarNodeOptionsName = "NODE_OPTIONS" - envVarNodeOptionsValue = "--require /opt/dash0/instrumentation/node.js/node_modules/@dash0/opentelemetry/src/index.js" - envVarDash0CollectorBaseUrlName = "DASH0_OTEL_COLLECTOR_BASE_URL" - envVarDash0CollectorBaseUrlNameValueTemplate = "http://dash0-opentelemetry-collector-daemonset.%s.svc.cluster.local:4318" + envVarNodeOptionsName = "NODE_OPTIONS" + envVarNodeOptionsValue = "--require /opt/dash0/instrumentation/node.js/node_modules/@dash0/opentelemetry/src/index.js" + envVarDash0CollectorBaseUrlName = "DASH0_OTEL_COLLECTOR_BASE_URL" + envVarDash0CollectorBaseUrlValueTemplate = "http://dash0-opentelemetry-collector-daemonset.%s.svc.cluster.local:4318" instrumentedLabelKey = "dash0.instrumented" operatorVersionLabelKey = "dash0.operator.version" initContainerImageVersionLabelKey = "dash0.initcontainer.image.version" + instrumentedByLabelKey = "dash0.instrumented.by" ) var ( @@ -43,44 +48,83 @@ var ( initContainerReadOnlyRootFilesystem = true ) -type Versions struct { - OperatorVersion string - InitContainerImageVersion string +type ResourceModifier struct { + instrumentationMetadata util.InstrumentationMetadata + logger *logr.Logger +} + +func NewResourceModifier( + instrumentationMetadata util.InstrumentationMetadata, + logger *logr.Logger, +) *ResourceModifier { + return &ResourceModifier{ + instrumentationMetadata: instrumentationMetadata, + logger: logger, + } +} + +func (m *ResourceModifier) ModifyCronJob(cronJob *batchv1.CronJob, namespace string) bool { + return m.modifyResource(&cronJob.Spec.JobTemplate.Spec.Template, &cronJob.ObjectMeta, namespace) +} + +func (m *ResourceModifier) ModifyDaemonSet(daemonSet *appsv1.DaemonSet, namespace string) bool { + return m.modifyResource(&daemonSet.Spec.Template, &daemonSet.ObjectMeta, namespace) +} + +func (m *ResourceModifier) ModifyDeployment(deployment *appsv1.Deployment, namespace string) bool { + return m.modifyResource(&deployment.Spec.Template, &deployment.ObjectMeta, namespace) +} + +func (m *ResourceModifier) ModifyJob(job *batchv1.Job, namespace string) bool { + return m.modifyResource(&job.Spec.Template, &job.ObjectMeta, namespace) +} + +func (m *ResourceModifier) AddLabelsToImmutableJob(job *batchv1.Job) { + m.addInstrumentationLabels(&job.ObjectMeta, false) } -func ModifyDeployment( - deployment *appsv1.Deployment, - namespace string, - versions Versions, - logger logr.Logger, -) bool { - podTemplateSpec := &deployment.Spec.Template - hasBeenModified := modifyPodSpec( +func (m *ResourceModifier) ModifyReplicaSet(replicaSet *appsv1.ReplicaSet, namespace string) bool { + ownerReferences := replicaSet.ObjectMeta.OwnerReferences + if len(ownerReferences) > 0 { + for _, ownerReference := range ownerReferences { + if ownerReference.APIVersion == "apps/v1" && ownerReference.Kind == "Deployment" { + return false + } + } + } + + return m.modifyResource(&replicaSet.Spec.Template, &replicaSet.ObjectMeta, namespace) +} + +func (m *ResourceModifier) ModifyStatefulSet(statefulSet *appsv1.StatefulSet, namespace string) bool { + return m.modifyResource(&statefulSet.Spec.Template, &statefulSet.ObjectMeta, namespace) +} + +func (m *ResourceModifier) modifyResource(podTemplateSpec *corev1.PodTemplateSpec, meta *metav1.ObjectMeta, namespace string) bool { + hasBeenModified := m.modifyPodSpec( &podTemplateSpec.Spec, - namespace, - versions.InitContainerImageVersion, - logger, + fmt.Sprintf(envVarDash0CollectorBaseUrlValueTemplate, namespace), ) if hasBeenModified { - addInstrumentationLabels(&deployment.ObjectMeta, versions) - addInstrumentationLabels(&podTemplateSpec.ObjectMeta, versions) + m.addInstrumentationLabels(meta, true) + m.addInstrumentationLabels(&podTemplateSpec.ObjectMeta, true) } return hasBeenModified } -func modifyPodSpec(podSpec *corev1.PodSpec, namespace string, initContainerImageVersion string, logger logr.Logger) bool { +func (m *ResourceModifier) modifyPodSpec(podSpec *corev1.PodSpec, dash0CollectorBaseUrl string) bool { originalSpec := podSpec.DeepCopy() - addInstrumentationVolume(podSpec) - addInitContainer(podSpec, initContainerImageVersion) + m.addInstrumentationVolume(podSpec) + m.addInitContainer(podSpec) for idx := range podSpec.Containers { container := &podSpec.Containers[idx] - instrumentContainer(container, namespace, logger) + m.instrumentContainer(container, dash0CollectorBaseUrl) } return !reflect.DeepEqual(originalSpec, podSpec) } -func addInstrumentationVolume(podSpec *corev1.PodSpec) { +func (m *ResourceModifier) addInstrumentationVolume(podSpec *corev1.PodSpec) { if podSpec.Volumes == nil { podSpec.Volumes = make([]corev1.Volume, 0) } @@ -103,7 +147,7 @@ func addInstrumentationVolume(podSpec *corev1.PodSpec) { } } -func addInitContainer(podSpec *corev1.PodSpec, initContainerImageVersion string) { +func (m *ResourceModifier) addInitContainer(podSpec *corev1.PodSpec) { // The init container has all the instrumentation packages (e.g. the Dash0 Node.js distribution etc.), stored under // /dash0/instrumentation. Its main responsibility is to copy these files to the Kubernetes volume created and mounted in // addInstrumentationVolume (mounted at /opt/dash0/instrumentation in the init container and also in the target containers). @@ -114,7 +158,7 @@ func addInitContainer(podSpec *corev1.PodSpec, initContainerImageVersion string) idx := slices.IndexFunc(podSpec.InitContainers, func(c corev1.Container) bool { return c.Name == initContainerName }) - initContainer := createInitContainer(podSpec, initContainerImageVersion) + initContainer := m.createInitContainer(podSpec) if idx < 0 { podSpec.InitContainers = append(podSpec.InitContainers, *initContainer) } else { @@ -122,7 +166,7 @@ func addInitContainer(podSpec *corev1.PodSpec, initContainerImageVersion string) } } -func createInitContainer(podSpec *corev1.PodSpec, initContainerImageVersion string) *corev1.Container { +func (m *ResourceModifier) createInitContainer(podSpec *corev1.PodSpec) *corev1.Container { initContainerUser := &defaultInitContainerUser initContainerGroup := &defaultInitContainerGroup @@ -137,7 +181,7 @@ func createInitContainer(podSpec *corev1.PodSpec, initContainerImageVersion stri return &corev1.Container{ Name: initContainerName, - Image: fmt.Sprintf(initContainerImageTemplate, initContainerImageVersion), + Image: fmt.Sprintf(initContainerImageTemplate, m.instrumentationMetadata.InitContainerImageVersion), Env: []corev1.EnvVar{ { Name: dash0DirectoryEnvVarName, @@ -162,13 +206,13 @@ func createInitContainer(podSpec *corev1.PodSpec, initContainerImageVersion stri } } -func instrumentContainer(container *corev1.Container, namespace string, logger logr.Logger) { - logger = logger.WithValues("container", container.Name) - addMount(container) - addEnvironmentVariables(container, namespace, logger) +func (m *ResourceModifier) instrumentContainer(container *corev1.Container, dash0CollectorBaseUrl string) { + perContainerLogger := m.logger.WithValues("container", container.Name) + m.addMount(container) + m.addEnvironmentVariables(container, dash0CollectorBaseUrl, perContainerLogger) } -func addMount(container *corev1.Container) { +func (m *ResourceModifier) addMount(container *corev1.Container) { if container.VolumeMounts == nil { container.VolumeMounts = make([]corev1.VolumeMount, 0) } @@ -187,17 +231,23 @@ func addMount(container *corev1.Container) { } } -func addEnvironmentVariables(container *corev1.Container, namespace string, logger logr.Logger) { +func (m *ResourceModifier) addEnvironmentVariables(container *corev1.Container, dash0CollectorBaseUrl string, perContainerLogger logr.Logger) { // For now, we directly modify NODE_OPTIONS. Consider migrating to an LD_PRELOAD hook at some point. - addOrPrependToEnvironmentVariable(container, envVarNodeOptionsName, envVarNodeOptionsValue, logger) + m.addOrPrependToEnvironmentVariable(container, envVarNodeOptionsName, envVarNodeOptionsValue, perContainerLogger) - addOrReplaceEnvironmentVariable( + m.addOrReplaceEnvironmentVariable( container, envVarDash0CollectorBaseUrlName, - fmt.Sprintf(envVarDash0CollectorBaseUrlNameValueTemplate, namespace)) + dash0CollectorBaseUrl, + ) } -func addOrPrependToEnvironmentVariable(container *corev1.Container, name string, value string, logger logr.Logger) { +func (m *ResourceModifier) addOrPrependToEnvironmentVariable( + container *corev1.Container, + name string, + value string, + perContainerLogger logr.Logger, +) { if container.Env == nil { container.Env = make([]corev1.EnvVar, 0) } @@ -214,7 +264,7 @@ func addOrPrependToEnvironmentVariable(container *corev1.Container, name string, envVar := container.Env[idx] previousValue := envVar.Value if previousValue == "" && envVar.ValueFrom != nil { - logger.Info( + perContainerLogger.Info( fmt.Sprintf( "Dash0 cannot prepend anything to the environment variable %s as it is specified via "+ "ValueFrom. This container will not be instrumented.", @@ -225,7 +275,7 @@ func addOrPrependToEnvironmentVariable(container *corev1.Container, name string, } } -func addOrReplaceEnvironmentVariable(container *corev1.Container, name string, value string) { +func (m *ResourceModifier) addOrReplaceEnvironmentVariable(container *corev1.Container, name string, value string) { if container.Env == nil { container.Env = make([]corev1.EnvVar, 0) } @@ -244,13 +294,17 @@ func addOrReplaceEnvironmentVariable(container *corev1.Container, name string, v } } -func addInstrumentationLabels(meta *v1.ObjectMeta, labelInformation Versions) { - addLabel(meta, instrumentedLabelKey, "true") - addLabel(meta, operatorVersionLabelKey, labelInformation.OperatorVersion) - addLabel(meta, initContainerImageVersionLabelKey, labelInformation.InitContainerImageVersion) +func (m *ResourceModifier) addInstrumentationLabels( + meta *metav1.ObjectMeta, + hasBeenInstrumented bool, +) { + m.addLabel(meta, instrumentedLabelKey, strconv.FormatBool(hasBeenInstrumented)) + m.addLabel(meta, operatorVersionLabelKey, m.instrumentationMetadata.OperatorVersion) + m.addLabel(meta, initContainerImageVersionLabelKey, m.instrumentationMetadata.InitContainerImageVersion) + m.addLabel(meta, instrumentedByLabelKey, m.instrumentationMetadata.InstrumentedBy) } -func addLabel(meta *v1.ObjectMeta, key string, value string) { +func (m *ResourceModifier) addLabel(meta *metav1.ObjectMeta, key string, value string) { if meta.Labels == nil { meta.Labels = make(map[string]string, 1) } diff --git a/internal/k8sresources/modify_test.go b/internal/k8sresources/modify_test.go index d15c070d..e8c90dc1 100644 --- a/internal/k8sresources/modify_test.go +++ b/internal/k8sresources/modify_test.go @@ -11,6 +11,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" + "github.com/dash0hq/dash0-operator/internal/util" + . "github.com/dash0hq/dash0-operator/test/util" ) @@ -19,54 +21,35 @@ import ( // be used to verify external effects (recording events etc.) that cannot be covered in this test. var ( - versions = Versions{ - OperatorVersion: "1.2.3", - InitContainerImageVersion: "4.5.6", + instrumentationMetadata = util.InstrumentationMetadata{ + Versions: util.Versions{OperatorVersion: "1.2.3", + InitContainerImageVersion: "4.5.6", + }, + InstrumentedBy: "modify_test", } ) var _ = Describe("Dash0 Resource Modification", func() { ctx := context.Background() + logger := log.FromContext(ctx) + resourceModifier := NewResourceModifier(instrumentationMetadata, &logger) - Context("when mutating new deployments", func() { - It("should inject Dash into a new basic deployment", func() { + Context("when mutating new resources", func() { + It("should inject Dash0 into a new basic deployment", func() { deployment := BasicDeployment(TestNamespaceName, DeploymentName) - result := ModifyDeployment( - deployment, - TestNamespaceName, - versions, - log.FromContext(ctx), - ) + result := resourceModifier.ModifyDeployment(deployment, TestNamespaceName) Expect(result).To(BeTrue()) - VerifyModifiedDeployment(deployment, DeploymentExpectations{ - Volumes: 1, - Dash0VolumeIdx: 0, - InitContainers: 1, - Dash0InitContainerIdx: 0, - Containers: []ContainerExpectations{{ - VolumeMounts: 1, - Dash0VolumeMountIdx: 0, - EnvVars: 2, - NodeOptionsEnvVarIdx: 0, - Dash0CollectorBaseUrlEnvVarIdx: 1, - Dash0CollectorBaseUrlEnvVarExpectedValue: "http://dash0-opentelemetry-collector-daemonset.test-namespace.svc.cluster.local:4318", - }}, - }) + VerifyModifiedDeployment(deployment, BasicPodSpecExpectations) }) - It("should inject Dash into a new deployment that has multiple Containers, and already has Volumes and init Containers", func() { + It("should inject Dash0 into a new deployment that has multiple Containers, and already has Volumes and init Containers", func() { deployment := DeploymentWithMoreBellsAndWhistles(TestNamespaceName, DeploymentName) - result := ModifyDeployment( - deployment, - TestNamespaceName, - versions, - log.FromContext(ctx), - ) + result := resourceModifier.ModifyDeployment(deployment, TestNamespaceName) Expect(result).To(BeTrue()) - VerifyModifiedDeployment(deployment, DeploymentExpectations{ + VerifyModifiedDeployment(deployment, PodSpecExpectations{ Volumes: 3, Dash0VolumeIdx: 2, InitContainers: 3, @@ -92,17 +75,12 @@ var _ = Describe("Dash0 Resource Modification", func() { }) }) - It("should update existing Dash artifacts in a new deployment", func() { + It("should update existing Dash0 artifacts in a new deployment", func() { deployment := DeploymentWithExistingDash0Artifacts(TestNamespaceName, DeploymentName) - result := ModifyDeployment( - deployment, - TestNamespaceName, - versions, - log.FromContext(ctx), - ) + result := resourceModifier.ModifyDeployment(deployment, TestNamespaceName) Expect(result).To(BeTrue()) - VerifyModifiedDeployment(deployment, DeploymentExpectations{ + VerifyModifiedDeployment(deployment, PodSpecExpectations{ Volumes: 3, Dash0VolumeIdx: 1, InitContainers: 3, @@ -129,5 +107,53 @@ var _ = Describe("Dash0 Resource Modification", func() { }, }) }) + + It("should inject Dash0 into a new basic cron job", func() { + resource := BasicCronJob(TestNamespaceName, CronJobName) + result := resourceModifier.ModifyCronJob(resource, TestNamespaceName) + + Expect(result).To(BeTrue()) + VerifyModifiedCronJob(resource, BasicPodSpecExpectations) + }) + + It("should inject Dash0 into a new basic daemon set", func() { + resource := BasicDaemonSet(TestNamespaceName, DaemonSetName) + result := resourceModifier.ModifyDaemonSet(resource, TestNamespaceName) + + Expect(result).To(BeTrue()) + VerifyModifiedDaemonSet(resource, BasicPodSpecExpectations) + }) + + It("should inject Dash0 into a new basic job", func() { + resource := BasicJob(TestNamespaceName, JobName) + result := resourceModifier.ModifyJob(resource, TestNamespaceName) + + Expect(result).To(BeTrue()) + VerifyModifiedJob(resource, BasicPodSpecExpectations) + }) + + It("should inject Dash0 into a new basic replica set", func() { + resource := BasicReplicaSet(TestNamespaceName, ReplicaSetName) + result := resourceModifier.ModifyReplicaSet(resource, TestNamespaceName) + + Expect(result).To(BeTrue()) + VerifyModifiedReplicaSet(resource, BasicPodSpecExpectations) + }) + + It("should not inject Dash0 into a new basic replica set that is owned by a deployment", func() { + resource := ReplicaSetOwnedByDeployment(TestNamespaceName, ReplicaSetName) + result := resourceModifier.ModifyReplicaSet(resource, TestNamespaceName) + + Expect(result).To(BeFalse()) + VerifyUnmodifiedReplicaSet(resource) + }) + + It("should inject Dash0 into a new basic stateful set", func() { + resource := BasicStatefulSet(TestNamespaceName, StatefulSetName) + result := resourceModifier.ModifyStatefulSet(resource, TestNamespaceName) + + Expect(result).To(BeTrue()) + VerifyModifiedStatefulSet(resource, BasicPodSpecExpectations) + }) }) }) diff --git a/internal/util/k8sevents.go b/internal/util/k8sevents.go index 5d117feb..28f8990a 100644 --- a/internal/util/k8sevents.go +++ b/internal/util/k8sevents.go @@ -9,15 +9,13 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" - - operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" ) func QueueSuccessfulInstrumentationEvent(eventRecorder record.EventRecorder, resource runtime.Object, eventSource string) { eventRecorder.Event( resource, corev1.EventTypeNormal, - string(operatorv1alpha1.ReasonSuccessfulInstrumentation), + string(ReasonSuccessfulInstrumentation), fmt.Sprintf("Dash0 instrumentation by %s has been successful.", eventSource), ) } @@ -26,7 +24,7 @@ func QueueAlreadyInstrumentedEvent(eventRecorder record.EventRecorder, resource eventRecorder.Event( resource, corev1.EventTypeNormal, - string(operatorv1alpha1.ReasonSuccessfulInstrumentation), + string(ReasonSuccessfulInstrumentation), fmt.Sprintf("Dash0 instrumentation already present, no modification by %s is necessary.", eventSource), ) } @@ -35,7 +33,7 @@ func QueueFailedInstrumentationEvent(eventRecorder record.EventRecorder, resourc eventRecorder.Event( resource, corev1.EventTypeWarning, - string(operatorv1alpha1.ReasonFailedInstrumentation), + string(ReasonFailedInstrumentation), fmt.Sprintf("Dash0 instrumentation by %s has not been successful. Error message: %s", eventSource, err.Error()), ) } diff --git a/internal/util/types.go b/internal/util/types.go new file mode 100644 index 00000000..5700d1ae --- /dev/null +++ b/internal/util/types.go @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +// SPDX-License-Identifier: Apache-2.0 + +package util + +type ConditionType string +type Reason string + +const ( + ConditionTypeAvailable ConditionType = "Available" + ConditionTypeDegraded ConditionType = "Degraded" + + ReasonSuccessfulInstrumentation Reason = "SuccessfulInstrumentation" + ReasonFailedInstrumentation Reason = "FailedInstrumentation" +) + +type Versions struct { + OperatorVersion string + InitContainerImageVersion string +} + +type InstrumentationMetadata struct { + Versions + InstrumentedBy string +} diff --git a/internal/webhook/dash0_webhook.go b/internal/webhook/dash0_webhook.go index efd962ae..a86b5202 100644 --- a/internal/webhook/dash0_webhook.go +++ b/internal/webhook/dash0_webhook.go @@ -9,27 +9,69 @@ import ( "fmt" "net/http" - "github.com/dash0hq/dash0-operator/internal/k8sresources" - "github.com/dash0hq/dash0-operator/internal/util" - + "github.com/go-logr/logr" appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" -) -var ( - log = logf.Log.WithName("dash0-webhook") - decoder = scheme.Codecs.UniversalDecoder() + "github.com/dash0hq/dash0-operator/internal/k8sresources" + "github.com/dash0hq/dash0-operator/internal/util" ) type Handler struct { Recorder record.EventRecorder - Versions k8sresources.Versions + Versions util.Versions } +type resourceHandler func(h *Handler, request admission.Request, gvkLabel string, logger *logr.Logger) admission.Response +type routing map[string]map[string]map[string]resourceHandler + +var ( + log = logf.Log.WithName("dash0-webhook") + decoder = scheme.Codecs.UniversalDecoder() + + routes = routing{ + "batch": { + "CronJob": { + "v1": (*Handler).handleCronJob, + }, + "Job": { + "v1": (*Handler).handleJob, + }, + }, + "apps": { + "DaemonSet": { + "v1": (*Handler).handleDaemonSet, + }, + "Deployment": { + "v1": (*Handler).handleDeployment, + }, + "ReplicaSet": { + "v1": (*Handler).handleReplicaSet, + }, + "StatefulSet": { + "v1": (*Handler).handleStatefulSet, + }, + }, + } + + fallbackRoute resourceHandler = func( + h *Handler, + request admission.Request, + gvkLabel string, + logger *logr.Logger, + ) admission.Response { + msg := fmt.Sprintf("resource type not supported: %s", gvkLabel) + logger.Info(msg) + return admission.Allowed(msg) + } +) + func (h *Handler) SetupWebhookWithManager(mgr ctrl.Manager) error { webhook := &admission.Webhook{ Handler: h, @@ -52,33 +94,153 @@ func (h *Handler) Handle(_ context.Context, request admission.Request) admission group := gkv.Group version := gkv.Version kind := gkv.Kind + gvkLabel := fmt.Sprintf("%s/%s.%s", group, version, kind) + + return routes.routeFor(group, kind, version)(h, request, gvkLabel, &logger) +} + +func (h *Handler) handleCronJob( + request admission.Request, + gvkLabel string, + logger *logr.Logger, +) admission.Response { + cronJob := &batchv1.CronJob{} + responseIfFailed, failed := h.preProcess(request, gvkLabel, cronJob) + if failed { + return responseIfFailed + } + hasBeenModified := h.newResourceModifier(logger).ModifyCronJob(cronJob, request.Namespace) + return h.postProcess(request, cronJob, hasBeenModified, logger) +} + +func (h *Handler) handleDaemonSet( + request admission.Request, + gvkLabel string, + logger *logr.Logger, +) admission.Response { + daemonSet := &appsv1.DaemonSet{} + responseIfFailed, failed := h.preProcess(request, gvkLabel, daemonSet) + if failed { + return responseIfFailed + } + hasBeenModified := h.newResourceModifier(logger).ModifyDaemonSet(daemonSet, request.Namespace) + return h.postProcess(request, daemonSet, hasBeenModified, logger) +} - if group == "apps" && version == "v1" && kind == "Deployment" { - deployment := &appsv1.Deployment{} - if _, _, err := decoder.Decode(request.Object.Raw, nil, deployment); err != nil { - err := fmt.Errorf("cannot parse resource into a %s/%s.%s: %w", group, version, kind, err) - util.QueueFailedInstrumentationEvent(h.Recorder, deployment, "webhook", err) - return admission.Errored(http.StatusInternalServerError, err) - } - - hasBeenModified := k8sresources.ModifyDeployment(deployment, request.Namespace, h.Versions, logger) - if !hasBeenModified { - logger.Info("Dash0 instrumentation already present, no modification by webhook is necessary.") - util.QueueAlreadyInstrumentedEvent(h.Recorder, deployment, "webhook") - return admission.Allowed("no changes") - } - - marshalled, err := json.Marshal(deployment) - if err != nil { - util.QueueFailedInstrumentationEvent(h.Recorder, deployment, "webhook", err) - return admission.Allowed(fmt.Errorf("error when marshalling modfied resource to JSON: %w", err).Error()) - } - - logger.Info("The webhook has added Dash0 instrumentation to the resource.") - util.QueueSuccessfulInstrumentationEvent(h.Recorder, deployment, "webhook") - return admission.PatchResponseFromRaw(request.Object.Raw, marshalled) - } else { - logger.Info("resource type not supported", "group", group, "version", version, "kind", kind) - return admission.Allowed("unknown resource type") +func (h *Handler) handleDeployment( + request admission.Request, + gvkLabel string, + logger *logr.Logger, +) admission.Response { + deployment := &appsv1.Deployment{} + responseIfFailed, failed := h.preProcess(request, gvkLabel, deployment) + if failed { + return responseIfFailed + } + hasBeenModified := h.newResourceModifier(logger).ModifyDeployment(deployment, request.Namespace) + return h.postProcess(request, deployment, hasBeenModified, logger) +} + +func (h *Handler) handleJob( + request admission.Request, + gvkLabel string, + logger *logr.Logger, +) admission.Response { + job := &batchv1.Job{} + responseIfFailed, failed := h.preProcess(request, gvkLabel, job) + if failed { + return responseIfFailed + } + hasBeenModified := h.newResourceModifier(logger).ModifyJob(job, request.Namespace) + return h.postProcess(request, job, hasBeenModified, logger) +} + +func (h *Handler) handleReplicaSet( + request admission.Request, + gvkLabel string, + logger *logr.Logger, +) admission.Response { + replicaSet := &appsv1.ReplicaSet{} + responseIfFailed, failed := h.preProcess(request, gvkLabel, replicaSet) + if failed { + return responseIfFailed + } + hasBeenModified := h.newResourceModifier(logger).ModifyReplicaSet(replicaSet, request.Namespace) + return h.postProcess(request, replicaSet, hasBeenModified, logger) +} + +func (h *Handler) handleStatefulSet( + request admission.Request, + gvkLabel string, + logger *logr.Logger, +) admission.Response { + statefulSet := &appsv1.StatefulSet{} + responseIfFailed, failed := h.preProcess(request, gvkLabel, statefulSet) + if failed { + return responseIfFailed + } + hasBeenModified := h.newResourceModifier(logger).ModifyStatefulSet(statefulSet, request.Namespace) + return h.postProcess(request, statefulSet, hasBeenModified, logger) +} + +func (h *Handler) preProcess( + request admission.Request, + gvkLabel string, + resource runtime.Object, +) (admission.Response, bool) { + if _, _, err := decoder.Decode(request.Object.Raw, nil, resource); err != nil { + err := fmt.Errorf("cannot parse resource into a %s: %w", gvkLabel, err) + util.QueueFailedInstrumentationEvent(h.Recorder, resource, "webhook", err) + return admission.Errored(http.StatusInternalServerError, err), true + } + return admission.Response{}, false +} + +func (h *Handler) postProcess( + request admission.Request, + resource runtime.Object, + hasBeenModified bool, + logger *logr.Logger, +) admission.Response { + if !hasBeenModified { + logger.Info("Dash0 instrumentation already present, no modification by webhook is necessary.") + util.QueueAlreadyInstrumentedEvent(h.Recorder, resource, "webhook") + return admission.Allowed("no changes") + } + + marshalled, err := json.Marshal(resource) + if err != nil { + util.QueueFailedInstrumentationEvent(h.Recorder, resource, "webhook", err) + return admission.Allowed(fmt.Errorf("error when marshalling modfied resource to JSON: %w", err).Error()) + } + + logger.Info("The webhook has added Dash0 instrumentation to the resource.") + util.QueueSuccessfulInstrumentationEvent(h.Recorder, resource, "webhook") + return admission.PatchResponseFromRaw(request.Object.Raw, marshalled) +} + +func (h *Handler) newResourceModifier(logger *logr.Logger) *k8sresources.ResourceModifier { + return k8sresources.NewResourceModifier( + util.InstrumentationMetadata{ + Versions: h.Versions, + InstrumentedBy: "webhook", + }, + logger, + ) +} + +func (r *routing) routeFor(group, kind, version string) resourceHandler { + routesForGroup := (*r)[group] + if routesForGroup == nil { + return nil + } + routesForKind := routesForGroup[kind] + if routesForKind == nil { + return nil + } + routesForVersion := routesForKind[version] + if routesForVersion == nil { + return fallbackRoute } + return routesForVersion } diff --git a/internal/webhook/dash0_webhook_test.go b/internal/webhook/dash0_webhook_test.go index 1a2ebb1c..24f42998 100644 --- a/internal/webhook/dash0_webhook_test.go +++ b/internal/webhook/dash0_webhook_test.go @@ -4,9 +4,12 @@ package webhook import ( - . "github.com/dash0hq/dash0-operator/test/util" + "fmt" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + + . "github.com/dash0hq/dash0-operator/test/util" ) // Maintenance note: There is some overlap of test cases between this file and k8sresources/modify_test.go. This is @@ -16,37 +19,31 @@ import ( var _ = Describe("Dash0 Webhook", func() { AfterEach(func() { + _ = k8sClient.Delete(ctx, BasicCronJob(TestNamespaceName, CronJobName)) + _ = k8sClient.Delete(ctx, BasicDaemonSet(TestNamespaceName, DaemonSetName)) _ = k8sClient.Delete(ctx, BasicDeployment(TestNamespaceName, DeploymentName)) + _ = k8sClient.Delete(ctx, BasicJob(TestNamespaceName, JobName)) + err := k8sClient.Delete(ctx, BasicReplicaSet(TestNamespaceName, ReplicaSetName)) + if err != nil { + fmt.Fprintf(GinkgoWriter, "cannot delete replicaset: %v\n", err) + } + _ = k8sClient.Delete(ctx, BasicStatefulSet(TestNamespaceName, StatefulSetName)) + }) Context("when mutating new deployments", func() { - It("should inject Dash into a new basic deployment", func() { - deployment := BasicDeployment(TestNamespaceName, DeploymentName) - Expect(k8sClient.Create(ctx, deployment)).Should(Succeed()) - - deployment = GetDeployment(ctx, k8sClient, TestNamespaceName, DeploymentName) - VerifyModifiedDeployment(deployment, DeploymentExpectations{ - Volumes: 1, - Dash0VolumeIdx: 0, - InitContainers: 1, - Dash0InitContainerIdx: 0, - Containers: []ContainerExpectations{{ - VolumeMounts: 1, - Dash0VolumeMountIdx: 0, - EnvVars: 2, - NodeOptionsEnvVarIdx: 0, - Dash0CollectorBaseUrlEnvVarIdx: 1, - Dash0CollectorBaseUrlEnvVarExpectedValue: "http://dash0-opentelemetry-collector-daemonset.test-namespace.svc.cluster.local:4318", - }}, - }) + It("should inject Dash0 into a new basic deployment", func() { + CreateBasicDeployment(ctx, k8sClient, TestNamespaceName, DeploymentName) + deployment := GetDeployment(ctx, k8sClient, TestNamespaceName, DeploymentName) + VerifyModifiedDeployment(deployment, BasicPodSpecExpectations) }) - It("should inject Dash into a new deployment that has multiple Containers, and already has Volumes and init Containers", func() { + It("should inject Dash0 into a new deployment that has multiple containers, and already has volumes and init containers", func() { deployment := DeploymentWithMoreBellsAndWhistles(TestNamespaceName, DeploymentName) Expect(k8sClient.Create(ctx, deployment)).Should(Succeed()) deployment = GetDeployment(ctx, k8sClient, TestNamespaceName, DeploymentName) - VerifyModifiedDeployment(deployment, DeploymentExpectations{ + VerifyModifiedDeployment(deployment, PodSpecExpectations{ Volumes: 3, Dash0VolumeIdx: 2, InitContainers: 3, @@ -73,12 +70,12 @@ var _ = Describe("Dash0 Webhook", func() { VerifySuccessEvent(ctx, clientset, TestNamespaceName, DeploymentName, "webhook") }) - It("should update existing Dash artifacts in a new deployment", func() { + It("should update existing Dash0 artifacts in a new deployment", func() { deployment := DeploymentWithExistingDash0Artifacts(TestNamespaceName, DeploymentName) Expect(k8sClient.Create(ctx, deployment)).Should(Succeed()) deployment = GetDeployment(ctx, k8sClient, TestNamespaceName, DeploymentName) - VerifyModifiedDeployment(deployment, DeploymentExpectations{ + VerifyModifiedDeployment(deployment, PodSpecExpectations{ Volumes: 3, Dash0VolumeIdx: 1, InitContainers: 3, @@ -105,5 +102,41 @@ var _ = Describe("Dash0 Webhook", func() { }, }) }) + + It("should inject Dash0 into a new basic cron job", func() { + CreateBasicCronJob(ctx, k8sClient, TestNamespaceName, CronJobName) + cronJob := GetCronJob(ctx, k8sClient, TestNamespaceName, CronJobName) + VerifyModifiedCronJob(cronJob, BasicPodSpecExpectations) + }) + + It("should inject Dash0 into a new basic daemon set", func() { + CreateBasicDaemonSet(ctx, k8sClient, TestNamespaceName, DaemonSetName) + daemonSet := GetDaemonSet(ctx, k8sClient, TestNamespaceName, DaemonSetName) + VerifyModifiedDaemonSet(daemonSet, BasicPodSpecExpectations) + }) + + It("should inject Dash0 into a new basic job", func() { + CreateBasicJob(ctx, k8sClient, TestNamespaceName, JobName) + job := GetJob(ctx, k8sClient, TestNamespaceName, JobName) + VerifyModifiedJob(job, BasicPodSpecExpectations) + }) + + It("should inject Dash0 into a new basic replica set", func() { + CreateBasicReplicaSet(ctx, k8sClient, TestNamespaceName, ReplicaSetName) + replicaSet := GetReplicaSet(ctx, k8sClient, TestNamespaceName, ReplicaSetName) + VerifyModifiedReplicaSet(replicaSet, BasicPodSpecExpectations) + }) + + It("should not inject Dash0 into a new replica set owned by a deployment", func() { + CreateReplicaSetOwnedByDeployment(ctx, k8sClient, TestNamespaceName, ReplicaSetName) + replicaSet := GetReplicaSet(ctx, k8sClient, TestNamespaceName, ReplicaSetName) + VerifyUnmodifiedReplicaSet(replicaSet) + }) + + It("should inject Dash0 into a new basic stateful set", func() { + CreateBasicStatefulSet(ctx, k8sClient, TestNamespaceName, StatefulSetName) + statefulSet := GetStatefulSet(ctx, k8sClient, TestNamespaceName, StatefulSetName) + VerifyModifiedStatefulSet(statefulSet, BasicPodSpecExpectations) + }) }) }) diff --git a/internal/webhook/webhook_suite_test.go b/internal/webhook/webhook_suite_test.go index e9b17b88..5631c663 100644 --- a/internal/webhook/webhook_suite_test.go +++ b/internal/webhook/webhook_suite_test.go @@ -14,12 +14,9 @@ import ( "testing" "time" - . "github.com/dash0hq/dash0-operator/test/util" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" - "github.com/dash0hq/dash0-operator/internal/k8sresources" admissionv1 "k8s.io/api/admission/v1" apimachineryruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes" @@ -32,6 +29,11 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" + + operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1" + "github.com/dash0hq/dash0-operator/internal/util" + + . "github.com/dash0hq/dash0-operator/test/util" ) var ( @@ -42,7 +44,7 @@ var ( ctx context.Context cancel context.CancelFunc - versions = k8sresources.Versions{ + versions = util.Versions{ OperatorVersion: "1.2.3", InitContainerImageVersion: "4.5.6", } diff --git a/test-resources/bin/render-templates.sh b/test-resources/bin/render-templates.sh new file mode 100755 index 00000000..e59181a5 --- /dev/null +++ b/test-resources/bin/render-templates.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +cd "$(dirname ${BASH_SOURCE})"/../.. + +resource_types=( cronjob daemonset deployment job replicaset statefulset ) + +for resource_type in "${resource_types[@]}"; do + cat test-resources/node.js/express/${resource_type}.yaml.template | envsubst > test-resources/node.js/express/${resource_type}.yaml +done + +if [[ ${1:-} == manual-testing ]]; then + # Generate collector values file for manual testing: + if [[ -n ${DASH0_AUTHORIZATION_TOKEN:-} ]] && [[ -n ${OTEL_EXPORTER_OTLP_ENDPOINT:-} ]] then + echo "Using DASH0_AUTHORIZATION_TOKEN and OTEL_EXPORTER_OTLP_ENDPOINT to render the template test-resources/collector/manual.values.yaml.template. The rendered file test-resources/collector/manual.values.yaml can be used for manual testing and actually reporting data to Dash0." + cat test-resources/collector/manual.values.yaml.template | envsubst > test-resources/collector/manual.values.yaml + elif [[ -f test-resources/collector/manual.values.yaml ]]; then + echo "The file test-resources/collector/manual.values.yaml already exists, the file will be reused. Set DASH0_AUTHORIZATION_TOKEN and OTEL_EXPORTER_OTLP_ENDPOINT to generate a new file." + else + echo "The file test-resources/collector/manual.values.yaml does not exist and either DASH0_AUTHORIZATION_TOKEN or OTEL_EXPORTER_OTLP_ENDPOINT are not set (or both). Both environment variables are required when running this script for the first time, to generate a valid manual.values.yaml file for the OpenTelemetry collector. They can be omitted on subsequent runs, unless test-resources/collector/manual.values.yaml is deleted and needs to be regenerated." + exit 1 + fi +else + # Generate collector values file for end-to-end tests: + cat test-resources/collector/e2e.values.yaml.template | envsubst > test-resources/collector/e2e.values.yaml +fi diff --git a/test-resources/bin/test-cleanup.sh b/test-resources/bin/test-cleanup.sh index 9e05bdd2..8652c3d3 100755 --- a/test-resources/bin/test-cleanup.sh +++ b/test-resources/bin/test-cleanup.sh @@ -5,7 +5,6 @@ set -euo pipefail - cd "$(dirname ${BASH_SOURCE})"/../.. target_namespace=${1:-default} @@ -13,5 +12,14 @@ target_namespace=${1:-default} kubectl delete -n ${target_namespace} -k config/samples || true make uninstall || true make undeploy || true -test-resources/node.js/express/undeploy.sh ${target_namespace} -test-resources/collector/undeploy.sh ${target_namespace} \ No newline at end of file + +resource_types=( cronjob daemonset deployment job replicaset statefulset ) +for resource_type in "${resource_types[@]}"; do + test-resources/node.js/express/undeploy.sh ${target_namespace} ${resource_type} +done + +test-resources/collector/undeploy.sh ${target_namespace} + +if [[ "${target_namespace}" != "default" ]]; then + kubectl delete ns ${target_namespace} +fi diff --git a/test-resources/bin/test-roundtrip-01-aum-cr-operator.sh b/test-resources/bin/test-roundtrip-01-aum-cr-operator.sh index a1d2eeac..023dffd7 100755 --- a/test-resources/bin/test-roundtrip-01-aum-cr-operator.sh +++ b/test-resources/bin/test-roundtrip-01-aum-cr-operator.sh @@ -13,6 +13,9 @@ if ! kubectl get ns cert-manager &> /dev/null; then fi target_namespace=${1:-default} +kind=${2:-deployment} + +test-resources/bin/render-templates.sh manual-testing echo "STEP 1: creating target namespace (if necessary)" test-resources/bin/ensure-namespace-exists.sh ${target_namespace} @@ -33,7 +36,7 @@ echo echo echo "STEP 5: deploy application under monitoring" -test-resources/node.js/express/build-and-deploy.sh ${target_namespace} +test-resources/node.js/express/deploy.sh ${target_namespace} ${kind} echo echo @@ -52,4 +55,4 @@ echo sleep 5 echo "STEP 8: deploy the Dash0 operator" -make deploy \ No newline at end of file +make deploy diff --git a/test-resources/bin/test-roundtrip-02-aum-operator-cr.sh b/test-resources/bin/test-roundtrip-02-aum-operator-cr.sh index c5125bc6..a12bd482 100755 --- a/test-resources/bin/test-roundtrip-02-aum-operator-cr.sh +++ b/test-resources/bin/test-roundtrip-02-aum-operator-cr.sh @@ -13,6 +13,9 @@ if ! kubectl get ns cert-manager &> /dev/null; then fi target_namespace=${1:-default} +kind=${2:-deployment} + +test-resources/bin/render-templates.sh manual-testing echo "STEP 1: creating target namespace (if necessary)" test-resources/bin/ensure-namespace-exists.sh ${target_namespace} @@ -33,7 +36,7 @@ echo echo echo "STEP 5: deploy application under monitoring" -test-resources/node.js/express/build-and-deploy.sh ${target_namespace} +test-resources/node.js/express/deploy.sh ${target_namespace} ${kind} echo echo @@ -52,4 +55,4 @@ echo sleep 5 echo "STEP 8: deploy the Dash0 custom resource to namespace ${target_namespace}" -kubectl apply -n ${target_namespace} -k config/samples \ No newline at end of file +kubectl apply -n ${target_namespace} -k config/samples diff --git a/test-resources/bin/test-roundtrip-03-operator-cr-aum.sh b/test-resources/bin/test-roundtrip-03-operator-cr-aum.sh index fe0968e8..2ff48d48 100755 --- a/test-resources/bin/test-roundtrip-03-operator-cr-aum.sh +++ b/test-resources/bin/test-roundtrip-03-operator-cr-aum.sh @@ -13,6 +13,9 @@ if ! kubectl get ns cert-manager &> /dev/null; then fi target_namespace=${1:-default} +kind=${2:-deployment} + +test-resources/bin/render-templates.sh manual-testing echo "STEP 1: creating target namespace (if necessary)" test-resources/bin/ensure-namespace-exists.sh ${target_namespace} @@ -54,4 +57,4 @@ echo sleep 5 echo "STEP 8: deploy application under monitoring" -test-resources/node.js/express/build-and-deploy.sh ${target_namespace} \ No newline at end of file +test-resources/node.js/express/deploy.sh ${target_namespace} ${kind} diff --git a/test-resources/collector/.gitignore b/test-resources/collector/.gitignore index 7f47975f..f0e74d5e 100644 --- a/test-resources/collector/.gitignore +++ b/test-resources/collector/.gitignore @@ -1 +1,2 @@ -values.yaml +e2e.values.yaml +manual.values.yaml diff --git a/test-resources/collector/deploy.sh b/test-resources/collector/deploy.sh index b2220e8d..73b73ec5 100755 --- a/test-resources/collector/deploy.sh +++ b/test-resources/collector/deploy.sh @@ -7,14 +7,18 @@ set -euo pipefail cd "$(dirname ${BASH_SOURCE})" -if [[ ! -f values.yaml ]]; then - echo Please copy $(pwd)/values.yaml.template to $(pwd)/values.yaml and provide the auth token and the ingress endpoint. - exit 1 -fi - target_namespace=${1:-default} -# helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts +if [[ ! $(helm repo list | grep open-telemetry) ]]; then + echo "The helm repo for open-telemetry has not been found, adding it now." + helm helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts --force-update + echo "Running helm repo update." + helm repo update +fi + +if [[ -f manual.values.yaml ]]; then + ../bin/render-templates.sh manual-testing +fi ./undeploy.sh ${target_namespace} @@ -22,5 +26,5 @@ helm install \ dash0-opentelemetry-collector-daemonset \ open-telemetry/opentelemetry-collector \ --namespace ${target_namespace} \ - --values values.yaml \ + --values manual.values.yaml \ --set image.repository="otel/opentelemetry-collector-k8s" \ No newline at end of file diff --git a/test-resources/collector/values.yaml.template b/test-resources/collector/e2e.values.yaml.template similarity index 64% rename from test-resources/collector/values.yaml.template rename to test-resources/collector/e2e.values.yaml.template index 272056f4..c8bb9da3 100644 --- a/test-resources/collector/values.yaml.template +++ b/test-resources/collector/e2e.values.yaml.template @@ -5,11 +5,6 @@ resources: limits: memory: 500Mi config: - extensions: - bearertokenauth/dash0: - scheme: "Bearer" - token: "TODO provide token" - receivers: jaeger: null prometheus: null @@ -20,43 +15,35 @@ config: exporters: file/traces: - path: /e2e-test-received-data/traces.jsonl + path: /collector-received-data/traces.jsonl flush_interval: 100ms file/metrics: - path: /e2e-test-received-data/metrics.jsonl + path: /collector-received-data/metrics.jsonl flush_interval: 100ms file/logs: - path: /e2e-test-received-data/logs.jsonl + path: /collector-received-data/logs.jsonl flush_interval: 100ms - otlp: - auth: - authenticator: bearertokenauth/dash0 - endpoint: "TODO provide endpoint" service: extensions: - health_check - memory_ballast - - bearertokenauth/dash0 pipelines: traces: receivers: - otlp exporters: - file/traces - - otlp metrics: receivers: - otlp exporters: - file/metrics - - otlp logs: receivers: - otlp exporters: - file/logs - - otlp ports: jaeger-compact: @@ -71,8 +58,8 @@ ports: extraVolumes: - name: telemetry-file-export hostPath: - path: {TODO provide absolute path to repo}/e2e-test-received-data + path: $PWD/test-resources/e2e-test-volumes/collector-received-data type: DirectoryOrCreate extraVolumeMounts: - name: telemetry-file-export - mountPath: /e2e-test-received-data \ No newline at end of file + mountPath: /collector-received-data \ No newline at end of file diff --git a/test-resources/collector/manual.values.yaml.template b/test-resources/collector/manual.values.yaml.template new file mode 100644 index 00000000..483a057e --- /dev/null +++ b/test-resources/collector/manual.values.yaml.template @@ -0,0 +1,57 @@ +mode: daemonset +service: + enabled: true +resources: + limits: + memory: 500Mi +config: + extensions: + bearertokenauth/dash0: + scheme: "Bearer" + token: "$DASH0_AUTHORIZATION_TOKEN" + + receivers: + jaeger: null + prometheus: null + zipkin: null + + processors: + batch: {} + + exporters: + otlp: + auth: + authenticator: bearertokenauth/dash0 + endpoint: "$OTEL_EXPORTER_OTLP_ENDPOINT" + + service: + extensions: + - health_check + - memory_ballast + - bearertokenauth/dash0 + pipelines: + traces: + receivers: + - otlp + exporters: + - otlp + metrics: + receivers: + - otlp + exporters: + - otlp + logs: + receivers: + - otlp + exporters: + - otlp + +ports: + jaeger-compact: + enabled: false + jaeger-thrift: + enabled: false + jaeger-grpc: + enabled: false + zipkin: + enabled: false \ No newline at end of file diff --git a/test-resources/e2e-test-volumes/collector-received-data/.gitignore b/test-resources/e2e-test-volumes/collector-received-data/.gitignore new file mode 100644 index 00000000..72e8ffc0 --- /dev/null +++ b/test-resources/e2e-test-volumes/collector-received-data/.gitignore @@ -0,0 +1 @@ +* diff --git a/test-resources/e2e-test-volumes/test-uuid/.gitignore b/test-resources/e2e-test-volumes/test-uuid/.gitignore new file mode 100644 index 00000000..72e8ffc0 --- /dev/null +++ b/test-resources/e2e-test-volumes/test-uuid/.gitignore @@ -0,0 +1 @@ +* diff --git a/test-resources/node.js/express/.gitignore b/test-resources/node.js/express/.gitignore index c2658d7d..95a02a08 100644 --- a/test-resources/node.js/express/.gitignore +++ b/test-resources/node.js/express/.gitignore @@ -1 +1,7 @@ node_modules/ +cronjob.yaml +daemonset.yaml +deployment.yaml +job.yaml +replicaset.yaml +statefulset.yaml diff --git a/test-resources/node.js/express/app.js b/test-resources/node.js/express/app.js index e66d2a65..794adde1 100644 --- a/test-resources/node.js/express/app.js +++ b/test-resources/node.js/express/app.js @@ -1,16 +1,48 @@ // SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. // SPDX-License-Identifier: Apache-2.0 -const express = require( "express"); +const crypto = require('node:crypto'); +const fs = require('node:fs/promises'); +const express = require('express'); -const port = parseInt(process.env.PORT || "1207"); +const port = parseInt(process.env.PORT || '1207'); const app = express(); -app.get("/ohai", (req, res) => { - console.log("processing request"); - res.json({ message: "We make Observability easy for every developer." }); +app.get('/dash0-k8s-operator-test', (req, res) => { + console.log('processing request'); + res.json({ message: 'We make Observability easy for every developer.' }); }); app.listen(port, () => { - console.log(`listening on port ${port}`); + console.log(`listening on port ${port}`); }); + +if (process.env.TRIGGER_SELF_AND_EXIT) { + const testId = crypto.randomUUID(); + + (async function () { + const testIdFile = process.env.TEST_ID_FILE || '/test-uuid/test.id'; + try { + await fs.writeFile(testIdFile, testId); + console.log(`Test ID ${testId} has been written to file ${testIdFile}.`); + } catch (err) { + console.error( + `Unable to write test ID ${testId} to file ${testIdFile}, matching spans in the end-to-end test will not work.`, + err, + ); + } + + for (let i = 0; i < 120; i++) { + await fetch(`http://localhost:${port}/dash0-k8s-operator-test?id=${testId}`); + await delay(500); + } + + process.exit(0); + })(); +} + +function delay(ms) { + return new Promise(resolve => { + setTimeout(resolve, ms); + }); +} diff --git a/test-resources/node.js/express/cronjob.yaml.template b/test-resources/node.js/express/cronjob.yaml.template new file mode 100644 index 00000000..bb1dab7a --- /dev/null +++ b/test-resources/node.js/express/cronjob.yaml.template @@ -0,0 +1,35 @@ +# SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: batch/v1 +kind: CronJob +metadata: + name: dash0-operator-nodejs-20-express-test-cronjob +spec: + schedule: "* * * * *" + jobTemplate: + spec: + template: + spec: + restartPolicy: Never + containers: + - name: dash0-operator-nodejs-20-express-test-app + image: "dash0-operator-nodejs-20-express-test-app:latest" + env: + - name: DASH0_DEBUG + value: "true" + - name: TRIGGER_SELF_AND_EXIT + value: "true" + - name: TEST_ID_FILE + value: "/test-uuid/cronjob.test.id" + ports: + - containerPort: 1207 + imagePullPolicy: Never + volumeMounts: + - name: test-uuid + mountPath: /test-uuid + volumes: + - name: test-uuid + hostPath: + path: $PWD/test-resources/e2e-test-volumes/test-uuid + type: DirectoryOrCreate diff --git a/test-resources/node.js/express/daemonset.yaml.template b/test-resources/node.js/express/daemonset.yaml.template new file mode 100644 index 00000000..7d3635d9 --- /dev/null +++ b/test-resources/node.js/express/daemonset.yaml.template @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: dash0-operator-nodejs-20-express-test-service +spec: + selector: + app: dash0-operator-nodejs-20-express-test-app + ports: + - port: 1207 + targetPort: 1207 + type: LoadBalancer +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: dash0-operator-nodejs-20-express-test-daemonset + labels: + app: dash0-operator-nodejs-20-express-test-app +spec: + selector: + matchLabels: + app: dash0-operator-nodejs-20-express-test-app + template: + metadata: + labels: + app: dash0-operator-nodejs-20-express-test-app + spec: + containers: + - name: dash0-operator-nodejs-20-express-test-app + image: "dash0-operator-nodejs-20-express-test-app:latest" + env: + - name: DASH0_DEBUG + value: "true" + ports: + - containerPort: 1207 + imagePullPolicy: Never \ No newline at end of file diff --git a/test-resources/node.js/express/build-and-deploy.sh b/test-resources/node.js/express/deploy.sh similarity index 60% rename from test-resources/node.js/express/build-and-deploy.sh rename to test-resources/node.js/express/deploy.sh index 6ef644bc..ac25e524 100755 --- a/test-resources/node.js/express/build-and-deploy.sh +++ b/test-resources/node.js/express/deploy.sh @@ -8,10 +8,15 @@ set -euo pipefail cd "$(dirname ${BASH_SOURCE})" target_namespace=${1:-default} +kind=${2:-deployment} if [[ -z ${SKIP_DOCKER_BUILD:-} ]]; then docker build . -t dash0-operator-nodejs-20-express-test-app fi -./undeploy.sh ${target_namespace} -kubectl apply -n ${target_namespace} -f deploy.yaml +if [[ -f ${kind}.yaml ]]; then + ../../bin/render-templates.sh manual-testing +fi + +./undeploy.sh ${target_namespace} ${kind} +kubectl apply -n ${target_namespace} -f ${kind}.yaml diff --git a/test-resources/node.js/express/deploy.yaml b/test-resources/node.js/express/deployment.yaml.template similarity index 98% rename from test-resources/node.js/express/deploy.yaml rename to test-resources/node.js/express/deployment.yaml.template index 632362ea..c86b7155 100644 --- a/test-resources/node.js/express/deploy.yaml +++ b/test-resources/node.js/express/deployment.yaml.template @@ -20,7 +20,7 @@ metadata: labels: app: dash0-operator-nodejs-20-express-test-app spec: - replicas: 2 + replicas: 1 selector: matchLabels: app: dash0-operator-nodejs-20-express-test-app diff --git a/test-resources/node.js/express/job.yaml.template b/test-resources/node.js/express/job.yaml.template new file mode 100644 index 00000000..ec781b50 --- /dev/null +++ b/test-resources/node.js/express/job.yaml.template @@ -0,0 +1,32 @@ +# SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: batch/v1 +kind: Job +metadata: + name: dash0-operator-nodejs-20-express-test-job +spec: + template: + spec: + restartPolicy: Never + containers: + - name: dash0-operator-nodejs-20-express-test-app + image: "dash0-operator-nodejs-20-express-test-app:latest" + env: + - name: DASH0_DEBUG + value: "true" + - name: TRIGGER_SELF_AND_EXIT + value: "true" + - name: TEST_ID_FILE + value: "/test-uuid/job.test.id" + ports: + - containerPort: 1207 + imagePullPolicy: Never + volumeMounts: + - name: test-uuid + mountPath: /test-uuid + volumes: + - name: test-uuid + hostPath: + path: $PWD/test-resources/e2e-test-volumes/test-uuid + type: DirectoryOrCreate diff --git a/test-resources/node.js/express/replicaset.yaml.template b/test-resources/node.js/express/replicaset.yaml.template new file mode 100644 index 00000000..be92caa7 --- /dev/null +++ b/test-resources/node.js/express/replicaset.yaml.template @@ -0,0 +1,40 @@ +# SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: dash0-operator-nodejs-20-express-test-service +spec: + selector: + app: dash0-operator-nodejs-20-express-test-app + ports: + - port: 1207 + targetPort: 1207 + type: LoadBalancer +--- +apiVersion: apps/v1 +kind: ReplicaSet +metadata: + name: dash0-operator-nodejs-20-express-test-replicaset + labels: + app: dash0-operator-nodejs-20-express-test-app +spec: + replicas: 1 + selector: + matchLabels: + app: dash0-operator-nodejs-20-express-test-app + template: + metadata: + labels: + app: dash0-operator-nodejs-20-express-test-app + spec: + containers: + - name: dash0-operator-nodejs-20-express-test-app + image: "dash0-operator-nodejs-20-express-test-app:latest" + env: + - name: DASH0_DEBUG + value: "true" + ports: + - containerPort: 1207 + imagePullPolicy: Never diff --git a/test-resources/node.js/express/statefulset.yaml.template b/test-resources/node.js/express/statefulset.yaml.template new file mode 100644 index 00000000..0b11aa6c --- /dev/null +++ b/test-resources/node.js/express/statefulset.yaml.template @@ -0,0 +1,41 @@ +# SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: dash0-operator-nodejs-20-express-test-service +spec: + selector: + app: dash0-operator-nodejs-20-express-test-app + ports: + - port: 1207 + targetPort: 1207 + type: LoadBalancer +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: dash0-operator-nodejs-20-express-test-statefulset +spec: + selector: + matchLabels: + # has to match .spec.template.metadata.labels + app: dash0-operator-nodejs-20-express-test-app + serviceName: "dash0-operator-nodejs-20-express-test-service" + replicas: 1 + template: + metadata: + labels: + app: dash0-operator-nodejs-20-express-test-app + spec: + terminationGracePeriodSeconds: 3 + containers: + - name: dash0-operator-nodejs-20-express-test-app + image: "dash0-operator-nodejs-20-express-test-app:latest" + env: + - name: DASH0_DEBUG + value: "true" + ports: + - containerPort: 1207 + imagePullPolicy: Never diff --git a/test-resources/node.js/express/undeploy.sh b/test-resources/node.js/express/undeploy.sh index 2074a892..e46f6ec3 100755 --- a/test-resources/node.js/express/undeploy.sh +++ b/test-resources/node.js/express/undeploy.sh @@ -8,5 +8,6 @@ set -euo pipefail cd "$(dirname ${BASH_SOURCE})" target_namespace=${1:-default} +kind=${2:-deployment} -kubectl delete -n ${target_namespace} -f deploy.yaml || true +kubectl delete -n ${target_namespace} -f ${kind}.yaml --ignore-not-found || true diff --git a/test/e2e/e2e_helpers.go b/test/e2e/e2e_helpers.go index dd720be1..d75a6c41 100644 --- a/test/e2e/e2e_helpers.go +++ b/test/e2e/e2e_helpers.go @@ -10,19 +10,21 @@ import ( "io" "os" "os/exec" + "strconv" "strings" "time" . "github.com/onsi/ginkgo/v2" //nolint:golint,revive . "github.com/onsi/gomega" + "github.com/google/uuid" "go.opentelemetry.io/collector/pdata/ptrace" ) const ( certmanagerVersion = "v1.14.5" tracesJsonMaxLineLength = 1_048_576 - verifyTelemetryTimeout = 60 * time.Second + verifyTelemetryTimeout = 90 * time.Second verifyTelemetryPollingInterval = 500 * time.Millisecond ) @@ -30,6 +32,11 @@ var ( traceUnmarshaller = &ptrace.JSONUnmarshaler{} ) +func RenderTemplates() { + By("render yaml templates") + ExpectWithOffset(1, RunAndIgnoreOutput(exec.Command("test-resources/bin/render-templates.sh"))).To(Succeed()) +} + func EnsureNamespaceExists(namespace string) bool { err := RunAndIgnoreOutput(exec.Command("kubectl", "get", "ns", namespace), false) if err != nil { @@ -181,10 +188,34 @@ func uninstallCertManager() { func ReinstallCollectorAndClearExportedTelemetry(namespace string) error { _ = UninstallCollector(namespace) - _ = os.Remove("e2e-test-received-data/traces.jsonl") - _ = os.Remove("e2e-test-received-data/metrics.jsonl") - _ = os.Remove("e2e-test-received-data/logs.jsonl") - err := RunAndIgnoreOutput( + _ = os.Remove("test-resources/e2e-test-volumes/collector-received-data/traces.jsonl") + _ = os.Remove("test-resources/e2e-test-volumes/collector-received-data/metrics.jsonl") + _ = os.Remove("test-resources/e2e-test-volumes/collector-received-data/logs.jsonl") + + repoList, err := Run(exec.Command("helm", "repo", "list")) + if err != nil { + return err + } + if !strings.Contains(string(repoList), "open-telemetry") { + fmt.Fprintf(GinkgoWriter, "The helm repo for open-telemetry has not been found, adding it now.\n") + if err := RunAndIgnoreOutput( + exec.Command( + "helm", + "repo", + "add", + "open-telemetry", + "https://open-telemetry.github.io/opentelemetry-helm-charts", + "--force-update", + )); err != nil { + return err + } + fmt.Fprintf(GinkgoWriter, "Running helm repo update.\n") + if err = RunAndIgnoreOutput(exec.Command("helm", "repo", "update")); err != nil { + return err + } + } + + err = RunAndIgnoreOutput( exec.Command( "helm", "install", @@ -193,7 +224,7 @@ func ReinstallCollectorAndClearExportedTelemetry(namespace string) error { "--namespace", namespace, "--values", - "test-resources/collector/values.yaml", + "test-resources/collector/e2e.values.yaml", "--set", "image.repository=otel/opentelemetry-collector-k8s", )) @@ -209,7 +240,7 @@ func ReinstallCollectorAndClearExportedTelemetry(namespace string) error { "--namespace", namespace, "--timeout", - "30s", + "60s", )) } @@ -269,8 +300,9 @@ func UndeployOperator(namespace string) { By("undeploying the controller-manager") ExpectWithOffset(1, RunAndIgnoreOutput(exec.Command("make", "undeploy"))).To(Succeed()) - // We need to wait until the namespace is really gone, otherwise the next test/suite that tries to create the operator - // will run into issues when trying to recreate the namespace which is still in the process of being deleted. + // We need to wait until the namespace is really gone, otherwise the next test case/suite that tries to create the + // operator will run into issues when trying to recreate the namespace which is still in the process of being + // deleted. ExpectWithOffset(1, RunAndIgnoreOutput(exec.Command( "kubectl", "wait", @@ -292,17 +324,44 @@ func UndeployDash0Resource(namespace string) { "kubectl", "delete", "-n", namespace, "-k", "config/samples"))).To(Succeed()) } -func InstallNodeJsDeployment(namespace string) error { - imageName := "dash0-operator-nodejs-20-express-test-app" - err := RunMultipleFromStrings([][]string{ - {"docker", "build", "test-resources/node.js/express", "-t", imageName}, - {"kubectl", "apply", "-f", "test-resources/node.js/express/deploy.yaml", "--namespace", namespace}, - }) - if err != nil { - return err - } +func InstallNodeJsCronJob(namespace string) error { + return installNodeJsApplication( + namespace, + "cronjob", + nil, + ) +} - return RunAndIgnoreOutput( +func UninstallNodeJsCronJob(namespace string) error { + return uninstallNodeJsApplication(namespace, "cronjob") +} + +func InstallNodeJsDaemonSet(namespace string) error { + return installNodeJsApplication( + namespace, + "daemonset", + exec.Command( + "kubectl", + "rollout", + "status", + "daemonset", + "dash0-operator-nodejs-20-express-test-daemonset", + "--namespace", + namespace, + "--timeout", + "60s", + ), + ) +} + +func UninstallNodeJsDaemonSet(namespace string) error { + return uninstallNodeJsApplication(namespace, "daemonset") +} + +func InstallNodeJsDeployment(namespace string) error { + return installNodeJsApplication( + namespace, + "deployment", exec.Command( "kubectl", "wait", @@ -312,76 +371,243 @@ func InstallNodeJsDeployment(namespace string) error { "--namespace", namespace, "--timeout", - "30s", - )) + "60s", + ), + ) } func UninstallNodeJsDeployment(namespace string) error { + return uninstallNodeJsApplication(namespace, "deployment") +} + +func InstallNodeJsJob(namespace string) error { + return installNodeJsApplication( + namespace, + "job", + nil, + ) +} + +func UninstallNodeJsJob(namespace string) error { + return uninstallNodeJsApplication(namespace, "job") +} + +func InstallNodeJsReplicaSet(namespace string) error { + return installNodeJsApplication( + namespace, + "replicaset", + exec.Command( + "kubectl", + "wait", + "pod", + "--namespace", + namespace, + "--selector", + "app=dash0-operator-nodejs-20-express-test-app", + "--for", + "condition=ContainersReady", + "--timeout", + "60s", + ), + ) +} + +func UninstallNodeJsReplicaSet(namespace string) error { + return uninstallNodeJsApplication(namespace, "replicaset") +} + +func InstallNodeJsStatefulSet(namespace string) error { + return installNodeJsApplication( + namespace, + "statefulset", + exec.Command( + "kubectl", + "rollout", + "status", + "statefulset", + "dash0-operator-nodejs-20-express-test-statefulset", + "--namespace", + namespace, + "--timeout", + "60s", + ), + ) +} + +func UninstallNodeJsStatefulSet(namespace string) error { + return uninstallNodeJsApplication(namespace, "statefulset") +} + +func RemoveAllTestApplications(namespace string) { + By("uninstalling the test applications") + ExpectWithOffset(1, UninstallNodeJsCronJob(namespace)).To(Succeed()) + ExpectWithOffset(1, UninstallNodeJsDaemonSet(namespace)).To(Succeed()) + ExpectWithOffset(1, UninstallNodeJsDeployment(namespace)).To(Succeed()) + ExpectWithOffset(1, UninstallNodeJsJob(namespace)).To(Succeed()) + ExpectWithOffset(1, UninstallNodeJsReplicaSet(namespace)).To(Succeed()) + ExpectWithOffset(1, UninstallNodeJsStatefulSet(namespace)).To(Succeed()) +} + +func installNodeJsApplication(namespace string, kind string, waitCommand *exec.Cmd) error { + imageName := "dash0-operator-nodejs-20-express-test-app" + err := RunMultipleFromStrings([][]string{ + {"docker", "build", "test-resources/node.js/express", "-t", imageName}, + { + "kubectl", + "apply", + "--namespace", + namespace, + "-f", + fmt.Sprintf("test-resources/node.js/express/%s.yaml", kind), + }, + }) + if err != nil { + return err + } + if waitCommand == nil { + return nil + } + return RunAndIgnoreOutput(waitCommand) +} + +func uninstallNodeJsApplication(namespace string, kind string) error { return RunAndIgnoreOutput( exec.Command( "kubectl", "delete", "--namespace", namespace, + "--ignore-not-found", "-f", - "test-resources/node.js/express/deploy.yaml", + fmt.Sprintf("test-resources/node.js/express/%s.yaml", kind), )) } -func SendRequestsAndVerifySpansHaveBeenProduced(namespace string) { - timestampLowerBound := time.Now() +func DeleteTestIdFiles() { + _ = os.Remove("test-resources/e2e-test-volumes/test-uuid/cronjob.test.id") + _ = os.Remove("test-resources/e2e-test-volumes/test-uuid/job.test.id") +} - By("verify that the resource has been instrumented and is sending telemetry", func() { +func VerifyThatSpansAreCaptured( + namespace string, + kind string, + sendRequests bool, + restartPodsManually bool, + instrumentationBy string, +) { + By("verify that the resource has been instrumented and is sending telemetry") + + var testId string + if sendRequests { + // For resource types that are available as a service (daemonset, deployment etc.) we send HTTP requests with + // a unique ID as a query parameter. When checking the produced spans that the OTel collector writes to disk via + // the file exporter, we can verify that the span is actually from the currently running test case by inspecting + // the http.target span attribute. This guarantees that we do not accidentally pass the test due to a span from + // a previous test case. + testIdUuid := uuid.New() + testId = testIdUuid.String() + } else { + By(fmt.Sprintf("waiting for the test ID file to be written by the %s under test", kind)) Eventually(func(g Gomega) { - verifyLabels(g, namespace) - response, err := Run(exec.Command("curl", "http://localhost:1207/ohai"), false) - g.ExpectWithOffset(1, err).NotTo(HaveOccurred()) - g.ExpectWithOffset(1, string(response)).To(ContainSubstring( - "We make Observability easy for every developer.")) - fileHandle, err := os.Open("e2e-test-received-data/traces.jsonl") - g.ExpectWithOffset(1, err).NotTo(HaveOccurred()) - defer func() { - _ = fileHandle.Close() - }() - scanner := bufio.NewScanner(fileHandle) - scanner.Buffer(make([]byte, tracesJsonMaxLineLength), tracesJsonMaxLineLength) - - // read file line by line - spansFound := false - for scanner.Scan() { - resourceSpanBytes := scanner.Bytes() - traces, err := traceUnmarshaller.UnmarshalTraces(resourceSpanBytes) - if err != nil { - // ignore lines that cannot be parsed - continue - } - if spansFound = hasMatchingSpans(traces, timestampLowerBound, isHttpServerSpanWithRoute("/ohai")); spansFound { - break - } - } - g.Expect(scanner.Err()).NotTo(HaveOccurred()) - g.Expect(spansFound).To(BeTrue(), "expected to find an HTTP server span") - }, verifyTelemetryTimeout, verifyTelemetryPollingInterval).Should(Succeed()) - }) + // For resource types like batch jobs/cron jobs, the application under test generates the test ID and writes it + // to a volume that maps to a host path. We read the test ID from the host path and use it to verify the spans. + testIdBytes, err := os.ReadFile(fmt.Sprintf("test-resources/e2e-test-volumes/test-uuid/%s.test.id", kind)) + g.Expect(err).NotTo(HaveOccurred()) + testId = string(testIdBytes) + }, 80*time.Second, 200*time.Millisecond).Should(Succeed()) + } + + httpPathWithQuery := fmt.Sprintf("/dash0-k8s-operator-test?id=%s", testId) + + By("waiting for the workload to be modified/checking labels") + Eventually(func(g Gomega) { + verifyLabels(g, namespace, kind, true, instrumentationBy) + }, verifyTelemetryTimeout, verifyTelemetryPollingInterval).Should(Succeed()) + + if restartPodsManually { + restartAllPods(namespace) + } + + By("waiting for spans to be captured") + Eventually(func(g Gomega) { + verifySpans(g, sendRequests, httpPathWithQuery) + }, verifyTelemetryTimeout, verifyTelemetryPollingInterval).Should(Succeed()) + By("matchin spans have been received") +} + +func restartAllPods(namespace string) { + // The pods of replicasets are not restarted automatically when the template changes (in contrast to + // deployments, daemonsets etc.). For now we execpt the user to restart the pods of the replciaset manually, + // and we simuate this in the e2e tests. + By("restarting pods manually") + Expect( + RunAndIgnoreOutput( + exec.Command( + "kubectl", + "delete", + "pod", + "--namespace", + namespace, + "--selector", + "app=dash0-operator-nodejs-20-express-test-app", + ))).To(Succeed()) + +} + +func verifySpans(g Gomega, sendRequests bool, httpPathWithQuery string) { + if sendRequests { + response, err := Run(exec.Command("curl", fmt.Sprintf("http://localhost:1207%s", httpPathWithQuery)), false) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(string(response)).To(ContainSubstring( + "We make Observability easy for every developer.")) + } + fileHandle, err := os.Open("test-resources/e2e-test-volumes/collector-received-data/traces.jsonl") + g.Expect(err).NotTo(HaveOccurred()) + defer func() { + _ = fileHandle.Close() + }() + scanner := bufio.NewScanner(fileHandle) + scanner.Buffer(make([]byte, tracesJsonMaxLineLength), tracesJsonMaxLineLength) + + // read file line by line + spansFound := false + for scanner.Scan() { + resourceSpanBytes := scanner.Bytes() + traces, err := traceUnmarshaller.UnmarshalTraces(resourceSpanBytes) + if err != nil { + // ignore lines that cannot be parsed + continue + } + if spansFound = hasMatchingSpans( + traces, + isHttpServerSpanWithHttpTarget(httpPathWithQuery), + ); spansFound { + break + } + } + g.Expect(scanner.Err()).NotTo(HaveOccurred()) + g.Expect(spansFound).To(BeTrue(), "expected to find an HTTP server span") } -func verifyLabels(g Gomega, namespace string) { - instrumented := readLabel(g, namespace, "dash0.instrumented") - g.ExpectWithOffset(1, instrumented).To(Equal("true")) - operatorVersion := readLabel(g, namespace, "dash0.operator.version") +func verifyLabels(g Gomega, namespace string, kind string, hasBeenInstrumented bool, instrumentationBy string) { + instrumented := readLabel(g, namespace, kind, "dash0.instrumented") + g.ExpectWithOffset(1, instrumented).To(Equal(strconv.FormatBool(hasBeenInstrumented))) + operatorVersion := readLabel(g, namespace, kind, "dash0.operator.version") g.ExpectWithOffset(1, operatorVersion).To(MatchRegexp("\\d+\\.\\d+\\.\\d+")) - initContainerImageVersion := readLabel(g, namespace, "dash0.initcontainer.image.version") + initContainerImageVersion := readLabel(g, namespace, kind, "dash0.initcontainer.image.version") g.ExpectWithOffset(1, initContainerImageVersion).To(MatchRegexp("\\d+\\.\\d+\\.\\d+")) + instrumentedBy := readLabel(g, namespace, kind, "dash0.instrumented.by") + g.ExpectWithOffset(1, instrumentedBy).To(Equal(instrumentationBy)) } -func readLabel(g Gomega, namespace string, labelKey string) string { +func readLabel(g Gomega, namespace string, kind string, labelKey string) string { labelValue, err := Run(exec.Command( "kubectl", "get", - "deployment", + kind, "--namespace", namespace, - "dash0-operator-nodejs-20-express-test-deployment", + fmt.Sprintf("dash0-operator-nodejs-20-express-test-%s", kind), "-o", fmt.Sprintf("jsonpath={.metadata.labels['%s']}", strings.ReplaceAll(labelKey, ".", "\\.")), ), false) @@ -389,14 +615,14 @@ func readLabel(g Gomega, namespace string, labelKey string) string { return string(labelValue) } -func hasMatchingSpans(traces ptrace.Traces, timestampLowerBound time.Time, matchFn func(span ptrace.Span) bool) bool { +func hasMatchingSpans(traces ptrace.Traces, matchFn func(span ptrace.Span) bool) bool { for i := 0; i < traces.ResourceSpans().Len(); i++ { resourceSpan := traces.ResourceSpans().At(i) for j := 0; j < resourceSpan.ScopeSpans().Len(); j++ { scopeSpan := resourceSpan.ScopeSpans().At(j) for k := 0; k < scopeSpan.Spans().Len(); k++ { span := scopeSpan.Spans().At(k) - if span.StartTimestamp().AsTime().After(timestampLowerBound) && matchFn(span) { + if matchFn(span) { return true } } @@ -405,12 +631,14 @@ func hasMatchingSpans(traces ptrace.Traces, timestampLowerBound time.Time, match return false } -func isHttpServerSpanWithRoute(expectedRoute string) func(span ptrace.Span) bool { +func isHttpServerSpanWithHttpTarget(expectedTarget string) func(span ptrace.Span) bool { return func(span ptrace.Span) bool { if span.Kind() == ptrace.SpanKindServer { - route, hasRoute := span.Attributes().Get("http.route") - if hasRoute && route.Str() == expectedRoute { - return true + target, hasTarget := span.Attributes().Get("http.target") + if hasTarget { + if target.Str() == expectedTarget { + return true + } } } return false diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 8da2bc57..c78125f9 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -14,6 +14,6 @@ import ( // Run e2e tests using the Ginkgo runner. func TestE2E(t *testing.T) { RegisterFailHandler(Fail) - fmt.Fprintf(GinkgoWriter, "Starting dash0-operator suite\n") + fmt.Fprint(GinkgoWriter, "Starting dash0-operator suite\n") RunSpecs(t, "e2e suite") } diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index c0b4af13..c2fe9dbf 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -18,7 +18,7 @@ const ( operatorNamespace = "dash0-operator-system" operatorImage = "dash0-operator-controller:latest" - applicationUnderTestNamespace = "application-under-test-namespace" + applicationUnderTestNamespace = "e2e-application-under-test-namespace" managerYaml = "config/manager/manager.yaml" managerYamlBackup = managerYaml + ".backup" @@ -40,7 +40,9 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { workingDir := strings.TrimSpace(string(pwdOutput)) fmt.Fprintf(GinkgoWriter, "workingDir: %s\n", workingDir) - By("Reading current imagePullPolicy") + RenderTemplates() + + By("reading current imagePullPolicy") yqOutput, err := Run(exec.Command( "yq", "e", @@ -92,9 +94,6 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { }) AfterAll(func() { - By("uninstalling the Node.js deployment") - ExpectWithOffset(1, UninstallNodeJsDeployment(applicationUnderTestNamespace)).To(Succeed()) - if managerYamlNeedsRevert { By("reverting changes to " + managerYaml) err := CopyFile(managerYamlBackup, managerYaml) @@ -124,39 +123,110 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { fmt.Fprint(GinkgoWriter, string(output)) }) - Context("the Dash0 operator's webhook", func() { + BeforeEach(func() { + DeleteTestIdFiles() + }) - BeforeAll(func() { - DeployOperator(operatorNamespace, operatorImage) - fmt.Fprint(GinkgoWriter, "waiting 10 seconds to give the webook some time to get ready\n") - time.Sleep(10 * time.Second) - }) + AfterEach(func() { + // As an alternative to undeploying all applications under test (deployment, daemonset, cronjob, ...) we could + // also delete the whole namespace for the application under test to after each test case get rid of all + // applications (and then recreate the namespace before each test). However, this would mean we also need to + // deploy the OpenTelemetry collector to the target namespace again for each test case, which would slow down + // tests a bit more. + RemoveAllTestApplications(applicationUnderTestNamespace) - AfterAll(func() { + DeleteTestIdFiles() + }) + + Describe("controller", func() { + AfterEach(func() { + UndeployDash0Resource(applicationUnderTestNamespace) UndeployOperator(operatorNamespace) }) - It("should modify new deployments", func() { - By("installing the Node.js deployment") - Expect(InstallNodeJsDeployment(applicationUnderTestNamespace)).To(Succeed()) - By("verifying that the Node.js deployment has been instrumented by the webhook") - SendRequestsAndVerifySpansHaveBeenProduced(applicationUnderTestNamespace) + DescribeTable( + "when instrumenting existing resources", + func( + resourceType string, + installResource func(string) error, + sendRequests bool, + restartPodsManually bool, + ) { + By(fmt.Sprintf("installing the Node.js %s", resourceType)) + Expect(installResource(applicationUnderTestNamespace)).To(Succeed()) + By("deploy the operator and the Dash0 custom resource") + DeployOperator(operatorNamespace, operatorImage) + DeployDash0Resource(applicationUnderTestNamespace) + By(fmt.Sprintf("verifying that the Node.js %s has been instrumented by the controller", resourceType)) + VerifyThatSpansAreCaptured( + applicationUnderTestNamespace, + resourceType, + sendRequests, + restartPodsManually, + "controller", + ) + }, + Entry("should modify existing cron jobs", "cronjob", InstallNodeJsCronJob, false, false), + Entry("should modify existing daemon sets", "daemonset", InstallNodeJsDaemonSet, true, false), + Entry("should modify existing deployments", "deployment", InstallNodeJsDeployment, true, false), + Entry("should modify existing replica set", "replicaset", InstallNodeJsReplicaSet, true, true), + Entry("should modify existing stateful set", "statefulset", InstallNodeJsStatefulSet, true, false), + ) + + Describe("when it detects existing immutable jobs", func() { + It("should label them accordingly", func() { + By("installing the Node.js job") + Expect(InstallNodeJsJob(applicationUnderTestNamespace)).To(Succeed()) + By("deploy the operator and the Dash0 custom resource") + DeployOperator(operatorNamespace, operatorImage) + DeployDash0Resource(applicationUnderTestNamespace) + By("verifying that the Node.js job has been labelled by the controller") + Eventually(func(g Gomega) { + verifyLabels(g, applicationUnderTestNamespace, "job", false, "controller") + }, verifyTelemetryTimeout, verifyTelemetryPollingInterval).Should(Succeed()) + }) }) }) - Context("the Dash0 operator controller", func() { + Describe("webhook", func() { + + BeforeAll(func() { + DeployOperator(operatorNamespace, operatorImage) + + // Deliberately not deploying the Dash0 custom resource here: as of now, the webhook does not rely on the + // presence of the Dash0 resource. This is subject to change though. (If changed, it also needs to be + // undeployed in the AfterAll hook.) + // + // DeployDash0Resource(applicationUnderTestNamespace) + + fmt.Fprint(GinkgoWriter, "waiting 10 seconds to give the webhook some time to get ready\n") + time.Sleep(10 * time.Second) + }) + AfterAll(func() { - UndeployDash0Resource(applicationUnderTestNamespace) + // See comment in BeforeAll hook. + // UndeployDash0Resource(applicationUnderTestNamespace) UndeployOperator(operatorNamespace) }) - It("should modify existing deployments", func() { - By("installing the Node.js deployment") - Expect(InstallNodeJsDeployment(applicationUnderTestNamespace)).To(Succeed()) - DeployOperator(operatorNamespace, operatorImage) - DeployDash0Resource(applicationUnderTestNamespace) - By("verifying that the Node.js deployment has been instrumented by the controller") - SendRequestsAndVerifySpansHaveBeenProduced(applicationUnderTestNamespace) - }) + DescribeTable( + "when instrumenting new resources", + func( + resourceType string, + installResource func(string) error, + sendRequests bool, + ) { + By(fmt.Sprintf("installing the Node.js %s", resourceType)) + Expect(installResource(applicationUnderTestNamespace)).To(Succeed()) + By(fmt.Sprintf("verifying that the Node.js %s has been instrumented by the webhook", resourceType)) + VerifyThatSpansAreCaptured(applicationUnderTestNamespace, resourceType, sendRequests, false, "webhook") + }, + Entry("should modify new cron jobs", "cronjob", InstallNodeJsCronJob, false), + Entry("should modify new daemon sets", "daemonset", InstallNodeJsDaemonSet, true), + Entry("should modify new deployments", "deployment", InstallNodeJsDeployment, true), + Entry("should modify new jobs", "job", InstallNodeJsJob, false), + Entry("should modify new replica sets", "replicaset", InstallNodeJsReplicaSet, true), + Entry("should modify new stateful sets", "statefulset", InstallNodeJsStatefulSet, true), + ) }) }) diff --git a/test/util/helpers.go b/test/util/helpers.go index fadc7978..7fb21105 100644 --- a/test/util/helpers.go +++ b/test/util/helpers.go @@ -7,9 +7,10 @@ import ( "context" "fmt" - "github.com/dash0hq/dash0-operator/api/v1alpha1" - "github.com/onsi/gomega" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/dash0hq/dash0-operator/internal/util" + . "github.com/onsi/gomega" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" ) @@ -20,15 +21,62 @@ func VerifySuccessEvent( resourceName string, eventSource string, ) { - allEvents, err := clientset.CoreV1().Events(namespace).List(ctx, v1.ListOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(allEvents.Items).To(gomega.HaveLen(1)) - gomega.Expect(allEvents.Items).To( - gomega.ContainElement( + verifyEvent( + ctx, + clientset, + namespace, + resourceName, + util.ReasonSuccessfulInstrumentation, + fmt.Sprintf("Dash0 instrumentation by %s has been successful.", eventSource), + ) +} + +func VerifyFailureEvent( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, + resourceName string, + eventSource string, + message string, +) { + verifyEvent( + ctx, + clientset, + namespace, + resourceName, + util.ReasonFailedInstrumentation, + message, + ) +} + +func verifyEvent( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, + resourceName string, + reason util.Reason, + message string, +) { + allEvents, err := clientset.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{}) + Expect(err).NotTo(HaveOccurred()) + Expect(allEvents.Items).To(HaveLen(1)) + Expect(allEvents.Items).To( + ContainElement( MatchEvent( namespace, resourceName, - v1alpha1.ReasonSuccessfulInstrumentation, - fmt.Sprintf("Dash0 instrumentation by %s has been successful.", eventSource), + reason, + message, ))) } + +func DeleteAllEvents( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, +) { + err := clientset.CoreV1().Events(namespace).DeleteCollection(ctx, metav1.DeleteOptions{ + GracePeriodSeconds: new(int64), // delete immediately + }, metav1.ListOptions{}) + Expect(err).NotTo(HaveOccurred()) +} diff --git a/test/util/matchers.go b/test/util/matchers.go index 013791b9..7872b5a5 100644 --- a/test/util/matchers.go +++ b/test/util/matchers.go @@ -7,11 +7,10 @@ import ( "fmt" "strings" + "github.com/dash0hq/dash0-operator/internal/util" "github.com/onsi/gomega" "github.com/onsi/gomega/format" corev1 "k8s.io/api/core/v1" - - "github.com/dash0hq/dash0-operator/api/v1alpha1" ) func MatchEnvVar(name string, value string, args ...interface{}) gomega.OmegaMatcher { @@ -87,7 +86,7 @@ func (matcher *MatchVolumeMountMatcher) NegatedFailureMessage(actual interface{} func MatchEvent( namespace string, resourceName string, - reason v1alpha1.Reason, + reason util.Reason, message string, args ...interface{}, ) gomega.OmegaMatcher { diff --git a/test/util/resources.go b/test/util/resources.go index 810d0515..e0b5c168 100644 --- a/test/util/resources.go +++ b/test/util/resources.go @@ -7,8 +7,8 @@ import ( "context" . "github.com/onsi/gomega" - appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -17,12 +17,16 @@ import ( ) const ( - TestNamespaceName = "test-namespace" - DeploymentName = "deployment" - DeploymentNameExisting = "existing-deployment" + TestNamespaceName = "test-namespace" + CronJobName = "cronjob" + DaemonSetName = "daemonset" + DeploymentName = "deployment" + JobName = "job" + ReplicaSetName = "replicaset" + StatefulSetName = "statefulset" ) -func EnsureResourceExists( +func EnsureDash0CustomResourceExists( ctx context.Context, k8sClient client.Client, qualifiedName types.NamespacedName, @@ -59,7 +63,7 @@ func EnsureTestNamespaceExists( k8sClient client.Client, name string, ) *corev1.Namespace { - object := EnsureResourceExists( + object := EnsureDash0CustomResourceExists( ctx, k8sClient, types.NamespacedName{Name: name}, @@ -69,20 +73,53 @@ func EnsureTestNamespaceExists( return object.(*corev1.Namespace) } +func BasicCronJob(namespace string, name string) *batchv1.CronJob { + resource := &batchv1.CronJob{} + resource.Namespace = namespace + resource.Name = name + resource.Spec = batchv1.CronJobSpec{} + resource.Spec.Schedule = "*/1 * * * *" + resource.Spec.JobTemplate.Spec.Template = basicPodSpecTemplate() + resource.Spec.JobTemplate.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicyNever + return resource +} + +func CreateBasicCronJob( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *batchv1.CronJob { + return create(ctx, k8sClient, BasicCronJob(namespace, name)).(*batchv1.CronJob) +} + +func BasicDaemonSet(namespace string, name string) *appsv1.DaemonSet { + resource := &appsv1.DaemonSet{} + resource.Namespace = namespace + resource.Name = name + resource.Spec = appsv1.DaemonSetSpec{} + resource.Spec.Template = basicPodSpecTemplate() + resource.Spec.Selector = createSelector() + return resource +} + +func CreateBasicDaemonSet( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *appsv1.DaemonSet { + return create(ctx, k8sClient, BasicDaemonSet(namespace, name)).(*appsv1.DaemonSet) +} + func BasicDeployment(namespace string, name string) *appsv1.Deployment { - deployment := &appsv1.Deployment{} - deployment.Namespace = namespace - deployment.Name = name - deployment.Spec = appsv1.DeploymentSpec{} - deployment.Spec.Template = corev1.PodTemplateSpec{} - deployment.Spec.Template.Labels = map[string]string{"app": "test"} - deployment.Spec.Template.Spec.Containers = []corev1.Container{{ - Name: "test-container-0", - Image: "ubuntu", - }} - deployment.Spec.Selector = &metav1.LabelSelector{} - deployment.Spec.Selector.MatchLabels = map[string]string{"app": "test"} - return deployment + resource := &appsv1.Deployment{} + resource.Namespace = namespace + resource.Name = name + resource.Spec = appsv1.DeploymentSpec{} + resource.Spec.Template = basicPodSpecTemplate() + resource.Spec.Selector = createSelector() + return resource } func CreateBasicDeployment( @@ -91,14 +128,114 @@ func CreateBasicDeployment( namespace string, name string, ) *appsv1.Deployment { - deployment := BasicDeployment(namespace, name) - Expect(k8sClient.Create(ctx, deployment)).Should(Succeed()) - return deployment + return create(ctx, k8sClient, BasicDeployment(namespace, name)).(*appsv1.Deployment) +} + +func BasicJob(namespace string, name string) *batchv1.Job { + resource := &batchv1.Job{} + resource.Namespace = namespace + resource.Name = name + resource.Spec = batchv1.JobSpec{} + resource.Spec.Template = basicPodSpecTemplate() + resource.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicyNever + return resource +} + +func CreateBasicJob( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *batchv1.Job { + return create(ctx, k8sClient, BasicJob(namespace, name)).(*batchv1.Job) +} + +func BasicReplicaSet(namespace string, name string) *appsv1.ReplicaSet { + resource := &appsv1.ReplicaSet{} + resource.Namespace = namespace + resource.Name = name + resource.Spec = appsv1.ReplicaSetSpec{} + resource.Spec.Template = basicPodSpecTemplate() + resource.Spec.Selector = createSelector() + return resource +} + +func CreateBasicReplicaSet( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *appsv1.ReplicaSet { + return create(ctx, k8sClient, BasicReplicaSet(namespace, name)).(*appsv1.ReplicaSet) +} + +func ReplicaSetOwnedByDeployment(namespace string, name string) *appsv1.ReplicaSet { + resource := BasicReplicaSet(namespace, name) + resource.ObjectMeta = metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + OwnerReferences: []metav1.OwnerReference{{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "deployment", + UID: "1234", + }}, + } + return resource +} + +func CreateReplicaSetOwnedByDeployment( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *appsv1.ReplicaSet { + return create(ctx, k8sClient, ReplicaSetOwnedByDeployment(namespace, name)).(*appsv1.ReplicaSet) +} + +func BasicStatefulSet(namespace string, name string) *appsv1.StatefulSet { + resource := &appsv1.StatefulSet{} + resource.Namespace = namespace + resource.Name = name + resource.Spec = appsv1.StatefulSetSpec{} + resource.Spec.Template = basicPodSpecTemplate() + resource.Spec.Selector = createSelector() + return resource +} + +func CreateBasicStatefulSet( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *appsv1.StatefulSet { + return create(ctx, k8sClient, BasicStatefulSet(namespace, name)).(*appsv1.StatefulSet) +} + +func basicPodSpecTemplate() corev1.PodTemplateSpec { + podSpecTemplate := corev1.PodTemplateSpec{} + podSpecTemplate.Labels = map[string]string{"app": "test"} + podSpecTemplate.Spec.Containers = []corev1.Container{{ + Name: "test-container-0", + Image: "ubuntu", + }} + return podSpecTemplate +} + +func createSelector() *metav1.LabelSelector { + selector := &metav1.LabelSelector{} + selector.MatchLabels = map[string]string{"app": "test"} + return selector +} + +func create(ctx context.Context, k8sClient client.Client, resource client.Object) client.Object { + Expect(k8sClient.Create(ctx, resource)).Should(Succeed()) + return resource } func DeploymentWithMoreBellsAndWhistles(namespace string, name string) *appsv1.Deployment { - deployment := BasicDeployment(namespace, name) - podSpec := &deployment.Spec.Template.Spec + resource := BasicDeployment(namespace, name) + podSpec := &resource.Spec.Template.Spec podSpec.Volumes = []corev1.Volume{ { Name: "test-volume-0", @@ -172,7 +309,7 @@ func DeploymentWithMoreBellsAndWhistles(namespace string, name string) *appsv1.D }, } - return deployment + return resource } func DeploymentWithExistingDash0Artifacts(namespace string, name string) *appsv1.Deployment { @@ -294,17 +431,92 @@ func DeploymentWithExistingDash0Artifacts(namespace string, name string) *appsv1 return deployment } +func GetCronJob( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *batchv1.CronJob { + resource := &batchv1.CronJob{} + namespacedName := types.NamespacedName{ + Namespace: namespace, + Name: name, + } + ExpectWithOffset(1, k8sClient.Get(ctx, namespacedName, resource)).Should(Succeed()) + return resource +} + +func GetDaemonSet( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *appsv1.DaemonSet { + resource := &appsv1.DaemonSet{} + namespacedName := types.NamespacedName{ + Namespace: namespace, + Name: name, + } + ExpectWithOffset(1, k8sClient.Get(ctx, namespacedName, resource)).Should(Succeed()) + return resource +} + func GetDeployment( ctx context.Context, k8sClient client.Client, namespace string, - deploymentName string, + name string, ) *appsv1.Deployment { - deployment := &appsv1.Deployment{} + resource := &appsv1.Deployment{} namespacedName := types.NamespacedName{ Namespace: namespace, - Name: deploymentName, + Name: name, } - ExpectWithOffset(1, k8sClient.Get(ctx, namespacedName, deployment)).Should(Succeed()) - return deployment + ExpectWithOffset(1, k8sClient.Get(ctx, namespacedName, resource)).Should(Succeed()) + return resource +} + +func GetJob( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *batchv1.Job { + resource := &batchv1.Job{} + namespacedName := types.NamespacedName{ + Namespace: namespace, + Name: name, + } + ExpectWithOffset(1, k8sClient.Get(ctx, namespacedName, resource)).Should(Succeed()) + return resource +} + +func GetReplicaSet( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *appsv1.ReplicaSet { + resource := &appsv1.ReplicaSet{} + namespacedName := types.NamespacedName{ + Namespace: namespace, + Name: name, + } + ExpectWithOffset(1, k8sClient.Get(ctx, namespacedName, resource)).Should(Succeed()) + return resource +} + +func GetStatefulSet( + ctx context.Context, + k8sClient client.Client, + namespace string, + name string, +) *appsv1.StatefulSet { + resource := &appsv1.StatefulSet{} + namespacedName := types.NamespacedName{ + Namespace: namespace, + Name: name, + } + ExpectWithOffset(1, k8sClient.Get(ctx, namespacedName, resource)).Should(Succeed()) + return resource } diff --git a/test/util/verification.go b/test/util/verification.go index 1d43f0ac..e43a6df2 100644 --- a/test/util/verification.go +++ b/test/util/verification.go @@ -7,7 +7,10 @@ import ( "fmt" . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -22,7 +25,7 @@ type ContainerExpectations struct { Dash0CollectorBaseUrlEnvVarExpectedValue string } -type DeploymentExpectations struct { +type PodSpecExpectations struct { Volumes int Dash0VolumeIdx int InitContainers int @@ -30,9 +33,73 @@ type DeploymentExpectations struct { Containers []ContainerExpectations } -func VerifyModifiedDeployment(deployment *appsv1.Deployment, expectations DeploymentExpectations) { - podSpec := deployment.Spec.Template.Spec +var ( + BasicPodSpecExpectations = PodSpecExpectations{ + Volumes: 1, + Dash0VolumeIdx: 0, + InitContainers: 1, + Dash0InitContainerIdx: 0, + Containers: []ContainerExpectations{{ + VolumeMounts: 1, + Dash0VolumeMountIdx: 0, + EnvVars: 2, + NodeOptionsEnvVarIdx: 0, + Dash0CollectorBaseUrlEnvVarIdx: 1, + Dash0CollectorBaseUrlEnvVarExpectedValue:// + "http://dash0-opentelemetry-collector-daemonset.test-namespace.svc.cluster.local:4318", + }}, + } +) + +func VerifyModifiedCronJob(resource *batchv1.CronJob, expectations PodSpecExpectations) { + verifyModifiedPodSpec(resource.Spec.JobTemplate.Spec.Template.Spec, expectations) + verifyLabelsAfterSuccessfulModification(resource.Spec.JobTemplate.Spec.Template.ObjectMeta) + verifyLabelsAfterSuccessfulModification(resource.ObjectMeta) +} + +func VerifyModifiedDaemonSet(resource *appsv1.DaemonSet, expectations PodSpecExpectations) { + verifyModifiedPodSpec(resource.Spec.Template.Spec, expectations) + verifyLabelsAfterSuccessfulModification(resource.Spec.Template.ObjectMeta) + verifyLabelsAfterSuccessfulModification(resource.ObjectMeta) +} + +func VerifyModifiedDeployment(resource *appsv1.Deployment, expectations PodSpecExpectations) { + verifyModifiedPodSpec(resource.Spec.Template.Spec, expectations) + verifyLabelsAfterSuccessfulModification(resource.Spec.Template.ObjectMeta) + verifyLabelsAfterSuccessfulModification(resource.ObjectMeta) +} + +func VerifyModifiedJob(resource *batchv1.Job, expectations PodSpecExpectations) { + verifyModifiedPodSpec(resource.Spec.Template.Spec, expectations) + verifyLabelsAfterSuccessfulModification(resource.Spec.Template.ObjectMeta) + verifyLabelsAfterSuccessfulModification(resource.ObjectMeta) +} + +func VerifyUnmodifiedJob(resource *batchv1.Job) { + verifyUnmodifiedPodSpec(resource.Spec.Template.Spec) + verifyNoDash0Labels(resource.Spec.Template.ObjectMeta) + verifyLabelsAfterFailureToModify(resource.ObjectMeta) +} + +func VerifyModifiedReplicaSet(resource *appsv1.ReplicaSet, expectations PodSpecExpectations) { + verifyModifiedPodSpec(resource.Spec.Template.Spec, expectations) + verifyLabelsAfterSuccessfulModification(resource.Spec.Template.ObjectMeta) + verifyLabelsAfterSuccessfulModification(resource.ObjectMeta) +} +func VerifyUnmodifiedReplicaSet(resource *appsv1.ReplicaSet) { + verifyUnmodifiedPodSpec(resource.Spec.Template.Spec) + verifyNoDash0Labels(resource.Spec.Template.ObjectMeta) + verifyNoDash0Labels(resource.ObjectMeta) +} + +func VerifyModifiedStatefulSet(resource *appsv1.StatefulSet, expectations PodSpecExpectations) { + verifyModifiedPodSpec(resource.Spec.Template.Spec, expectations) + verifyLabelsAfterSuccessfulModification(resource.Spec.Template.ObjectMeta) + verifyLabelsAfterSuccessfulModification(resource.ObjectMeta) +} + +func verifyModifiedPodSpec(podSpec corev1.PodSpec, expectations PodSpecExpectations) { Expect(podSpec.Volumes).To(HaveLen(expectations.Volumes)) for i, volume := range podSpec.Volumes { if i == expectations.Dash0VolumeIdx { @@ -95,13 +162,33 @@ func VerifyModifiedDeployment(deployment *appsv1.Deployment, expectations Deploy } } } +} - verifyLabels(deployment.Spec.Template.ObjectMeta) - verifyLabels(deployment.ObjectMeta) +func verifyUnmodifiedPodSpec(podSpec corev1.PodSpec) { + Expect(podSpec.Volumes).To(BeEmpty()) + Expect(podSpec.InitContainers).To(BeEmpty()) + Expect(podSpec.Containers).To(HaveLen(1)) + for i, container := range podSpec.Containers { + Expect(container.Name).To(Equal(fmt.Sprintf("test-container-%d", i))) + Expect(container.VolumeMounts).To(BeEmpty()) + Expect(container.Env).To(BeEmpty()) + } } -func verifyLabels(meta metav1.ObjectMeta) { +func verifyLabelsAfterSuccessfulModification(meta metav1.ObjectMeta) { Expect(meta.Labels["dash0.instrumented"]).To(Equal("true")) Expect(meta.Labels["dash0.operator.version"]).To(Equal("1.2.3")) Expect(meta.Labels["dash0.initcontainer.image.version"]).To(Equal("4.5.6")) } + +func verifyLabelsAfterFailureToModify(meta metav1.ObjectMeta) { + Expect(meta.Labels["dash0.instrumented"]).To(Equal("false")) + Expect(meta.Labels["dash0.operator.version"]).To(Equal("1.2.3")) + Expect(meta.Labels["dash0.initcontainer.image.version"]).To(Equal("4.5.6")) +} + +func verifyNoDash0Labels(meta metav1.ObjectMeta) { + Expect(meta.Labels["dash0.instrumented"]).To(Equal("")) + Expect(meta.Labels["dash0.operator.version"]).To(Equal("")) + Expect(meta.Labels["dash0.initcontainer.image.version"]).To(Equal("")) +}