From ef50d2bb63acb027f1866503e2fa75072775ec57 Mon Sep 17 00:00:00 2001 From: Bastian Krol Date: Wed, 29 May 2024 16:40:14 +0200 Subject: [PATCH] test(e2e): e2e test for reverting the instrumentation --- Makefile | 9 +- test/e2e/e2e_helpers.go | 230 +++++++++++++++++++++++++++---------- test/e2e/e2e_suite_test.go | 2 +- test/e2e/e2e_test.go | 133 +++++++++++++++------ 4 files changed, 271 insertions(+), 103 deletions(-) diff --git a/Makefile b/Makefile index 62add015..0193aa7e 100644 --- a/Makefile +++ b/Makefile @@ -115,10 +115,13 @@ vet: ## Run go vet against code. test: manifests generate fmt vet envtest ## Run tests. KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out -# Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors. -.PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up. +# Invoking ginkgo via go run makes sure we use the version from go.mod and not a version installed globally, which +# would be used when simply running `ginkgo -v test/e2e`. An alternative would be to invoke ginkgo via go test, that +# is, `go test ./test/e2e/ -v -ginkgo.v`, but that would require us to manage go test's timeout (via the -timeout +# flag), and ginkgo's own timeout. +.PHONY: test-e2e test-e2e: - go test ./test/e2e/ -v -ginkgo.v + go run github.com/onsi/ginkgo/v2/ginkgo -v test/e2e GOLANGCI_LINT = $(shell pwd)/bin/golangci-lint GOLANGCI_LINT_VERSION ?= v1.54.2 diff --git a/test/e2e/e2e_helpers.go b/test/e2e/e2e_helpers.go index fb99b56c..49b08575 100644 --- a/test/e2e/e2e_helpers.go +++ b/test/e2e/e2e_helpers.go @@ -74,15 +74,25 @@ func RenderTemplates() { ExpectWithOffset(1, RunAndIgnoreOutput(exec.Command("test-resources/bin/render-templates.sh"))).To(Succeed()) } -func EnsureNamespaceExists(namespace string) bool { - err := RunAndIgnoreOutput(exec.Command("kubectl", "get", "ns", namespace), false) +func RecreateNamespace(namespace string) { + By(fmt.Sprintf("(re)creating namespace %s", namespace)) + output, err := Run(exec.Command("kubectl", "get", "ns", namespace)) if err != nil { - By(fmt.Sprintf("creating namespace %s", namespace)) + if strings.Contains(string(output), "(NotFound)") { + // The namespace does not exist, that's fine, we will create it further down. + } else { + Fail(fmt.Sprintf("kubectl get ns %s failed with unexpected error: %v", namespace, err)) + } + } else { ExpectWithOffset(1, - RunAndIgnoreOutput(exec.Command("kubectl", "create", "ns", namespace))).To(Succeed()) - return true + RunAndIgnoreOutput(exec.Command("kubectl", "delete", "ns", namespace))).To(Succeed()) + ExpectWithOffset(1, + RunAndIgnoreOutput( + exec.Command("kubectl", "wait", "--for=delete", "ns", namespace, "--timeout=60s"))).To(Succeed()) } - return false + + ExpectWithOffset(1, + RunAndIgnoreOutput(exec.Command("kubectl", "create", "ns", namespace))).To(Succeed()) } func EnsureCertManagerIsInstalled() bool { @@ -356,9 +366,28 @@ func DeployDash0Resource(namespace string) { } func UndeployDash0Resource(namespace string) { + // remove the finalizer from the resource to allow immediate deletion + _ = RunAndIgnoreOutput(exec.Command( + "kubectl", + "patch", + "dash0/dash0-sample", + "--namespace", + namespace, + "--type", + "json", + "--patch='[{\"op\":\"remove\",\"path\":\"/metadata/finalizers\"}]'", + )) + // remove the resource ExpectWithOffset(1, RunAndIgnoreOutput(exec.Command( - "kubectl", "delete", "-n", namespace, "-k", "config/samples"))).To(Succeed()) + "kubectl", + "delete", + "--namespace", + namespace, + "-k", + "config/samples", + "--ignore-not-found", + ))).To(Succeed()) } func InstallNodeJsCronJob(namespace string) error { @@ -525,17 +554,36 @@ func DeleteTestIdFiles() { _ = os.Remove("test-resources/e2e-test-volumes/test-uuid/job.test.id") } -func VerifyThatSpansAreCaptured( +func VerifyThatWorkloadHasBeenInstrumented( namespace string, - kind string, - sendRequests bool, + workloadType string, + isBatch bool, restartPodsManually bool, instrumentationBy string, -) { - By("verify that the workload has been instrumented and is sending telemetry") +) string { + By("waiting for the workload to get instrumented (polling its labels to check)") + Eventually(func(g Gomega) { + verifyLabels(g, namespace, workloadType, true, instrumentationBy) + }, verifyTelemetryTimeout, verifyTelemetryPollingInterval).Should(Succeed()) + + if restartPodsManually { + restartAllPods(namespace) + } + By("waiting for spans to be captured") var testId string - if sendRequests { + if isBatch { + By(fmt.Sprintf("waiting for the test ID file to be written by the %s under test", workloadType)) + Eventually(func(g Gomega) { + // For resource types like batch jobs/cron jobs, the application under test generates the test ID and writes it + // to a volume that maps to a host path. We read the test ID from the host path and use it to verify the spans. + testIdBytes, err := os.ReadFile(fmt.Sprintf("test-resources/e2e-test-volumes/test-uuid/%s.test.id", workloadType)) + g.Expect(err).NotTo(HaveOccurred()) + testId = string(testIdBytes) + // Also, cronjob pods are only scheduled once a minute, so we might need to wait a while for the ID to + // become available, hence the 80 second timeout for the surrounding Eventually. + }, 80*time.Second, 200*time.Millisecond).Should(Succeed()) + } else { // For resource types that are available as a service (daemonset, deployment etc.) we send HTTP requests with // a unique ID as a query parameter. When checking the produced spans that the OTel collector writes to disk via // the file exporter, we can verify that the span is actually from the currently running test case by inspecting @@ -543,33 +591,86 @@ func VerifyThatSpansAreCaptured( // a previous test case. testIdUuid := uuid.New() testId = testIdUuid.String() - } else { - By(fmt.Sprintf("waiting for the test ID file to be written by the %s under test", kind)) - Eventually(func(g Gomega) { - // For resource types like batch jobs/cron jobs, the application under test generates the test ID and writes it - // to a volume that maps to a host path. We read the test ID from the host path and use it to verify the spans. - testIdBytes, err := os.ReadFile(fmt.Sprintf("test-resources/e2e-test-volumes/test-uuid/%s.test.id", kind)) - g.Expect(err).NotTo(HaveOccurred()) - testId = string(testIdBytes) - }, 80*time.Second, 200*time.Millisecond).Should(Succeed()) } httpPathWithQuery := fmt.Sprintf("/dash0-k8s-operator-test?id=%s", testId) + Eventually(func(g Gomega) { + verifySpans(g, isBatch, httpPathWithQuery) + }, verifyTelemetryTimeout, verifyTelemetryPollingInterval).Should(Succeed()) + By("matchin spans have been received") + return testId +} - By("waiting for the workload to be modified/checking labels") +func VerifyThatInstrumentationHasBeenReverted( + namespace string, + workloadType string, + isBatch bool, + restartPodsManually bool, + testId string, +) { + By("waiting for the instrumentation to get removed from the workload (polling its labels to check)") Eventually(func(g Gomega) { - verifyLabels(g, namespace, kind, true, instrumentationBy) + verifyLabelsHaveBeenRemoved(g, namespace, workloadType) }, verifyTelemetryTimeout, verifyTelemetryPollingInterval).Should(Succeed()) if restartPodsManually { restartAllPods(namespace) } - By("waiting for spans to be captured") - Eventually(func(g Gomega) { - verifySpans(g, sendRequests, httpPathWithQuery) - }, verifyTelemetryTimeout, verifyTelemetryPollingInterval).Should(Succeed()) - By("matchin spans have been received") + // Add some buffer time between the workloads being restarted and verifying that no spans are produced/captured. + time.Sleep(10 * time.Second) + + secondsToCheckForSpans := 20 + if workloadType == "cronjob" { + // Pod for cron jobs only get scheduled once a minute, since the cronjob schedule format does not allow for jobs + // starting every second. Thus, to make the test valid, we need to monitor for spans a little bit longer than + // for appsv1 workloads. + secondsToCheckForSpans = 80 + } + httpPathWithQuery := fmt.Sprintf("/dash0-k8s-operator-test?id=%s", testId) + By(fmt.Sprintf("verifying that spans are no longer being captured (checking for %d seconds)", secondsToCheckForSpans)) + Consistently(func(g Gomega) { + verifyNoSpans(isBatch, httpPathWithQuery) + }, time.Duration(secondsToCheckForSpans)*time.Second, 1*time.Second).Should(Succeed()) + + By("matching spans are no longer captured") +} + +func verifyLabels(g Gomega, namespace string, kind string, hasBeenInstrumented bool, instrumentationBy string) { + instrumented := readLabel(g, namespace, kind, "dash0.instrumented") + g.ExpectWithOffset(1, instrumented).To(Equal(strconv.FormatBool(hasBeenInstrumented))) + operatorVersion := readLabel(g, namespace, kind, "dash0.operator.version") + g.ExpectWithOffset(1, operatorVersion).To(MatchRegexp("\\d+\\.\\d+\\.\\d+")) + initContainerImageVersion := readLabel(g, namespace, kind, "dash0.initcontainer.image.version") + g.ExpectWithOffset(1, initContainerImageVersion).To(MatchRegexp("\\d+\\.\\d+\\.\\d+")) + instrumentedBy := readLabel(g, namespace, kind, "dash0.instrumented.by") + g.ExpectWithOffset(1, instrumentedBy).To(Equal(instrumentationBy)) +} + +func verifyLabelsHaveBeenRemoved(g Gomega, namespace string, kind string) { + instrumented := readLabel(g, namespace, kind, "dash0.instrumented") + g.ExpectWithOffset(1, instrumented).To(Equal("")) + operatorVersion := readLabel(g, namespace, kind, "dash0.operator.version") + g.ExpectWithOffset(1, operatorVersion).To(Equal("")) + initContainerImageVersion := readLabel(g, namespace, kind, "dash0.initcontainer.image.version") + g.ExpectWithOffset(1, initContainerImageVersion).To(Equal("")) + instrumentedBy := readLabel(g, namespace, kind, "dash0.instrumented.by") + g.ExpectWithOffset(1, instrumentedBy).To(Equal("")) +} + +func readLabel(g Gomega, namespace string, kind string, labelKey string) string { + labelValue, err := Run(exec.Command( + "kubectl", + "get", + kind, + "--namespace", + namespace, + fmt.Sprintf("dash0-operator-nodejs-20-express-test-%s", kind), + "-o", + fmt.Sprintf("jsonpath={.metadata.labels['%s']}", strings.ReplaceAll(labelKey, ".", "\\.")), + ), false) + g.ExpectWithOffset(1, err).NotTo(HaveOccurred()) + return string(labelValue) } func restartAllPods(namespace string) { @@ -591,13 +692,40 @@ func restartAllPods(namespace string) { } -func verifySpans(g Gomega, sendRequests bool, httpPathWithQuery string) { - if sendRequests { +func verifySpans(g Gomega, isBatch bool, httpPathWithQuery string) { + spansFound := sendRequestAndFindMatchingSpans(g, isBatch, httpPathWithQuery, true, nil) + g.Expect(spansFound).To(BeTrue(), "expected to find at least one matching HTTP server span") +} + +func verifyNoSpans(isBatch bool, httpPathWithQuery string) { + timestampLowerBound := time.Now() + spansFound := sendRequestAndFindMatchingSpans(Default, isBatch, httpPathWithQuery, false, ×tampLowerBound) + Expect(spansFound).To(BeFalse(), "expected to find no matching HTTP server span") +} + +func sendRequestAndFindMatchingSpans( + g Gomega, + isBatch bool, + httpPathWithQuery string, + requestsMustNotFail bool, + timestampLowerBound *time.Time, +) bool { + sendRequest(g, isBatch, httpPathWithQuery, requestsMustNotFail) + return fileHasMatchingSpan(g, httpPathWithQuery, timestampLowerBound) +} + +func sendRequest(g Gomega, isBatch bool, httpPathWithQuery string, mustNotFail bool) { + if !isBatch { response, err := Run(exec.Command("curl", fmt.Sprintf("http://localhost:1207%s", httpPathWithQuery)), false) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(string(response)).To(ContainSubstring( - "We make Observability easy for every developer.")) + if mustNotFail { + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(string(response)).To(ContainSubstring( + "We make Observability easy for every developer.")) + } } +} + +func fileHasMatchingSpan(g Gomega, httpPathWithQuery string, timestampLowerBound *time.Time) bool { fileHandle, err := os.Open("test-resources/e2e-test-volumes/collector-received-data/traces.jsonl") g.Expect(err).NotTo(HaveOccurred()) defer func() { @@ -618,48 +746,26 @@ func verifySpans(g Gomega, sendRequests bool, httpPathWithQuery string) { if spansFound = hasMatchingSpans( traces, isHttpServerSpanWithHttpTarget(httpPathWithQuery), + timestampLowerBound, ); spansFound { break } } - g.Expect(scanner.Err()).NotTo(HaveOccurred()) - g.Expect(spansFound).To(BeTrue(), "expected to find an HTTP server span") -} -func verifyLabels(g Gomega, namespace string, kind string, hasBeenInstrumented bool, instrumentationBy string) { - instrumented := readLabel(g, namespace, kind, "dash0.instrumented") - g.ExpectWithOffset(1, instrumented).To(Equal(strconv.FormatBool(hasBeenInstrumented))) - operatorVersion := readLabel(g, namespace, kind, "dash0.operator.version") - g.ExpectWithOffset(1, operatorVersion).To(MatchRegexp("\\d+\\.\\d+\\.\\d+")) - initContainerImageVersion := readLabel(g, namespace, kind, "dash0.initcontainer.image.version") - g.ExpectWithOffset(1, initContainerImageVersion).To(MatchRegexp("\\d+\\.\\d+\\.\\d+")) - instrumentedBy := readLabel(g, namespace, kind, "dash0.instrumented.by") - g.ExpectWithOffset(1, instrumentedBy).To(Equal(instrumentationBy)) -} + g.Expect(scanner.Err()).NotTo(HaveOccurred()) -func readLabel(g Gomega, namespace string, kind string, labelKey string) string { - labelValue, err := Run(exec.Command( - "kubectl", - "get", - kind, - "--namespace", - namespace, - fmt.Sprintf("dash0-operator-nodejs-20-express-test-%s", kind), - "-o", - fmt.Sprintf("jsonpath={.metadata.labels['%s']}", strings.ReplaceAll(labelKey, ".", "\\.")), - ), false) - g.ExpectWithOffset(1, err).NotTo(HaveOccurred()) - return string(labelValue) + return spansFound } -func hasMatchingSpans(traces ptrace.Traces, matchFn func(span ptrace.Span) bool) bool { +func hasMatchingSpans(traces ptrace.Traces, matchFn func(span ptrace.Span) bool, timestampLowerBound *time.Time) bool { for i := 0; i < traces.ResourceSpans().Len(); i++ { resourceSpan := traces.ResourceSpans().At(i) for j := 0; j < resourceSpan.ScopeSpans().Len(); j++ { scopeSpan := resourceSpan.ScopeSpans().At(j) for k := 0; k < scopeSpan.Spans().Len(); k++ { span := scopeSpan.Spans().At(k) - if matchFn(span) { + if (timestampLowerBound == nil || span.StartTimestamp().AsTime().After(*timestampLowerBound)) && + matchFn(span) { return true } } diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index c78125f9..9117accf 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -15,5 +15,5 @@ import ( func TestE2E(t *testing.T) { RegisterFailHandler(Fail) fmt.Fprint(GinkgoWriter, "Starting dash0-operator suite\n") - RunSpecs(t, "e2e suite") + RunSpecs(t, "Dash0 Kubernetes operator end-to-end test suite") } diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index e0292394..cd9e9b3a 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -26,8 +26,7 @@ const ( ) var ( - applicationNamespaceHasBeenCreated = false - certManagerHasBeenInstalled = false + certManagerHasBeenInstalled = false originalKubeContext string managerYamlNeedsRevert bool @@ -85,7 +84,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { certManagerHasBeenInstalled = EnsureCertManagerIsInstalled() - applicationNamespaceHasBeenCreated = EnsureNamespaceExists(applicationUnderTestNamespace) + RecreateNamespace(applicationUnderTestNamespace) By("(re)installing the collector") ExpectWithOffset(1, ReinstallCollectorAndClearExportedTelemetry(applicationUnderTestNamespace)).To(Succeed()) @@ -126,7 +125,7 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { _ = RunAndIgnoreOutput(exec.Command("kubectl", "delete", "ns", operatorNamespace)) } - if applicationNamespaceHasBeenCreated && applicationUnderTestNamespace != "default" { + if applicationUnderTestNamespace != "default" { By("removing namespace for application under test") _ = RunAndIgnoreOutput(exec.Command("kubectl", "delete", "ns", applicationUnderTestNamespace)) } @@ -163,33 +162,63 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { UndeployOperator(operatorNamespace) }) + type controllerTest struct { + workloadType string + installWorkload func(string) error + isBatch bool + restartPodsManually bool + } + DescribeTable( "when instrumenting existing workloads", - func( - resourceType string, - installResource func(string) error, - sendRequests bool, - restartPodsManually bool, - ) { - By(fmt.Sprintf("installing the Node.js %s", resourceType)) - Expect(installResource(applicationUnderTestNamespace)).To(Succeed()) + func(config controllerTest) { + By(fmt.Sprintf("installing the Node.js %s", config.workloadType)) + Expect(config.installWorkload(applicationUnderTestNamespace)).To(Succeed()) By("deploy the operator and the Dash0 custom resource") + DeployOperator(operatorNamespace, operatorImage) DeployDash0Resource(applicationUnderTestNamespace) - By(fmt.Sprintf("verifying that the Node.js %s has been instrumented by the controller", resourceType)) - VerifyThatSpansAreCaptured( + By(fmt.Sprintf("verifying that the Node.js %s has been instrumented by the controller", config.workloadType)) + testId := VerifyThatWorkloadHasBeenInstrumented( applicationUnderTestNamespace, - resourceType, - sendRequests, - restartPodsManually, + config.workloadType, + config.isBatch, + config.restartPodsManually, "controller", ) + + UndeployDash0Resource(applicationUnderTestNamespace) + + VerifyThatInstrumentationHasBeenReverted( + applicationUnderTestNamespace, + config.workloadType, + config.isBatch, + config.restartPodsManually, + testId, + ) }, - Entry("should modify existing cron jobs", "cronjob", InstallNodeJsCronJob, false, false), - Entry("should modify existing daemon sets", "daemonset", InstallNodeJsDaemonSet, true, false), - Entry("should modify existing deployments", "deployment", InstallNodeJsDeployment, true, false), - Entry("should modify existing replica set", "replicaset", InstallNodeJsReplicaSet, true, true), - Entry("should modify existing stateful set", "statefulset", InstallNodeJsStatefulSet, true, false), + Entry("should instrument and uninstrument existing cron jobs", controllerTest{ + workloadType: "cronjob", + installWorkload: InstallNodeJsCronJob, + isBatch: true, + }), + Entry("should instrument and uninstrument existing daemon sets", controllerTest{ + workloadType: "daemonset", + installWorkload: InstallNodeJsDaemonSet, + }), + Entry("should instrument and uninstrument existing deployments", controllerTest{ + workloadType: "deployment", + installWorkload: InstallNodeJsDeployment, + }), + Entry("should instrument and uninstrument existing replica set", controllerTest{ + workloadType: "replicaset", + installWorkload: InstallNodeJsReplicaSet, + restartPodsManually: true, + }), + Entry("should instrument and uninstrument existing stateful set", controllerTest{ + workloadType: "statefulset", + installWorkload: InstallNodeJsStatefulSet, + }), ) Describe("when it detects existing immutable jobs", func() { @@ -203,6 +232,8 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { Eventually(func(g Gomega) { verifyLabels(g, applicationUnderTestNamespace, "job", false, "controller") }, verifyTelemetryTimeout, verifyTelemetryPollingInterval).Should(Succeed()) + + UndeployDash0Resource(applicationUnderTestNamespace) }) }) }) @@ -228,24 +259,52 @@ var _ = Describe("Dash0 Kubernetes Operator", Ordered, func() { UndeployOperator(operatorNamespace) }) + type webhookTest struct { + workloadType string + installWorkload func(string) error + isBatch bool + } + DescribeTable( "when instrumenting new workloads", - func( - resourceType string, - installResource func(string) error, - sendRequests bool, - ) { - By(fmt.Sprintf("installing the Node.js %s", resourceType)) - Expect(installResource(applicationUnderTestNamespace)).To(Succeed()) - By(fmt.Sprintf("verifying that the Node.js %s has been instrumented by the webhook", resourceType)) - VerifyThatSpansAreCaptured(applicationUnderTestNamespace, resourceType, sendRequests, false, "webhook") + func(config webhookTest) { + By(fmt.Sprintf("installing the Node.js %s", config.workloadType)) + Expect(config.installWorkload(applicationUnderTestNamespace)).To(Succeed()) + By(fmt.Sprintf("verifying that the Node.js %s has been instrumented by the webhook", config.workloadType)) + VerifyThatWorkloadHasBeenInstrumented( + applicationUnderTestNamespace, + config.workloadType, + config.isBatch, + false, + "webhook", + ) }, - Entry("should modify new cron jobs", "cronjob", InstallNodeJsCronJob, false), - Entry("should modify new daemon sets", "daemonset", InstallNodeJsDaemonSet, true), - Entry("should modify new deployments", "deployment", InstallNodeJsDeployment, true), - Entry("should modify new jobs", "job", InstallNodeJsJob, false), - Entry("should modify new replica sets", "replicaset", InstallNodeJsReplicaSet, true), - Entry("should modify new stateful sets", "statefulset", InstallNodeJsStatefulSet, true), + Entry("should modify new cron jobs", webhookTest{ + workloadType: "cronjob", + installWorkload: InstallNodeJsCronJob, + isBatch: true, + }), + Entry("should modify new daemon sets", webhookTest{ + workloadType: "daemonset", + installWorkload: InstallNodeJsDaemonSet, + }), + Entry("should modify new deployments", webhookTest{ + workloadType: "deployment", + installWorkload: InstallNodeJsDeployment, + }), + Entry("should modify new jobs", webhookTest{ + workloadType: "job", + installWorkload: InstallNodeJsJob, + isBatch: true, + }), + Entry("should modify new replica sets", webhookTest{ + workloadType: "replicaset", + installWorkload: InstallNodeJsReplicaSet, + }), + Entry("should modify new stateful sets", webhookTest{ + workloadType: "statefulset", + installWorkload: InstallNodeJsStatefulSet, + }), ) }) })