Skip to content

Commit

Permalink
feat(webhook): instrument new ownerless pods
Browse files Browse the repository at this point in the history
  • Loading branch information
basti1302 committed Jun 18, 2024
1 parent a847b63 commit 209335e
Show file tree
Hide file tree
Showing 18 changed files with 460 additions and 79 deletions.
20 changes: 14 additions & 6 deletions config/webhook/manifests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,22 @@ webhooks:
- replicasets
- statefulsets
- apiGroups:
- batch
- batch
apiVersions:
- v1
- v1
operations:
- CREATE
- UPDATE
- CREATE
- UPDATE
resources:
- cronjobs
- jobs
- apiGroups: [""]
apiVersions:
- v1
operations:
- CREATE
- UPDATE
resources:
- cronjobs
- jobs
- pods
sideEffects: None
timeoutSeconds: 5
8 changes: 8 additions & 0 deletions helm-chart/dash0-operator/templates/operator/webhook.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,5 +92,13 @@ webhooks:
resources:
- cronjobs
- jobs
- apiGroups: [""]
apiVersions:
- v1
operations:
- CREATE
- UPDATE
resources:
- pods
sideEffects: None
timeoutSeconds: 5
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,14 @@ webhook should match snapshot:
resources:
- cronjobs
- jobs
- apiGroups:
- ""
apiVersions:
- v1
operations:
- CREATE
- UPDATE
resources:
- pods
sideEffects: None
timeoutSeconds: 5
10 changes: 10 additions & 0 deletions helm-chart/dash0-operator/tests/update-snapshots.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash

# SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc.
# SPDX-License-Identifier: Apache-2.0

set -euo pipefail

cd "$(dirname ${BASH_SOURCE})"/..

helm unittest -f 'tests/**/*.yaml' --update-snapshot .
2 changes: 1 addition & 1 deletion internal/controller/dash0_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ func (r *Dash0Reconciler) findAndInstrumentReplicaSets(
matchingWorkloadsInNamespace, err :=
r.ClientSet.AppsV1().ReplicaSets(namespace).List(ctx, util.WorkloadsWithoutDash0InstrumentedLabelFilter)
if err != nil {
return fmt.Errorf("error when querying deployments: %w", err)
return fmt.Errorf("error when querying replica sets: %w", err)
}
for _, resource := range matchingWorkloadsInNamespace.Items {
r.instrumentReplicaSet(ctx, resource, logger)
Expand Down
82 changes: 77 additions & 5 deletions internal/controller/dash0_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,35 @@ var _ = Describe("The Dash0 controller", Ordered, func() {
VerifyImmutableJobCouldNotBeModified(GetJob(ctx, k8sClient, namespace, name))
})

It("should instrument an existing orphan replicaset", func() {
It("should not instrument an existing ownerless pod", func() {
name := UniqueName(PodNamePrefix)
By("Inititalize a pod")
pod := CreateBasicPod(ctx, k8sClient, namespace, name)
createdObjects = append(createdObjects, pod)

triggerReconcileRequest(ctx, reconciler, "")

// We do not instrument existing pods via the controller, since they cannot be restarted.
// We only instrument new pods via the webhook.
verifyDash0ResourceIsAvailable(ctx)
VerifyNoEvents(ctx, clientset, namespace)
VerifyUnmodifiedPod(GetPod(ctx, k8sClient, namespace, name))
})

It("should not instrument an existing pod owned by a replicaset", func() {
name := UniqueName(PodNamePrefix)
By("Inititalize a pod")
pod := CreatePodOwnedByReplicaSet(ctx, k8sClient, namespace, name)
createdObjects = append(createdObjects, pod)

triggerReconcileRequest(ctx, reconciler, "")

verifyDash0ResourceIsAvailable(ctx)
VerifyNoEvents(ctx, clientset, namespace)
VerifyUnmodifiedPod(GetPod(ctx, k8sClient, namespace, name))
})

It("should instrument an existing ownerless replicaset", func() {
name := UniqueName(ReplicaSetNamePrefix)
By("Inititalize a replicaset")
replicaSet := CreateBasicReplicaSet(ctx, k8sClient, namespace, name)
Expand Down Expand Up @@ -223,7 +251,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() {
VerifyJobWithOptOutLabel(GetJob(ctx, k8sClient, namespace, name))
})

It("should not instrument an existing orphan replicaset with the opt-out label", func() {
It("should not instrument an existing ownerless replicaset with the opt-out label", func() {
name := UniqueName(ReplicaSetNamePrefix)
By("Inititalize a replicaset")
replicaSet := CreateReplicaSetWithOptOutLabel(ctx, k8sClient, namespace, name)
Expand Down Expand Up @@ -373,7 +401,51 @@ var _ = Describe("The Dash0 controller", Ordered, func() {
VerifyUnmodifiedJob(GetJob(ctx, k8sClient, namespace, name))
})

It("should revert an instrumented orphan replica set", func() {
It("should not revert an instrumented ownerless pod", func() {
// We trigger one reconcile request before creating any workload and before deleting the Dash0 custom
// resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer.
// Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually
// happens in production.
triggerReconcileRequest(ctx, reconciler, "Trigger first reconcile request")

name := UniqueName(PodNamePrefix)
By("Create an instrumented pod")
pod := CreateInstrumentedPod(ctx, k8sClient, namespace, name)
createdObjects = append(createdObjects, pod)

By("Queue the deletion of the Dash0 custom resource")
dash0CustomResource := LoadDash0CustomResourceOrFail(ctx, k8sClient, Default)
Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed())

triggerReconcileRequest(ctx, reconciler, "Trigger a reconcile request to revert the instrumented workload")

VerifyNoEvents(ctx, clientset, namespace)
VerifyModifiedPod(GetPod(ctx, k8sClient, namespace, name), BasicInstrumentedPodSpecExpectations)
})

It("should leave existing uninstrumented pod owned by a replica set alone", func() {
// We trigger one reconcile request before creating any workload and before deleting the Dash0 custom
// resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer.
// Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually
// happens in production.
triggerReconcileRequest(ctx, reconciler, "Trigger first reconcile request")

name := UniqueName(PodNamePrefix)
By("Create an instrumented pod owned by a deployment")
pod := CreatePodOwnedByReplicaSet(ctx, k8sClient, namespace, name)
createdObjects = append(createdObjects, pod)

By("Queue the deletion of the Dash0 custom resource")
dash0CustomResource := LoadDash0CustomResourceOrFail(ctx, k8sClient, Default)
Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed())

triggerReconcileRequest(ctx, reconciler, "Trigger a reconcile request to revert the instrumented workload")

VerifyNoEvents(ctx, clientset, namespace)
VerifyUnmodifiedPod(GetPod(ctx, k8sClient, namespace, name))
})

It("should revert an instrumented ownerless replica set", func() {
// We trigger one reconcile request before creating any workload and before deleting the Dash0 custom
// resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer.
// Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually
Expand All @@ -397,7 +469,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() {
VerifyWebhookIgnoreOnceLabelIsPresent(&replicaSet.ObjectMeta)
})

It("should leave existing uninstrumented replica sets owned by deployments alone", func() {
It("should leave existing uninstrumented replica sets owned by deployment alone", func() {
// We trigger one reconcile request before creating any workload and before deleting the Dash0 custom
// resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer.
// Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually
Expand Down Expand Up @@ -541,7 +613,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() {
VerifyWebhookIgnoreOnceLabelIsAbesent(&job.ObjectMeta)
})

It("should not attempt to revert an orphan replica set that has the opt-out label", func() {
It("should not attempt to revert an ownerless replica set that has the opt-out label", func() {
// We trigger one reconcile request before creating any workload and before deleting the Dash0 custom
// resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer.
// Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually
Expand Down
27 changes: 27 additions & 0 deletions internal/webhook/dash0_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/go-logr/logr"
appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/scheme"
Expand Down Expand Up @@ -45,6 +46,11 @@ var (
decoder = scheme.Codecs.UniversalDecoder()

routes = routing{
"": {
"Pod": {
"v1": (*Handler).handlePod,
},
},
"batch": {
"CronJob": {
"v1": (*Handler).handleCronJob,
Expand Down Expand Up @@ -248,6 +254,27 @@ func (h *Handler) handleJob(
return h.postProcess(request, job, hasBeenModified, false, logger)
}

func (h *Handler) handlePod(
request admission.Request,
gvkLabel string,
logger *logr.Logger,
) admission.Response {
pod := &corev1.Pod{}
responseIfFailed, failed := h.preProcess(request, gvkLabel, pod)
if failed {
return responseIfFailed
}
if util.CheckAndDeleteIgnoreOnceLabel(&pod.ObjectMeta) {
return h.postProcess(request, pod, false, true, logger)
}
if util.HasOptedOutOfInstrumenationForWorkload(&pod.ObjectMeta) {
logger.Info(optOutAdmissionAllowedMessage)
return admission.Allowed(optOutAdmissionAllowedMessage)
}
hasBeenModified := h.newWorkloadModifier(logger).ModifyPod(pod)
return h.postProcess(request, pod, hasBeenModified, false, logger)
}

func (h *Handler) handleReplicaSet(
request admission.Request,
gvkLabel string,
Expand Down
52 changes: 46 additions & 6 deletions internal/webhook/dash0_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ var _ = Describe("The Dash0 webhook", func() {
})

It("should instrument a new basic daemon set", func() {
name := UniqueName(CronJobNamePrefix)
name := UniqueName(DaemonSetNamePrefix)
workload := CreateBasicDaemonSet(ctx, k8sClient, TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
workload = GetDaemonSet(ctx, k8sClient, TestNamespaceName, name)
Expand All @@ -132,16 +132,34 @@ var _ = Describe("The Dash0 webhook", func() {
})

It("should instrument a new basic job", func() {
name := UniqueName(CronJobNamePrefix)
name := UniqueName(JobNamePrefix)
workload := CreateBasicJob(ctx, k8sClient, TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
workload = GetJob(ctx, k8sClient, TestNamespaceName, name)
VerifyModifiedJob(workload, BasicInstrumentedPodSpecExpectations)
VerifySuccessfulInstrumentationEvent(ctx, clientset, TestNamespaceName, name, "webhook")
})

It("should instrument a new basic ownerless pod", func() {
name := UniqueName(PodNamePrefix)
workload := CreateBasicPod(ctx, k8sClient, TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
workload = GetPod(ctx, k8sClient, TestNamespaceName, name)
VerifyModifiedPod(workload, BasicInstrumentedPodSpecExpectations)
VerifySuccessfulInstrumentationEvent(ctx, clientset, TestNamespaceName, name, "webhook")
})

It("should not instrument a new pod owned by a replica set", func() {
name := UniqueName(PodNamePrefix)
workload := CreatePodOwnedByReplicaSet(ctx, k8sClient, TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
workload = GetPod(ctx, k8sClient, TestNamespaceName, name)
VerifyUnmodifiedPod(workload)
VerifyNoInstrumentationNecessaryEvent(ctx, clientset, TestNamespaceName, name, "webhook")
})

It("should instrument a new basic replica set", func() {
name := UniqueName(CronJobNamePrefix)
name := UniqueName(ReplicaSetNamePrefix)
workload := CreateBasicReplicaSet(ctx, k8sClient, TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
workload = GetReplicaSet(ctx, k8sClient, TestNamespaceName, name)
Expand All @@ -159,7 +177,7 @@ var _ = Describe("The Dash0 webhook", func() {
})

It("should instrument a new basic stateful set", func() {
name := UniqueName(CronJobNamePrefix)
name := UniqueName(StatefulSetNamePrefix)
workload := CreateBasicStatefulSet(ctx, k8sClient, TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
workload = GetStatefulSet(ctx, k8sClient, TestNamespaceName, name)
Expand Down Expand Up @@ -209,7 +227,17 @@ var _ = Describe("The Dash0 webhook", func() {
VerifyNoEvents(ctx, clientset, TestNamespaceName)
})

It("should not instrument an orphan replica set that has opted out of instrumentation", func() {
It("should not instrument an ownerless pod that has opted out of instrumentation", func() {
name := UniqueName(PodNamePrefix)
workload := PodWithOptOutLabel(TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
CreateWorkload(ctx, k8sClient, workload)
workload = GetPod(ctx, k8sClient, TestNamespaceName, name)
VerifyPodWithOptOutLabel(workload)
VerifyNoEvents(ctx, clientset, TestNamespaceName)
})

It("should not instrument an ownerless replica set that has opted out of instrumentation", func() {
name := UniqueName(ReplicaSetNamePrefix)
workload := ReplicaSetWithOptOutLabel(TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
Expand Down Expand Up @@ -279,7 +307,19 @@ var _ = Describe("The Dash0 webhook", func() {
VerifyNoEvents(ctx, clientset, TestNamespaceName)
})

It("should not instrument an orphan replica set that has the label, but remove the label", func() {
It("should not instrument an ownerless pod that has the label, but remove the label", func() {
name := UniqueName(PodNamePrefix)
workload := BasicPod(TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
AddLabel(&workload.ObjectMeta, "dash0.com/webhook-ignore-once", "true")
CreateWorkload(ctx, k8sClient, workload)
workload = GetPod(ctx, k8sClient, TestNamespaceName, name)
VerifyUnmodifiedPod(workload)
VerifyWebhookIgnoreOnceLabelIsAbesent(&workload.ObjectMeta)
VerifyNoEvents(ctx, clientset, TestNamespaceName)
})

It("should not instrument an ownerless replica set that has the label, but remove the label", func() {
name := UniqueName(ReplicaSetNamePrefix)
workload := BasicReplicaSet(TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
Expand Down
28 changes: 16 additions & 12 deletions internal/workloads/workload_modifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/dash0hq/dash0-operator/internal/util"
)
Expand Down Expand Up @@ -76,8 +77,19 @@ func (m *ResourceModifier) AddLabelsToImmutableJob(job *batchv1.Job) {
util.AddInstrumentationLabels(&job.ObjectMeta, false, m.instrumentationMetadata)
}

func (m *ResourceModifier) ModifyPod(pod *corev1.Pod) bool {
if m.hasOwnerReference(pod) {
return false
}
hasBeenModified := m.modifyPodSpec(&pod.Spec)
if hasBeenModified {
util.AddInstrumentationLabels(&pod.ObjectMeta, true, m.instrumentationMetadata)
}
return hasBeenModified
}

func (m *ResourceModifier) ModifyReplicaSet(replicaSet *appsv1.ReplicaSet) bool {
if m.hasDeploymentOwnerReference(replicaSet) {
if m.hasOwnerReference(replicaSet) {
return false
}
return m.modifyResource(&replicaSet.Spec.Template, &replicaSet.ObjectMeta)
Expand Down Expand Up @@ -304,7 +316,7 @@ func (m *ResourceModifier) RemoveLabelsFromImmutableJob(job *batchv1.Job) {
}

func (m *ResourceModifier) RevertReplicaSet(replicaSet *appsv1.ReplicaSet) bool {
if m.hasDeploymentOwnerReference(replicaSet) {
if m.hasOwnerReference(replicaSet) {
return false
}
return m.revertResource(&replicaSet.Spec.Template, &replicaSet.ObjectMeta)
Expand Down Expand Up @@ -413,14 +425,6 @@ func (m *ResourceModifier) removeEnvironmentVariable(container *corev1.Container
})
}

func (m *ResourceModifier) hasDeploymentOwnerReference(replicaSet *appsv1.ReplicaSet) bool {
ownerReferences := replicaSet.ObjectMeta.OwnerReferences
if len(ownerReferences) > 0 {
for _, ownerReference := range ownerReferences {
if ownerReference.APIVersion == "apps/v1" && ownerReference.Kind == "Deployment" {
return true
}
}
}
return false
func (m *ResourceModifier) hasOwnerReference(workload client.Object) bool {
return len(workload.GetOwnerReferences()) > 0
}
Loading

0 comments on commit 209335e

Please sign in to comment.