Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(webhook): instrument new ownerless pods #23

Merged
merged 1 commit into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions config/webhook/manifests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,22 @@ webhooks:
- replicasets
- statefulsets
- apiGroups:
- batch
- batch
apiVersions:
- v1
- v1
operations:
- CREATE
- UPDATE
- CREATE
- UPDATE
resources:
- cronjobs
- jobs
- apiGroups: [""]
apiVersions:
- v1
operations:
- CREATE
- UPDATE
resources:
- cronjobs
- jobs
- pods
sideEffects: None
timeoutSeconds: 5
8 changes: 8 additions & 0 deletions helm-chart/dash0-operator/templates/operator/webhook.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,5 +92,13 @@ webhooks:
resources:
- cronjobs
- jobs
- apiGroups: [""]
apiVersions:
- v1
operations:
- CREATE
- UPDATE
resources:
- pods
sideEffects: None
timeoutSeconds: 5
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,14 @@ webhook should match snapshot:
resources:
- cronjobs
- jobs
- apiGroups:
- ""
apiVersions:
- v1
operations:
- CREATE
- UPDATE
resources:
- pods
sideEffects: None
timeoutSeconds: 5
10 changes: 10 additions & 0 deletions helm-chart/dash0-operator/tests/update-snapshots.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash

# SPDX-FileCopyrightText: Copyright 2024 Dash0 Inc.
# SPDX-License-Identifier: Apache-2.0

set -euo pipefail

cd "$(dirname ${BASH_SOURCE})"/..

helm unittest -f 'tests/**/*.yaml' --update-snapshot .
2 changes: 1 addition & 1 deletion internal/controller/dash0_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ func (r *Dash0Reconciler) findAndInstrumentReplicaSets(
matchingWorkloadsInNamespace, err :=
r.ClientSet.AppsV1().ReplicaSets(namespace).List(ctx, util.WorkloadsWithoutDash0InstrumentedLabelFilter)
if err != nil {
return fmt.Errorf("error when querying deployments: %w", err)
return fmt.Errorf("error when querying replica sets: %w", err)
}
for _, resource := range matchingWorkloadsInNamespace.Items {
r.instrumentReplicaSet(ctx, resource, logger)
Expand Down
82 changes: 77 additions & 5 deletions internal/controller/dash0_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,35 @@ var _ = Describe("The Dash0 controller", Ordered, func() {
VerifyImmutableJobCouldNotBeModified(GetJob(ctx, k8sClient, namespace, name))
})

It("should instrument an existing orphan replicaset", func() {
It("should not instrument an existing ownerless pod", func() {
name := UniqueName(PodNamePrefix)
By("Inititalize a pod")
pod := CreateBasicPod(ctx, k8sClient, namespace, name)
createdObjects = append(createdObjects, pod)

triggerReconcileRequest(ctx, reconciler, "")

// We do not instrument existing pods via the controller, since they cannot be restarted.
// We only instrument new pods via the webhook.
verifyDash0ResourceIsAvailable(ctx)
VerifyNoEvents(ctx, clientset, namespace)
VerifyUnmodifiedPod(GetPod(ctx, k8sClient, namespace, name))
})

It("should not instrument an existing pod owned by a replicaset", func() {
name := UniqueName(PodNamePrefix)
By("Inititalize a pod")
pod := CreatePodOwnedByReplicaSet(ctx, k8sClient, namespace, name)
createdObjects = append(createdObjects, pod)

triggerReconcileRequest(ctx, reconciler, "")

verifyDash0ResourceIsAvailable(ctx)
VerifyNoEvents(ctx, clientset, namespace)
VerifyUnmodifiedPod(GetPod(ctx, k8sClient, namespace, name))
})

It("should instrument an existing ownerless replicaset", func() {
name := UniqueName(ReplicaSetNamePrefix)
By("Inititalize a replicaset")
replicaSet := CreateBasicReplicaSet(ctx, k8sClient, namespace, name)
Expand Down Expand Up @@ -223,7 +251,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() {
VerifyJobWithOptOutLabel(GetJob(ctx, k8sClient, namespace, name))
})

It("should not instrument an existing orphan replicaset with the opt-out label", func() {
It("should not instrument an existing ownerless replicaset with the opt-out label", func() {
name := UniqueName(ReplicaSetNamePrefix)
By("Inititalize a replicaset")
replicaSet := CreateReplicaSetWithOptOutLabel(ctx, k8sClient, namespace, name)
Expand Down Expand Up @@ -373,7 +401,51 @@ var _ = Describe("The Dash0 controller", Ordered, func() {
VerifyUnmodifiedJob(GetJob(ctx, k8sClient, namespace, name))
})

It("should revert an instrumented orphan replica set", func() {
It("should not revert an instrumented ownerless pod", func() {
// We trigger one reconcile request before creating any workload and before deleting the Dash0 custom
// resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer.
// Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually
// happens in production.
triggerReconcileRequest(ctx, reconciler, "Trigger first reconcile request")

name := UniqueName(PodNamePrefix)
By("Create an instrumented pod")
pod := CreateInstrumentedPod(ctx, k8sClient, namespace, name)
createdObjects = append(createdObjects, pod)

By("Queue the deletion of the Dash0 custom resource")
dash0CustomResource := LoadDash0CustomResourceOrFail(ctx, k8sClient, Default)
Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed())

triggerReconcileRequest(ctx, reconciler, "Trigger a reconcile request to revert the instrumented workload")

VerifyNoEvents(ctx, clientset, namespace)
VerifyModifiedPod(GetPod(ctx, k8sClient, namespace, name), BasicInstrumentedPodSpecExpectations)
})

It("should leave existing uninstrumented pod owned by a replica set alone", func() {
// We trigger one reconcile request before creating any workload and before deleting the Dash0 custom
// resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer.
// Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually
// happens in production.
triggerReconcileRequest(ctx, reconciler, "Trigger first reconcile request")

name := UniqueName(PodNamePrefix)
By("Create an instrumented pod owned by a deployment")
pod := CreatePodOwnedByReplicaSet(ctx, k8sClient, namespace, name)
createdObjects = append(createdObjects, pod)

By("Queue the deletion of the Dash0 custom resource")
dash0CustomResource := LoadDash0CustomResourceOrFail(ctx, k8sClient, Default)
Expect(k8sClient.Delete(ctx, dash0CustomResource)).To(Succeed())

triggerReconcileRequest(ctx, reconciler, "Trigger a reconcile request to revert the instrumented workload")

VerifyNoEvents(ctx, clientset, namespace)
VerifyUnmodifiedPod(GetPod(ctx, k8sClient, namespace, name))
})

It("should revert an instrumented ownerless replica set", func() {
// We trigger one reconcile request before creating any workload and before deleting the Dash0 custom
// resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer.
// Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually
Expand All @@ -397,7 +469,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() {
VerifyWebhookIgnoreOnceLabelIsPresent(&replicaSet.ObjectMeta)
})

It("should leave existing uninstrumented replica sets owned by deployments alone", func() {
It("should leave existing uninstrumented replica sets owned by deployment alone", func() {
// We trigger one reconcile request before creating any workload and before deleting the Dash0 custom
// resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer.
// Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually
Expand Down Expand Up @@ -541,7 +613,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() {
VerifyWebhookIgnoreOnceLabelIsAbesent(&job.ObjectMeta)
})

It("should not attempt to revert an orphan replica set that has the opt-out label", func() {
It("should not attempt to revert an ownerless replica set that has the opt-out label", func() {
// We trigger one reconcile request before creating any workload and before deleting the Dash0 custom
// resource, just to get the `isFirstReconcile` logic out of the way and to add the finalizer.
// Alternatively, we could just add the finalizer here directly, but this approach is closer to what usually
Expand Down
27 changes: 27 additions & 0 deletions internal/webhook/dash0_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/go-logr/logr"
appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/scheme"
Expand Down Expand Up @@ -45,6 +46,11 @@ var (
decoder = scheme.Codecs.UniversalDecoder()

routes = routing{
"": {
"Pod": {
"v1": (*Handler).handlePod,
},
},
"batch": {
"CronJob": {
"v1": (*Handler).handleCronJob,
Expand Down Expand Up @@ -248,6 +254,27 @@ func (h *Handler) handleJob(
return h.postProcess(request, job, hasBeenModified, false, logger)
}

func (h *Handler) handlePod(
request admission.Request,
gvkLabel string,
logger *logr.Logger,
) admission.Response {
pod := &corev1.Pod{}
responseIfFailed, failed := h.preProcess(request, gvkLabel, pod)
if failed {
return responseIfFailed
}
if util.CheckAndDeleteIgnoreOnceLabel(&pod.ObjectMeta) {
return h.postProcess(request, pod, false, true, logger)
}
if util.HasOptedOutOfInstrumenationForWorkload(&pod.ObjectMeta) {
logger.Info(optOutAdmissionAllowedMessage)
return admission.Allowed(optOutAdmissionAllowedMessage)
}
hasBeenModified := h.newWorkloadModifier(logger).ModifyPod(pod)
return h.postProcess(request, pod, hasBeenModified, false, logger)
}

func (h *Handler) handleReplicaSet(
request admission.Request,
gvkLabel string,
Expand Down
52 changes: 46 additions & 6 deletions internal/webhook/dash0_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ var _ = Describe("The Dash0 webhook", func() {
})

It("should instrument a new basic daemon set", func() {
name := UniqueName(CronJobNamePrefix)
name := UniqueName(DaemonSetNamePrefix)
workload := CreateBasicDaemonSet(ctx, k8sClient, TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
workload = GetDaemonSet(ctx, k8sClient, TestNamespaceName, name)
Expand All @@ -132,16 +132,34 @@ var _ = Describe("The Dash0 webhook", func() {
})

It("should instrument a new basic job", func() {
name := UniqueName(CronJobNamePrefix)
name := UniqueName(JobNamePrefix)
workload := CreateBasicJob(ctx, k8sClient, TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
workload = GetJob(ctx, k8sClient, TestNamespaceName, name)
VerifyModifiedJob(workload, BasicInstrumentedPodSpecExpectations)
VerifySuccessfulInstrumentationEvent(ctx, clientset, TestNamespaceName, name, "webhook")
})

It("should instrument a new basic ownerless pod", func() {
name := UniqueName(PodNamePrefix)
workload := CreateBasicPod(ctx, k8sClient, TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
workload = GetPod(ctx, k8sClient, TestNamespaceName, name)
VerifyModifiedPod(workload, BasicInstrumentedPodSpecExpectations)
VerifySuccessfulInstrumentationEvent(ctx, clientset, TestNamespaceName, name, "webhook")
})

It("should not instrument a new pod owned by a replica set", func() {
name := UniqueName(PodNamePrefix)
workload := CreatePodOwnedByReplicaSet(ctx, k8sClient, TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
workload = GetPod(ctx, k8sClient, TestNamespaceName, name)
VerifyUnmodifiedPod(workload)
VerifyNoInstrumentationNecessaryEvent(ctx, clientset, TestNamespaceName, name, "webhook")
})

It("should instrument a new basic replica set", func() {
name := UniqueName(CronJobNamePrefix)
name := UniqueName(ReplicaSetNamePrefix)
workload := CreateBasicReplicaSet(ctx, k8sClient, TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
workload = GetReplicaSet(ctx, k8sClient, TestNamespaceName, name)
Expand All @@ -159,7 +177,7 @@ var _ = Describe("The Dash0 webhook", func() {
})

It("should instrument a new basic stateful set", func() {
name := UniqueName(CronJobNamePrefix)
name := UniqueName(StatefulSetNamePrefix)
workload := CreateBasicStatefulSet(ctx, k8sClient, TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
workload = GetStatefulSet(ctx, k8sClient, TestNamespaceName, name)
Expand Down Expand Up @@ -209,7 +227,17 @@ var _ = Describe("The Dash0 webhook", func() {
VerifyNoEvents(ctx, clientset, TestNamespaceName)
})

It("should not instrument an orphan replica set that has opted out of instrumentation", func() {
It("should not instrument an ownerless pod that has opted out of instrumentation", func() {
name := UniqueName(PodNamePrefix)
workload := PodWithOptOutLabel(TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
CreateWorkload(ctx, k8sClient, workload)
workload = GetPod(ctx, k8sClient, TestNamespaceName, name)
VerifyPodWithOptOutLabel(workload)
VerifyNoEvents(ctx, clientset, TestNamespaceName)
})

It("should not instrument an ownerless replica set that has opted out of instrumentation", func() {
name := UniqueName(ReplicaSetNamePrefix)
workload := ReplicaSetWithOptOutLabel(TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
Expand Down Expand Up @@ -279,7 +307,19 @@ var _ = Describe("The Dash0 webhook", func() {
VerifyNoEvents(ctx, clientset, TestNamespaceName)
})

It("should not instrument an orphan replica set that has the label, but remove the label", func() {
It("should not instrument an ownerless pod that has the label, but remove the label", func() {
name := UniqueName(PodNamePrefix)
workload := BasicPod(TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
AddLabel(&workload.ObjectMeta, "dash0.com/webhook-ignore-once", "true")
CreateWorkload(ctx, k8sClient, workload)
workload = GetPod(ctx, k8sClient, TestNamespaceName, name)
VerifyUnmodifiedPod(workload)
VerifyWebhookIgnoreOnceLabelIsAbesent(&workload.ObjectMeta)
VerifyNoEvents(ctx, clientset, TestNamespaceName)
})

It("should not instrument an ownerless replica set that has the label, but remove the label", func() {
name := UniqueName(ReplicaSetNamePrefix)
workload := BasicReplicaSet(TestNamespaceName, name)
createdObjects = append(createdObjects, workload)
Expand Down
28 changes: 16 additions & 12 deletions internal/workloads/workload_modifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/dash0hq/dash0-operator/internal/util"
)
Expand Down Expand Up @@ -76,8 +77,19 @@ func (m *ResourceModifier) AddLabelsToImmutableJob(job *batchv1.Job) {
util.AddInstrumentationLabels(&job.ObjectMeta, false, m.instrumentationMetadata)
}

func (m *ResourceModifier) ModifyPod(pod *corev1.Pod) bool {
if m.hasOwnerReference(pod) {
return false
}
hasBeenModified := m.modifyPodSpec(&pod.Spec)
if hasBeenModified {
util.AddInstrumentationLabels(&pod.ObjectMeta, true, m.instrumentationMetadata)
}
return hasBeenModified
}

func (m *ResourceModifier) ModifyReplicaSet(replicaSet *appsv1.ReplicaSet) bool {
if m.hasDeploymentOwnerReference(replicaSet) {
if m.hasOwnerReference(replicaSet) {
return false
}
return m.modifyResource(&replicaSet.Spec.Template, &replicaSet.ObjectMeta)
Expand Down Expand Up @@ -304,7 +316,7 @@ func (m *ResourceModifier) RemoveLabelsFromImmutableJob(job *batchv1.Job) {
}

func (m *ResourceModifier) RevertReplicaSet(replicaSet *appsv1.ReplicaSet) bool {
if m.hasDeploymentOwnerReference(replicaSet) {
if m.hasOwnerReference(replicaSet) {
return false
}
return m.revertResource(&replicaSet.Spec.Template, &replicaSet.ObjectMeta)
Expand Down Expand Up @@ -413,14 +425,6 @@ func (m *ResourceModifier) removeEnvironmentVariable(container *corev1.Container
})
}

func (m *ResourceModifier) hasDeploymentOwnerReference(replicaSet *appsv1.ReplicaSet) bool {
ownerReferences := replicaSet.ObjectMeta.OwnerReferences
if len(ownerReferences) > 0 {
for _, ownerReference := range ownerReferences {
if ownerReference.APIVersion == "apps/v1" && ownerReference.Kind == "Deployment" {
return true
}
}
}
return false
func (m *ResourceModifier) hasOwnerReference(workload client.Object) bool {
return len(workload.GetOwnerReferences()) > 0
}
Loading