Skip to content

Commit

Permalink
feat(controller): handle the presence of multiple Dash0 custom resources
Browse files Browse the repository at this point in the history
  • Loading branch information
basti1302 committed Jun 18, 2024
1 parent 67e9d33 commit 6b03423
Show file tree
Hide file tree
Showing 4 changed files with 293 additions and 33 deletions.
24 changes: 19 additions & 5 deletions api/v1alpha1/dash0_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,21 @@ func (d *Dash0) EnsureResourceIsMarkedAsAvailable() {
meta.RemoveStatusCondition(&d.Status.Conditions, string(util.ConditionTypeDegraded))
}

func (d *Dash0) EnsureResourceIsMarkedAsUnavailable() {
func (d *Dash0) EnsureResourceIsMarkedAsAboutToBeDeleted() {
d.EnsureResourceIsMarkedAsDegraded(
"Dash0CustomResourceHasBeenRemoved",
"Dash0 is inactive in this namespace now.",
"Dash0CustomResourceHasBeenRemoved",
"Dash0 is about to be deleted.",
)
}

func (d *Dash0) EnsureResourceIsMarkedAsDegraded(
availableReason string,
availableMessage string,
degradedReason string,
degradedMessage string,
) {
// If the available status is already false, the status condition is not updated, except for Reason, Message and
// ObservedGeneration timestamp. In particular, LastTransitionTime is not updated. Thus, this operation is
// effectively idempotent.
Expand All @@ -152,16 +166,16 @@ func (d *Dash0) EnsureResourceIsMarkedAsUnavailable() {
metav1.Condition{
Type: string(util.ConditionTypeAvailable),
Status: metav1.ConditionFalse,
Reason: "Dash0CustomResourceHasBeenRemoved",
Message: "Dash0 is inactive in this namespace now.",
Reason: availableReason,
Message: availableMessage,
})
meta.SetStatusCondition(
&d.Status.Conditions,
metav1.Condition{
Type: string(util.ConditionTypeDegraded),
Status: metav1.ConditionTrue,
Reason: "Dash0CustomResourceHasBeenRemoved",
Message: "Dash0 is about to be deleted.",
Reason: degradedReason,
Message: degradedMessage,
})
}

Expand Down
136 changes: 122 additions & 14 deletions internal/controller/dash0_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"context"
"errors"
"fmt"
"sort"

"github.com/go-logr/logr"
appsv1 "k8s.io/api/apps/v1"
Expand All @@ -31,6 +32,8 @@ const (
workkloadTypeLabel = "workload type"
workloadNamespaceLabel = "workload namespace"
workloadNameLabel = "workload name"

updateStatusFailedMessage = "Failed to update Dash0 status conditions, requeuing reconcile request."
)

type Dash0Reconciler struct {
Expand Down Expand Up @@ -116,19 +119,11 @@ func (r *Dash0Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
return ctrl.Result{}, nil
}

// Check whether the Dash0 custom resource exists.
dash0CustomResource := &operatorv1alpha1.Dash0{}
err = r.Get(ctx, req.NamespacedName, dash0CustomResource)
dash0CustomResource, stopReconcile, err := r.verifyUniqueDash0CustomResourceExists(ctx, req, logger)
if err != nil {
if apierrors.IsNotFound(err) {
logger.Info(
"The Dash0 custom resource has not been found, either it hasn't been installed or it has been " +
"deleted. Ignoring the reconcile request.")
// stop the reconciliation
return ctrl.Result{}, nil
}
logger.Error(err, "Failed to get the Dash0 custom resource, requeuing reconcile request.")
return ctrl.Result{}, err
} else if stopReconcile {
return ctrl.Result{}, nil
}

isFirstReconcile, err := r.initStatusConditions(ctx, dash0CustomResource, &logger)
Expand Down Expand Up @@ -156,7 +151,7 @@ func (r *Dash0Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl

dash0CustomResource.EnsureResourceIsMarkedAsAvailable()
if err := r.Status().Update(ctx, dash0CustomResource); err != nil {
logger.Error(err, "Failed to update Dash0 status conditions, requeuing reconcile request.")
logger.Error(err, updateStatusFailedMessage)
return ctrl.Result{}, err
}

Expand All @@ -180,6 +175,107 @@ func (r *Dash0Reconciler) checkIfNamespaceExists(
return true, nil
}

func (r *Dash0Reconciler) verifyUniqueDash0CustomResourceExists(
ctx context.Context,
req ctrl.Request,
logger logr.Logger,
) (*operatorv1alpha1.Dash0, bool, error) {
dash0CustomResource, stopReconcile, err := r.verifyThatDash0CustomResourceExists(ctx, req, &logger)
if err != nil || stopReconcile {
return dash0CustomResource, stopReconcile, err
}
stopReconcile, err = r.verifyThatDash0CustomResourceIsUniqe(ctx, req, dash0CustomResource, &logger)
return dash0CustomResource, stopReconcile, err
}

// verifyThatDash0CustomResourceExists loads the Dash0 custom resource that that the current reconcile request applies
// to. If that resource does not exist, the function logs a message and returns (nil, true, nil) and expects the caller
// to stop the reconciliation (without requeing it). If any other error occurs while trying to fetch the resource, the
// function logs the error and returns (nil, true, err) and expects the caller to requeue the reconciliation.
func (r *Dash0Reconciler) verifyThatDash0CustomResourceExists(
ctx context.Context,
req ctrl.Request,
logger *logr.Logger,
) (*operatorv1alpha1.Dash0, bool, error) {
dash0CustomResource := &operatorv1alpha1.Dash0{}
err := r.Get(ctx, req.NamespacedName, dash0CustomResource)
if err != nil {
if apierrors.IsNotFound(err) {
logger.Info(
"The Dash0 custom resource has not been found, either it hasn't been installed or it has been " +
"deleted. Ignoring the reconcile request.")
// stop the reconciliation, and do not requeue it (that is, return (ctrl.Result{}, nil))
return nil, true, nil
}
logger.Error(err, "Failed to get the Dash0 custom resource, requeuing reconcile request.")
// requeue the reconciliation (that is, return (ctrl.Result{}, err))
return nil, true, err
}
return dash0CustomResource, false, nil
}

// verifyThatDash0CustomResourceIsUniqe checks whether there are any additional Dash0 custom resources in the namespace,
// besides the one that the current reconcile request applies to.
func (r *Dash0Reconciler) verifyThatDash0CustomResourceIsUniqe(
ctx context.Context,
req ctrl.Request,
dash0CustomResource *operatorv1alpha1.Dash0,
logger *logr.Logger,
) (bool, error) {
allDash0CustomResourcesNamespace := &operatorv1alpha1.Dash0List{}
if err := r.Client.List(
ctx,
allDash0CustomResourcesNamespace,
&client.ListOptions{
Namespace: req.Namespace,
},
); err != nil {
logger.Error(err, "Failed to list all Dash0 custom resources, requeuing reconcile request.")
return true, err
}

if len(allDash0CustomResourcesNamespace.Items) > 1 {
// There are multiple instances of the Dash0 custom resource in this namespace. If the resource that is
// currently being reconciled is the one that has been most recently created, we assume that this is the source
// of truth in terms of configuration settings etc., and we ignore the other instances in this reconcile request
// (they will be handled when they are being reconciled). If the currently reconciled resource is not the most
// recent one, we set its status to degraded.
sort.Sort(SortByCreationTimestamp(allDash0CustomResourcesNamespace.Items))
mostRecentResource := allDash0CustomResourcesNamespace.Items[len(allDash0CustomResourcesNamespace.Items)-1]
if mostRecentResource.UID == dash0CustomResource.UID {
logger.Info(
"At least one other Dash0 custom resource exists in this namespace. This Dash0 custom " +
"resource is the most recent one. The state of the other resource(s) will be set to degraded.")
// continue with the reconcile request for this resource, let the reconcile requests for the other offending
// resources handle the situation for those resources
return false, nil
} else {
logger.Info(
"At least one other Dash0 custom resource exists in this namespace, and at least one other Dash0 "+
"custom resource has been created more recently than this one. Setting the state of this resource "+
"to degraded.",
"most recently created Dash0 custom resource",
fmt.Sprintf("%s (%s)", mostRecentResource.Name, mostRecentResource.UID),
)
dash0CustomResource.EnsureResourceIsMarkedAsDegraded(
"NewerDash0CustomResourceIsPresent",
"There is a more recently created Dash0 custom resource in this namespace, please remove all but one "+
"resource instance.",
"NewerDash0CustomResourceIsPresent",
"There is a more recently created Dash0 custom resource in this namespace, please remove all but one "+
"resource instance.",
)
if err := r.Status().Update(ctx, dash0CustomResource); err != nil {
logger.Error(err, updateStatusFailedMessage)
return true, err
}
// stop the reconciliation, and do not requeue it
return true, nil
}
}
return false, nil
}

func (r *Dash0Reconciler) initStatusConditions(
ctx context.Context,
dash0CustomResource *operatorv1alpha1.Dash0,
Expand Down Expand Up @@ -603,9 +699,9 @@ func (r *Dash0Reconciler) runCleanupActions(
// The Dash0 custom resource will be deleted after this reconcile finished, so updating the status is
// probably unnecessary. But for due process we do it anyway. In particular, if deleting it should fail
// for any reason or take a while, the resource is no longer marked as available.
dash0CustomResource.EnsureResourceIsMarkedAsUnavailable()
dash0CustomResource.EnsureResourceIsMarkedAsAboutToBeDeleted()
if err = r.Status().Update(ctx, dash0CustomResource); err != nil {
logger.Error(err, "Failed to update Dash0 status conditions, requeuing reconcile request.")
logger.Error(err, updateStatusFailedMessage)
return err
}

Expand Down Expand Up @@ -971,3 +1067,15 @@ func newWorkloadModifier(images util.Images, otelCollectorBaseUrl string, logger
logger,
)
}

type SortByCreationTimestamp []operatorv1alpha1.Dash0

func (s SortByCreationTimestamp) Len() int {
return len(s)
}
func (s SortByCreationTimestamp) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s SortByCreationTimestamp) Less(i, j int) bool {
return s[i].CreationTimestamp.Before(&s[j].CreationTimestamp)
}
106 changes: 99 additions & 7 deletions internal/controller/dash0_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1"
"github.com/dash0hq/dash0-operator/internal/util"

. "github.com/dash0hq/dash0-operator/test/util"
Expand All @@ -25,14 +27,16 @@ import (
var (
namespace = TestNamespaceName

timeout = 15 * time.Second
timeout = 10 * time.Second
pollingInterval = 50 * time.Millisecond

images = util.Images{
OperatorImage: "some-registry.com:1234/dash0-operator-controller:1.2.3",
InitContainerImage: "some-registry.com:1234/dash0-instrumentation:4.5.6",
InitContainerImagePullPolicy: corev1.PullAlways,
}

extraDash0CustomResourceNames = []types.NamespacedName{}
)

var _ = Describe("The Dash0 controller", Ordered, func() {
Expand Down Expand Up @@ -70,6 +74,9 @@ var _ = Describe("The Dash0 controller", Ordered, func() {

AfterEach(func() {
RemoveDash0CustomResource(ctx, k8sClient)
for _, name := range extraDash0CustomResourceNames {
RemoveDash0CustomResourceByName(ctx, k8sClient, name)
}
})

Describe("when reconciling", func() {
Expand All @@ -93,6 +100,68 @@ var _ = Describe("The Dash0 controller", Ordered, func() {
secondAvailableCondition := verifyDash0ResourceIsAvailable(ctx)
Expect(secondAvailableCondition.LastTransitionTime.Time).To(Equal(originalTransitionTimestamp))
})

It("should mark only the most recent resource as available and the other ones as degraded when multiple "+
"resources exist", func() {
firstDash0CustomResource := &operatorv1alpha1.Dash0{}
Expect(k8sClient.Get(ctx, Dash0CustomResourceQualifiedName, firstDash0CustomResource)).To(Succeed())
time.Sleep(10 * time.Millisecond)
secondName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-test-resource-2"}
extraDash0CustomResourceNames = append(extraDash0CustomResourceNames, secondName)
CreateDash0CustomResource(ctx, k8sClient, secondName)
time.Sleep(10 * time.Millisecond)
thirdName := types.NamespacedName{Namespace: TestNamespaceName, Name: "dash0-test-resource-3"}
extraDash0CustomResourceNames = append(extraDash0CustomResourceNames, thirdName)
CreateDash0CustomResource(ctx, k8sClient, thirdName)

triggerReconcileRequestForName(ctx, reconciler, "", Dash0CustomResourceQualifiedName)
triggerReconcileRequestForName(ctx, reconciler, "", secondName)
triggerReconcileRequestForName(ctx, reconciler, "", thirdName)

Eventually(func(g Gomega) {
resource1Available := loadCondition(ctx, Dash0CustomResourceQualifiedName, util.ConditionTypeAvailable)
resource1Degraded := loadCondition(ctx, Dash0CustomResourceQualifiedName, util.ConditionTypeDegraded)
resource2Available := loadCondition(ctx, secondName, util.ConditionTypeAvailable)
resource2Degraded := loadCondition(ctx, secondName, util.ConditionTypeDegraded)
resource3Available := loadCondition(ctx, thirdName, util.ConditionTypeAvailable)
resource3Degraded := loadCondition(ctx, thirdName, util.ConditionTypeDegraded)

// The first two resource should have been marked as degraded.
verifyCondition(
g,
resource1Available,
metav1.ConditionFalse,
"NewerDash0CustomResourceIsPresent",
"There is a more recently created Dash0 custom resource in this namespace, please remove all "+
"but one resource instance.",
)
verifyCondition(
g,
resource1Degraded,
metav1.ConditionTrue,
"NewerDash0CustomResourceIsPresent",
"There is a more recently created Dash0 custom resource in this namespace, please remove all "+
"but one resource instance.",
)
verifyCondition(g, resource2Available, metav1.ConditionFalse, "NewerDash0CustomResourceIsPresent",
"There is a more recently created Dash0 custom resource in this namespace, please remove all "+
"but one resource instance.")
verifyCondition(g, resource2Degraded, metav1.ConditionTrue, "NewerDash0CustomResourceIsPresent",
"There is a more recently created Dash0 custom resource in this namespace, please remove all "+
"but one resource instance.")

// The third (and most recent) resource should have been marked as available.
verifyCondition(
g,
resource3Available,
metav1.ConditionTrue,
"ReconcileFinished",
"Dash0 is active in this namespace now.",
)
g.Expect(resource3Degraded).To(BeNil())

}, timeout, pollingInterval).Should(Succeed())
})
})

Describe("when instrumenting existing workloads", func() {
Expand Down Expand Up @@ -747,12 +816,21 @@ func verifyThatDeploymentIsNotBeingInstrumented(ctx context.Context, reconciler
}

func triggerReconcileRequest(ctx context.Context, reconciler *Dash0Reconciler, stepMessage string) {
triggerReconcileRequestForName(ctx, reconciler, stepMessage, Dash0CustomResourceQualifiedName)
}

func triggerReconcileRequestForName(
ctx context.Context,
reconciler *Dash0Reconciler,
stepMessage string,
dash0CustomResourceName types.NamespacedName,
) {
if stepMessage == "" {
stepMessage = "Trigger reconcile request"
}
By(stepMessage)
_, err := reconciler.Reconcile(ctx, reconcile.Request{
NamespacedName: Dash0CustomResourceQualifiedName,
NamespacedName: dash0CustomResourceName,
})
Expect(err).NotTo(HaveOccurred())
}
Expand All @@ -763,19 +841,33 @@ func verifyStatusConditionAndSuccessfulInstrumentationEvent(ctx context.Context,
}

func verifyDash0ResourceIsAvailable(ctx context.Context) *metav1.Condition {
return verifyDash0ResourceStatus(ctx, metav1.ConditionTrue)
}

func verifyDash0ResourceStatus(ctx context.Context, expectedStatus metav1.ConditionStatus) *metav1.Condition {
var availableCondition *metav1.Condition
By("Verifying status conditions")
Eventually(func(g Gomega) {
dash0CustomResource := LoadDash0CustomResourceOrFail(ctx, k8sClient, g)
availableCondition = meta.FindStatusCondition(dash0CustomResource.Status.Conditions, string(util.ConditionTypeAvailable))
g.Expect(availableCondition).NotTo(BeNil())
g.Expect(availableCondition.Status).To(Equal(expectedStatus))
g.Expect(availableCondition.Status).To(Equal(metav1.ConditionTrue))
degraded := meta.FindStatusCondition(dash0CustomResource.Status.Conditions, string(util.ConditionTypeDegraded))
g.Expect(degraded).To(BeNil())
}, timeout, pollingInterval).Should(Succeed())
return availableCondition
}

func loadCondition(ctx context.Context, dash0CustomResourceName types.NamespacedName, conditionType util.ConditionType) *metav1.Condition {
dash0CustomResource := LoadDash0CustomResourceByNameOrFail(ctx, k8sClient, Default, dash0CustomResourceName)
return meta.FindStatusCondition(dash0CustomResource.Status.Conditions, string(conditionType))
}

func verifyCondition(
g Gomega,
condition *metav1.Condition,
expectedStatus metav1.ConditionStatus,
expectedReason string,
expectedMessage string,
) {
g.Expect(condition).NotTo(BeNil())
g.Expect(condition.Status).To(Equal(expectedStatus))
g.Expect(condition.Reason).To(Equal(expectedReason))
g.Expect(condition.Message).To(Equal(expectedMessage))
}
Loading

0 comments on commit 6b03423

Please sign in to comment.