From 9eda835e07553e3c7c022dace40bbb178f74fbb3 Mon Sep 17 00:00:00 2001 From: Bastian Krol Date: Thu, 26 Sep 2024 12:21:30 +0100 Subject: [PATCH] fix(backendconnection): add annotation to prevent ArgoCD pruning For clusters managed by ArgoCD, we need to prevent ArgoCD to prune Kubernetes resources necessary for the OTel collector managed by the operator. If pruning is enabled in ArgoCD, it will automatically delete resources which have no owner reference. This applies to all cluster-scoped resources which the operator creates (since cluster-scoped resource cannot be owned by namespace-scoped resources). In particular, this affects the cluster role & cluster role binding. References: * https://github.com/argoproj/argo-cd/issues/4764#issuecomment-722661940 this indicates that only top level resources are pruned (which is basically the same as resources without an owner reference). * The docs for preventing this on a resource level are here: https://argo-cd.readthedocs.io/en/stable/user-guide/sync-options/#no-prune-resources --- .../otelcolresources/desired_state.go | 54 +++++++++++++------ .../otelcolresources/desired_state_test.go | 25 +++++---- .../otelcolresources/otelcol_resources.go | 6 ++- 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/internal/backendconnection/otelcolresources/desired_state.go b/internal/backendconnection/otelcolresources/desired_state.go index a2fc4db..8968390 100644 --- a/internal/backendconnection/otelcolresources/desired_state.go +++ b/internal/backendconnection/otelcolresources/desired_state.go @@ -37,6 +37,11 @@ type collectorConfigurationTemplateValues struct { DevelopmentMode bool } +// This type just exists to ensure all created objects go through addCommonMetadata. +type clientObject struct { + object client.Object +} + const ( OtlpGrpcHostPort = 40317 OtlpHttpHostPort = 40318 @@ -150,39 +155,39 @@ var ( deploymentReplicas int32 = 1 ) -func assembleDesiredState(config *oTelColConfig, forDeletion bool) ([]client.Object, error) { - var desiredState []client.Object - desiredState = append(desiredState, assembleServiceAccountForDaemonSet(config)) +func assembleDesiredState(config *oTelColConfig, forDeletion bool) ([]clientObject, error) { + var desiredState []clientObject + desiredState = append(desiredState, addCommonMetadata(assembleServiceAccountForDaemonSet(config))) daemonSetCollectorConfigMap, err := assembleDaemonSetCollectorConfigMap(config, forDeletion) if err != nil { return desiredState, err } - desiredState = append(desiredState, daemonSetCollectorConfigMap) - desiredState = append(desiredState, assembleFilelogOffsetsConfigMap(config)) - desiredState = append(desiredState, assembleClusterRoleForDaemonSet(config)) - desiredState = append(desiredState, assembleClusterRoleBindingForDaemonSet(config)) - desiredState = append(desiredState, assembleRole(config)) - desiredState = append(desiredState, assembleRoleBinding(config)) - desiredState = append(desiredState, assembleService(config)) + desiredState = append(desiredState, addCommonMetadata(daemonSetCollectorConfigMap)) + desiredState = append(desiredState, addCommonMetadata(assembleFilelogOffsetsConfigMap(config))) + desiredState = append(desiredState, addCommonMetadata(assembleClusterRoleForDaemonSet(config))) + desiredState = append(desiredState, addCommonMetadata(assembleClusterRoleBindingForDaemonSet(config))) + desiredState = append(desiredState, addCommonMetadata(assembleRole(config))) + desiredState = append(desiredState, addCommonMetadata(assembleRoleBinding(config))) + desiredState = append(desiredState, addCommonMetadata(assembleService(config))) collectorDaemonSet, err := assembleCollectorDaemonSet(config) if err != nil { return desiredState, err } - desiredState = append(desiredState, collectorDaemonSet) + desiredState = append(desiredState, addCommonMetadata(collectorDaemonSet)) - desiredState = append(desiredState, assembleServiceAccountForDeployment(config)) - desiredState = append(desiredState, assembleClusterRoleForDeployment(config)) - desiredState = append(desiredState, assembleClusterRoleBindingForDeployment(config)) + desiredState = append(desiredState, addCommonMetadata(assembleServiceAccountForDeployment(config))) + desiredState = append(desiredState, addCommonMetadata(assembleClusterRoleForDeployment(config))) + desiredState = append(desiredState, addCommonMetadata(assembleClusterRoleBindingForDeployment(config))) deploymentCollectorConfigMap, err := assembleDeploymentCollectorConfigMap(config, forDeletion) if err != nil { return desiredState, err } - desiredState = append(desiredState, deploymentCollectorConfigMap) + desiredState = append(desiredState, addCommonMetadata(deploymentCollectorConfigMap)) collectorDeployment, err := assembleCollectorDeployment(config) if err != nil { return desiredState, err } - desiredState = append(desiredState, collectorDeployment) + desiredState = append(desiredState, addCommonMetadata(collectorDeployment)) return desiredState, nil } @@ -1011,6 +1016,23 @@ func labels(addOptOutLabel bool) map[string]string { return lbls } +func addCommonMetadata(object client.Object) clientObject { + // For clusters managed by ArgoCD, we need to prevent ArgoCD to prune resources that have no owner reference + // which are all cluster-scoped resources, like cluster roles & cluster role bindings. We could add the annotation + // to achieve that only to the cluster-scoped resources, but instead we just apply it to all resources we manage. + // * https://github.com/argoproj/argo-cd/issues/4764#issuecomment-722661940 -- this is where they say that only top + // level resources are pruned (that is basically the same as resources without an owner reference). + // * The docs for preventing this on a resource level are here: + // https://argo-cd.readthedocs.io/en/stable/user-guide/sync-options/#no-prune-resources + if object.GetAnnotations() == nil { + object.SetAnnotations(map[string]string{}) + } + object.GetAnnotations()["argocd.argoproj.io/sync-options"] = "Prune=false" + return clientObject{ + object: object, + } +} + func compileObsoleteResources(namespace string, namePrefix string) []client.Object { openTelemetryCollectorSuffix := "opentelemetry-collector" openTelemetryCollectorAgentSuffix := "opentelemetry-collector-agent" diff --git a/internal/backendconnection/otelcolresources/desired_state_test.go b/internal/backendconnection/otelcolresources/desired_state_test.go index 397e942..652aa46 100644 --- a/internal/backendconnection/otelcolresources/desired_state_test.go +++ b/internal/backendconnection/otelcolresources/desired_state_test.go @@ -9,7 +9,6 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - "sigs.k8s.io/controller-runtime/pkg/client" dash0v1alpha1 "github.com/dash0hq/dash0-operator/api/dash0monitoring/v1alpha1" "github.com/dash0hq/dash0-operator/internal/dash0/selfmonitoring" @@ -53,6 +52,14 @@ var _ = Describe("The desired state of the OpenTelemetry Collector resources", f Expect(err).ToNot(HaveOccurred()) Expect(desiredState).To(HaveLen(14)) + + for _, wrapper := range desiredState { + object := wrapper.object + annotations := object.GetAnnotations() + Expect(annotations).To(HaveLen(1)) + Expect(annotations["argocd.argoproj.io/sync-options"]).To(Equal("Prune=false")) + } + collectorConfigConfigMapContent := getCollectorConfigConfigMapContent(desiredState) Expect(collectorConfigConfigMapContent).To(ContainSubstring(fmt.Sprintf("endpoint: %s", EndpointDash0TestQuoted))) Expect(collectorConfigConfigMapContent).NotTo(ContainSubstring("file/traces")) @@ -264,37 +271,37 @@ var _ = Describe("The desired state of the OpenTelemetry Collector resources", f }) }) -func getConfigMap(desiredState []client.Object, name string) *corev1.ConfigMap { +func getConfigMap(desiredState []clientObject, name string) *corev1.ConfigMap { for _, object := range desiredState { - if cm, ok := object.(*corev1.ConfigMap); ok && cm.Name == name { + if cm, ok := object.object.(*corev1.ConfigMap); ok && cm.Name == name { return cm } } return nil } -func getCollectorConfigConfigMapContent(desiredState []client.Object) string { +func getCollectorConfigConfigMapContent(desiredState []clientObject) string { cm := getConfigMap(desiredState, ExpectedDaemonSetCollectorConfigMapName) return cm.Data["config.yaml"] } -func getFileOffsetConfigMapContent(desiredState []client.Object) string { +func getFileOffsetConfigMapContent(desiredState []clientObject) string { cm := getConfigMap(desiredState, ExpectedDaemonSetFilelogOffsetSynchConfigMapName) return cm.Data["config.yaml"] } -func getDaemonSet(desiredState []client.Object) *appsv1.DaemonSet { +func getDaemonSet(desiredState []clientObject) *appsv1.DaemonSet { for _, object := range desiredState { - if ds, ok := object.(*appsv1.DaemonSet); ok { + if ds, ok := object.object.(*appsv1.DaemonSet); ok { return ds } } return nil } -func getDeployment(desiredState []client.Object) *appsv1.Deployment { +func getDeployment(desiredState []clientObject) *appsv1.Deployment { for _, object := range desiredState { - if d, ok := object.(*appsv1.Deployment); ok { + if d, ok := object.object.(*appsv1.Deployment); ok { return d } } diff --git a/internal/backendconnection/otelcolresources/otelcol_resources.go b/internal/backendconnection/otelcolresources/otelcol_resources.go index 0b594d9..b183d07 100644 --- a/internal/backendconnection/otelcolresources/otelcol_resources.go +++ b/internal/backendconnection/otelcolresources/otelcol_resources.go @@ -102,7 +102,8 @@ func (m *OTelColResourceManager) CreateOrUpdateOpenTelemetryCollectorResources( } resourcesHaveBeenCreated := false resourcesHaveBeenUpdated := false - for _, desiredResource := range desiredState { + for _, wrapper := range desiredState { + desiredResource := wrapper.object isNew, isChanged, err := m.createOrUpdateResource( ctx, desiredResource, @@ -346,7 +347,8 @@ func (m *OTelColResourceManager) DeleteResources( return err } var allErrors []error - for _, desiredResource := range desiredResources { + for _, wrapper := range desiredResources { + desiredResource := wrapper.object err = m.Client.Delete(ctx, desiredResource) if err != nil { if apierrors.IsNotFound(err) {