Skip to content

Commit

Permalink
feat: revert instrumentation when operator is uninstalled
Browse files Browse the repository at this point in the history
  • Loading branch information
basti1302 committed Jun 20, 2024
1 parent 24f2ce2 commit 6d1e93c
Show file tree
Hide file tree
Showing 21 changed files with 883 additions and 126 deletions.
31 changes: 30 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
// to ensure that exec-entrypoint and run can make use of them.
_ "k8s.io/client-go/plugin/pkg/client/auth"

"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
Expand All @@ -27,6 +28,7 @@ import (

operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1"
"github.com/dash0hq/dash0-operator/internal/controller"
"github.com/dash0hq/dash0-operator/internal/removal"
"github.com/dash0hq/dash0-operator/internal/util"
dash0webhook "github.com/dash0hq/dash0-operator/internal/webhook"
//+kubebuilder:scaffold:imports
Expand Down Expand Up @@ -55,18 +57,23 @@ func init() {
}

func main() {
var uninstrumentAll bool
var metricsAddr string
var enableLeaderElection bool
var probeAddr string
var secureMetrics bool
var enableHTTP2 bool
flag.BoolVar(&uninstrumentAll, "uninstrument-all", false,
"If set, the process will remove all Dash0 custom resources from all namespaces in the cluster. This will "+
"trigger the Dash0 custom resources' finalizers in each namespace, which in turn will revert the "+
"instrumentation of all workloads in all namespaces.")
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
flag.BoolVar(&secureMetrics, "metrics-secure", false,
"If set the metrics endpoint is served securely")
"If set, the metrics endpoint is served securely")
flag.BoolVar(&enableHTTP2, "enable-http2", false,
"If set, HTTP/2 will be enabled for the metrics and webhook servers")

Expand All @@ -88,6 +95,14 @@ func main() {

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))

if uninstrumentAll {
if err := deleteCustomResourcesInAllNamespaces(&setupLog); err != nil {
setupLog.Error(err, "deleting the Dash0 custom resources in all namespaces failed")
os.Exit(1)
}
os.Exit(0)
}

// if the enable-http2 flag is false (the default), http/2 should be disabled
// due to its vulnerabilities. More specifically, disabling http/2 will
// prevent from being vulnerable to the HTTP/2 Stream Cancelation and
Expand Down Expand Up @@ -261,3 +276,17 @@ func readEnvironmentVariables() (string, string, string, corev1.PullPolicy, erro

return otelCollectorBaseUrl, operatorImage, initContainerImage, initContainerImagePullPolicy, nil
}

func deleteCustomResourcesInAllNamespaces(logger *logr.Logger) error {
handler, err := removal.NewOperatorPreDeleteHandler()
if err != nil {
logger.Error(err, "Failed to create the OperatorPreDeleteHandler.")
return err
}
err = handler.DeleteAllDash0CustomResources()
if err != nil {
logger.Error(err, "Failed to delete all Dash0 custom resources.")
return err
}
return nil
}
12 changes: 11 additions & 1 deletion helm-chart/dash0-operator/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/part-of: {{ include "dash0-operator.chartName" . }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
helm.sh/chart: {{ include "dash0-operator.chartNameWithVersion" . }}
{{- include "dash0-operator.additionalLabels" . }}
{{- end }}
Expand Down Expand Up @@ -54,4 +54,14 @@ helm.sh/chart: {{ include "dash0-operator.chartNameWithVersion" . }}
{{/* the init container image */}}
{{- define "dash0-operator.initContainerImage" -}}
{{- printf "%s:%s" .Values.operator.initContainerImage.repository .Values.operator.initContainerImage.tag }}
{{- end }}

{{- define "dash0-operator.restrictiveContainerSecurityContext" -}}
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
capabilities:
drop:
- ALL
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ rules:
verbs:
- create
- delete
- deletecollection
- get
- list
- patch
Expand Down
8 changes: 3 additions & 5 deletions helm-chart/dash0-operator/templates/operator/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ metadata:
app.kubernetes.io/component: manager
{{- include "dash0-operator.labels" . | nindent 4 }}
control-plane: controller-manager
dash0.com/enable: "false"
{{- with .Values.operator.deploymentAnnotations }}
annotations:
{{- toYaml . | nindent 4 }}
Expand Down Expand Up @@ -76,11 +77,7 @@ spec:
protocol: TCP
resources:
{{- toYaml .Values.operator.managerPodResources | nindent 10 }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
{{ include "dash0-operator.restrictiveContainerSecurityContext" . | nindent 8 }}
volumeMounts:
- mountPath: /tmp/k8s-webhook-server/serving-certs
name: cert
Expand Down Expand Up @@ -124,6 +121,7 @@ spec:
seccompProfile:
type: RuntimeDefault
serviceAccountName: {{ template "dash0-operator.serviceAccountName" . }}
automountServiceAccountToken: true
terminationGracePeriodSeconds: 10
volumes:
- name: cert
Expand Down
39 changes: 39 additions & 0 deletions helm-chart/dash0-operator/templates/operator/pre-delete-hook.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "dash0-operator.chartName" . }}-pre-delete
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/name: job
app.kubernetes.io/instance: pre-delete-hook
app.kubernetes.io/component: pre-delete
{{- include "dash0-operator.labels" . | nindent 4 }}
dash0.com/enable: "false"
annotations:
"helm.sh/hook": pre-delete
"helm.sh/hook-delete-policy": hook-succeeded
spec:
template:
metadata:
name: {{ .Release.Name }}-pre-delete-job
labels:
app.kubernetes.io/instance: pre-delete-hook
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
helm.sh/chart: {{ include "dash0-operator.chartNameWithVersion" . }}
spec:
restartPolicy: OnFailure
containers:
- name: pre-delete-job
image: {{ include "dash0-operator.image" . | quote }}
imagePullPolicy: {{ .Values.operator.image.pullPolicy }}
command:
- /manager
- "--uninstrument-all"
{{ include "dash0-operator.restrictiveContainerSecurityContext" . | nindent 10 }}
resources:
{{- toYaml .Values.operator.managerPodResources | nindent 12 }}
securityContext:
runAsNonRoot: true
serviceAccountName: {{ template "dash0-operator.serviceAccountName" . }}
automountServiceAccountToken: true
backoffLimit: 2
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ cluster roles should match snapshot:
verbs:
- create
- delete
- deletecollection
- get
- list
- patch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ deployment should match snapshot (default values):
app.kubernetes.io/part-of: dash0-operator
app.kubernetes.io/version: 1.0.0
control-plane: controller-manager
dash0.com/enable: "false"
helm.sh/chart: dash0-operator-1.0.0
name: dash0-operator-controller-manager
namespace: NAMESPACE
Expand All @@ -26,6 +27,7 @@ deployment should match snapshot (default values):
labels:
control-plane: controller-manager
spec:
automountServiceAccountToken: true
containers:
- args:
- --health-probe-bind-address=:8081
Expand Down Expand Up @@ -72,6 +74,8 @@ deployment should match snapshot (default values):
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
volumeMounts:
- mountPath: /tmp/k8s-webhook-server/serving-certs
name: cert
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
pre-delete hook job should match snapshot:
1: |
apiVersion: batch/v1
kind: Job
metadata:
annotations:
helm.sh/hook: pre-delete
helm.sh/hook-delete-policy: hook-succeeded
labels:
app.kubernetes.io/component: pre-delete
app.kubernetes.io/instance: pre-delete-hook
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: job
app.kubernetes.io/part-of: dash0-operator
app.kubernetes.io/version: 1.0.0
dash0.com/enable: "false"
helm.sh/chart: dash0-operator-1.0.0
name: dash0-operator-pre-delete
namespace: NAMESPACE
spec:
backoffLimit: 2
template:
metadata:
labels:
app.kubernetes.io/instance: pre-delete-hook
app.kubernetes.io/managed-by: Helm
helm.sh/chart: dash0-operator-1.0.0
name: RELEASE-NAME-pre-delete-job
spec:
automountServiceAccountToken: true
containers:
- command:
- /manager
- --uninstrument-all
image: dash0-operator-controller:1.0.0
imagePullPolicy: null
name: pre-delete-job
resources:
limits:
cpu: 500m
ephemeral-storage: 500Mi
memory: 128Mi
requests:
cpu: 10m
ephemeral-storage: 500Mi
memory: 64Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
restartPolicy: OnFailure
securityContext:
runAsNonRoot: true
serviceAccountName: dash0-operator-controller-manager
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
suite: test pre-delete hook job
templates:
- operator/pre-delete-hook.yaml
tests:
- it: pre-delete hook job should match snapshot
asserts:
- matchSnapshot: {}
6 changes: 3 additions & 3 deletions internal/controller/controller_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@ import (
"runtime"
"testing"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
Expand All @@ -22,6 +19,9 @@ import (
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

operatorv1alpha1 "github.com/dash0hq/dash0-operator/api/v1alpha1"
//+kubebuilder:scaffold:imports
)
Expand Down
10 changes: 5 additions & 5 deletions internal/controller/dash0_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,10 @@ func (r *Dash0Reconciler) verifyThatDash0CustomResourceIsUniqe(
dash0CustomResource *operatorv1alpha1.Dash0,
logger *logr.Logger,
) (bool, error) {
allDash0CustomResourcesNamespace := &operatorv1alpha1.Dash0List{}
allDash0CustomResourcesInNamespace := &operatorv1alpha1.Dash0List{}
if err := r.Client.List(
ctx,
allDash0CustomResourcesNamespace,
allDash0CustomResourcesInNamespace,
&client.ListOptions{
Namespace: req.Namespace,
},
Expand All @@ -234,14 +234,14 @@ func (r *Dash0Reconciler) verifyThatDash0CustomResourceIsUniqe(
return true, err
}

if len(allDash0CustomResourcesNamespace.Items) > 1 {
if len(allDash0CustomResourcesInNamespace.Items) > 1 {
// There are multiple instances of the Dash0 custom resource in this namespace. If the resource that is
// currently being reconciled is the one that has been most recently created, we assume that this is the source
// of truth in terms of configuration settings etc., and we ignore the other instances in this reconcile request
// (they will be handled when they are being reconciled). If the currently reconciled resource is not the most
// recent one, we set its status to degraded.
sort.Sort(SortByCreationTimestamp(allDash0CustomResourcesNamespace.Items))
mostRecentResource := allDash0CustomResourcesNamespace.Items[len(allDash0CustomResourcesNamespace.Items)-1]
sort.Sort(SortByCreationTimestamp(allDash0CustomResourcesInNamespace.Items))
mostRecentResource := allDash0CustomResourcesInNamespace.Items[len(allDash0CustomResourcesInNamespace.Items)-1]
if mostRecentResource.UID == dash0CustomResource.UID {
logger.Info(
"At least one other Dash0 custom resource exists in this namespace. This Dash0 custom " +
Expand Down
2 changes: 1 addition & 1 deletion internal/controller/dash0_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ var _ = Describe("The Dash0 controller", Ordered, func() {
AfterEach(func() {
RemoveDash0CustomResource(ctx, k8sClient)
for _, name := range extraDash0CustomResourceNames {
RemoveDash0CustomResourceByName(ctx, k8sClient, name)
RemoveDash0CustomResourceByName(ctx, k8sClient, name, true)
}
})

Expand Down
Loading

0 comments on commit 6d1e93c

Please sign in to comment.