From b1bb0443823ed741d52f8fc55739d93dd6dc0ef6 Mon Sep 17 00:00:00 2001 From: Ido Heyvi Date: Mon, 28 Oct 2024 15:14:05 +0200 Subject: [PATCH] adding sriov operator config cleanup binary, to be used under helm uninstall pre-delete hook Signed-off-by: Ido Heyvi --- Dockerfile | 2 + Makefile | 2 +- .../cleanup.go | 83 ++++++++ .../cleanup_test.go | 177 ++++++++++++++++++ .../main.go | 38 ++++ .../suite_test.go | 121 ++++++++++++ .../templates/pre-delete-webooks.yaml | 27 +++ 7 files changed, 449 insertions(+), 1 deletion(-) create mode 100644 cmd/sriov-network-operator-config-cleanup/cleanup.go create mode 100644 cmd/sriov-network-operator-config-cleanup/cleanup_test.go create mode 100644 cmd/sriov-network-operator-config-cleanup/main.go create mode 100644 cmd/sriov-network-operator-config-cleanup/suite_test.go create mode 100644 deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml diff --git a/Dockerfile b/Dockerfile index 2b26247e8..7735bef7b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,9 +2,11 @@ FROM golang:1.22 AS builder WORKDIR /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator COPY . . RUN make _build-manager BIN_PATH=build/_output/cmd +RUN make _build-sriov-network-operator-config-cleanup BIN_PATH=build/_output/cmd FROM quay.io/centos/centos:stream9 COPY --from=builder /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator/build/_output/cmd/manager /usr/bin/sriov-network-operator +COPY --from=builder /go/src/github.com/k8snetworkplumbingwg/sriov-network-operator/build/_output/cmd/sriov-network-operator-config-cleanup /usr/bin/sriov-network-operator-config-cleanup COPY bindata /bindata ENV OPERATOR_NAME=sriov-network-operator CMD ["/usr/bin/sriov-network-operator"] diff --git a/Makefile b/Makefile index 3718b75bd..310f1dc52 100644 --- a/Makefile +++ b/Makefile @@ -60,7 +60,7 @@ GOLANGCI_LINT_VER = v1.55.2 all: generate lint build -build: manager _build-sriov-network-config-daemon _build-webhook +build: manager _build-sriov-network-config-daemon _build-webhook _build-sriov-network-operator-config-cleanup _build-%: WHAT=$* hack/build-go.sh diff --git a/cmd/sriov-network-operator-config-cleanup/cleanup.go b/cmd/sriov-network-operator-config-cleanup/cleanup.go new file mode 100644 index 000000000..e53deba34 --- /dev/null +++ b/cmd/sriov-network-operator-config-cleanup/cleanup.go @@ -0,0 +1,83 @@ +package main + +import ( + "context" + "time" + + "github.com/spf13/cobra" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/log" + + snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" + + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/watch" + + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned/typed/sriovnetwork/v1" +) + +var ( + namespace string + watchTO int +) + +func init() { + rootCmd.Flags().StringVarP(&namespace, "namespace", "n", "", "designated SriovOperatorConfig namespace") + rootCmd.Flags().IntVarP(&watchTO, "watch-timeout", "w", 10, "sriov-operator config post-delete watch timeout ") +} + +func runCleanupCmd(cmd *cobra.Command, args []string) error { + // init logger + snolog.InitLog() + setupLog := log.Log.WithName("sriov-network-operator-config-cleanup") + setupLog.Info("Run sriov-network-operator-config-cleanup") + + // adding context timeout although client-go Delete should be non-blocking by default + ctx, timeoutFunc := context.WithTimeout(context.Background(), time.Second*time.Duration(watchTO)) + defer timeoutFunc() + + restConfig := ctrl.GetConfigOrDie() + sriovcs, err := sriovnetworkv1.NewForConfig(restConfig) + if err != nil { + setupLog.Error(err, "failed to create 'sriovnetworkv1' clientset") + } + + err = sriovcs.SriovOperatorConfigs(namespace).Delete(context.Background(), "default", metav1.DeleteOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return nil + } + setupLog.Error(err, "failed to delete SriovOperatorConfig") + return err + } + + // watching 'default' config deletion with context timeout, in case sriov-operator fails to delete 'default' config + watcher, err := sriovcs.SriovOperatorConfigs(namespace).Watch(ctx, metav1.ListOptions{Watch: true}) + if err != nil { + setupLog.Error(err, "failed creating 'default' SriovOperatorConfig object watcher") + return err + } + defer watcher.Stop() + for { + select { + case event := <-watcher.ResultChan(): + if event.Type == watch.Deleted { + setupLog.Info("'default' SriovOperatorConfig is deleted") + return nil + } + + case <-ctx.Done(): + // check whether object might has been deleted before watch event triggered + _, err := sriovcs.SriovOperatorConfigs(namespace).Get(context.Background(), "default", metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return nil + } + } + err = ctx.Err() + setupLog.Error(err, "timeout has occurred for 'default' SriovOperatorConfig deletion") + return err + } + } +} diff --git a/cmd/sriov-network-operator-config-cleanup/cleanup_test.go b/cmd/sriov-network-operator-config-cleanup/cleanup_test.go new file mode 100644 index 000000000..f7926d834 --- /dev/null +++ b/cmd/sriov-network-operator-config-cleanup/cleanup_test.go @@ -0,0 +1,177 @@ +package main + +import ( + "context" + "sync" + + "github.com/golang/mock/gomock" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/spf13/cobra" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/manager" + + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/controllers" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" + mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" +) + +type configController struct { + k8sManager manager.Manager + ctx context.Context + cancel context.CancelFunc + wg *sync.WaitGroup +} + +var ( + controller *configController + testNamespace string = "sriov-network-operator" + defaultSriovOperatorSpec = sriovnetworkv1.SriovOperatorConfigSpec{ + EnableInjector: true, + EnableOperatorWebhook: true, + LogLevel: 2, + FeatureGates: nil, + } +) + +var _ = Describe("cleanup", Ordered, func() { + BeforeAll(func() { + By("Create SriovOperatorConfig controller k8s objs") + config := getDefaultSriovOperatorConfig() + Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) + + somePolicy := &sriovnetworkv1.SriovNetworkNodePolicy{} + somePolicy.SetNamespace(testNamespace) + somePolicy.SetName("some-policy") + somePolicy.Spec = sriovnetworkv1.SriovNetworkNodePolicySpec{ + NumVfs: 5, + NodeSelector: map[string]string{"foo": "bar"}, + NicSelector: sriovnetworkv1.SriovNetworkNicSelector{}, + Priority: 20, + } + Expect(k8sClient.Create(context.Background(), somePolicy)).ToNot(HaveOccurred()) + DeferCleanup(func() { + err := k8sClient.Delete(context.Background(), somePolicy) + Expect(err).ToNot(HaveOccurred()) + }) + + controller = newConfigController() + + }) + + It("test webhook cleanup flow", func() { + controller.start() + defer controller.stop() + + cmd := &cobra.Command{} + namespace = testNamespace + // verify that finalizer has been added, by controller, upon object creation + config := &sriovnetworkv1.SriovOperatorConfig{} + Eventually(func() []string { + // wait for SriovOperatorConfig flags to get updated + err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config) + if err != nil { + return nil + } + return config.Finalizers + }, util.APITimeout, util.RetryInterval).Should(Equal([]string{sriovnetworkv1.OPERATORCONFIGFINALIZERNAME})) + + Expect(runCleanupCmd(cmd, []string{})).Should(Succeed()) + config = &sriovnetworkv1.SriovOperatorConfig{} + err := util.WaitForNamespacedObjectDeleted(config, k8sClient, testNamespace, "default", util.RetryInterval, util.APITimeout) + Expect(err).NotTo(HaveOccurred()) + + }) + + It("test 'default' config cleanup timeout", func() { + // in this test case sriov-operator controller has been scaled down. + // we are testing returned ctx timeout error, for not being able to delete 'default' config object + config := getDefaultSriovOperatorConfig() + config.Finalizers = []string{sriovnetworkv1.OPERATORCONFIGFINALIZERNAME} + Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) + + cmd := &cobra.Command{} + namespace = testNamespace + // verify that finalizer has been added, by controller, upon object creation + config = &sriovnetworkv1.SriovOperatorConfig{} + Eventually(func() []string { + // wait for SriovOperatorConfig flags to get updated + err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config) + if err != nil { + return nil + } + return config.Finalizers + }, util.APITimeout, util.RetryInterval).Should(Equal([]string{sriovnetworkv1.OPERATORCONFIGFINALIZERNAME})) + + watchTO = 1 + err := runCleanupCmd(cmd, []string{}) + Expect(err.Error()).To(ContainSubstring("context deadline exceeded")) + }) +}) + +func getDefaultSriovOperatorConfig() *sriovnetworkv1.SriovOperatorConfig { + return &sriovnetworkv1.SriovOperatorConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "default", + Namespace: testNamespace, + }, + Spec: defaultSriovOperatorSpec, + } +} + +func newConfigController() *configController { + // setup controller manager + By("Setup controller manager") + k8sManager, err := ctrl.NewManager(cfg, ctrl.Options{ + Scheme: scheme.Scheme, + }) + Expect(err).ToNot(HaveOccurred()) + + t := GinkgoT() + mockCtrl := gomock.NewController(t) + platformHelper := mock_platforms.NewMockInterface(mockCtrl) + platformHelper.EXPECT().GetFlavor().Return(openshift.OpenshiftFlavorDefault).AnyTimes() + platformHelper.EXPECT().IsOpenshiftCluster().Return(false).AnyTimes() + platformHelper.EXPECT().IsHypershift().Return(false).AnyTimes() + + err = (&controllers.SriovOperatorConfigReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + PlatformHelper: platformHelper, + FeatureGate: featuregate.New(), + }).SetupWithManager(k8sManager) + Expect(err).ToNot(HaveOccurred()) + + ctx, cancel := context.WithCancel(context.Background()) + wg := sync.WaitGroup{} + controller = &configController{ + k8sManager: k8sManager, + ctx: ctx, + cancel: cancel, + wg: &wg, + } + + return controller +} + +func (c *configController) start() { + c.wg.Add(1) + go func() { + defer c.wg.Done() + defer GinkgoRecover() + By("Start controller manager") + err := c.k8sManager.Start(c.ctx) + Expect(err).ToNot(HaveOccurred()) + }() +} + +func (c *configController) stop() { + c.cancel() + c.wg.Wait() +} diff --git a/cmd/sriov-network-operator-config-cleanup/main.go b/cmd/sriov-network-operator-config-cleanup/main.go new file mode 100644 index 000000000..51874e54e --- /dev/null +++ b/cmd/sriov-network-operator-config-cleanup/main.go @@ -0,0 +1,38 @@ +package main + +import ( + "flag" + "os" + + "github.com/spf13/cobra" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/log" + + snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" +) + +const ( + componentName = "sriov-network-operator-config-cleanup" +) + +var ( + rootCmd = &cobra.Command{ + Use: componentName, + Short: "Removes 'default' SriovOperatorConfig", + Long: `Removes 'default' SriovOperatorConfig in order to cleanup non-namespaced objects e.g clusterroles/clusterrolebinding/validating/mutating webhooks + +Example: sriov-network-operator-config-cleanup -n `, + RunE: runCleanupCmd, + } +) + +func main() { + klog.InitFlags(nil) + snolog.BindFlags(flag.CommandLine) + rootCmd.PersistentFlags().AddGoFlagSet(flag.CommandLine) + + if err := rootCmd.Execute(); err != nil { + log.Log.Error(err, "Error executing sriov-network-operator-config-cleanup") + os.Exit(1) + } +} diff --git a/cmd/sriov-network-operator-config-cleanup/suite_test.go b/cmd/sriov-network-operator-config-cleanup/suite_test.go new file mode 100644 index 000000000..ee1815ff7 --- /dev/null +++ b/cmd/sriov-network-operator-config-cleanup/suite_test.go @@ -0,0 +1,121 @@ +package main + +import ( + "context" + "io/fs" + "os" + "path/filepath" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "go.uber.org/zap/zapcore" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + + //+kubebuilder:scaffold:imports + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util" + + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + "sigs.k8s.io/controller-runtime/pkg/log/zap" +) + +var ( + k8sClient client.Client + testEnv *envtest.Environment + cfg *rest.Config + kubecfgPath string +) + +var _ = BeforeSuite(func() { + + logf.SetLogger(zap.New( + zap.WriteTo(GinkgoWriter), + zap.UseDevMode(true), + func(o *zap.Options) { + o.TimeEncoder = zapcore.RFC3339NanoTimeEncoder + })) + + // Go to project root directory + err := os.Chdir("../..") + Expect(err).NotTo(HaveOccurred()) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("config", "crd", "bases"), filepath.Join("test", "util", "crds")}, + ErrorIfCRDPathMissing: true, + } + + testEnv.ControlPlane.GetAPIServer().Configure().Set("disable-admission-plugins", "MutatingAdmissionWebhook", "ValidatingAdmissionWebhook") + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + apiserverDir := testEnv.ControlPlane.GetAPIServer().CertDir + kubecfgPath = findKubecfg(apiserverDir, ".kubecfg") + err = os.Setenv("KUBECONFIG", kubecfgPath) + Expect(err).NotTo(HaveOccurred()) + + By("registering schemes") + err = sriovnetworkv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + vars.Config = cfg + vars.Scheme = scheme.Scheme + vars.Namespace = testNamespace + + By("creating K8s client") + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + + By("creating default/common k8s objects for tests") + // Create test namespace + ns := &corev1.Namespace{ + TypeMeta: metav1.TypeMeta{}, + ObjectMeta: metav1.ObjectMeta{ + Name: testNamespace, + }, + Spec: corev1.NamespaceSpec{}, + Status: corev1.NamespaceStatus{}, + } + ctx := context.Background() + Expect(k8sClient.Create(ctx, ns)).Should(Succeed()) +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + if testEnv != nil { + Eventually(func() error { + return testEnv.Stop() + }, util.APITimeout, time.Second).ShouldNot(HaveOccurred()) + } +}) + +func findKubecfg(path, ext string) string { + var cfg string + filepath.WalkDir(path, func(s string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if filepath.Ext(d.Name()) == ext { + cfg = s + } + return nil + }) + return cfg +} + +func TestAPIs(t *testing.T) { + _, reporterConfig := GinkgoConfiguration() + + RegisterFailHandler(Fail) + + RunSpecs(t, "operator-webhook Suite", reporterConfig) +} diff --git a/deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml b/deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml new file mode 100644 index 000000000..8fc7fa06b --- /dev/null +++ b/deployment/sriov-network-operator-chart/templates/pre-delete-webooks.yaml @@ -0,0 +1,27 @@ +# The following job will be used as Helm pre-delete hook. It executes a small go-client binary +# which intent to delete 'default' SriovOperatorConfig, that triggers operator removal of generated cluster objects +# e.g. mutating/validating webhooks, within operator's recoinciling loop and +# preventing operator cluster object remainings while using helm uninstall +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "sriov-network-operator.fullname" . }}-pre-delete-hook + namespace: {{ .Release.Namespace }} + annotations: + "helm.sh/hook": pre-delete + "helm.sh/hook-delete-policy": hook-succeeded,hook-failed +spec: + template: + spec: + serviceAccountName: {{ include "sriov-network-operator.fullname" . }} + containers: + - name: cleanup + image: {{ .Values.images.operator }} + command: + - sriov-network-operator-config-cleanup + args: + - --namespace + - {{ .Release.Namespace }} + restartPolicy: Never + backoffLimit: 2 +