From b85e0a447ff3f34be191ed9883b03246317befcd Mon Sep 17 00:00:00 2001 From: Yury Kulazhenkov Date: Wed, 12 Apr 2023 11:07:42 +0300 Subject: [PATCH] Remove whereabouts IP reconcile CronJob IP reconciliation logic is now built in whereabouts, and we don't need to deploy CronJob separately - remove manifest to prevent deployment for new installations - add migration handler, which will remove CronJob deployed by the previous operator version Signed-off-by: Yury Kulazhenkov --- main.go | 19 ++++- .../0060-ip-reconciler-job.yaml | 59 ---------------- pkg/config/config.go | 2 + pkg/migrate/migrate.go | 70 +++++++++++++++++++ 4 files changed, 90 insertions(+), 60 deletions(-) delete mode 100644 manifests/stage-whereabouts-cni/0060-ip-reconciler-job.yaml create mode 100644 pkg/migrate/migrate.go diff --git a/main.go b/main.go index 593c9a65..2e59fc27 100644 --- a/main.go +++ b/main.go @@ -27,6 +27,7 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/config" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" @@ -38,6 +39,7 @@ import ( mellanoxcomv1alpha1 "github.com/Mellanox/network-operator/api/v1alpha1" "github.com/Mellanox/network-operator/controllers" "github.com/Mellanox/network-operator/pkg/consts" + "github.com/Mellanox/network-operator/pkg/migrate" // +kubebuilder:scaffold:imports ) @@ -111,6 +113,8 @@ func main() { clientConf := ctrl.GetConfigOrDie() clientConf.UserAgent = consts.KubernetesClientUserAgent + stopCtx := ctrl.SetupSignalHandler() + mgr, err := ctrl.NewManager(clientConf, ctrl.Options{ Scheme: scheme, MetricsBindAddress: metricsAddr, @@ -124,6 +128,19 @@ func main() { os.Exit(1) } + directClient, err := client.New(clientConf, + client.Options{Scheme: mgr.GetScheme(), Mapper: mgr.GetRESTMapper()}) + if err != nil { + setupLog.Error(err, "failed to create direct client") + os.Exit(1) + } + + // run migration logic before controllers start + if err := migrate.Migrate(stopCtx, setupLog.WithName("migrate"), directClient); err != nil { + setupLog.Error(err, "failed to run migration logic") + os.Exit(1) + } + err = setupCRDControllers(mgr) if err != nil { os.Exit(1) @@ -170,7 +187,7 @@ func main() { } setupLog.Info("starting manager") - if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + if err := mgr.Start(stopCtx); err != nil { setupLog.Error(err, "problem running manager") os.Exit(1) } diff --git a/manifests/stage-whereabouts-cni/0060-ip-reconciler-job.yaml b/manifests/stage-whereabouts-cni/0060-ip-reconciler-job.yaml deleted file mode 100644 index ef9998fd..00000000 --- a/manifests/stage-whereabouts-cni/0060-ip-reconciler-job.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright 2020 NVIDIA -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -apiVersion: batch/v1 -kind: CronJob -metadata: - name: whereabouts-ip-reconciler - namespace: {{ .RuntimeSpec.Namespace }} - labels: - tier: node - app: whereabouts -spec: - concurrencyPolicy: Forbid - successfulJobsHistoryLimit: 0 - schedule: "*/5 * * * *" - jobTemplate: - spec: - backoffLimit: 0 - template: - metadata: - labels: - app: whereabouts - spec: - priorityClassName: "system-node-critical" - serviceAccountName: whereabouts - {{- if .CrSpec.ImagePullSecrets }} - imagePullSecrets: - {{- range .CrSpec.ImagePullSecrets }} - - name: {{ . }} - {{- end }} - {{- end }} - containers: - - name: whereabouts - image: {{ .CrSpec.Repository }}/{{ .CrSpec.Image }}:{{ .CrSpec.Version }} - resources: - requests: - cpu: "100m" - memory: "50Mi" - command: - - /ip-reconciler - - -log-level=error - volumeMounts: - - name: cni-net-dir - mountPath: /host/etc/cni/net.d - volumes: - - name: cni-net-dir - hostPath: - path: /etc/cni/net.d - restartPolicy: OnFailure diff --git a/pkg/config/config.go b/pkg/config/config.go index 1c5dfcea..73495b53 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -29,6 +29,8 @@ var operatorConfig *OperatorConfig type OperatorConfig struct { State StateConfig Controller ControllerConfig + // disable migration logic in the operator. + DisableMigration bool `env:"DISABLE_MIGRATION" envDefault:"false"` } // state related configurations diff --git a/pkg/migrate/migrate.go b/pkg/migrate/migrate.go new file mode 100644 index 00000000..8a862e49 --- /dev/null +++ b/pkg/migrate/migrate.go @@ -0,0 +1,70 @@ +/* +2023 NVIDIA CORPORATION & AFFILIATES + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package migrate + +import ( + "context" + + "github.com/go-logr/logr" + v1 "k8s.io/api/batch/v1" + apiErrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/Mellanox/network-operator/pkg/config" + "github.com/Mellanox/network-operator/pkg/consts" +) + +// Migrate contains logic which should run once during controller start. +// The main use case for this handler is network-operator upgrade +// for example, the handler can contain logic to change old data format to a new one or +// to remove unneeded resources from the cluster +func Migrate(ctx context.Context, log logr.Logger, c client.Client) error { + if config.FromEnv().DisableMigration { + log.Info("migration logic is disabled for the operator") + return nil + } + return migrate(ctx, log, c) +} + +func migrate(ctx context.Context, log logr.Logger, c client.Client) error { + if err := removeWhereaboutsIPReconcileCronJob(ctx, log, c); err != nil { + // not critical for the operator operation, safer to ignore + log.V(consts.LogLevelWarning).Info("ignore error during whereabouts CronJob removal") + } + return nil +} + +// reason: update whereabouts to version v0.6.1 in network-operator v23.4.0 +// remove whereabouts-ip-reconciler CronJob from network-operator namespace +// IP reconciliation logic is now built in whereabouts, and we don't need to deploy CronJob separately. +// The network-operator will not deploy CronJob for new deployments anymore, and we also need to remove the job +// which were deployed by the previous Network-operator version. +func removeWhereaboutsIPReconcileCronJob(ctx context.Context, log logr.Logger, c client.Client) error { + namespace := config.FromEnv().State.NetworkOperatorResourceNamespace + cronJobName := "whereabouts-ip-reconciler" + err := c.Delete(ctx, &v1.CronJob{ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: cronJobName}}) + if err == nil { + log.V(consts.LogLevelDebug).Info("whereabouts IP reconciler CronJob removed") + return nil + } + if !apiErrors.IsNotFound(err) { + log.V(consts.LogLevelError).Error(err, "failed to remove whereabouts IP reconciler CronJob") + return err + } + return nil +}