diff --git a/api/v1alpha1/utils.go b/api/v1alpha1/utils.go index 28eb3d2a6..486be6bf3 100644 --- a/api/v1alpha1/utils.go +++ b/api/v1alpha1/utils.go @@ -16,7 +16,13 @@ limitations under the License. package v1alpha1 -import upgradeApi "github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1" +import ( + "fmt" + + upgradeApi "github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1" + + "github.com/Mellanox/network-operator/pkg/consts" +) func GetDriverUpgradePolicy( ofedUpgradePolicy *DriverUpgradePolicySpec) *upgradeApi.DriverUpgradePolicySpec { @@ -33,6 +39,19 @@ func GetDriverUpgradePolicy( driverUpgradePolicy.WaitForCompletion = getWaitForCompletionSpec(ofedUpgradePolicy.WaitForCompletion) driverUpgradePolicy.DrainSpec = getDrainSpec(ofedUpgradePolicy.DrainSpec) + if driverUpgradePolicy.DrainSpec != nil && driverUpgradePolicy.DrainSpec.Enable { + // We want to skip operator itself during the drain because the upgrade process might hang + // if the operator is evicted and can't be rescheduled to any other node, e.g. in a single-node cluster. + // It's safe to do because the goal of the node draining during the upgrade is to + // evict pods that might use driver and operator doesn't use in its own pod. + if driverUpgradePolicy.DrainSpec.PodSelector == "" { + driverUpgradePolicy.DrainSpec.PodSelector = consts.OfedDriverSkipDrainLabel + } else { + driverUpgradePolicy.DrainSpec.PodSelector = + fmt.Sprintf("%s,%s", driverUpgradePolicy.DrainSpec.PodSelector, consts.OfedDriverSkipDrainLabel) + } + } + return &driverUpgradePolicy } diff --git a/api/v1alpha1/utils_test.go b/api/v1alpha1/utils_test.go index ea280a33e..412f41d45 100644 --- a/api/v1alpha1/utils_test.go +++ b/api/v1alpha1/utils_test.go @@ -80,7 +80,7 @@ var _ = Describe("API utils tests", func() { result := getDrainSpec(input) Expect(result.Enable).To(Equal(input.Enable)) Expect(result.Force).To(Equal(input.Force)) - Expect(result.PodSelector).To(Equal(input.PodSelector)) + Expect(result.PodSelector).To(ContainSubstring(input.PodSelector)) Expect(result.TimeoutSecond).To(Equal(input.TimeoutSecond)) Expect(result.DeleteEmptyDir).To(Equal(input.DeleteEmptyDir)) }) diff --git a/controllers/upgrade_controller.go b/controllers/upgrade_controller.go index b621180a7..04cae26df 100644 --- a/controllers/upgrade_controller.go +++ b/controllers/upgrade_controller.go @@ -18,7 +18,6 @@ package controllers import ( "context" - "fmt" "time" corev1 "k8s.io/api/core/v1" @@ -35,7 +34,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" - "github.com/go-logr/logr" appsv1 "k8s.io/api/apps/v1" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" @@ -49,9 +47,8 @@ import ( // UpgradeReconciler reconciles OFED Daemon Sets for upgrade type UpgradeReconciler struct { client.Client - Scheme *runtime.Scheme - StateManager *upgrade.ClusterUpgradeStateManager - NodeUpgradeStateProvider upgrade.NodeUpgradeStateProvider + Scheme *runtime.Scheme + StateManager upgrade.ClusterUpgradeStateManager } const plannedRequeueInterval = time.Minute * 2 @@ -102,7 +99,9 @@ func (r *UpgradeReconciler) Reconcile(ctx context.Context, _ ctrl.Request) (ctrl upgradePolicy := nicClusterPolicy.Spec.OFEDDriver.OfedUpgradePolicy - state, err := r.BuildState(ctx) + state, err := r.StateManager.BuildState(ctx, + config.FromEnv().State.NetworkOperatorResourceNamespace, + map[string]string{consts.OfedDriverLabel: ""}) if err != nil { reqLogger.V(consts.LogLevelError).Error(err, "Failed to build cluster upgrade state") return ctrl.Result{}, err @@ -184,125 +183,6 @@ func (r *UpgradeReconciler) removeNodeUpgradeStateAnnotations(ctx context.Contex return nil } -// BuildState creates a snapshot of the current OFED upgrade state in the cluster (upgrade.ClusterUpgradeState) -// It creates mappings between nodes and their upgrade state -// Nodes are grouped together with the driver POD running on them and the daemon set, controlling this pod -// This state is then used as an input for the upgrade.ClusterUpgradeStateManager -func (r *UpgradeReconciler) BuildState(ctx context.Context) (*upgrade.ClusterUpgradeState, error) { - reqLogger := log.FromContext(ctx) - reqLogger.V(consts.LogLevelInfo).Info("Building state") - - upgradeState := upgrade.NewClusterUpgradeState() - - daemonSets, err := r.getDriverDaemonSets(ctx) - if err != nil { - reqLogger.V(consts.LogLevelError).Error(err, "Failed to get driver daemon set list") - return nil, err - } - - reqLogger.V(consts.LogLevelDebug).Info("Got driver daemon sets", "length", len(daemonSets)) - - // Get list of driver pods - podList := &corev1.PodList{} - - err = r.List(ctx, podList, - client.InNamespace(config.FromEnv().State.NetworkOperatorResourceNamespace), - client.MatchingLabels{consts.OfedDriverLabel: ""}) - if err != nil { - return nil, err - } - - filteredPodList := []corev1.Pod{} - for _, ds := range daemonSets { - dsPods := r.getPodsOwnedbyDs(ds, podList.Items, reqLogger) - if int(ds.Status.DesiredNumberScheduled) != len(dsPods) { - reqLogger.V(consts.LogLevelInfo).Info("Driver daemon set has Unscheduled pods", "name", ds.Name) - return nil, fmt.Errorf("DS should not have Unscheduled pods") - } - filteredPodList = append(filteredPodList, dsPods...) - } - - upgradeStateLabel := upgrade.GetUpgradeStateLabelKey() - - for i := range filteredPodList { - pod := &filteredPodList[i] - ownerDaemonSet := daemonSets[pod.OwnerReferences[0].UID] - nodeState, err := r.buildNodeUpgradeState(ctx, pod, ownerDaemonSet) - if err != nil { - reqLogger.V(consts.LogLevelError).Error(err, "Failed to build node upgrade state for pod", "pod", pod) - return nil, err - } - nodeStateLabel := nodeState.Node.Labels[upgradeStateLabel] - upgradeState.NodeStates[nodeStateLabel] = append( - upgradeState.NodeStates[nodeStateLabel], nodeState) - } - - return &upgradeState, nil -} - -// buildNodeUpgradeState creates a mapping between a node, -// the driver POD running on them and the daemon set, controlling this pod -func (r *UpgradeReconciler) buildNodeUpgradeState( - ctx context.Context, pod *corev1.Pod, ds *appsv1.DaemonSet) (*upgrade.NodeUpgradeState, error) { - reqLogger := log.FromContext(ctx) - node, err := r.NodeUpgradeStateProvider.GetNode(ctx, pod.Spec.NodeName) - if err != nil { - reqLogger.V(consts.LogLevelError).Error(err, "Failed to get node", "node", pod.Spec.NodeName) - return nil, err - } - - upgradeStateLabel := upgrade.GetUpgradeStateLabelKey() - reqLogger.V(consts.LogLevelInfo).Info("Node hosting a driver pod", - "node", node.Name, "state", node.Labels[upgradeStateLabel]) - - return &upgrade.NodeUpgradeState{Node: node, DriverPod: pod, DriverDaemonSet: ds}, nil -} - -// getDriverDaemonSets retrieves DaemonSets labeled with OfedDriverLabel and returns UID->DaemonSet map -func (r *UpgradeReconciler) getDriverDaemonSets(ctx context.Context) (map[types.UID]*appsv1.DaemonSet, error) { - reqLogger := log.FromContext(ctx) - // Get list of driver pods - daemonSetList := &appsv1.DaemonSetList{} - - err := r.List(ctx, daemonSetList, - client.InNamespace(config.FromEnv().State.NetworkOperatorResourceNamespace), - client.MatchingLabels{consts.OfedDriverLabel: ""}) - if err != nil { - reqLogger.V(consts.LogLevelError).Error(err, "Failed to get daemon set list") - return nil, err - } - - daemonSetMap := make(map[types.UID]*appsv1.DaemonSet) - for i := range daemonSetList.Items { - daemonSet := &daemonSetList.Items[i] - daemonSetMap[daemonSet.UID] = daemonSet - } - - return daemonSetMap, nil -} - -// getPodsOwnedbyDs gets a list of pods return a list of the pods owned by the specified DaemonSet -func (r *UpgradeReconciler) getPodsOwnedbyDs( - ds *appsv1.DaemonSet, pods []corev1.Pod, reqLogger logr.Logger) []corev1.Pod { - dsPodList := []corev1.Pod{} - for i := range pods { - pod := &pods[i] - if pod.OwnerReferences == nil || len(pod.OwnerReferences) < 1 { - reqLogger.V(consts.LogLevelWarning).Info("OFED Driver Pod has no owner DaemonSet", "pod", pod) - continue - } - reqLogger.V(consts.LogLevelDebug).Info("Pod", "pod", pod.Name, "owner", pod.OwnerReferences[0].Name) - - if ds.UID == pod.OwnerReferences[0].UID { - dsPodList = append(dsPodList, *pod) - } else { - reqLogger.V(consts.LogLevelWarning).Info("OFED Driver Pod is not owned by an OFED Driver DaemonSet", - "pod", pod, "actual owner", pod.OwnerReferences[0]) - } - } - return dsPodList -} - // SetupWithManager sets up the controller with the Manager. // //nolint:dupl diff --git a/controllers/upgrade_controller_test.go b/controllers/upgrade_controller_test.go index 5284f1b9b..3ecbb6ca2 100644 --- a/controllers/upgrade_controller_test.go +++ b/controllers/upgrade_controller_test.go @@ -23,12 +23,10 @@ import ( "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/log" mellanoxv1alpha1 "github.com/Mellanox/network-operator/api/v1alpha1" "github.com/Mellanox/network-operator/pkg/consts" @@ -97,68 +95,6 @@ var _ = Describe("Upgrade Controller", func() { Expect(present).To(Equal(false)) } }) - - It("should only retrieve pods owned by a given daemonset", func() { - upgrade.SetDriverName("ofed") - - ds := &appsv1.DaemonSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-daemonset", - Namespace: "default", - UID: "ds-uid", - }, - } - - // Create a Pod owned by the DaemonSet - dsPod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ds-pod", - Namespace: "default", - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: "apps/v1", - Kind: "DaemonSet", - Name: ds.Name, - UID: ds.UID, - }, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{Image: "test", Name: "test"}}, - }, - } - err := k8sClient.Create(goctx.TODO(), dsPod) - Expect(err).NotTo(HaveOccurred()) - - // Create a Pod NOT owned by the DaemonSet - nonDsPod := &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "non-ds-pod", - Namespace: "default", - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{Image: "test", Name: "test"}}, - }, - } - err = k8sClient.Create(goctx.TODO(), nonDsPod) - Expect(err).NotTo(HaveOccurred()) - - podList := &corev1.PodList{} - err = k8sClient.List(goctx.TODO(), podList) - Expect(podList.Items).To(HaveLen(2)) - Expect(err).NotTo(HaveOccurred()) - - upgradeReconciler := &UpgradeReconciler{ - Client: k8sClient, - Scheme: k8sClient.Scheme(), - } - - pods := upgradeReconciler.getPodsOwnedbyDs(ds, podList.Items, log.Log.WithName("test-log")) - - // Verify that the returned Pods are owned by the DaemonSet - Expect(pods).To(HaveLen(1)) - Expect(pods[0].Name).To(Equal(dsPod.Name)) - }) }) }) diff --git a/go.mod b/go.mod index 6cf7965eb..41aca8640 100644 --- a/go.mod +++ b/go.mod @@ -110,4 +110,7 @@ require ( sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect ) -replace github.com/emicklei/go-restful => github.com/emicklei/go-restful v2.16.0+incompatible +replace ( + github.com/NVIDIA/k8s-operator-libs => gitlab.com/nvidia/cloud-native/k8s-operator-libs v0.0.0-20230913095933-ed4c9d6bb9e4 + github.com/emicklei/go-restful => github.com/emicklei/go-restful v2.16.0+incompatible +) diff --git a/go.sum b/go.sum index 474c9ef11..aaed13bfe 100644 --- a/go.sum +++ b/go.sum @@ -39,8 +39,6 @@ github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= github.com/Masterminds/semver/v3 v3.1.1 h1:hLg3sBzpNErnxhQtUy/mmLR2I9foDujNK030IGemrRc= github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs= -github.com/NVIDIA/k8s-operator-libs v0.0.0-20230504161004-dd428b908f5a h1:k8HalV8rdGshw0hCPxVLt8FRzpd9hnXfjggc/ePAn3A= -github.com/NVIDIA/k8s-operator-libs v0.0.0-20230504161004-dd428b908f5a/go.mod h1:o7UeOk3V0xlOX87NnSaGNDje23VKCKPdfN6aSIZ9BwM= github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= @@ -385,6 +383,8 @@ github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +gitlab.com/nvidia/cloud-native/k8s-operator-libs v0.0.0-20230913095933-ed4c9d6bb9e4 h1:RzPeXJ+YHt30+kPIFrlwiATo3D8vFgvB7rxEFtjOu5I= +gitlab.com/nvidia/cloud-native/k8s-operator-libs v0.0.0-20230913095933-ed4c9d6bb9e4/go.mod h1:o7UeOk3V0xlOX87NnSaGNDje23VKCKPdfN6aSIZ9BwM= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= diff --git a/main.go b/main.go index b90415d96..40846ccda 100644 --- a/main.go +++ b/main.go @@ -170,9 +170,6 @@ func main() { upgradeLogger := ctrl.Log.WithName("controllers").WithName("Upgrade") - nodeUpgradeStateProvider := upgrade.NewNodeUpgradeStateProvider( - mgr.GetClient(), upgradeLogger.WithName("nodeUpgradeStateProvider"), nil) - clusterUpdateStateManager, err := upgrade.NewClusterUpgradeStateManager( upgradeLogger.WithName("clusterUpgradeManager"), config.GetConfigOrDie(), nil) @@ -182,10 +179,9 @@ func main() { } if err = (&controllers.UpgradeReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - StateManager: clusterUpdateStateManager, - NodeUpgradeStateProvider: nodeUpgradeStateProvider, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + StateManager: clusterUpdateStateManager, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "Upgrade") os.Exit(1) diff --git a/pkg/consts/consts.go b/pkg/consts/consts.go index 473b6b655..cb05cffa1 100644 --- a/pkg/consts/consts.go +++ b/pkg/consts/consts.go @@ -32,4 +32,7 @@ const ( NicClusterPolicyResourceName = "nic-cluster-policy" OfedDriverLabel = "nvidia.com/ofed-driver" StateLabel = "nvidia.network-operator.state" + // OfedDriverSkipDrainLabel contains name of the label which is used to indicate + // that OFED pod should be skipped during the drain operation which is executed by the upgrade controller. + OfedDriverSkipDrainLabel = "nvidia.com/ofed-driver-upgrade-drain.skip" )