From 5ad4ae9af8a18108bda70f119973a25574b24293 Mon Sep 17 00:00:00 2001 From: Yury Kulazhenkov Date: Mon, 28 Oct 2024 17:01:15 +0200 Subject: [PATCH] feat: Update controller logic to handle stale SriovNetworkNodeState CRs with delay - Changed the logic in the sriov-network-operator controller to handle stale SriovNetworkNodeState CRs (those with no matching Nodes with daemon). - Introduced a delay (30 minutes by default) before removing stale state CRs to manage scenarios where the user temporarily removes the daemon from the node but does not want to lose the state stored in the SriovNetworkNodeState. - Added the `STALE_NODE_STATE_CLEANUP_DELAY_MINUTES` environment variable to configure the required delay in minutes (default is 30 minutes). --- api/v1/helper.go | 41 ++++++++++ .../sriovnetworknodepolicy_controller.go | 66 ++++++++++++++-- .../sriovnetworknodepolicy_controller_test.go | 78 +++++++++++++++++-- .../templates/operator.yaml | 2 + .../sriov-network-operator-chart/values.yaml | 4 + pkg/consts/constants.go | 8 ++ 6 files changed, 188 insertions(+), 11 deletions(-) diff --git a/api/v1/helper.go b/api/v1/helper.go index 300992acb..1f30be618 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -12,6 +12,7 @@ import ( "sort" "strconv" "strings" + "time" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -1005,3 +1006,43 @@ func GenerateBridgeName(iface *InterfaceExt) string { func NeedToUpdateBridges(bridgeSpec, bridgeStatus *Bridges) bool { return !reflect.DeepEqual(bridgeSpec, bridgeStatus) } + +// SetKeepUntilTime sets an annotation to hold the "keep until time" for the node’s state. +// The "keep until time" specifies the earliest time at which the state object can be removed +// if the daemon's pod is not found on the node. +func (s *SriovNetworkNodeState) SetKeepUntilTime(t time.Time) { + ts := t.Format(time.RFC3339) + annotations := s.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations[consts.NodeStateKeepUntilAnnotation] = ts + s.SetAnnotations(annotations) +} + +// GetKeepUntilTime returns the value that is stored in the "keep until time" annotation. +// The "keep until time" specifies the earliest time at which the state object can be removed +// if the daemon's pod is not found on the node. +// Return zero time instant if annotaion is not found on the object or if it has a wrong format. +func (s *SriovNetworkNodeState) GetKeepUntilTime() time.Time { + t, err := time.Parse(time.RFC3339, s.GetAnnotations()[consts.NodeStateKeepUntilAnnotation]) + if err != nil { + return time.Time{} + } + return t +} + +// ResetKeepUntilTime removes "keep until time" annotation from the state object. +// The "keep until time" specifies the earliest time at which the state object can be removed +// if the daemon's pod is not found on the node. +// Returns true if the value was removed, false otherwise. +func (s *SriovNetworkNodeState) ResetKeepUntilTime() bool { + annotations := s.GetAnnotations() + _, exist := annotations[consts.NodeStateKeepUntilAnnotation] + if !exist { + return false + } + delete(annotations, consts.NodeStateKeepUntilAnnotation) + s.SetAnnotations(annotations) + return true +} diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index 29438b176..f8811ed97 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -20,8 +20,10 @@ import ( "context" "encoding/json" "fmt" + "os" "reflect" "sort" + "strconv" "strings" "time" @@ -338,10 +340,7 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con logger.Error(err, "Fail to remove device plugin label from node", "node", ns.Name) return err } - logger.Info("Deleting SriovNetworkNodeState as node with that name doesn't exist", "nodeStateName", ns.Name) - err = r.Delete(ctx, &ns, &client.DeleteOptions{}) - if err != nil { - logger.Error(err, "Fail to Delete", "SriovNetworkNodeState CR:", ns.GetName()) + if err := r.handleStaleNodeState(ctx, &ns); err != nil { return err } } @@ -350,6 +349,56 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con return nil } +// handleStaleNodeState handles stale SriovNetworkNodeState CR (the CR which no longer have a corresponding node with the daemon). +// If the CR has the "keep until time" annotation, indicating the earliest time the state object can be removed, +// this function will compare it to the current time to determine if deletion is permissible and do deletion if allowed. +// If the annotation is absent, the function will create one with a timestamp in future, using either the default or a configured offset. +// If STALE_NODE_STATE_CLEANUP_DELAY_MINUTES env variable is set to 0, removes the CR immediately +func (r *SriovNetworkNodePolicyReconciler) handleStaleNodeState(ctx context.Context, ns *sriovnetworkv1.SriovNetworkNodeState) error { + logger := log.Log.WithName("handleStaleNodeState") + + var delayMinutes int + var err error + + envValue, found := os.LookupEnv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES") + if found { + delayMinutes, err = strconv.Atoi(envValue) + if err != nil || delayMinutes < 0 { + delayMinutes = constants.DefaultNodeStateCleanupDelayMinutes + logger.Error(err, "invalid value in STALE_NODE_STATE_CLEANUP_DELAY_MINUTES env variable, use default delay", + "delay", delayMinutes) + } + } else { + delayMinutes = constants.DefaultNodeStateCleanupDelayMinutes + } + + if delayMinutes != 0 { + now := time.Now().UTC() + keepUntilTime := ns.GetKeepUntilTime() + if keepUntilTime.IsZero() { + keepUntilTime = now.Add(time.Minute * time.Duration(delayMinutes)) + logger.V(2).Info("SriovNetworkNodeState has no matching node, configure cleanup delay for the state object", + "nodeStateName", ns.Name, "delay", delayMinutes, "keepUntilTime", keepUntilTime.String()) + ns.SetKeepUntilTime(keepUntilTime) + if err := r.Update(ctx, ns); err != nil { + logger.Error(err, "Fail to update SriovNetworkNodeState CR", "name", ns.GetName()) + return err + } + return nil + } + if now.Before(keepUntilTime) { + return nil + } + } + // remove the object if delayMinutes is 0 or if keepUntilTime is already passed + logger.Info("Deleting SriovNetworkNodeState as node with that name doesn't exist", "nodeStateName", ns.Name) + if err := r.Delete(ctx, ns, &client.DeleteOptions{}); err != nil { + logger.Error(err, "Fail to delete SriovNetworkNodeState CR", "name", ns.GetName()) + return err + } + return nil +} + func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context.Context, dc *sriovnetworkv1.SriovOperatorConfig, npl *sriovnetworkv1.SriovNetworkNodePolicyList, @@ -375,9 +424,16 @@ func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context return fmt.Errorf("failed to get SriovNetworkNodeState: %v", err) } } else { + keepUntilAnnotationUpdated := found.ResetKeepUntilTime() + if len(found.Status.Interfaces) == 0 { logger.Info("SriovNetworkNodeState Status Interfaces are empty. Skip update of policies in spec", "namespace", ns.Namespace, "name", ns.Name) + if keepUntilAnnotationUpdated { + if err := r.Update(ctx, found); err != nil { + return fmt.Errorf("couldn't update SriovNetworkNodeState: %v", err) + } + } return nil } @@ -420,7 +476,7 @@ func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context // Note(adrianc): we check same ownerReferences since SriovNetworkNodeState // was owned by a default SriovNetworkNodePolicy. if we encounter a descripancy // we need to update. - if reflect.DeepEqual(newVersion.OwnerReferences, found.OwnerReferences) && + if !keepUntilAnnotationUpdated && reflect.DeepEqual(newVersion.OwnerReferences, found.OwnerReferences) && equality.Semantic.DeepEqual(newVersion.Spec, found.Spec) { logger.V(1).Info("SriovNetworkNodeState did not change, not updating") return nil diff --git a/controllers/sriovnetworknodepolicy_controller_test.go b/controllers/sriovnetworknodepolicy_controller_test.go index abdddbc91..7a7b7a8ec 100644 --- a/controllers/sriovnetworknodepolicy_controller_test.go +++ b/controllers/sriovnetworknodepolicy_controller_test.go @@ -3,6 +3,7 @@ package controllers import ( "context" "encoding/json" + "os" "sync" "testing" "time" @@ -11,18 +12,17 @@ import ( . "github.com/onsi/gomega" "github.com/google/go-cmp/cmp" + dptypes "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - dptypes "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types" - sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - v1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" @@ -48,7 +48,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) { { tname: "testVirtioVdpaVirtio", policy: sriovnetworkv1.SriovNetworkNodePolicy{ - Spec: v1.SriovNetworkNodePolicySpec{ + Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ ResourceName: "resourceName", DeviceType: consts.DeviceTypeNetDevice, VdpaType: consts.VdpaTypeVirtio, @@ -67,7 +67,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) { }, { tname: "testVhostVdpaVirtio", policy: sriovnetworkv1.SriovNetworkNodePolicy{ - Spec: v1.SriovNetworkNodePolicySpec{ + Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ ResourceName: "resourceName", DeviceType: consts.DeviceTypeNetDevice, VdpaType: consts.VdpaTypeVhost, @@ -87,7 +87,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) { { tname: "testExcludeTopology", policy: sriovnetworkv1.SriovNetworkNodePolicy{ - Spec: v1.SriovNetworkNodePolicySpec{ + Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{ ResourceName: "resourceName", ExcludeTopology: true, }, @@ -138,6 +138,10 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() { var ctx context.Context BeforeAll(func() { + // disable stale state cleanup delay to check that the controller can cleanup state objects + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "0") + By("Create SriovOperatorConfig controller k8s objs") config := makeDefaultSriovOpConfig() Expect(k8sClient.Create(context.Background(), config)).Should(Succeed()) @@ -261,3 +265,65 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() { }) }) }) + +var _ = Describe("SriovNetworkNodePolicyReconciler", Ordered, func() { + Context("handleStaleNodeState", func() { + var ( + ctx context.Context + r *SriovNetworkNodePolicyReconciler + nodeState *sriovnetworkv1.SriovNetworkNodeState + ) + + BeforeEach(func() { + ctx = context.Background() + scheme := runtime.NewScheme() + utilruntime.Must(sriovnetworkv1.AddToScheme(scheme)) + nodeState = &sriovnetworkv1.SriovNetworkNodeState{ObjectMeta: metav1.ObjectMeta{Name: "node1"}} + r = &SriovNetworkNodePolicyReconciler{Client: fake.NewClientBuilder().WithObjects(nodeState).Build()} + }) + It("should set default delay", func() { + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(time.Now().UTC().Before(nodeState.GetKeepUntilTime())).To(BeTrue()) + }) + It("should remove CR if wait time expired", func() { + nodeState := nodeState.DeepCopy() + nodeState.SetKeepUntilTime(time.Now().UTC().Add(-time.Minute)) + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(errors.IsNotFound(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState))).To(BeTrue()) + }) + It("should keep existing wait time if already set", func() { + nodeState := nodeState.DeepCopy() + nodeState.SetKeepUntilTime(time.Now().UTC().Add(time.Minute)) + testTime := nodeState.GetKeepUntilTime() + r.Update(ctx, nodeState) + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(nodeState.GetKeepUntilTime()).To(Equal(testTime)) + }) + It("non default dealy", func() { + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "60") + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(time.Until(nodeState.GetKeepUntilTime()) > 30*time.Minute).To(BeTrue()) + }) + It("invalid non default delay - should use default", func() { + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "-20") + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred()) + Expect(time.Until(nodeState.GetKeepUntilTime()) > 20*time.Minute).To(BeTrue()) + }) + It("should remove CR if delay is zero", func() { + DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")) + os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "0") + nodeState := nodeState.DeepCopy() + Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred()) + Expect(errors.IsNotFound(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState))).To(BeTrue()) + }) + }) +}) diff --git a/deployment/sriov-network-operator-chart/templates/operator.yaml b/deployment/sriov-network-operator-chart/templates/operator.yaml index 0e89d1959..c2a813fc8 100644 --- a/deployment/sriov-network-operator-chart/templates/operator.yaml +++ b/deployment/sriov-network-operator-chart/templates/operator.yaml @@ -112,6 +112,8 @@ spec: value: {{ .Values.operator.cniBinPath }} - name: CLUSTER_TYPE value: {{ .Values.operator.clusterType }} + - name: STALE_NODE_STATE_CLEANUP_DELAY_MINUTES + value: "{{ .Values.operator.staleNodeStateCleanupDelayMinutes }}" {{- if .Values.operator.admissionControllers.enabled }} - name: ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_SECRET_NAME value: {{ .Values.operator.admissionControllers.certificates.secretNames.operator }} diff --git a/deployment/sriov-network-operator-chart/values.yaml b/deployment/sriov-network-operator-chart/values.yaml index c70d6e323..ec9323bf7 100644 --- a/deployment/sriov-network-operator-chart/values.yaml +++ b/deployment/sriov-network-operator-chart/values.yaml @@ -27,6 +27,10 @@ operator: resourcePrefix: "openshift.io" cniBinPath: "/opt/cni/bin" clusterType: "kubernetes" + # minimal amount of time (in minutes) the operator will wait before removing + # stale SriovNetworkNodeState objects (objects that doesn't match node with the daemon) + # "0" means no extra delay, in this case the CR will be removed by the next reconcilation cycle (may take up to 5 minutes) + staleNodeStateCleanupDelayMinutes: "30" metricsExporter: port: "9110" certificates: diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index 4ce478730..6aadef648 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -93,6 +93,14 @@ const ( MCPPauseAnnotationState = "sriovnetwork.openshift.io/state" MCPPauseAnnotationTime = "sriovnetwork.openshift.io/time" + // NodeStateKeepUntilAnnotation contains name of the "keep until time" annotation for SriovNetworkNodeState object. + // The "keep until time" specifies the earliest time at which the state object can be removed + // if the daemon's pod is not found on the node. + NodeStateKeepUntilAnnotation = "sriovnetwork.openshift.io/keep-state-until" + // DefaultNodeStateCleanupDelayMinutes contains default delay before removing stale SriovNetworkNodeState CRs + // (the CRs that no longer have a corresponding node with the daemon). + DefaultNodeStateCleanupDelayMinutes = 30 + CheckpointFileName = "sno-initial-node-state.json" Unknown = "Unknown"