Skip to content

Commit

Permalink
feat: Update controller logic to handle stale SriovNetworkNodeState C…
Browse files Browse the repository at this point in the history
…Rs with delay

- Changed the logic in the sriov-network-operator controller to handle stale SriovNetworkNodeState CRs (those with no matching Nodes with daemon).
- Introduced a delay (30 minutes by default) before removing stale state CRs to manage scenarios where the user temporarily removes the daemon from the node but does not want to lose the state stored in the SriovNetworkNodeState.
- Added the `STALE_NODE_STATE_CLEANUP_DELAY_MINUTES` environment variable to configure the required delay in minutes (default is 30 minutes).
  • Loading branch information
ykulazhenkov committed Dec 2, 2024
1 parent 5f492e5 commit 4ea6ce0
Show file tree
Hide file tree
Showing 6 changed files with 184 additions and 11 deletions.
41 changes: 41 additions & 0 deletions api/v1/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"sort"
"strconv"
"strings"
"time"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -1005,3 +1006,43 @@ func GenerateBridgeName(iface *InterfaceExt) string {
func NeedToUpdateBridges(bridgeSpec, bridgeStatus *Bridges) bool {
return !reflect.DeepEqual(bridgeSpec, bridgeStatus)
}

// SetKeepUntilTime sets an annotation to hold the "keep until time" for the node’s state.
// The "keep until time" specifies the earliest time at which the state object can be removed
// if the daemon's pod is not found on the node.
func (s *SriovNetworkNodeState) SetKeepUntilTime(t time.Time) {
ts := t.Format(time.RFC3339)
annotations := s.GetAnnotations()
if annotations == nil {
annotations = map[string]string{}
}
annotations[consts.NodeStateKeepUntilAnnotation] = ts
s.SetAnnotations(annotations)
}

// GetKeepUntilTime returns the value that is stored in the "keep until time" annotation.
// The "keep until time" specifies the earliest time at which the state object can be removed
// if the daemon's pod is not found on the node.
// Return zero time instant if annotaion is not found on the object or if it has a wrong format.
func (s *SriovNetworkNodeState) GetKeepUntilTime() time.Time {
t, err := time.Parse(time.RFC3339, s.GetAnnotations()[consts.NodeStateKeepUntilAnnotation])
if err != nil {
return time.Time{}
}
return t
}

// ResetKeepUntilTime removes "keep until time" annotation from the state object.
// The "keep until time" specifies the earliest time at which the state object can be removed
// if the daemon's pod is not found on the node.
// Returns true if the value was removed, false otherwise.
func (s *SriovNetworkNodeState) ResetKeepUntilTime() bool {
annotations := s.GetAnnotations()
_, exist := annotations[consts.NodeStateKeepUntilAnnotation]
if !exist {
return false
}
delete(annotations, consts.NodeStateKeepUntilAnnotation)
s.SetAnnotations(annotations)
return true
}
66 changes: 61 additions & 5 deletions controllers/sriovnetworknodepolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ import (
"context"
"encoding/json"
"fmt"
"os"
"reflect"
"sort"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -338,10 +340,7 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con
logger.Error(err, "Fail to remove device plugin label from node", "node", ns.Name)
return err
}
logger.Info("Deleting SriovNetworkNodeState as node with that name doesn't exist", "nodeStateName", ns.Name)
err = r.Delete(ctx, &ns, &client.DeleteOptions{})
if err != nil {
logger.Error(err, "Fail to Delete", "SriovNetworkNodeState CR:", ns.GetName())
if err := r.handleStaleNodeState(ctx, &ns); err != nil {
return err
}
}
Expand All @@ -350,6 +349,56 @@ func (r *SriovNetworkNodePolicyReconciler) syncAllSriovNetworkNodeStates(ctx con
return nil
}

// handleStaleNodeState handles stale SriovNetworkNodeState CR (the CR which no longer have a corresponding node with the daemon).
// If the CR has the "keep until time" annotation, indicating the earliest time the state object can be removed,
// this function will compare it to the current time to determine if deletion is permissible and do deletion if allowed.
// If the annotation is absent, the function will create one with a timestamp in future, using either the default or a configured offset.
// If STALE_NODE_STATE_CLEANUP_DELAY_MINUTES env variable is set to 0, removes the CR immediately
func (r *SriovNetworkNodePolicyReconciler) handleStaleNodeState(ctx context.Context, ns *sriovnetworkv1.SriovNetworkNodeState) error {
logger := log.Log.WithName("handleStaleNodeState")

var delayMinutes int
var err error

envValue, found := os.LookupEnv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES")
if found {
delayMinutes, err = strconv.Atoi(envValue)
if err != nil || delayMinutes < 0 {
delayMinutes = constants.DefaultNodeStateCleanupDelayMinutes
logger.Error(err, "invalid value in STALE_NODE_STATE_CLEANUP_DELAY_MINUTES env variable, use default delay",
"delay", delayMinutes)
}
} else {
delayMinutes = constants.DefaultNodeStateCleanupDelayMinutes
}

if delayMinutes != 0 {
now := time.Now().UTC()
keepUntilTime := ns.GetKeepUntilTime()
if keepUntilTime.IsZero() {
keepUntilTime = now.Add(time.Minute * time.Duration(delayMinutes))
logger.V(2).Info("SriovNetworkNodeState has no matching node, configure cleanup delay for the state object",
"nodeStateName", ns.Name, "delay", delayMinutes, "keepUntilTime", keepUntilTime.String())
ns.SetKeepUntilTime(keepUntilTime)
if err := r.Update(ctx, ns); err != nil {
logger.Error(err, "Fail to update SriovNetworkNodeState CR", "name", ns.GetName())
return err
}
return nil
}
if now.Before(keepUntilTime) {
return nil
}
}
// remove the object if delayMinutes is 0 or if keepUntilTime is already passed
logger.Info("Deleting SriovNetworkNodeState as node with that name doesn't exist", "nodeStateName", ns.Name)
if err := r.Delete(ctx, ns, &client.DeleteOptions{}); err != nil {
logger.Error(err, "Fail to delete SriovNetworkNodeState CR", "name", ns.GetName())
return err
}
return nil
}

func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context.Context,
dc *sriovnetworkv1.SriovOperatorConfig,
npl *sriovnetworkv1.SriovNetworkNodePolicyList,
Expand All @@ -375,9 +424,16 @@ func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context
return fmt.Errorf("failed to get SriovNetworkNodeState: %v", err)
}
} else {
keepUntilAnnotationUpdated := found.ResetKeepUntilTime()

if len(found.Status.Interfaces) == 0 {
logger.Info("SriovNetworkNodeState Status Interfaces are empty. Skip update of policies in spec",
"namespace", ns.Namespace, "name", ns.Name)
if keepUntilAnnotationUpdated {
if err := r.Update(ctx, found); err != nil {
return fmt.Errorf("couldn't update SriovNetworkNodeState: %v", err)
}
}
return nil
}

Expand Down Expand Up @@ -420,7 +476,7 @@ func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(ctx context
// Note(adrianc): we check same ownerReferences since SriovNetworkNodeState
// was owned by a default SriovNetworkNodePolicy. if we encounter a descripancy
// we need to update.
if reflect.DeepEqual(newVersion.OwnerReferences, found.OwnerReferences) &&
if !keepUntilAnnotationUpdated && reflect.DeepEqual(newVersion.OwnerReferences, found.OwnerReferences) &&
equality.Semantic.DeepEqual(newVersion.Spec, found.Spec) {
logger.V(1).Info("SriovNetworkNodeState did not change, not updating")
return nil
Expand Down
74 changes: 68 additions & 6 deletions controllers/sriovnetworknodepolicy_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package controllers
import (
"context"
"encoding/json"
"os"
"sync"
"testing"
"time"
Expand All @@ -11,18 +12,17 @@ import (
. "github.com/onsi/gomega"

"github.com/google/go-cmp/cmp"
dptypes "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

dptypes "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/types"

sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
v1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
Expand All @@ -48,7 +48,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) {
{
tname: "testVirtioVdpaVirtio",
policy: sriovnetworkv1.SriovNetworkNodePolicy{
Spec: v1.SriovNetworkNodePolicySpec{
Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{
ResourceName: "resourceName",
DeviceType: consts.DeviceTypeNetDevice,
VdpaType: consts.VdpaTypeVirtio,
Expand All @@ -67,7 +67,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) {
}, {
tname: "testVhostVdpaVirtio",
policy: sriovnetworkv1.SriovNetworkNodePolicy{
Spec: v1.SriovNetworkNodePolicySpec{
Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{
ResourceName: "resourceName",
DeviceType: consts.DeviceTypeNetDevice,
VdpaType: consts.VdpaTypeVhost,
Expand All @@ -87,7 +87,7 @@ func TestRenderDevicePluginConfigData(t *testing.T) {
{
tname: "testExcludeTopology",
policy: sriovnetworkv1.SriovNetworkNodePolicy{
Spec: v1.SriovNetworkNodePolicySpec{
Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{
ResourceName: "resourceName",
ExcludeTopology: true,
},
Expand Down Expand Up @@ -261,3 +261,65 @@ var _ = Describe("SriovnetworkNodePolicy controller", Ordered, func() {
})
})
})

var _ = Describe("SriovNetworkNodePolicyReconciler", Ordered, func() {
Context("handleStaleNodeState", func() {
var (
ctx context.Context
r *SriovNetworkNodePolicyReconciler
nodeState *sriovnetworkv1.SriovNetworkNodeState
)

BeforeEach(func() {
ctx = context.Background()
scheme := runtime.NewScheme()
utilruntime.Must(sriovnetworkv1.AddToScheme(scheme))
nodeState = &sriovnetworkv1.SriovNetworkNodeState{ObjectMeta: metav1.ObjectMeta{Name: "node1"}}
r = &SriovNetworkNodePolicyReconciler{Client: fake.NewClientBuilder().WithObjects(nodeState).Build()}
})
It("should set default delay", func() {
nodeState := nodeState.DeepCopy()
Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred())
Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred())
Expect(time.Now().UTC().Before(nodeState.GetKeepUntilTime())).To(BeTrue())
})
It("should remove CR if wait time expired", func() {
nodeState := nodeState.DeepCopy()
nodeState.SetKeepUntilTime(time.Now().UTC().Add(-time.Minute))
Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred())
Expect(errors.IsNotFound(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState))).To(BeTrue())
})
It("should keep existing wait time if already set", func() {
nodeState := nodeState.DeepCopy()
nodeState.SetKeepUntilTime(time.Now().UTC().Add(time.Minute))
testTime := nodeState.GetKeepUntilTime()
r.Update(ctx, nodeState)
Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred())
Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred())
Expect(nodeState.GetKeepUntilTime()).To(Equal(testTime))
})
It("non default dealy", func() {
DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES"))
os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "60")
nodeState := nodeState.DeepCopy()
Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred())
Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred())
Expect(time.Until(nodeState.GetKeepUntilTime()) > 30*time.Minute).To(BeTrue())
})
It("invalid non default delay - should use default", func() {
DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES"))
os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "-20")
nodeState := nodeState.DeepCopy()
Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred())
Expect(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState)).NotTo(HaveOccurred())
Expect(time.Until(nodeState.GetKeepUntilTime()) > 20*time.Minute).To(BeTrue())
})
It("should remove CR if delay is zero", func() {
DeferCleanup(os.Setenv, "STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", os.Getenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES"))
os.Setenv("STALE_NODE_STATE_CLEANUP_DELAY_MINUTES", "0")
nodeState := nodeState.DeepCopy()
Expect(r.handleStaleNodeState(ctx, nodeState)).NotTo(HaveOccurred())
Expect(errors.IsNotFound(r.Get(ctx, types.NamespacedName{Name: nodeState.Name}, nodeState))).To(BeTrue())
})
})
})
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ spec:
value: {{ .Values.operator.cniBinPath }}
- name: CLUSTER_TYPE
value: {{ .Values.operator.clusterType }}
- name: STALE_NODE_STATE_CLEANUP_DELAY_MINUTES
value: "{{ .Values.operator.staleNodeStateCleanupDelayMinutes }}"
{{- if .Values.operator.admissionControllers.enabled }}
- name: ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_SECRET_NAME
value: {{ .Values.operator.admissionControllers.certificates.secretNames.operator }}
Expand Down
4 changes: 4 additions & 0 deletions deployment/sriov-network-operator-chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ operator:
resourcePrefix: "openshift.io"
cniBinPath: "/opt/cni/bin"
clusterType: "kubernetes"
# minimal amount of time (in minutes) the operator will wait before removing
# stale SriovNetworkNodeState objects (objects that doesn't match node with the daemon)
# "0" means no extra delay, in this case the CR will be removed by the next reconcilation cycle (may take up to 5 minutes)
staleNodeStateCleanupDelayMinutes: "30"
metricsExporter:
port: "9110"
certificates:
Expand Down
8 changes: 8 additions & 0 deletions pkg/consts/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,14 @@ const (
MCPPauseAnnotationState = "sriovnetwork.openshift.io/state"
MCPPauseAnnotationTime = "sriovnetwork.openshift.io/time"

// NodeStateKeepUntilAnnotation contains name of the "keep until time" annotation for SriovNetworkNodeState object.
// The "keep until time" specifies the earliest time at which the state object can be removed
// if the daemon's pod is not found on the node.
NodeStateKeepUntilAnnotation = "sriovnetwork.openshift.io/keep-state-until"
// DefaultNodeStateCleanupDelayMinutes contains default delay before removing stale SriovNetworkNodeState CRs
// (the CRs that no longer have a corresponding node with the daemon).
DefaultNodeStateCleanupDelayMinutes = 30

CheckpointFileName = "sno-initial-node-state.json"
Unknown = "Unknown"

Expand Down

0 comments on commit 4ea6ce0

Please sign in to comment.