kubernetes-sigs · k8s-ci-robot · Sep 30, 2024 · Sep 10, 2024 · Sep 10, 2024 · Sep 10, 2024
diff --git a/api/v1beta1/machine_types.go b/api/v1beta1/machine_types.go
@@ -230,10 +230,32 @@ type MachineStatus struct {
 	// Conditions defines current service state of the Machine.
 	// +optional
 	Conditions Conditions `json:"conditions,omitempty"`
+
+	// Deletion contains information relating to removal of the Machine.
+	// Only present when the Machine has a deletionTimestamp and is being removed from the cluster.
+	// +optional
+	Deletion MachineStatusDeletion `json:"deletion,omitempty"`
 }
 
 // ANCHOR_END: MachineStatus
 
+// MachineStatusDeletion is the deletion state of the Machine.
+type MachineStatusDeletion struct {
+	// NodeDrainStartTime is the time when the drain of the node started.
+	// Only present when the Machine has a deletionTimestamp, is being removed from the cluster
+	// and draining the node had been started.
+	// +optional
+	NodeDrainStartTime *metav1.Time `json:"nodeDrainStartTime,omitempty"`
+
+	// WaitForNodeVolumeDetachStartTime is the time when waiting for volume detachment started.
+	// Detaching volumes from nodes is usually done by CSI implementations and the current state
+	// is observed from the node's `.Status.VolumesAttached` field.
+	// Only present when the Machine has a deletionTimestamp, is being removed from the cluster
+	// and waiting for volume detachments had been started.
+	// +optional
+	WaitForNodeVolumeDetachStartTime *metav1.Time `json:"nodeVolumeDetachStartTime,omitempty"`
+}
+
 // SetTypedPhase sets the Phase field to the string representation of MachinePhase.
 func (m *MachineStatus) SetTypedPhase(p MachinePhase) {
 	m.Phase = string(p)

diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go
diff --git a/api/v1beta1/zz_generated.openapi.go b/api/v1beta1/zz_generated.openapi.go
diff --git a/config/crd/bases/cluster.x-k8s.io_machines.yaml b/config/crd/bases/cluster.x-k8s.io_machines.yaml
diff --git a/internal/apis/core/v1alpha3/conversion.go b/internal/apis/core/v1alpha3/conversion.go
@@ -101,6 +101,7 @@ func (src *Machine) ConvertTo(dstRaw conversion.Hub) error {
 	dst.Spec.NodeVolumeDetachTimeout = restored.Spec.NodeVolumeDetachTimeout
 	dst.Status.NodeInfo = restored.Status.NodeInfo
 	dst.Status.CertificatesExpiryDate = restored.Status.CertificatesExpiryDate
+	dst.Status.Deletion = restored.Status.Deletion
 	return nil
 }
 

diff --git a/internal/apis/core/v1alpha3/zz_generated.conversion.go b/internal/apis/core/v1alpha3/zz_generated.conversion.go
diff --git a/internal/apis/core/v1alpha4/conversion.go b/internal/apis/core/v1alpha4/conversion.go
@@ -190,6 +190,7 @@ func (src *Machine) ConvertTo(dstRaw conversion.Hub) error {
 	dst.Spec.NodeDeletionTimeout = restored.Spec.NodeDeletionTimeout
 	dst.Status.CertificatesExpiryDate = restored.Status.CertificatesExpiryDate
 	dst.Spec.NodeVolumeDetachTimeout = restored.Spec.NodeVolumeDetachTimeout
+	dst.Status.Deletion = restored.Status.Deletion
 	return nil
 }
 

diff --git a/internal/apis/core/v1alpha4/zz_generated.conversion.go b/internal/apis/core/v1alpha4/zz_generated.conversion.go
diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go
@@ -31,6 +31,7 @@ import (
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/client-go/tools/record"
 	"k8s.io/klog/v2"
+	"k8s.io/utils/ptr"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/builder"
 	"sigs.k8s.io/controller-runtime/pkg/client"
@@ -415,6 +416,10 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu
 				conditions.MarkFalse(m, clusterv1.VolumeDetachSucceededCondition, clusterv1.WaitingForVolumeDetachReason, clusterv1.ConditionSeverityInfo, "Waiting for node volumes to be detached")
 			}
 
+			if m.Status.Deletion.WaitForNodeVolumeDetachStartTime == nil {
+				m.Status.Deletion.WaitForNodeVolumeDetachStartTime = ptr.To(metav1.Now())
+			}
+
 			if ok, err := r.shouldWaitForNodeVolumes(ctx, cluster, m.Status.NodeRef.Name); ok || err != nil {
 				if err != nil {
 					r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedWaitForVolumeDetach", "error waiting for node volumes detaching, Machine's node %q: %v", m.Status.NodeRef.Name, err)
@@ -527,34 +532,32 @@ func (r *Reconciler) nodeDrainTimeoutExceeded(machine *clusterv1.Machine) bool {
 		return false
 	}
 
-	// if the draining succeeded condition does not exist
-	if conditions.Get(machine, clusterv1.DrainingSucceededCondition) == nil {
+	// if the NodeDrainStartTime does not exist
+	if machine.Status.Deletion.NodeDrainStartTime == nil {
 		return false
 	}
 
 	now := time.Now()
-	firstTimeDrain := conditions.GetLastTransitionTime(machine, clusterv1.DrainingSucceededCondition)
-	diff := now.Sub(firstTimeDrain.Time)
+	diff := now.Sub(machine.Status.Deletion.NodeDrainStartTime.Time)
 	return diff.Seconds() >= machine.Spec.NodeDrainTimeout.Seconds()
 }
 
 // nodeVolumeDetachTimeoutExceeded returns False if either NodeVolumeDetachTimeout is set to nil or <=0 OR
-// VolumeDetachSucceededCondition is not set on the Machine. Otherwise returns true if the timeout is expired
-// since the last transition time of VolumeDetachSucceededCondition.
+// WaitForNodeVolumeDetachStartTime is not set on the Machine. Otherwise returns true if the timeout is expired
+// since the WaitForNodeVolumeDetachStartTime.
 func (r *Reconciler) nodeVolumeDetachTimeoutExceeded(machine *clusterv1.Machine) bool {
 	// if the NodeVolumeDetachTimeout type is not set by user
 	if machine.Spec.NodeVolumeDetachTimeout == nil || machine.Spec.NodeVolumeDetachTimeout.Seconds() <= 0 {
 		return false
 	}
 
-	// if the volume detaching succeeded condition does not exist
-	if conditions.Get(machine, clusterv1.VolumeDetachSucceededCondition) == nil {
+	// if the NodeVolumeDetachStartTime does not exist
+	if machine.Status.Deletion.WaitForNodeVolumeDetachStartTime == nil {
 		return false
 	}
 
 	now := time.Now()
-	firstTimeDetach := conditions.GetLastTransitionTime(machine, clusterv1.VolumeDetachSucceededCondition)
-	diff := now.Sub(firstTimeDetach.Time)
+	diff := now.Sub(machine.Status.Deletion.WaitForNodeVolumeDetachStartTime.Time)
 	return diff.Seconds() >= machine.Spec.NodeVolumeDetachTimeout.Seconds()
 }
 
@@ -697,6 +700,10 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster,
 		log.V(3).Info("Node is unreachable, draining will use 1s GracePeriodSeconds and will ignore all Pods that have a deletionTimestamp > 1s old")
 	}
 
+	if machine.Status.Deletion.NodeDrainStartTime == nil {
+		machine.Status.Deletion.NodeDrainStartTime = ptr.To(metav1.Now())
+	}
+
 	if err := drainer.CordonNode(ctx, node); err != nil {
 		// Machine will be re-reconciled after a cordon failure.
 		return ctrl.Result{}, errors.Wrapf(err, "failed to cordon Node %s", node.Name)

diff --git a/internal/controllers/machine/machine_controller_test.go b/internal/controllers/machine/machine_controller_test.go
@@ -1386,12 +1386,8 @@ func TestIsNodeDrainedAllowed(t *testing.T) {
 				},
 
 				Status: clusterv1.MachineStatus{
-					Conditions: clusterv1.Conditions{
-						{
-							Type:               clusterv1.DrainingSucceededCondition,
-							Status:             corev1.ConditionFalse,
-							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 70)).UTC()},
-						},
+					Deletion: clusterv1.MachineStatusDeletion{
+						NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 70)).UTC()},
 					},
 				},
 			},
@@ -1412,12 +1408,8 @@ func TestIsNodeDrainedAllowed(t *testing.T) {
 					NodeDrainTimeout:  &metav1.Duration{Duration: time.Second * 60},
 				},
 				Status: clusterv1.MachineStatus{
-					Conditions: clusterv1.Conditions{
-						{
-							Type:               clusterv1.DrainingSucceededCondition,
-							Status:             corev1.ConditionFalse,
-							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
-						},
+					Deletion: clusterv1.MachineStatusDeletion{
+						NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
 					},
 				},
 			},
@@ -1437,12 +1429,8 @@ func TestIsNodeDrainedAllowed(t *testing.T) {
 					Bootstrap:         clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
 				},
 				Status: clusterv1.MachineStatus{
-					Conditions: clusterv1.Conditions{
-						{
-							Type:               clusterv1.DrainingSucceededCondition,
-							Status:             corev1.ConditionFalse,
-							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
-						},
+					Deletion: clusterv1.MachineStatusDeletion{
+						NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
 					},
 				},
 			},
@@ -1896,12 +1884,8 @@ func TestIsNodeVolumeDetachingAllowed(t *testing.T) {
 				},
 
 				Status: clusterv1.MachineStatus{
-					Conditions: clusterv1.Conditions{
-						{
-							Type:               clusterv1.VolumeDetachSucceededCondition,
-							Status:             corev1.ConditionFalse,
-							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 60)).UTC()},
-						},
+					Deletion: clusterv1.MachineStatusDeletion{
+						WaitForNodeVolumeDetachStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 60)).UTC()},
 					},
 				},
 			},
@@ -1922,12 +1906,8 @@ func TestIsNodeVolumeDetachingAllowed(t *testing.T) {
 					NodeVolumeDetachTimeout: &metav1.Duration{Duration: time.Second * 60},
 				},
 				Status: clusterv1.MachineStatus{
-					Conditions: clusterv1.Conditions{
-						{
-							Type:               clusterv1.VolumeDetachSucceededCondition,
-							Status:             corev1.ConditionFalse,
-							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
-						},
+					Deletion: clusterv1.MachineStatusDeletion{
+						WaitForNodeVolumeDetachStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
 					},
 				},
 			},
@@ -1947,12 +1927,8 @@ func TestIsNodeVolumeDetachingAllowed(t *testing.T) {
 					Bootstrap:         clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
 				},
 				Status: clusterv1.MachineStatus{
-					Conditions: clusterv1.Conditions{
-						{
-							Type:               clusterv1.VolumeDetachSucceededCondition,
-							Status:             corev1.ConditionFalse,
-							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
-						},
+					Deletion: clusterv1.MachineStatusDeletion{
+						WaitForNodeVolumeDetachStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
 					},
 				},
 			},