kubernetes-sigs · chrischdi · Sep 10, 2024 · Sep 10, 2024 · Sep 10, 2024 · Sep 19, 2024
diff --git a/api/v1beta1/machine_types.go b/api/v1beta1/machine_types.go
@@ -230,10 +230,32 @@ type MachineStatus struct {
 	// Conditions defines current service state of the Machine.
 	// +optional
 	Conditions Conditions `json:"conditions,omitempty"`
+
+	// deletion contains information relating to removal of the Machine.
+	// Only present when the Machine has a deletionTimestamp and drain or wait for volume detach started.
+	// +optional
+	Deletion *MachineDeletionStatus `json:"deletion,omitempty"`
 }
 
 // ANCHOR_END: MachineStatus
 
+// MachineDeletionStatus is the deletion state of the Machine.
+type MachineDeletionStatus struct {
+	// nodeDrainStartTime is the time when the drain of the node started and is used to determine
+	// if the NodeDrainTimeout is exceeded.
+	// Only present when the Machine has a deletionTimestamp and draining the node had been started.
+	// +optional
+	NodeDrainStartTime *metav1.Time `json:"nodeDrainStartTime,omitempty"`
+
+	// waitForNodeVolumeDetachStartTime is the time when waiting for volume detachment started
-	// waitForNodeVolumeDetachStartTime is the time when waiting for volume detachment started
+	// WaitForNodeVolumeDetachStartTime is the time when waiting for volume detachment started
-	// waitForNodeVolumeDetachStartTime is the time when waiting for volume detachment started
+	// WaitForNodeVolumeDetachStartTime is the time when waiting for volume detachment started
+	// and is used to determine if the NodeVolumeDetachTimeout is exceeded.
+	// Detaching volumes from nodes is usually done by CSI implementations and the current state
+	// is observed from the node's `.Status.VolumesAttached` field.
+	// Only present when the Machine has a deletionTimestamp and waiting for volume detachments had been started.
+	// +optional
+	WaitForNodeVolumeDetachStartTime *metav1.Time `json:"waitForNodeVolumeDetachStartTime,omitempty"`
+}
+
 // SetTypedPhase sets the Phase field to the string representation of MachinePhase.
 func (m *MachineStatus) SetTypedPhase(p MachinePhase) {
 	m.Phase = string(p)

diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go
diff --git a/api/v1beta1/zz_generated.openapi.go b/api/v1beta1/zz_generated.openapi.go
diff --git a/config/crd/bases/cluster.x-k8s.io_machines.yaml b/config/crd/bases/cluster.x-k8s.io_machines.yaml
diff --git a/internal/apis/core/v1alpha3/conversion.go b/internal/apis/core/v1alpha3/conversion.go
@@ -101,6 +101,7 @@ func (src *Machine) ConvertTo(dstRaw conversion.Hub) error {
 	dst.Spec.NodeVolumeDetachTimeout = restored.Spec.NodeVolumeDetachTimeout
 	dst.Status.NodeInfo = restored.Status.NodeInfo
 	dst.Status.CertificatesExpiryDate = restored.Status.CertificatesExpiryDate
+	dst.Status.Deletion = restored.Status.Deletion
 	return nil
 }
 

diff --git a/internal/apis/core/v1alpha3/zz_generated.conversion.go b/internal/apis/core/v1alpha3/zz_generated.conversion.go
diff --git a/internal/apis/core/v1alpha4/conversion.go b/internal/apis/core/v1alpha4/conversion.go
@@ -190,6 +190,7 @@ func (src *Machine) ConvertTo(dstRaw conversion.Hub) error {
 	dst.Spec.NodeDeletionTimeout = restored.Spec.NodeDeletionTimeout
 	dst.Status.CertificatesExpiryDate = restored.Status.CertificatesExpiryDate
 	dst.Spec.NodeVolumeDetachTimeout = restored.Spec.NodeVolumeDetachTimeout
+	dst.Status.Deletion = restored.Status.Deletion
 	return nil
 }
 

diff --git a/internal/apis/core/v1alpha4/zz_generated.conversion.go b/internal/apis/core/v1alpha4/zz_generated.conversion.go
diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go
@@ -31,6 +31,7 @@ import (
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/client-go/tools/record"
 	"k8s.io/klog/v2"
+	"k8s.io/utils/ptr"
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/builder"
 	"sigs.k8s.io/controller-runtime/pkg/client"
@@ -378,13 +379,19 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu
 				return ctrl.Result{}, err
 			}
 
-			// The DrainingSucceededCondition never exists before the node is drained for the first time,
-			// so its transition time can be used to record the first time draining.
+			// The DrainingSucceededCondition never exists before the node is drained for the first time.
 			// This `if` condition prevents the transition time to be changed more than once.
 			if conditions.Get(m, clusterv1.DrainingSucceededCondition) == nil {
 				conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingReason, clusterv1.ConditionSeverityInfo, "Draining the node before deletion")
 			}
 
+			if m.Status.Deletion == nil {
+				m.Status.Deletion = &clusterv1.MachineDeletionStatus{}
+			}
+			if m.Status.Deletion.NodeDrainStartTime == nil {
+				m.Status.Deletion.NodeDrainStartTime = ptr.To(metav1.Now())
+			}
+
 			if err := patchMachine(ctx, patchHelper, m); err != nil {
 				return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine")
 			}
@@ -408,13 +415,19 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu
 		// volumes are detached before proceeding to delete the Node.
 		// In case the node is unreachable, the detachment is skipped.
 		if r.isNodeVolumeDetachingAllowed(m) {
-			// The VolumeDetachSucceededCondition never exists before we wait for volume detachment for the first time,
-			// so its transition time can be used to record the first time we wait for volume detachment.
+			// The VolumeDetachSucceededCondition never exists before we wait for volume detachment for the first time.
 			// This `if` condition prevents the transition time to be changed more than once.
 			if conditions.Get(m, clusterv1.VolumeDetachSucceededCondition) == nil {
 				conditions.MarkFalse(m, clusterv1.VolumeDetachSucceededCondition, clusterv1.WaitingForVolumeDetachReason, clusterv1.ConditionSeverityInfo, "Waiting for node volumes to be detached")
 			}
 
+			if m.Status.Deletion == nil {
+				m.Status.Deletion = &clusterv1.MachineDeletionStatus{}
+			}
+			if m.Status.Deletion.WaitForNodeVolumeDetachStartTime == nil {
+				m.Status.Deletion.WaitForNodeVolumeDetachStartTime = ptr.To(metav1.Now())
+			}
+
 			if ok, err := r.shouldWaitForNodeVolumes(ctx, cluster, m.Status.NodeRef.Name); ok || err != nil {
 				if err != nil {
 					r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedWaitForVolumeDetach", "error waiting for node volumes detaching, Machine's node %q: %v", m.Status.NodeRef.Name, err)
@@ -523,38 +536,36 @@ func (r *Reconciler) isNodeVolumeDetachingAllowed(m *clusterv1.Machine) bool {
 
 func (r *Reconciler) nodeDrainTimeoutExceeded(machine *clusterv1.Machine) bool {
 	// if the NodeDrainTimeout type is not set by user
-	if machine.Spec.NodeDrainTimeout == nil || machine.Spec.NodeDrainTimeout.Seconds() <= 0 {
+	if machine.Status.Deletion == nil || machine.Spec.NodeDrainTimeout == nil || machine.Spec.NodeDrainTimeout.Seconds() <= 0 {
 		return false
 	}
 
-	// if the draining succeeded condition does not exist
-	if conditions.Get(machine, clusterv1.DrainingSucceededCondition) == nil {
+	// if the NodeDrainStartTime does not exist
+	if machine.Status.Deletion.NodeDrainStartTime == nil {
 		return false
 	}
 
 	now := time.Now()
-	firstTimeDrain := conditions.GetLastTransitionTime(machine, clusterv1.DrainingSucceededCondition)
-	diff := now.Sub(firstTimeDrain.Time)
+	diff := now.Sub(machine.Status.Deletion.NodeDrainStartTime.Time)
 	return diff.Seconds() >= machine.Spec.NodeDrainTimeout.Seconds()
 }
 
 // nodeVolumeDetachTimeoutExceeded returns False if either NodeVolumeDetachTimeout is set to nil or <=0 OR
-// VolumeDetachSucceededCondition is not set on the Machine. Otherwise returns true if the timeout is expired
-// since the last transition time of VolumeDetachSucceededCondition.
+// WaitForNodeVolumeDetachStartTime is not set on the Machine. Otherwise returns true if the timeout is expired
+// since the WaitForNodeVolumeDetachStartTime.
 func (r *Reconciler) nodeVolumeDetachTimeoutExceeded(machine *clusterv1.Machine) bool {
 	// if the NodeVolumeDetachTimeout type is not set by user
-	if machine.Spec.NodeVolumeDetachTimeout == nil || machine.Spec.NodeVolumeDetachTimeout.Seconds() <= 0 {
+	if machine.Status.Deletion == nil || machine.Spec.NodeVolumeDetachTimeout == nil || machine.Spec.NodeVolumeDetachTimeout.Seconds() <= 0 {
 		return false
 	}
 
-	// if the volume detaching succeeded condition does not exist
-	if conditions.Get(machine, clusterv1.VolumeDetachSucceededCondition) == nil {
+	// if the WaitForNodeVolumeDetachStartTime does not exist
+	if machine.Status.Deletion.WaitForNodeVolumeDetachStartTime == nil {
 		return false
 	}
 
 	now := time.Now()
-	firstTimeDetach := conditions.GetLastTransitionTime(machine, clusterv1.VolumeDetachSucceededCondition)
-	diff := now.Sub(firstTimeDetach.Time)
+	diff := now.Sub(machine.Status.Deletion.WaitForNodeVolumeDetachStartTime.Time)
 	return diff.Seconds() >= machine.Spec.NodeVolumeDetachTimeout.Seconds()
 }
 

diff --git a/internal/controllers/machine/machine_controller_test.go b/internal/controllers/machine/machine_controller_test.go
@@ -1386,12 +1386,8 @@ func TestIsNodeDrainedAllowed(t *testing.T) {
 				},
 
 				Status: clusterv1.MachineStatus{
-					Conditions: clusterv1.Conditions{
-						{
-							Type:               clusterv1.DrainingSucceededCondition,
-							Status:             corev1.ConditionFalse,
-							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 70)).UTC()},
-						},
+					Deletion: &clusterv1.MachineDeletionStatus{
+						NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 70)).UTC()},
 					},
 				},
 			},
@@ -1412,12 +1408,8 @@ func TestIsNodeDrainedAllowed(t *testing.T) {
 					NodeDrainTimeout:  &metav1.Duration{Duration: time.Second * 60},
 				},
 				Status: clusterv1.MachineStatus{
-					Conditions: clusterv1.Conditions{
-						{
-							Type:               clusterv1.DrainingSucceededCondition,
-							Status:             corev1.ConditionFalse,
-							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
-						},
+					Deletion: &clusterv1.MachineDeletionStatus{
+						NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
 					},
 				},
 			},
@@ -1437,12 +1429,8 @@ func TestIsNodeDrainedAllowed(t *testing.T) {
 					Bootstrap:         clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
 				},
 				Status: clusterv1.MachineStatus{
-					Conditions: clusterv1.Conditions{
-						{
-							Type:               clusterv1.DrainingSucceededCondition,
-							Status:             corev1.ConditionFalse,
-							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
-						},
+					Deletion: &clusterv1.MachineDeletionStatus{
+						NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
 					},
 				},
 			},
@@ -1896,12 +1884,8 @@ func TestIsNodeVolumeDetachingAllowed(t *testing.T) {
 				},
 
 				Status: clusterv1.MachineStatus{
-					Conditions: clusterv1.Conditions{
-						{
-							Type:               clusterv1.VolumeDetachSucceededCondition,
-							Status:             corev1.ConditionFalse,
-							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 60)).UTC()},
-						},
+					Deletion: &clusterv1.MachineDeletionStatus{
+						WaitForNodeVolumeDetachStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 60)).UTC()},
 					},
 				},
 			},
@@ -1922,12 +1906,8 @@ func TestIsNodeVolumeDetachingAllowed(t *testing.T) {
 					NodeVolumeDetachTimeout: &metav1.Duration{Duration: time.Second * 60},
 				},
 				Status: clusterv1.MachineStatus{
-					Conditions: clusterv1.Conditions{
-						{
-							Type:               clusterv1.VolumeDetachSucceededCondition,
-							Status:             corev1.ConditionFalse,
-							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
-						},
+					Deletion: &clusterv1.MachineDeletionStatus{
+						WaitForNodeVolumeDetachStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 30)).UTC()},
 					},
 				},
 			},
@@ -1947,12 +1927,8 @@ func TestIsNodeVolumeDetachingAllowed(t *testing.T) {
 					Bootstrap:         clusterv1.Bootstrap{DataSecretName: ptr.To("data")},
 				},
 				Status: clusterv1.MachineStatus{
-					Conditions: clusterv1.Conditions{
-						{
-							Type:               clusterv1.VolumeDetachSucceededCondition,
-							Status:             corev1.ConditionFalse,
-							LastTransitionTime: metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
-						},
+					Deletion: &clusterv1.MachineDeletionStatus{
+						WaitForNodeVolumeDetachStartTime: &metav1.Time{Time: time.Now().Add(-(time.Second * 1000)).UTC()},
 					},
 				},
 			},