diff --git a/controlplane/kubeadm/internal/controllers/remediation.go b/controlplane/kubeadm/internal/controllers/remediation.go index e2c24d93d8c7..0441df9576e7 100644 --- a/controlplane/kubeadm/internal/controllers/remediation.go +++ b/controlplane/kubeadm/internal/controllers/remediation.go @@ -336,10 +336,24 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C return ctrl.Result{Requeue: true}, nil } -// Gets the machine to be remediated, which is the oldest machine marked as unhealthy not yet provisioned (if any) -// or the oldest machine marked as unhealthy. +// Gets the machine which is marked as unhealthy and to be remediated, in the following order +// Oldest machine which is not yet provisioned and has RemediateMachineAnnotation annotation +// Oldest machine has RemediateMachineAnnotation annotation +// Oldest machine not yet provisioned +// Oldest machine marked as unhealthy func getMachineToBeRemediated(unhealthyMachines collections.Machines) *clusterv1.Machine { - machineToBeRemediated := unhealthyMachines.Filter(collections.Not(collections.HasNode())).Oldest() + machinesWithoutNode := unhealthyMachines.Filter(collections.Not(collections.HasNode())) + annotatedMachineWithoutNode := machinesWithoutNode.Filter(collections.HasAnnotationKey(clusterv1.RemediateMachineAnnotation)).Oldest() + if annotatedMachineWithoutNode != nil { + return annotatedMachineWithoutNode + } + + annotatedMachine := unhealthyMachines.Filter(collections.HasAnnotationKey(clusterv1.RemediateMachineAnnotation)).Oldest() + if annotatedMachine != nil { + return annotatedMachine + } + + machineToBeRemediated := machinesWithoutNode.Oldest() if machineToBeRemediated == nil { machineToBeRemediated = unhealthyMachines.Oldest() } diff --git a/controlplane/kubeadm/internal/controllers/remediation_test.go b/controlplane/kubeadm/internal/controllers/remediation_test.go index 48a2ebd1b12c..9cde923dafff 100644 --- a/controlplane/kubeadm/internal/controllers/remediation_test.go +++ b/controlplane/kubeadm/internal/controllers/remediation_test.go @@ -42,6 +42,50 @@ import ( ) func TestGetMachineToBeRemediated(t *testing.T) { + t.Run("returns the oldest of the provisioning machines with RemediateMachineAnnotation", func(t *testing.T) { + g := NewWithT(t) + + ns, err := env.CreateNamespace(ctx, "ns1") + g.Expect(err).ToNot(HaveOccurred()) + defer func() { + g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) + }() + + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed()) + m2 := createMachine(ctx, g, ns.Name, "m2-unhealthy-", withMachineHealthCheckFailed(), withoutNodeRef()) + m3 := createMachine(ctx, g, ns.Name, "m3-unhealthy-", withMachineHealthCheckFailed(), withoutNodeRef()) + m4 := createMachine(ctx, g, ns.Name, "m4-unhealthy-", withMachineHealthCheckFailed(), withoutNodeRef()) + + m1.SetAnnotations(map[string]string{clusterv1.RemediateMachineAnnotation: ""}) + m2.SetAnnotations(map[string]string{clusterv1.RemediateMachineAnnotation: ""}) + m3.SetAnnotations(map[string]string{clusterv1.RemediateMachineAnnotation: ""}) + + unhealthyMachines := collections.FromMachines(m1, m2, m3, m4) + + g.Expect(getMachineToBeRemediated(unhealthyMachines).Name).To(HavePrefix("m2-unhealthy-")) + }) + + t.Run("returns the oldest machine with RemediateMachineAnnotation if there are no provisioning machines", func(t *testing.T) { + g := NewWithT(t) + + ns, err := env.CreateNamespace(ctx, "ns1") + g.Expect(err).ToNot(HaveOccurred()) + defer func() { + g.Expect(env.Cleanup(ctx, ns)).To(Succeed()) + }() + + m1 := createMachine(ctx, g, ns.Name, "m1-unhealthy-", withMachineHealthCheckFailed()) + m2 := createMachine(ctx, g, ns.Name, "m2-unhealthy-", withMachineHealthCheckFailed()) + m3 := createMachine(ctx, g, ns.Name, "m3-unhealthy-", withMachineHealthCheckFailed()) + + m1.SetAnnotations(map[string]string{clusterv1.RemediateMachineAnnotation: ""}) + m2.SetAnnotations(map[string]string{clusterv1.RemediateMachineAnnotation: ""}) + + unhealthyMachines := collections.FromMachines(m1, m2, m3) + + g.Expect(getMachineToBeRemediated(unhealthyMachines).Name).To(HavePrefix("m1-unhealthy-")) + }) + t.Run("returns the oldest machine if there are no provisioning machines", func(t *testing.T) { g := NewWithT(t)