Skip to content

Commit

Permalink
Implement KCP and MS Deleting conditions
Browse files Browse the repository at this point in the history
Signed-off-by: Stefan Büringer [email protected]
  • Loading branch information
sbueringer committed Nov 6, 2024
1 parent ec04dcb commit 9fdc5c9
Show file tree
Hide file tree
Showing 11 changed files with 389 additions and 31 deletions.
14 changes: 14 additions & 0 deletions api/v1beta1/machineset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,23 @@ const (
const (
// MachineSetRemediatingV1Beta2Condition surfaces details about ongoing remediation of the controlled machines, if any.
MachineSetRemediatingV1Beta2Condition = RemediatingV1Beta2Condition
)

// MachineSet's Deleting condition and corresponding reasons that will be used in v1Beta2 API version.
const (
// MachineSetDeletingV1Beta2Condition surfaces details about ongoing deletion of the controlled machines.
MachineSetDeletingV1Beta2Condition = DeletingV1Beta2Condition

// MachineSetDeletingDeletionTimestampNotSetV1Beta2Reason surfaces when the MachineSet is not deleting because the
// DeletionTimestamp is not set.
MachineSetDeletingDeletionTimestampNotSetV1Beta2Reason = DeletionTimestampNotSetV1Beta2Reason

// MachineSetDeletingDeletionTimestampSetV1Beta2Reason surfaces when the MachineSet is deleting because the
// DeletionTimestamp is set.
MachineSetDeletingDeletionTimestampSetV1Beta2Reason = DeletionTimestampSetV1Beta2Reason

// MachineSetDeletingInternalErrorV1Beta2Reason surfaces unexpected failures when deleting a MachineSet.
MachineSetDeletingInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason
)

// ANCHOR_END: MachineSetSpec
Expand Down
25 changes: 20 additions & 5 deletions controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,12 +213,27 @@ const (
const (
// KubeadmControlPlaneDeletingV1Beta2Condition surfaces details about ongoing deletion of the controlled machines.
KubeadmControlPlaneDeletingV1Beta2Condition = clusterv1.DeletingV1Beta2Condition
)

// KubeadmControlPlane's Paused condition and corresponding reasons that will be used in v1Beta2 API version.
const (
// KubeadmControlPlanePausedV1Beta2Condition is true if this resource or the Cluster it belongs to are paused.
KubeadmControlPlanePausedV1Beta2Condition = clusterv1.PausedV1Beta2Condition
// KubeadmControlPlaneDeletingDeletionTimestampNotSetV1Beta2Reason surfaces when the KCP is not deleting because the
// DeletionTimestamp is not set.
KubeadmControlPlaneDeletingDeletionTimestampNotSetV1Beta2Reason = clusterv1.DeletionTimestampNotSetV1Beta2Reason

// KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason surfaces when the KCP deletion
// waits for the workers to be deleted.
KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason = "WaitingForWorkersDeletion"

// KubeadmControlPlaneDeletingWaitingForMachineDeletionV1Beta2Reason surfaces when the KCP deletion
// waits for the control plane Machines to be deleted.
KubeadmControlPlaneDeletingWaitingForMachineDeletionV1Beta2Reason = "WaitingForMachineDeletion"

// KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason surfaces when the KCP deletion has been completed.
// This reason is set right after the `kubeadm.controlplane.cluster.x-k8s.io` finalizer is removed.
// This means that the object will go away (i.e. be removed from etcd), except if there are other
// finalizers on the KCP object.
KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason = clusterv1.DeletionCompletedV1Beta2Reason

// KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason surfaces unexpected failures when deleting a KCP object.
KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason = clusterv1.InternalErrorV1Beta2Reason
)

// APIServerPodHealthy, ControllerManagerPodHealthy, SchedulerPodHealthy and EtcdPodHealthy condition and corresponding
Expand Down
79 changes: 74 additions & 5 deletions controlplane/kubeadm/internal/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package controllers
import (
"context"
"fmt"
"sort"
"strings"
"time"

Expand Down Expand Up @@ -52,6 +53,7 @@ import (
"sigs.k8s.io/cluster-api/util/conditions"
v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2"
"sigs.k8s.io/cluster-api/util/finalizers"
clog "sigs.k8s.io/cluster-api/util/log"
"sigs.k8s.io/cluster-api/util/patch"
"sigs.k8s.io/cluster-api/util/paused"
"sigs.k8s.io/cluster-api/util/predicates"
Expand Down Expand Up @@ -208,6 +210,8 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.
return ctrl.Result{}, nil
}

s := &scope{}

defer func() {
// Always attempt to update status.
if err := r.updateStatus(ctx, controlPlane); err != nil {
Expand All @@ -220,7 +224,7 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.
}
}

r.updateV1beta2Status(ctx, controlPlane)
r.updateV1Beta2Status(ctx, controlPlane, s)

// Always attempt to Patch the KubeadmControlPlane object and status after each reconciliation.
patchOpts := []patch.Option{}
Expand Down Expand Up @@ -254,7 +258,7 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.

if !kcp.ObjectMeta.DeletionTimestamp.IsZero() {
// Handle deletion reconciliation loop.
res, err = r.reconcileDelete(ctx, controlPlane)
res, err = r.reconcileDelete(ctx, controlPlane, s)
if errors.Is(err, clustercache.ErrClusterNotConnected) {
log.V(5).Info("Requeuing because connection to the workload cluster is down")
return ctrl.Result{RequeueAfter: time.Minute}, nil
Expand Down Expand Up @@ -587,15 +591,27 @@ func (r *KubeadmControlPlaneReconciler) reconcileClusterCertificates(ctx context
return nil
}

// scope holds the different objects that are read and used during the reconcile.
type scope struct {
// deletingReason is the reason that should be used when setting the Deleting condition.
deletingReason string

// deletingMessage is the message that should be used when setting the Deleting condition.
deletingMessage string
}

// reconcileDelete handles KubeadmControlPlane deletion.
// The implementation does not take non-control plane workloads into consideration. This may or may not change in the future.
// Please see https://github.com/kubernetes-sigs/cluster-api/issues/2064.
func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) {
func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, controlPlane *internal.ControlPlane, s *scope) (ctrl.Result, error) {
log := ctrl.LoggerFrom(ctx)
log.Info("Reconcile KubeadmControlPlane deletion")

// If no control plane machines remain, remove the finalizer
if len(controlPlane.Machines) == 0 {
s.deletingReason = controlplanev1.KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason
s.deletingMessage = ""

controllerutil.RemoveFinalizer(controlPlane.KCP, controlplanev1.KubeadmControlPlaneFinalizer)
return ctrl.Result{}, nil
}
Expand All @@ -615,6 +631,8 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
// Gets all machines, not just control plane machines.
allMachines, err := r.managementCluster.GetMachinesForCluster(ctx, controlPlane.Cluster)
if err != nil {
s.deletingReason = controlplanev1.KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason
s.deletingMessage = "Please check controller logs for errors" //nolint:goconst // Not making this a constant for now
return ctrl.Result{}, err
}

Expand All @@ -623,20 +641,25 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
if feature.Gates.Enabled(feature.MachinePool) {
allMachinePools, err = r.managementCluster.GetMachinePoolsForCluster(ctx, controlPlane.Cluster)
if err != nil {
s.deletingReason = controlplanev1.KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason
s.deletingMessage = "Please check controller logs for errors"
return ctrl.Result{}, err
}
}
// Verify that only control plane machines remain
if len(allMachines) != len(controlPlane.Machines) || len(allMachinePools.Items) != 0 {
log.Info("Waiting for worker nodes to be deleted first")
conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "Waiting for worker nodes to be deleted first")

s.deletingReason = controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason
s.deletingMessage = objectsPendingDeleteNames(allMachines, allMachinePools, controlPlane.Cluster)
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
}

// Delete control plane machines in parallel
machinesToDelete := controlPlane.Machines
machines := controlPlane.Machines
var errs []error
for _, machineToDelete := range machinesToDelete {
for _, machineToDelete := range machines {
log := log.WithValues("Machine", klog.KObj(machineToDelete))
ctx := ctrl.LoggerInto(ctx, log)

Expand Down Expand Up @@ -665,15 +688,61 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
err := kerrors.NewAggregate(errs)
r.recorder.Eventf(controlPlane.KCP, corev1.EventTypeWarning, "FailedDelete",
"Failed to delete control plane Machines for cluster %s control plane: %v", klog.KObj(controlPlane.Cluster), err)

s.deletingReason = controlplanev1.KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason
s.deletingMessage = "Please check controller logs for errors"
return ctrl.Result{}, err
}

log.Info("Waiting for control plane Machines to not exist anymore")

conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "")

message := ""
if len(machines) > 0 {
if len(machines) == 1 {
message = fmt.Sprintf("Deleting %d Machine", len(machines))
} else {
message = fmt.Sprintf("Deleting %d Machines", len(machines))
}
staleMessage := aggregateStaleMachines(machines)
if staleMessage != "" {
message += fmt.Sprintf(" and %s", staleMessage)
}
}
s.deletingReason = controlplanev1.KubeadmControlPlaneDeletingWaitingForMachineDeletionV1Beta2Reason
s.deletingMessage = message
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
}

// objectsPendingDeleteNames return the names of worker Machines and MachinePools pending delete.
func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools *expv1.MachinePoolList, cluster *clusterv1.Cluster) string {
controlPlaneMachines := allMachines.Filter(collections.ControlPlaneMachines(cluster.Name))
workerMachines := allMachines.Difference(controlPlaneMachines)

descendants := make([]string, 0)
if feature.Gates.Enabled(feature.MachinePool) {
machinePoolNames := make([]string, len(allMachinePools.Items))
for i, machinePool := range allMachinePools.Items {
machinePoolNames[i] = machinePool.Name
}
if len(machinePoolNames) > 0 {
sort.Strings(machinePoolNames)
descendants = append(descendants, "MachinePools: "+clog.StringListToString(machinePoolNames))
}
}

workerMachineNames := make([]string, len(workerMachines))
for i, workerMachine := range workerMachines.UnsortedList() {
workerMachineNames[i] = workerMachine.Name
}
if len(workerMachineNames) > 0 {
sort.Strings(workerMachineNames)
descendants = append(descendants, "Worker Machines: "+clog.StringListToString(workerMachineNames))
}
return strings.Join(descendants, "; ")
}

func (r *KubeadmControlPlaneReconciler) removePreTerminateHookAnnotationFromMachine(ctx context.Context, machine *clusterv1.Machine) error {
if _, exists := machine.Annotations[controlplanev1.PreTerminateHookCleanupAnnotation]; !exists {
// Nothing to do, the annotation is not set (anymore) on the Machine
Expand Down
72 changes: 65 additions & 7 deletions controlplane/kubeadm/internal/controllers/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3036,11 +3036,14 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
Cluster: cluster,
Machines: machines,
}
s := &scope{}

result, err := r.reconcileDelete(ctx, controlPlane)
result, err := r.reconcileDelete(ctx, controlPlane, s)
g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: deleteRequeueAfter}))
g.Expect(err).ToNot(HaveOccurred())
g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
g.Expect(s.deletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForMachineDeletionV1Beta2Reason))
g.Expect(s.deletingMessage).To(Equal("Deleting 3 Machines"))

controlPlaneMachines := clusterv1.MachineList{}
g.Expect(fakeClient.List(ctx, &controlPlaneMachines)).To(Succeed())
Expand All @@ -3062,11 +3065,14 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
KCP: kcp,
Cluster: cluster,
}
s = &scope{}

result, err = r.reconcileDelete(ctx, controlPlane)
result, err = r.reconcileDelete(ctx, controlPlane, s)
g.Expect(result).To(BeComparableTo(ctrl.Result{}))
g.Expect(err).ToNot(HaveOccurred())
g.Expect(kcp.Finalizers).To(BeEmpty())
g.Expect(s.deletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason))
g.Expect(s.deletingMessage).To(BeEmpty())
})

t.Run("does not remove any control plane Machines if other Machines exist", func(t *testing.T) {
Expand Down Expand Up @@ -3111,12 +3117,14 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
Cluster: cluster,
Machines: machines,
}
s := &scope{}

result, err := r.reconcileDelete(ctx, controlPlane)
result, err := r.reconcileDelete(ctx, controlPlane, s)
g.Expect(result).To(BeComparableTo(ctrl.Result{RequeueAfter: deleteRequeueAfter}))
g.Expect(err).ToNot(HaveOccurred())

g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
g.Expect(s.deletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason))
g.Expect(s.deletingMessage).To(Equal("Worker Machines: worker"))

controlPlaneMachines := clusterv1.MachineList{}
labels := map[string]string{
Expand Down Expand Up @@ -3169,12 +3177,14 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
Cluster: cluster,
Machines: machines,
}
s := &scope{}

result, err := r.reconcileDelete(ctx, controlPlane)
result, err := r.reconcileDelete(ctx, controlPlane, s)
g.Expect(result).To(BeComparableTo(ctrl.Result{RequeueAfter: deleteRequeueAfter}))
g.Expect(err).ToNot(HaveOccurred())

g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
g.Expect(s.deletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason))
g.Expect(s.deletingMessage).To(Equal("MachinePools: worker"))

controlPlaneMachines := clusterv1.MachineList{}
labels := map[string]string{
Expand Down Expand Up @@ -3206,12 +3216,60 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
KCP: kcp,
Cluster: cluster,
}
s := &scope{}

result, err := r.reconcileDelete(ctx, controlPlane)
result, err := r.reconcileDelete(ctx, controlPlane, s)
g.Expect(result).To(BeComparableTo(ctrl.Result{}))
g.Expect(err).ToNot(HaveOccurred())
g.Expect(kcp.Finalizers).To(BeEmpty())
g.Expect(s.deletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason))
g.Expect(s.deletingMessage).To(BeEmpty())
})
}

func TestObjectsPendingDelete(t *testing.T) {
c := &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "test-cluster",
},
}

cpMachineLabels := map[string]string{
clusterv1.ClusterNameLabel: c.Name,
clusterv1.MachineControlPlaneLabel: "",
}
workerMachineLabels := map[string]string{
clusterv1.ClusterNameLabel: c.Name,
}

allMachines := collections.FromMachineList(&clusterv1.MachineList{
Items: []clusterv1.Machine{
*machine("cp1", withLabels(cpMachineLabels)),
*machine("cp2", withLabels(cpMachineLabels)),
*machine("cp3", withLabels(cpMachineLabels)),
*machine("w1", withLabels(workerMachineLabels)),
*machine("w2", withLabels(workerMachineLabels)),
*machine("w3", withLabels(workerMachineLabels)),
*machine("w4", withLabels(workerMachineLabels)),
*machine("w5", withLabels(workerMachineLabels)),
*machine("w6", withLabels(workerMachineLabels)),
*machine("w7", withLabels(workerMachineLabels)),
*machine("w8", withLabels(workerMachineLabels)),
},
})
machinePools := &expv1.MachinePoolList{
Items: []expv1.MachinePool{
{
ObjectMeta: metav1.ObjectMeta{
Name: "mp1",
},
},
},
}

g := NewWithT(t)

g.Expect(objectsPendingDeleteNames(allMachines, machinePools, c)).To(Equal("MachinePools: mp1; Worker Machines: w1, w2, w3, w4, w5, ... (3 more)"))
}

// test utils.
Expand Down
6 changes: 6 additions & 0 deletions controlplane/kubeadm/internal/controllers/scale_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,12 @@ func withAnnotation(annotation string) machineOpt {
}
}

func withLabels(labels map[string]string) machineOpt {
return func(m *clusterv1.Machine) {
m.ObjectMeta.Labels = labels
}
}

func withTimestamp(t time.Time) machineOpt {
return func(m *clusterv1.Machine) {
m.CreationTimestamp = metav1.NewTime(t)
Expand Down
Loading

0 comments on commit 9fdc5c9

Please sign in to comment.