Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 Implement KCP and MS Deleting conditions #11381

Merged
merged 3 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions api/v1beta1/machineset_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,23 @@ const (
const (
// MachineSetRemediatingV1Beta2Condition surfaces details about ongoing remediation of the controlled machines, if any.
MachineSetRemediatingV1Beta2Condition = RemediatingV1Beta2Condition
)

// MachineSet's Deleting condition and corresponding reasons that will be used in v1Beta2 API version.
const (
// MachineSetDeletingV1Beta2Condition surfaces details about ongoing deletion of the controlled machines.
MachineSetDeletingV1Beta2Condition = DeletingV1Beta2Condition

// MachineSetDeletingDeletionTimestampNotSetV1Beta2Reason surfaces when the MachineSet is not deleting because the
// DeletionTimestamp is not set.
MachineSetDeletingDeletionTimestampNotSetV1Beta2Reason = DeletionTimestampNotSetV1Beta2Reason

// MachineSetDeletingDeletionTimestampSetV1Beta2Reason surfaces when the MachineSet is deleting because the
// DeletionTimestamp is set.
MachineSetDeletingDeletionTimestampSetV1Beta2Reason = DeletionTimestampSetV1Beta2Reason

// MachineSetDeletingInternalErrorV1Beta2Reason surfaces unexpected failures when deleting a MachineSet.
MachineSetDeletingInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason
)

// ANCHOR_END: MachineSetSpec
Expand Down
25 changes: 20 additions & 5 deletions controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,12 +213,27 @@ const (
const (
// KubeadmControlPlaneDeletingV1Beta2Condition surfaces details about ongoing deletion of the controlled machines.
KubeadmControlPlaneDeletingV1Beta2Condition = clusterv1.DeletingV1Beta2Condition
)

// KubeadmControlPlane's Paused condition and corresponding reasons that will be used in v1Beta2 API version.
const (
// KubeadmControlPlanePausedV1Beta2Condition is true if this resource or the Cluster it belongs to are paused.
KubeadmControlPlanePausedV1Beta2Condition = clusterv1.PausedV1Beta2Condition
// KubeadmControlPlaneDeletingDeletionTimestampNotSetV1Beta2Reason surfaces when the KCP is not deleting because the
// DeletionTimestamp is not set.
KubeadmControlPlaneDeletingDeletionTimestampNotSetV1Beta2Reason = clusterv1.DeletionTimestampNotSetV1Beta2Reason

// KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason surfaces when the KCP deletion
// waits for the workers to be deleted.
sbueringer marked this conversation as resolved.
Show resolved Hide resolved
KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason = "WaitingForWorkersDeletion"

// KubeadmControlPlaneDeletingWaitingForMachineDeletionV1Beta2Reason surfaces when the KCP deletion
// waits for the control plane Machines to be deleted.
KubeadmControlPlaneDeletingWaitingForMachineDeletionV1Beta2Reason = "WaitingForMachineDeletion"

// KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason surfaces when the KCP deletion has been completed.
// This reason is set right after the `kubeadm.controlplane.cluster.x-k8s.io` finalizer is removed.
// This means that the object will go away (i.e. be removed from etcd), except if there are other
// finalizers on the KCP object.
KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason = clusterv1.DeletionCompletedV1Beta2Reason

// KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason surfaces unexpected failures when deleting a KCP object.
KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason = clusterv1.InternalErrorV1Beta2Reason
)

// APIServerPodHealthy, ControllerManagerPodHealthy, SchedulerPodHealthy and EtcdPodHealthy condition and corresponding
Expand Down
6 changes: 6 additions & 0 deletions controlplane/kubeadm/internal/control_plane.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ type ControlPlane struct {

managementCluster ManagementCluster
workloadCluster WorkloadCluster

// deletingReason is the reason that should be used when setting the Deleting condition.
DeletingReason string

// deletingMessage is the message that should be used when setting the Deleting condition.
DeletingMessage string
}

// PreflightCheckResults contains description about pre flight check results blocking machines creation or deletion.
Expand Down
64 changes: 61 additions & 3 deletions controlplane/kubeadm/internal/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package controllers
import (
"context"
"fmt"
"sort"
"strings"
"time"

Expand Down Expand Up @@ -52,6 +53,7 @@ import (
"sigs.k8s.io/cluster-api/util/conditions"
v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2"
"sigs.k8s.io/cluster-api/util/finalizers"
clog "sigs.k8s.io/cluster-api/util/log"
"sigs.k8s.io/cluster-api/util/patch"
"sigs.k8s.io/cluster-api/util/paused"
"sigs.k8s.io/cluster-api/util/predicates"
Expand Down Expand Up @@ -220,7 +222,7 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.
}
}

r.updateV1beta2Status(ctx, controlPlane)
r.updateV1Beta2Status(ctx, controlPlane)

// Always attempt to Patch the KubeadmControlPlane object and status after each reconciliation.
patchOpts := []patch.Option{}
Expand Down Expand Up @@ -596,6 +598,9 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con

// If no control plane machines remain, remove the finalizer
if len(controlPlane.Machines) == 0 {
controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason
controlPlane.DeletingMessage = ""

controllerutil.RemoveFinalizer(controlPlane.KCP, controlplanev1.KubeadmControlPlaneFinalizer)
return ctrl.Result{}, nil
}
Expand All @@ -615,6 +620,8 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
// Gets all machines, not just control plane machines.
allMachines, err := r.managementCluster.GetMachinesForCluster(ctx, controlPlane.Cluster)
if err != nil {
controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason
controlPlane.DeletingMessage = "Please check controller logs for errors" //nolint:goconst // Not making this a constant for now
return ctrl.Result{}, err
}

Expand All @@ -623,20 +630,25 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
if feature.Gates.Enabled(feature.MachinePool) {
allMachinePools, err = r.managementCluster.GetMachinePoolsForCluster(ctx, controlPlane.Cluster)
if err != nil {
controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason
controlPlane.DeletingMessage = "Please check controller logs for errors"
return ctrl.Result{}, err
}
}
// Verify that only control plane machines remain
if len(allMachines) != len(controlPlane.Machines) || len(allMachinePools.Items) != 0 {
log.Info("Waiting for worker nodes to be deleted first")
conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "Waiting for worker nodes to be deleted first")

controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason
controlPlane.DeletingMessage = fmt.Sprintf("KCP deletion blocked because %s still exist", objectsPendingDeleteNames(allMachines, allMachinePools, controlPlane.Cluster))
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
}

// Delete control plane machines in parallel
machinesToDelete := controlPlane.Machines
machines := controlPlane.Machines
var errs []error
for _, machineToDelete := range machinesToDelete {
for _, machineToDelete := range machines {
log := log.WithValues("Machine", klog.KObj(machineToDelete))
ctx := ctrl.LoggerInto(ctx, log)

Expand Down Expand Up @@ -665,15 +677,61 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
err := kerrors.NewAggregate(errs)
r.recorder.Eventf(controlPlane.KCP, corev1.EventTypeWarning, "FailedDelete",
"Failed to delete control plane Machines for cluster %s control plane: %v", klog.KObj(controlPlane.Cluster), err)

controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingInternalErrorV1Beta2Reason
controlPlane.DeletingMessage = "Please check controller logs for errors"
return ctrl.Result{}, err
}

log.Info("Waiting for control plane Machines to not exist anymore")

conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "")

message := ""
if len(machines) > 0 {
if len(machines) == 1 {
message = fmt.Sprintf("Deleting %d Machine", len(machines))
} else {
message = fmt.Sprintf("Deleting %d Machines", len(machines))
}
staleMessage := aggregateStaleMachines(machines)
if staleMessage != "" {
message += fmt.Sprintf(" and %s", staleMessage)
}
}
controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingWaitingForMachineDeletionV1Beta2Reason
controlPlane.DeletingMessage = message
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
}

// objectsPendingDeleteNames return the names of worker Machines and MachinePools pending delete.
func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools *expv1.MachinePoolList, cluster *clusterv1.Cluster) string {
controlPlaneMachines := allMachines.Filter(collections.ControlPlaneMachines(cluster.Name))
workerMachines := allMachines.Difference(controlPlaneMachines)

descendants := make([]string, 0)
if feature.Gates.Enabled(feature.MachinePool) {
machinePoolNames := make([]string, len(allMachinePools.Items))
for i, machinePool := range allMachinePools.Items {
machinePoolNames[i] = machinePool.Name
}
if len(machinePoolNames) > 0 {
sort.Strings(machinePoolNames)
descendants = append(descendants, "MachinePools: "+clog.StringListToString(machinePoolNames))
}
}

workerMachineNames := make([]string, len(workerMachines))
for i, workerMachine := range workerMachines.UnsortedList() {
workerMachineNames[i] = workerMachine.Name
}
if len(workerMachineNames) > 0 {
sort.Strings(workerMachineNames)
descendants = append(descendants, "worker Machines: "+clog.StringListToString(workerMachineNames))
}
return strings.Join(descendants, "; ")
}

func (r *KubeadmControlPlaneReconciler) removePreTerminateHookAnnotationFromMachine(ctx context.Context, machine *clusterv1.Machine) error {
if _, exists := machine.Annotations[controlplanev1.PreTerminateHookCleanupAnnotation]; !exists {
// Nothing to do, the annotation is not set (anymore) on the Machine
Expand Down
97 changes: 77 additions & 20 deletions controlplane/kubeadm/internal/controllers/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3041,6 +3041,8 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: deleteRequeueAfter}))
g.Expect(err).ToNot(HaveOccurred())
g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForMachineDeletionV1Beta2Reason))
g.Expect(controlPlane.DeletingMessage).To(Equal("Deleting 3 Machines"))

controlPlaneMachines := clusterv1.MachineList{}
g.Expect(fakeClient.List(ctx, &controlPlaneMachines)).To(Succeed())
Expand All @@ -3067,6 +3069,8 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
g.Expect(result).To(BeComparableTo(ctrl.Result{}))
g.Expect(err).ToNot(HaveOccurred())
g.Expect(kcp.Finalizers).To(BeEmpty())
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason))
g.Expect(controlPlane.DeletingMessage).To(BeEmpty())
})

t.Run("does not remove any control plane Machines if other Machines exist", func(t *testing.T) {
Expand All @@ -3075,18 +3079,20 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
cluster, kcp, _ := createClusterWithControlPlane(metav1.NamespaceDefault)
controllerutil.AddFinalizer(kcp, controlplanev1.KubeadmControlPlaneFinalizer)

workerMachine := &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: "worker",
Namespace: cluster.Namespace,
Labels: map[string]string{
clusterv1.ClusterNameLabel: cluster.Name,
initObjs := []client.Object{cluster.DeepCopy(), kcp.DeepCopy()}

for i := range 10 {
initObjs = append(initObjs, &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("worker-%d", i),
Namespace: cluster.Namespace,
Labels: map[string]string{
clusterv1.ClusterNameLabel: cluster.Name,
},
},
},
})
}

initObjs := []client.Object{cluster.DeepCopy(), kcp.DeepCopy(), workerMachine.DeepCopy()}

machines := collections.New()
for i := range 3 {
m, _ := createMachineNodePair(fmt.Sprintf("test-%d", i), cluster, kcp, true)
Expand Down Expand Up @@ -3115,8 +3121,9 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
result, err := r.reconcileDelete(ctx, controlPlane)
g.Expect(result).To(BeComparableTo(ctrl.Result{RequeueAfter: deleteRequeueAfter}))
g.Expect(err).ToNot(HaveOccurred())

g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason))
g.Expect(controlPlane.DeletingMessage).To(Equal("KCP deletion blocked because worker Machines: worker-0, worker-1, worker-2, worker-3, worker-4, ... (5 more) still exist"))

controlPlaneMachines := clusterv1.MachineList{}
labels := map[string]string{
Expand All @@ -3133,18 +3140,20 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
cluster, kcp, _ := createClusterWithControlPlane(metav1.NamespaceDefault)
controllerutil.AddFinalizer(kcp, controlplanev1.KubeadmControlPlaneFinalizer)

workerMachinePool := &expv1.MachinePool{
ObjectMeta: metav1.ObjectMeta{
Name: "worker",
Namespace: cluster.Namespace,
Labels: map[string]string{
clusterv1.ClusterNameLabel: cluster.Name,
initObjs := []client.Object{cluster.DeepCopy(), kcp.DeepCopy()}

for i := range 10 {
initObjs = append(initObjs, &expv1.MachinePool{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("mp-%d", i),
Namespace: cluster.Namespace,
Labels: map[string]string{
clusterv1.ClusterNameLabel: cluster.Name,
},
},
},
})
}

initObjs := []client.Object{cluster.DeepCopy(), kcp.DeepCopy(), workerMachinePool.DeepCopy()}

machines := collections.New()
for i := range 3 {
m, _ := createMachineNodePair(fmt.Sprintf("test-%d", i), cluster, kcp, true)
Expand Down Expand Up @@ -3173,8 +3182,9 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
result, err := r.reconcileDelete(ctx, controlPlane)
g.Expect(result).To(BeComparableTo(ctrl.Result{RequeueAfter: deleteRequeueAfter}))
g.Expect(err).ToNot(HaveOccurred())

g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason))
g.Expect(controlPlane.DeletingMessage).To(Equal("KCP deletion blocked because MachinePools: mp-0, mp-1, mp-2, mp-3, mp-4, ... (5 more) still exist"))

controlPlaneMachines := clusterv1.MachineList{}
labels := map[string]string{
Expand Down Expand Up @@ -3211,9 +3221,56 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
g.Expect(result).To(BeComparableTo(ctrl.Result{}))
g.Expect(err).ToNot(HaveOccurred())
g.Expect(kcp.Finalizers).To(BeEmpty())
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingDeletionCompletedV1Beta2Reason))
g.Expect(controlPlane.DeletingMessage).To(BeEmpty())
})
}

func TestObjectsPendingDelete(t *testing.T) {
c := &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "test-cluster",
},
}

cpMachineLabels := map[string]string{
clusterv1.ClusterNameLabel: c.Name,
clusterv1.MachineControlPlaneLabel: "",
}
workerMachineLabels := map[string]string{
clusterv1.ClusterNameLabel: c.Name,
}

allMachines := collections.FromMachineList(&clusterv1.MachineList{
Items: []clusterv1.Machine{
*machine("cp1", withLabels(cpMachineLabels)),
*machine("cp2", withLabels(cpMachineLabels)),
*machine("cp3", withLabels(cpMachineLabels)),
*machine("w1", withLabels(workerMachineLabels)),
*machine("w2", withLabels(workerMachineLabels)),
*machine("w3", withLabels(workerMachineLabels)),
*machine("w4", withLabels(workerMachineLabels)),
*machine("w5", withLabels(workerMachineLabels)),
*machine("w6", withLabels(workerMachineLabels)),
*machine("w7", withLabels(workerMachineLabels)),
*machine("w8", withLabels(workerMachineLabels)),
},
})
machinePools := &expv1.MachinePoolList{
Items: []expv1.MachinePool{
{
ObjectMeta: metav1.ObjectMeta{
Name: "mp1",
},
},
},
}

g := NewWithT(t)

g.Expect(objectsPendingDeleteNames(allMachines, machinePools, c)).To(Equal("MachinePools: mp1; worker Machines: w1, w2, w3, w4, w5, ... (3 more)"))
}

// test utils.

func newFakeClient(initObjs ...client.Object) client.Client {
Expand Down
6 changes: 6 additions & 0 deletions controlplane/kubeadm/internal/controllers/scale_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,12 @@ func withAnnotation(annotation string) machineOpt {
}
}

func withLabels(labels map[string]string) machineOpt {
return func(m *clusterv1.Machine) {
m.ObjectMeta.Labels = labels
}
}

func withTimestamp(t time.Time) machineOpt {
return func(m *clusterv1.Machine) {
m.CreationTimestamp = metav1.NewTime(t)
Expand Down
Loading