Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 Refine v1beta2 condition messages #11404

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions controlplane/kubeadm/internal/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,11 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
conditions.MarkFalse(controlPlane.KCP, controlplanev1.ResizedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "Waiting for worker nodes to be deleted first")

controlPlane.DeletingReason = controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason
controlPlane.DeletingMessage = fmt.Sprintf("KCP deletion blocked because %s still exist", objectsPendingDeleteNames(allMachines, allMachinePools, controlPlane.Cluster))
names := objectsPendingDeleteNames(allMachines, allMachinePools, controlPlane.Cluster)
for i := range names {
names[i] = "* " + names[i]
}
controlPlane.DeletingMessage = fmt.Sprintf("KubeadmControlPlane deletion blocked because following objects still exist:\n%s", strings.Join(names, "\n"))
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
}

Expand Down Expand Up @@ -703,7 +707,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, con
}

// objectsPendingDeleteNames return the names of worker Machines and MachinePools pending delete.
func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools *expv1.MachinePoolList, cluster *clusterv1.Cluster) string {
func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools *expv1.MachinePoolList, cluster *clusterv1.Cluster) []string {
controlPlaneMachines := allMachines.Filter(collections.ControlPlaneMachines(cluster.Name))
workerMachines := allMachines.Difference(controlPlaneMachines)

Expand All @@ -725,9 +729,9 @@ func objectsPendingDeleteNames(allMachines collections.Machines, allMachinePools
}
if len(workerMachineNames) > 0 {
sort.Strings(workerMachineNames)
descendants = append(descendants, "worker Machines: "+clog.StringListToString(workerMachineNames))
descendants = append(descendants, "Machines: "+clog.StringListToString(workerMachineNames))
}
return strings.Join(descendants, "; ")
return descendants
}

func (r *KubeadmControlPlaneReconciler) removePreTerminateHookAnnotationFromMachine(ctx context.Context, machine *clusterv1.Machine) error {
Expand Down
48 changes: 29 additions & 19 deletions controlplane/kubeadm/internal/controllers/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2008,16 +2008,21 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneAndMachinesCondition
Reason: controlplanev1.KubeadmControlPlaneInitializedV1Beta2Reason,
},
{
Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason,
Message: "Following Machines are reporting etcd member unknown: machine1-test",
Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason,
Message: "* Machine machine1-test:\n" +
" * EtcdMemberHealthy: Node does not exist",
},
{
Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason,
Message: "Following Machines are reporting control plane unknown: machine1-test",
Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason,
Message: "* Machine machine1-test:\n" +
" * APIServerPodHealthy: Node does not exist\n" +
" * ControllerManagerPodHealthy: Node does not exist\n" +
" * SchedulerPodHealthy: Node does not exist\n" +
" * EtcdPodHealthy: Node does not exist",
},
},
expectMachineConditions: []metav1.Condition{
Expand Down Expand Up @@ -2083,16 +2088,21 @@ func TestKubeadmControlPlaneReconciler_reconcileControlPlaneAndMachinesCondition
Reason: controlplanev1.KubeadmControlPlaneInitializedV1Beta2Reason,
},
{
Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason,
Message: "Following Machines are reporting etcd member unknown: machine1-test",
Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason,
Message: "* Machine machine1-test:\n" +
" * EtcdMemberHealthy: Node does not exist",
},
{
Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason,
Message: "Following Machines are reporting control plane unknown: machine1-test",
Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition,
Status: metav1.ConditionUnknown,
Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason,
Message: "* Machine machine1-test:\n" +
" * APIServerPodHealthy: Node does not exist\n" +
" * ControllerManagerPodHealthy: Node does not exist\n" +
" * SchedulerPodHealthy: Node does not exist\n" +
" * EtcdPodHealthy: Node does not exist",
},
},
expectMachineConditions: []metav1.Condition{
Expand Down Expand Up @@ -3344,7 +3354,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
g.Expect(err).ToNot(HaveOccurred())
g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason))
g.Expect(controlPlane.DeletingMessage).To(Equal("KCP deletion blocked because worker Machines: worker-0, worker-1, worker-2, worker-3, worker-4, ... (5 more) still exist"))
g.Expect(controlPlane.DeletingMessage).To(Equal("KubeadmControlPlane deletion blocked because following objects still exist:\n* Machines: worker-0, worker-1, worker-2, worker-3, worker-4, ... (5 more)"))

controlPlaneMachines := clusterv1.MachineList{}
labels := map[string]string{
Expand Down Expand Up @@ -3405,7 +3415,7 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) {
g.Expect(err).ToNot(HaveOccurred())
g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer))
g.Expect(controlPlane.DeletingReason).To(Equal(controlplanev1.KubeadmControlPlaneDeletingWaitingForWorkersDeletionV1Beta2Reason))
g.Expect(controlPlane.DeletingMessage).To(Equal("KCP deletion blocked because MachinePools: mp-0, mp-1, mp-2, mp-3, mp-4, ... (5 more) still exist"))
g.Expect(controlPlane.DeletingMessage).To(Equal("KubeadmControlPlane deletion blocked because following objects still exist:\n* MachinePools: mp-0, mp-1, mp-2, mp-3, mp-4, ... (5 more)"))

controlPlaneMachines := clusterv1.MachineList{}
labels := map[string]string{
Expand Down Expand Up @@ -3489,7 +3499,7 @@ func TestObjectsPendingDelete(t *testing.T) {

g := NewWithT(t)

g.Expect(objectsPendingDeleteNames(allMachines, machinePools, c)).To(Equal("MachinePools: mp1; worker Machines: w1, w2, w3, w4, w5, ... (3 more)"))
g.Expect(objectsPendingDeleteNames(allMachines, machinePools, c)).To(Equal([]string{"MachinePools: mp1", "Machines: w1, w2, w3, w4, w5, ... (3 more)"}))
}

// test utils.
Expand Down
6 changes: 3 additions & 3 deletions controlplane/kubeadm/internal/controllers/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -482,13 +482,13 @@ func Test_setMachinesReadyAndMachinesUpToDateConditions(t *testing.T) {
Type: controlplanev1.KubeadmControlPlaneMachinesReadyV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: "SomeReason", // There is only one machine reporting issues, using the reason from that machine.
Message: "NotReady from Machine m3",
Message: "* Machine m3: NotReady",
},
expectMachinesUpToDateCondition: metav1.Condition{
Type: controlplanev1.KubeadmControlPlaneMachinesUpToDateV1Beta2Condition,
Status: metav1.ConditionFalse,
Reason: v1beta2conditions.MultipleIssuesReportedReason, // There are many machines reporting issues, using a generic reason.
Message: "NotUpToDate from Machines m2, m3",
Message: "* Machines m2, m3: NotUpToDate",
},
},
}
Expand Down Expand Up @@ -550,7 +550,7 @@ func Test_setRemediatingCondition(t *testing.T) {
Type: controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Condition,
Status: metav1.ConditionTrue,
Reason: controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Reason,
Message: "Machine deletionTimestamp set from Machine m3",
Message: "* Machine m3: Machine deletionTimestamp set",
},
},
{
Expand Down
62 changes: 56 additions & 6 deletions controlplane/kubeadm/internal/workload_cluster_conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package internal
import (
"context"
"fmt"
"sort"
"strings"
"time"

Expand All @@ -38,6 +39,7 @@ import (
"sigs.k8s.io/cluster-api/util/collections"
"sigs.k8s.io/cluster-api/util/conditions"
v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2"
clog "sigs.k8s.io/cluster-api/util/log"
)

// UpdateEtcdConditions is responsible for updating machine conditions reflecting the status of all the etcd members.
Expand Down Expand Up @@ -934,33 +936,81 @@ func aggregateV1Beta2ConditionsFromMachinesToKCP(input aggregateV1Beta2Condition
kcpMachinesWithUnknown := sets.Set[string]{}
kcpMachinesWithInfo := sets.Set[string]{}

messageMap := map[string][]string{}
for i := range input.controlPlane.Machines {
machine := input.controlPlane.Machines[i]
machineMessages := []string{}
for _, condition := range input.machineConditions {
if machineCondition := v1beta2conditions.Get(machine, condition); machineCondition != nil {
switch machineCondition.Status {
case metav1.ConditionTrue:
kcpMachinesWithInfo.Insert(machine.Name)
case metav1.ConditionFalse:
kcpMachinesWithErrors.Insert(machine.Name)
m := machineCondition.Message
if m == "" {
m = fmt.Sprintf("condition is %s", machineCondition.Status)
}
machineMessages = append(machineMessages, fmt.Sprintf(" * %s: %s", machineCondition.Type, m))
case metav1.ConditionUnknown:
kcpMachinesWithUnknown.Insert(machine.Name)
m := machineCondition.Message
if m == "" {
m = fmt.Sprintf("condition is %s", machineCondition.Status)
}
machineMessages = append(machineMessages, fmt.Sprintf(" * %s: %s", machineCondition.Type, m))
}
}
}

if len(machineMessages) > 0 {
message := strings.Join(machineMessages, "\n")
messageMap[message] = append(messageMap[message], machine.Name)
}
}

// compute the order of messages according to the number of machines reporting the same message.
// Note: The list of object names is used as a secondary criteria to sort messages with the same number of objects.
messageIndex := make([]string, 0, len(messageMap))
for m := range messageMap {
messageIndex = append(messageIndex, m)
}

sort.SliceStable(messageIndex, func(i, j int) bool {
return len(messageMap[messageIndex[i]]) > len(messageMap[messageIndex[j]]) ||
(len(messageMap[messageIndex[i]]) == len(messageMap[messageIndex[j]]) && strings.Join(messageMap[messageIndex[i]], ",") < strings.Join(messageMap[messageIndex[j]], ","))
})

// Build the message
messages := []string{}
for _, message := range messageIndex {
machines := messageMap[message]
machinesMessage := "Machine"
if len(messageMap[message]) > 1 {
machinesMessage += "s"
}

sort.Strings(machines)
machinesMessage += " " + clog.ListToString(machines, func(s string) string { return s }, 3)
sbueringer marked this conversation as resolved.
Show resolved Hide resolved

messages = append(messages, fmt.Sprintf("* %s:\n%s", machinesMessage, message))
}

// Append messages impacting KCP as a whole, if any
if len(input.kcpErrors) > 0 {
for _, message := range input.kcpErrors {
messages = append(messages, fmt.Sprintf("* %s", message))
}
}
message := strings.Join(messages, "\n")

// In case of at least one machine with errors or KCP level errors (nodes without machines), report false.
if len(input.kcpErrors) > 0 || len(kcpMachinesWithErrors) > 0 {
messages := input.kcpErrors
if len(kcpMachinesWithErrors) > 0 {
messages = append(messages, fmt.Sprintf("Following Machines are reporting %s errors: %s", input.note, strings.Join(sets.List(kcpMachinesWithErrors), ", ")))
}
v1beta2conditions.Set(input.controlPlane.KCP, metav1.Condition{
Type: input.condition,
Status: metav1.ConditionFalse,
Reason: input.falseReason,
Message: strings.Join(messages, ", "),
Message: message,
})
return
}
Expand All @@ -971,7 +1021,7 @@ func aggregateV1Beta2ConditionsFromMachinesToKCP(input aggregateV1Beta2Condition
Type: input.condition,
Status: metav1.ConditionUnknown,
Reason: input.unknownReason,
Message: fmt.Sprintf("Following Machines are reporting %s unknown: %s", input.note, strings.Join(sets.List(kcpMachinesWithUnknown), ", ")),
Message: message,
})
return
}
Expand Down
Loading