Skip to content

Commit

Permalink
Improve logging and reasons (#71)
Browse files Browse the repository at this point in the history
Several changes:
- deadline exceeded: Feedback was that it was too general (looked like http error)
- disruption type message is clearer (display allowed types as well)
- log all rejections: before we logged only the "budget" rejections, now all rejections are logged.
  • Loading branch information
geobeau authored Oct 3, 2024
1 parent 0619e04 commit f1470c9
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
13 changes: 8 additions & 5 deletions internal/controller/nodedisruption_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ func (ndr *SingleNodeDisruptionReconciler) getRetryDate() metav1.Time {
// tryTransitionToGranted attempt to transition to the granted state but can result in either pending or rejected
func (ndr *SingleNodeDisruptionReconciler) tryTransitionToGranted(ctx context.Context) (err error) {
nextRetryDate := ndr.getRetryDate()
logger := log.FromContext(ctx)

var state nodedisruptionv1alpha1.NodeDisruptionState

Expand All @@ -245,6 +246,11 @@ func (ndr *SingleNodeDisruptionReconciler) tryTransitionToGranted(ctx context.Co
if !nextRetryDate.IsZero() {
state = nodedisruptionv1alpha1.Pending
} else {
for _, status := range statuses {
if !status.Ok {
logger.Info("Disruption rejected", "status", status)
}
}
state = nodedisruptionv1alpha1.Rejected
}
}
Expand Down Expand Up @@ -314,7 +320,7 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithInternalConstraints(ctx c
if ndr.NodeDisruption.Spec.Retry.IsAfterDeadline() {
status := nodedisruptionv1alpha1.DisruptedBudgetStatus{
Reference: ndr.getNodeDisruptionReference(),
Reason: "Deadline exceeded",
Reason: fmt.Sprintf("Failed to grant maintenance before deadline (deadline: %s)", ndr.NodeDisruption.Spec.Retry.Deadline),
Ok: false,
}
// Conserve the statuses of the previous iteration to conserve the reason of rejection
Expand All @@ -338,7 +344,7 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithInternalConstraints(ctx c
if len(ndr.Config.NodeDisruptionTypes) != 0 && !slices.Contains(ndr.Config.NodeDisruptionTypes, ndr.NodeDisruption.Spec.Type) {
status := nodedisruptionv1alpha1.DisruptedBudgetStatus{
Reference: ndr.getNodeDisruptionReference(),
Reason: "Type provided of node disruption is not managed",
Reason: fmt.Sprintf("Type of node disruption provided is not allowed (supported: %s) (see --node-disruption-types on the controller)", ndr.Config.NodeDisruptionTypes),
Ok: false,
}
return true, []nodedisruptionv1alpha1.DisruptedBudgetStatus{status}, nil
Expand All @@ -350,7 +356,6 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithInternalConstraints(ctx c
// ValidateBudgetConstraints check that the Node Disruption is valid against the budgets defined in the cluster
func (ndr *SingleNodeDisruptionReconciler) ValidateWithBudgetConstraints(ctx context.Context, budgets []Budget) (anyFailed bool, statuses []nodedisruptionv1alpha1.DisruptedBudgetStatus) {
disruptedNodes := resolver.NewNodeSetFromStringList(ndr.NodeDisruption.Status.DisruptedNodes)
logger := log.FromContext(ctx)
anyFailed = false

impactedBudgets := []Budget{}
Expand All @@ -368,7 +373,6 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithBudgetConstraints(ctx con
Ok: false,
}
statuses = append(statuses, status)
logger.Info("Disruption rejected because: ", "status", status)
DisruptionBudgetRejectedTotal.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Inc()
break
}
Expand All @@ -390,7 +394,6 @@ func (ndr *SingleNodeDisruptionReconciler) ValidateWithBudgetConstraints(ctx con
Ok: false,
}
statuses = append(statuses, status)
logger.Info("Disruption rejected because: ", "status", status)
DisruptionBudgetRejectedTotal.WithLabelValues(ref.Namespace, ref.Name, ref.Kind).Inc()
break
}
Expand Down
2 changes: 1 addition & 1 deletion internal/controller/nodedisruption_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ var _ = Describe("NodeDisruption controller", func() {
Name: "test-nodedisruption",
Kind: "NodeDisruption",
},
Reason: "Deadline exceeded",
Reason: fmt.Sprintf("Failed to grant maintenance before deadline (deadline: %s)", disruption.Spec.Retry.Deadline),
Ok: false,
}}
Expect(disruption.Status.DisruptedDisruptionBudgets).Should(Equal(expectedStatus))
Expand Down

0 comments on commit f1470c9

Please sign in to comment.