From 6f7df7fe561daa30bec71ae3d552c870075d5ca2 Mon Sep 17 00:00:00 2001 From: Adam Dyess Date: Tue, 17 Dec 2024 12:24:55 -0600 Subject: [PATCH] Once kube-system is healthy, stop rescheduling update-status --- charms/worker/k8s/src/events/update_status.py | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/charms/worker/k8s/src/events/update_status.py b/charms/worker/k8s/src/events/update_status.py index 7472a483..0ed5263b 100644 --- a/charms/worker/k8s/src/events/update_status.py +++ b/charms/worker/k8s/src/events/update_status.py @@ -9,6 +9,7 @@ """ import logging +from typing import Optional import charms.contextual_status as status import ops @@ -91,6 +92,27 @@ def _on_update_status(self, event: ops.UpdateStatusEvent): except status.ReconcilerError: log.exception("Can't update_status") + def kube_system_pods_waiting(self) -> Optional[ops.WaitingStatus]: + """Check if kube-system pods are waiting. + + Returns: + WaitingStatus: waiting status if kube-system pods are not ready. + """ + if self.charm.is_worker: + # Worker nodes don't need to check the kube-system pods + return None + + waiting, inspect = None, self.charm.cluster_inspector + + try: + if failing_pods := inspect.verify_pods_running(["kube-system"]): + waiting = ops.WaitingStatus(f"Unready kube-system Pods: {failing_pods}") + except ClusterInspector.ClusterInspectorError as e: + log.exception("Failed to verify pods: %s", e) + waiting = ops.WaitingStatus("Waiting for API Server") + + return waiting + def run(self): """Check k8s snap status.""" version, overridden = snap_version("k8s") @@ -109,16 +131,8 @@ def run(self): trigger.create(reschedule.Period(seconds=30)) return - if self.charm.is_control_plane: - inspect = self.charm.cluster_inspector - try: - if failing_pods := inspect.verify_pods_running(["kube-system"]): - status.add(ops.WaitingStatus(f"Unready kube-system Pods: {failing_pods}")) - except ClusterInspector.ClusterInspectorError as e: - log.exception("Failed to verify pods: %s", e) - status.add(ops.WaitingStatus("Waiting for API Server")) - finally: - trigger.create(reschedule.Period(seconds=30)) + if waiting := self.kube_system_pods_waiting(): + status.add(waiting) + trigger.create(reschedule.Period(seconds=30)) return - trigger.cancel()