Skip to content

Commit

Permalink
Once kube-system is healthy, stop rescheduling update-status
Browse files Browse the repository at this point in the history
  • Loading branch information
addyess committed Dec 17, 2024
1 parent 697b1e9 commit 6f7df7f
Showing 1 changed file with 25 additions and 11 deletions.
36 changes: 25 additions & 11 deletions charms/worker/k8s/src/events/update_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""

import logging
from typing import Optional

import charms.contextual_status as status
import ops
Expand Down Expand Up @@ -91,6 +92,27 @@ def _on_update_status(self, event: ops.UpdateStatusEvent):
except status.ReconcilerError:
log.exception("Can't update_status")

def kube_system_pods_waiting(self) -> Optional[ops.WaitingStatus]:
"""Check if kube-system pods are waiting.
Returns:
WaitingStatus: waiting status if kube-system pods are not ready.
"""
if self.charm.is_worker:
# Worker nodes don't need to check the kube-system pods
return None

waiting, inspect = None, self.charm.cluster_inspector

try:
if failing_pods := inspect.verify_pods_running(["kube-system"]):
waiting = ops.WaitingStatus(f"Unready kube-system Pods: {failing_pods}")
except ClusterInspector.ClusterInspectorError as e:
log.exception("Failed to verify pods: %s", e)
waiting = ops.WaitingStatus("Waiting for API Server")

return waiting

def run(self):
"""Check k8s snap status."""
version, overridden = snap_version("k8s")
Expand All @@ -109,16 +131,8 @@ def run(self):
trigger.create(reschedule.Period(seconds=30))
return

if self.charm.is_control_plane:
inspect = self.charm.cluster_inspector
try:
if failing_pods := inspect.verify_pods_running(["kube-system"]):
status.add(ops.WaitingStatus(f"Unready kube-system Pods: {failing_pods}"))
except ClusterInspector.ClusterInspectorError as e:
log.exception("Failed to verify pods: %s", e)
status.add(ops.WaitingStatus("Waiting for API Server"))
finally:
trigger.create(reschedule.Period(seconds=30))
if waiting := self.kube_system_pods_waiting():
status.add(waiting)
trigger.create(reschedule.Period(seconds=30))
return

trigger.cancel()

0 comments on commit 6f7df7f

Please sign in to comment.