Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjustments to test charm upgrades #217

Merged
merged 12 commits into from
Dec 18, 2024
11 changes: 6 additions & 5 deletions .github/workflows/integration_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,25 @@ jobs:
strategy:
matrix:
arch:
- {id: amd64, builder-label: ubuntu-22.04, tester-arch: AMD64} # built on azure
- {id: arm64, builder-label: ARM64, tester-arch: ARM64} # built on self-hosted
suite: [k8s, etcd, ceph]
- {id: amd64, builder-label: ubuntu-22.04, tester-arch: AMD64, tester-size: xlarge} # built on azure, test on self-hosted
- {id: arm64, builder-label: ARM64, tester-arch: ARM64, tester-size: large } # built and test on on self-hosted
suite: [k8s, etcd, ceph, upgrade]
exclude:
- {arch: {id: arm64}, suite: ceph}
- {arch: {id: arm64}, suite: upgrade}
with:
identifier: ${{ matrix.arch.id }}-${{ matrix.suite }}
builder-runner-label: ${{ matrix.arch.builder-label }}
charmcraft-channel: ${{ needs.charmcraft-channel.outputs.channel }}
extra-arguments: >-
${{needs.extra-args.outputs.args}} -k test_${{ matrix.suite }}
${{ matrix.arch.id == 'arm64' && ' --lxd-containers' || '' }}
${{ matrix.arch.id == 'arm64' && ' --lxd-containers --series=focal' || '' }}
Comment on lines -53 to +54
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

arm64 -- you stay testing on focal so we can know if we break focal compatability

juju-channel: 3/stable
load-test-enabled: false
provider: lxd
self-hosted-runner: true
self-hosted-runner-arch: ${{ matrix.arch.tester-arch }}
self-hosted-runner-label: large
self-hosted-runner-label: ${{ matrix.arch.tester-size }}
test-timeout: 120
test-tox-env: integration-${{ matrix.suite }}
trivy-fs-enabled: false
Expand Down
28 changes: 28 additions & 0 deletions charms/worker/k8s/src/events/update_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
"""

import logging
from typing import Optional

import charms.contextual_status as status
import ops
import reschedule
from inspector import ClusterInspector
from protocols import K8sCharmProtocol
from snap import version as snap_version
from upgrade import K8sUpgrade
Expand Down Expand Up @@ -90,6 +92,27 @@ def _on_update_status(self, event: ops.UpdateStatusEvent):
except status.ReconcilerError:
log.exception("Can't update_status")

def kube_system_pods_waiting(self) -> Optional[ops.WaitingStatus]:
"""Check if kube-system pods are waiting.

Returns:
WaitingStatus: waiting status if kube-system pods are not ready.
"""
if self.charm.is_worker:
# Worker nodes don't need to check the kube-system pods
return None

waiting, inspect = None, self.charm.cluster_inspector

try:
if failing_pods := inspect.verify_pods_running(["kube-system"]):
waiting = ops.WaitingStatus(f"Unready Pods: {failing_pods}")
except ClusterInspector.ClusterInspectorError as e:
log.exception("Failed to verify pods: %s", e)
waiting = ops.WaitingStatus("Waiting for API Server")

return waiting

def run(self):
"""Check k8s snap status."""
version, overridden = snap_version("k8s")
Expand All @@ -107,4 +130,9 @@ def run(self):
status.add(ops.WaitingStatus("Node not Ready"))
trigger.create(reschedule.Period(seconds=30))
return

if waiting := self.kube_system_pods_waiting():
status.add(waiting)
trigger.create(reschedule.Period(seconds=30))
return
trigger.cancel()
3 changes: 2 additions & 1 deletion charms/worker/k8s/src/inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def _get_client(self) -> Client:
self.client = Client(config=config.get())
return self.client

def get_nodes(self, labels: LabelSelector) -> Optional[List[Node]]:
def get_nodes(self, labels: Optional[LabelSelector] = None) -> Optional[List[Node]]:
"""Get nodes from the cluster.

Args:
Expand All @@ -56,6 +56,7 @@ def get_nodes(self, labels: LabelSelector) -> Optional[List[Node]]:
Raises:
ClusterInspectorError: If the nodes cannot be retrieved.
"""
labels = labels or {}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

labels are now "OPTIONAL"

client = self._get_client()
try:

Expand Down
6 changes: 3 additions & 3 deletions charms/worker/k8s/src/literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@
},
# NOTE: Update the dependencies for the k8s-service before releasing.
"k8s_service": {
"dependencies": {"k8s-worker": "^1.30, < 1.32"},
"dependencies": {"k8s-worker": "^1.31, < 1.33"},
"name": "k8s",
"upgrade_supported": "^1.30, < 1.32",
"version": "1.31.3",
"upgrade_supported": "^1.31, < 1.33",
"version": "1.32.0",
mateoflorido marked this conversation as resolved.
Show resolved Hide resolved
},
}
3 changes: 3 additions & 0 deletions charms/worker/k8s/src/protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from charms.interface_external_cloud_provider import ExternalCloudProvider
from charms.k8s.v0.k8sd_api_manager import K8sdAPIManager
from charms.reconciler import Reconciler
from inspector import ClusterInspector
from ops.interface_kube_control import KubeControlProvides


Expand All @@ -17,6 +18,7 @@ class K8sCharmProtocol(ops.CharmBase):

Attributes:
api_manager (K8sdAPIManager): The API manager for the charm.
cluster_inspector (ClusterInspector): The cluster inspector for the charm.
kube_control (KubeControlProvides): The kube-control interface.
xcp (ExternalCloudProvider): The external cloud provider interface.
reconciler (Reconciler): The reconciler for the charm
Expand All @@ -28,6 +30,7 @@ class K8sCharmProtocol(ops.CharmBase):
"""

api_manager: K8sdAPIManager
cluster_inspector: ClusterInspector
kube_control: KubeControlProvides
xcp: ExternalCloudProvider
reconciler: Reconciler
Expand Down
7 changes: 4 additions & 3 deletions charms/worker/k8s/src/upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,11 @@ def pre_upgrade_check(self) -> None:
Raises:
ClusterNotReadyError: If the cluster is not ready for an upgrade.
"""
if self.charm.is_worker:
log.info("TODO: Find some pre-upgrade checks for worker application.")
return
try:
nodes = self.cluster_inspector.get_nodes(
labels={"juju-charm": "k8s-worker" if self.charm.is_worker else "k8s"}
)
nodes = self.cluster_inspector.get_nodes()
failing_pods = self.cluster_inspector.verify_pods_running(["kube-system"])
except ClusterInspector.ClusterInspectorError as e:
raise ClusterNotReadyError(
Expand Down
15 changes: 7 additions & 8 deletions charms/worker/k8s/tests/unit/test_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class TestK8sUpgrade(unittest.TestCase):
def setUp(self):
"""Set up common test fixtures."""
self.charm = MagicMock()
self.charm.is_worker = False
self.node_manager = MagicMock(spec=ClusterInspector)
self.upgrade = K8sUpgrade(
self.charm,
Expand Down Expand Up @@ -53,8 +54,8 @@ def test_pre_upgrade_check_worker_success(self):

self.upgrade.pre_upgrade_check()

self.node_manager.get_nodes.assert_called_once_with(labels={"juju-charm": "k8s-worker"})
self.node_manager.verify_pods_running.assert_called_once_with(["kube-system"])
self.node_manager.get_nodes.assert_not_called()
self.node_manager.verify_pods_running.assert_not_called()

def test_pre_upgrade_check_control_plane_success(self):
"""Test pre_upgrade_check succeeds for control plane nodes."""
Expand All @@ -64,15 +65,14 @@ def test_pre_upgrade_check_control_plane_success(self):

self.upgrade.pre_upgrade_check()

self.node_manager.get_nodes.assert_called_once_with(labels={"juju-charm": "k8s"})
self.node_manager.get_nodes.assert_called_once_with()
mateoflorido marked this conversation as resolved.
Show resolved Hide resolved

def test_pre_upgrade_check_unready_nodes(self):
"""Test pre_upgrade_check fails when nodes are not ready."""
self.charm.is_worker = True
self.node_manager.get_nodes.return_value = [
Node(metadata=ObjectMeta(name="worker-1")),
Node(metadata=ObjectMeta(name="worker-2")),
Node(metadata=ObjectMeta(name="worker-3")),
Node(metadata=ObjectMeta(name="k8s-1")),
Node(metadata=ObjectMeta(name="k8s-2")),
Node(metadata=ObjectMeta(name="k8s-3")),
]

with self.assertRaises(ClusterNotReadyError):
Expand All @@ -89,7 +89,6 @@ def test_pre_upgrade_check_cluster_inspector_error(self):

def test_pre_upgrade_check_pods_not_ready(self):
"""Test pre_upgrade_check fails when pods are not ready."""
self.charm.is_worker = True
self.node_manager.get_nodes.return_value = None
self.node_manager.verify_pods_running.return_value = "kube-system/pod-1"

Expand Down
Loading
Loading