Skip to content

Commit

Permalink
Adjustments to test charm upgrades (#217)
Browse files Browse the repository at this point in the history
* Adjustments to test charm upgrades
* Ignore pre-upgrade-check on k8s-workers
* Wait for stable kube-system before upgrading
* Allow test modules to specify which series to deploy
* Test arm64 on focal, amd64 on jammy
  • Loading branch information
addyess authored Dec 18, 2024
1 parent 1562054 commit 98c12d5
Show file tree
Hide file tree
Showing 17 changed files with 586 additions and 378 deletions.
11 changes: 6 additions & 5 deletions .github/workflows/integration_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,25 @@ jobs:
strategy:
matrix:
arch:
- {id: amd64, builder-label: ubuntu-22.04, tester-arch: AMD64} # built on azure
- {id: arm64, builder-label: ARM64, tester-arch: ARM64} # built on self-hosted
suite: [k8s, etcd, ceph]
- {id: amd64, builder-label: ubuntu-22.04, tester-arch: AMD64, tester-size: xlarge} # built on azure, test on self-hosted
- {id: arm64, builder-label: ARM64, tester-arch: ARM64, tester-size: large } # built and test on on self-hosted
suite: [k8s, etcd, ceph, upgrade]
exclude:
- {arch: {id: arm64}, suite: ceph}
- {arch: {id: arm64}, suite: upgrade}
with:
identifier: ${{ matrix.arch.id }}-${{ matrix.suite }}
builder-runner-label: ${{ matrix.arch.builder-label }}
charmcraft-channel: ${{ needs.charmcraft-channel.outputs.channel }}
extra-arguments: >-
${{needs.extra-args.outputs.args}} -k test_${{ matrix.suite }}
${{ matrix.arch.id == 'arm64' && ' --lxd-containers' || '' }}
${{ matrix.arch.id == 'arm64' && ' --lxd-containers --series=focal' || '' }}
juju-channel: 3/stable
load-test-enabled: false
provider: lxd
self-hosted-runner: true
self-hosted-runner-arch: ${{ matrix.arch.tester-arch }}
self-hosted-runner-label: large
self-hosted-runner-label: ${{ matrix.arch.tester-size }}
test-timeout: 120
test-tox-env: integration-${{ matrix.suite }}
trivy-fs-enabled: false
Expand Down
28 changes: 28 additions & 0 deletions charms/worker/k8s/src/events/update_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
"""

import logging
from typing import Optional

import charms.contextual_status as status
import ops
import reschedule
from inspector import ClusterInspector
from protocols import K8sCharmProtocol
from snap import version as snap_version
from upgrade import K8sUpgrade
Expand Down Expand Up @@ -90,6 +92,27 @@ def _on_update_status(self, event: ops.UpdateStatusEvent):
except status.ReconcilerError:
log.exception("Can't update_status")

def kube_system_pods_waiting(self) -> Optional[ops.WaitingStatus]:
"""Check if kube-system pods are waiting.
Returns:
WaitingStatus: waiting status if kube-system pods are not ready.
"""
if self.charm.is_worker:
# Worker nodes don't need to check the kube-system pods
return None

waiting, inspect = None, self.charm.cluster_inspector

try:
if failing_pods := inspect.verify_pods_running(["kube-system"]):
waiting = ops.WaitingStatus(f"Unready Pods: {failing_pods}")
except ClusterInspector.ClusterInspectorError as e:
log.exception("Failed to verify pods: %s", e)
waiting = ops.WaitingStatus("Waiting for API Server")

return waiting

def run(self):
"""Check k8s snap status."""
version, overridden = snap_version("k8s")
Expand All @@ -107,4 +130,9 @@ def run(self):
status.add(ops.WaitingStatus("Node not Ready"))
trigger.create(reschedule.Period(seconds=30))
return

if waiting := self.kube_system_pods_waiting():
status.add(waiting)
trigger.create(reschedule.Period(seconds=30))
return
trigger.cancel()
3 changes: 2 additions & 1 deletion charms/worker/k8s/src/inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def _get_client(self) -> Client:
self.client = Client(config=config.get())
return self.client

def get_nodes(self, labels: LabelSelector) -> Optional[List[Node]]:
def get_nodes(self, labels: Optional[LabelSelector] = None) -> Optional[List[Node]]:
"""Get nodes from the cluster.
Args:
Expand All @@ -56,6 +56,7 @@ def get_nodes(self, labels: LabelSelector) -> Optional[List[Node]]:
Raises:
ClusterInspectorError: If the nodes cannot be retrieved.
"""
labels = labels or {}
client = self._get_client()
try:

Expand Down
6 changes: 3 additions & 3 deletions charms/worker/k8s/src/literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@
},
# NOTE: Update the dependencies for the k8s-service before releasing.
"k8s_service": {
"dependencies": {"k8s-worker": "^1.30, < 1.32"},
"dependencies": {"k8s-worker": "^1.31, < 1.33"},
"name": "k8s",
"upgrade_supported": "^1.30, < 1.32",
"version": "1.31.3",
"upgrade_supported": "^1.31, < 1.33",
"version": "1.32.0",
},
}
3 changes: 3 additions & 0 deletions charms/worker/k8s/src/protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from charms.interface_external_cloud_provider import ExternalCloudProvider
from charms.k8s.v0.k8sd_api_manager import K8sdAPIManager
from charms.reconciler import Reconciler
from inspector import ClusterInspector
from ops.interface_kube_control import KubeControlProvides


Expand All @@ -17,6 +18,7 @@ class K8sCharmProtocol(ops.CharmBase):
Attributes:
api_manager (K8sdAPIManager): The API manager for the charm.
cluster_inspector (ClusterInspector): The cluster inspector for the charm.
kube_control (KubeControlProvides): The kube-control interface.
xcp (ExternalCloudProvider): The external cloud provider interface.
reconciler (Reconciler): The reconciler for the charm
Expand All @@ -28,6 +30,7 @@ class K8sCharmProtocol(ops.CharmBase):
"""

api_manager: K8sdAPIManager
cluster_inspector: ClusterInspector
kube_control: KubeControlProvides
xcp: ExternalCloudProvider
reconciler: Reconciler
Expand Down
7 changes: 4 additions & 3 deletions charms/worker/k8s/src/upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,11 @@ def pre_upgrade_check(self) -> None:
Raises:
ClusterNotReadyError: If the cluster is not ready for an upgrade.
"""
if self.charm.is_worker:
log.info("TODO: Find some pre-upgrade checks for worker application.")
return
try:
nodes = self.cluster_inspector.get_nodes(
labels={"juju-charm": "k8s-worker" if self.charm.is_worker else "k8s"}
)
nodes = self.cluster_inspector.get_nodes()
failing_pods = self.cluster_inspector.verify_pods_running(["kube-system"])
except ClusterInspector.ClusterInspectorError as e:
raise ClusterNotReadyError(
Expand Down
15 changes: 7 additions & 8 deletions charms/worker/k8s/tests/unit/test_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class TestK8sUpgrade(unittest.TestCase):
def setUp(self):
"""Set up common test fixtures."""
self.charm = MagicMock()
self.charm.is_worker = False
self.node_manager = MagicMock(spec=ClusterInspector)
self.upgrade = K8sUpgrade(
self.charm,
Expand Down Expand Up @@ -53,8 +54,8 @@ def test_pre_upgrade_check_worker_success(self):

self.upgrade.pre_upgrade_check()

self.node_manager.get_nodes.assert_called_once_with(labels={"juju-charm": "k8s-worker"})
self.node_manager.verify_pods_running.assert_called_once_with(["kube-system"])
self.node_manager.get_nodes.assert_not_called()
self.node_manager.verify_pods_running.assert_not_called()

def test_pre_upgrade_check_control_plane_success(self):
"""Test pre_upgrade_check succeeds for control plane nodes."""
Expand All @@ -64,15 +65,14 @@ def test_pre_upgrade_check_control_plane_success(self):

self.upgrade.pre_upgrade_check()

self.node_manager.get_nodes.assert_called_once_with(labels={"juju-charm": "k8s"})
self.node_manager.get_nodes.assert_called_once_with()

def test_pre_upgrade_check_unready_nodes(self):
"""Test pre_upgrade_check fails when nodes are not ready."""
self.charm.is_worker = True
self.node_manager.get_nodes.return_value = [
Node(metadata=ObjectMeta(name="worker-1")),
Node(metadata=ObjectMeta(name="worker-2")),
Node(metadata=ObjectMeta(name="worker-3")),
Node(metadata=ObjectMeta(name="k8s-1")),
Node(metadata=ObjectMeta(name="k8s-2")),
Node(metadata=ObjectMeta(name="k8s-3")),
]

with self.assertRaises(ClusterNotReadyError):
Expand All @@ -89,7 +89,6 @@ def test_pre_upgrade_check_cluster_inspector_error(self):

def test_pre_upgrade_check_pods_not_ready(self):
"""Test pre_upgrade_check fails when pods are not ready."""
self.charm.is_worker = True
self.node_manager.get_nodes.return_value = None
self.node_manager.verify_pods_running.return_value = "kube-system/pod-1"

Expand Down
Loading

0 comments on commit 98c12d5

Please sign in to comment.