Skip to content

Commit

Permalink
Wait for stable kube-system before upgrading
Browse files Browse the repository at this point in the history
  • Loading branch information
addyess committed Dec 16, 2024
1 parent a954641 commit 8490141
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 15 deletions.
27 changes: 17 additions & 10 deletions tests/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,22 +87,29 @@ async def get_unit_cidrs(model: Model, app_name: str, unit_num: int) -> List[str
return list(sorted(local_cidrs))


async def get_nodes(k8s):
"""Return Node list
async def get_rsc(k8s, resource, namespace=None, labels=None):
"""Return Pod list
Args:
k8s: any k8s unit
resource: string resource type
namespace: string namespace
labels: dict of labels
Returns:
list of nodes
list of pods
"""
action = await k8s.run("k8s kubectl get nodes -o json")
namespaced = f"-n {namespace}" if namespace else ""
labeled = " ".join(f"-l {k}={v}" for k, v in labels.items()) if labels else ""
cmd = f"k8s kubectl get {resource} {labeled} {namespaced} -o json"

action = await k8s.run(cmd)
result = await action.wait()
assert result.results["return-code"] == 0, "Failed to get nodes with kubectl"
log.info("Parsing node list...")
node_list = json.loads(result.results["stdout"])
assert node_list["kind"] == "List", "Should have found a list of nodes"
return node_list["items"]
assert result.results["return-code"] == 0, f"Failed to get {resource} with kubectl"
log.info("Parsing %s list...", resource)
resource_list = json.loads(result.results["stdout"])
assert resource_list["kind"] == "List", f"Should have found a list of {resource}"
return resource_list["items"]


@retry(reraise=True, stop=stop_after_attempt(12), wait=wait_fixed(15))
Expand All @@ -114,7 +121,7 @@ async def ready_nodes(k8s, expected_count):
expected_count: number of expected nodes
"""
log.info("Finding all nodes...")
nodes = await get_nodes(k8s)
nodes = await get_rsc(k8s, "nodes")
ready_nodes = {
node["metadata"]["name"]: all(
condition["status"] == "False"
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/test_k8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from tenacity import retry, stop_after_attempt, wait_fixed

from .grafana import Grafana
from .helpers import get_leader, get_nodes, ready_nodes
from .helpers import get_leader, get_rsc, ready_nodes
from .prometheus import Prometheus

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -45,7 +45,7 @@ async def test_nodes_labelled(request, kubernetes_cluster: model.Model):
await kubernetes_cluster.wait_for_idle(status="active", timeout=10 * 60)

try:
nodes = await get_nodes(k8s.units[0])
nodes = await get_rsc(k8s.units[0], "nodes")
labelled = [n for n in nodes if testname in n["metadata"]["labels"]]
juju_nodes = [n for n in nodes if "juju-charm" in n["metadata"]["labels"]]
assert len(k8s.units + worker.units) == len(
Expand All @@ -60,7 +60,7 @@ async def test_nodes_labelled(request, kubernetes_cluster: model.Model):
)

await kubernetes_cluster.wait_for_idle(status="active", timeout=10 * 60)
nodes = await get_nodes(k8s.units[0])
nodes = await get_rsc(k8s.units[0], "nodes")
labelled = [n for n in nodes if testname in n["metadata"]["labels"]]
juju_nodes = [n for n in nodes if "juju-charm" in n["metadata"]["labels"]]
assert 0 == len(labelled), "Not all nodes labelled with custom-label"
Expand Down
19 changes: 17 additions & 2 deletions tests/integration/test_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
import pytest
import yaml
from pytest_operator.plugin import OpsTest
from tenacity import before_sleep_log, retry, stop_after_attempt, wait_fixed

from .helpers import Bundle, get_leader
from .helpers import Bundle, get_leader, get_rsc

# This pytest mark configures the test environment to use the Canonical Kubernetes
# deploying charms from the edge channels, then upgrading them to the built charm.
Expand Down Expand Up @@ -43,6 +44,19 @@ async def test_upgrade(kubernetes_cluster: juju.model.Model, ops_test: OpsTest):
}
bundle, _ = await Bundle.create(ops_test)
charms = await bundle.discover_charm_files(ops_test)
k8s: juju.application.Application = kubernetes_cluster.applications["k8s"]

@retry(
stop=stop_after_attempt(6),
wait=wait_fixed(10),
before_sleep=before_sleep_log(log, logging.WARNING),
)
async def _wait_for_idle():
"""Wait for the model to become idle."""
kube_system_pods = await get_rsc(k8s.units[0], "pods", namespace="kube-system")
assert all(
p["status"]["phase"] == "Running" for p in kube_system_pods
), "Kube-system not yet ready"

async def _refresh(app_name: str):
"""Refresh the application.
Expand All @@ -58,7 +72,7 @@ async def _refresh(app_name: str):
leader: juju.unit.Unit = app.units[leader_idx]
action = await leader.run_action("pre-upgrade-check")
await action.wait()
with_fault = f"Pre-upgrade of {app_name} failed with {yaml.safe_dump(action.results)}"
with_fault = f"Pre-upgrade of '{app_name}' failed with {yaml.safe_dump(action.results)}"
if app_name == "k8s":
# The k8s charm has a pre-upgrade-check action that works, k8s-worker does not.
assert action.status == "completed", with_fault
Expand All @@ -70,5 +84,6 @@ async def _refresh(app_name: str):
timeout=30 * 60,
)

await _wait_for_idle()
for app in charms:
await _refresh(app)

0 comments on commit 8490141

Please sign in to comment.