diff --git a/tests/integration/upgrades/helpers.py b/tests/integration/upgrades/helpers.py index 77cbecbb1..c5f489c2f 100644 --- a/tests/integration/upgrades/helpers.py +++ b/tests/integration/upgrades/helpers.py @@ -6,6 +6,7 @@ import subprocess from typing import Optional +import pytest from pytest_operator.plugin import OpsTest from tenacity import Retrying, stop_after_attempt, wait_fixed @@ -17,12 +18,43 @@ from ..helpers import APP_NAME, IDLE_PERIOD, app_name, run_action from ..helpers_deployments import get_application_units, wait_until +OPENSEARCH_ORIGINAL_CHARM_NAME = "opensearch" OPENSEARCH_SERVICE_PATH = "/etc/systemd/system/snap.opensearch.daemon.service" ORIGINAL_RESTART_DELAY = 20 SECOND_APP_NAME = "second-opensearch" RESTART_DELAY = 360 +OPENSEARCH_CHANNEL = "2/edge" + + +STARTING_VERSION = "2.15.0" + + +VERSION_TO_REVISION = { + STARTING_VERSION: 144, + "2.16.0": 160, +} + + +CHANNELS = ["edge", "beta", "2/stable"] + + +FROM_VERSION_PREFIX = "from_v{}_to_local" + + +UPGRADE_INITIAL_VERSION = [ + ( + pytest.param( + version, + id=FROM_VERSION_PREFIX.format(version), + marks=pytest.mark.group(FROM_VERSION_PREFIX.format(version)), + ) + ) + for version in VERSION_TO_REVISION.keys() +] + + logger = logging.getLogger(__name__) diff --git a/tests/integration/upgrades/test_manual_large_deployment_upgrades.py b/tests/integration/upgrades/test_manual_large_deployment_upgrades.py index 5aa9e85a8..15892095b 100644 --- a/tests/integration/upgrades/test_manual_large_deployment_upgrades.py +++ b/tests/integration/upgrades/test_manual_large_deployment_upgrades.py @@ -10,51 +10,68 @@ from ..ha.continuous_writes import ContinuousWrites from ..ha.helpers import assert_continuous_writes_consistency -from ..helpers import APP_NAME, IDLE_PERIOD, MODEL_CONFIG, SERIES, run_action -from ..helpers_deployments import get_application_units, wait_until +from ..helpers import ( + APP_NAME, + IDLE_PERIOD, + MODEL_CONFIG, + SERIES, + get_leader_unit_id, + run_action, + set_watermark, +) +from ..helpers_deployments import wait_until from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME +from .helpers import ( + OPENSEARCH_CHANNEL, + OPENSEARCH_ORIGINAL_CHARM_NAME, + STARTING_VERSION, + UPGRADE_INITIAL_VERSION, + VERSION_TO_REVISION, + assert_upgrade_to_local, + refresh, +) logger = logging.getLogger(__name__) -OPENSEARCH_ORIGINAL_CHARM_NAME = "opensearch" -OPENSEARCH_INITIAL_CHANNEL = "2/edge" OPENSEARCH_MAIN_APP_NAME = "main" OPENSEARCH_FAILOVER_APP_NAME = "failover" +REL_ORCHESTRATOR = "peer-cluster-orchestrator" +REL_PEER = "peer-cluster" charm = None WORKLOAD = { - APP_NAME: 3, - OPENSEARCH_FAILOVER_APP_NAME: 2, - OPENSEARCH_MAIN_APP_NAME: 1, + APP_NAME: 2, + OPENSEARCH_FAILOVER_APP_NAME: 1, + OPENSEARCH_MAIN_APP_NAME: 3, } -@pytest.mark.skip(reason="Fix with DPE-4528") -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -@pytest.mark.skip_if_deployed -async def test_large_deployment_deploy_original_charm(ops_test: OpsTest) -> None: - """Build and deploy the charm for large deployment tests.""" +####################################################################### +# +# Auxiliary functions +# +####################################################################### +async def _build_env(ops_test: OpsTest, version: str) -> None: + """Deploy OpenSearch cluster from a given revision.""" await ops_test.model.set_config(MODEL_CONFIG) # Deploy TLS Certificates operator. tls_config = {"ca-common-name": "CN_CA"} main_orchestrator_conf = { - "cluster_name": "backup-test", + "cluster_name": "upgrade-test", "init_hold": False, - "roles": "cluster_manager", + "roles": "cluster_manager,data", } failover_orchestrator_conf = { - "cluster_name": "backup-test", + "cluster_name": "upgrade-test", "init_hold": True, - "roles": "cluster_manager", + "roles": "cluster_manager,data", } - data_hot_conf = {"cluster_name": "backup-test", "init_hold": True, "roles": "data.hot"} + data_conf = {"cluster_name": "upgrade-test", "init_hold": True, "roles": "data"} await asyncio.gather( ops_test.model.deploy(TLS_CERTIFICATES_APP_NAME, channel="stable", config=tls_config), @@ -63,7 +80,7 @@ async def test_large_deployment_deploy_original_charm(ops_test: OpsTest) -> None application_name=OPENSEARCH_MAIN_APP_NAME, num_units=WORKLOAD[OPENSEARCH_MAIN_APP_NAME], series=SERIES, - channel=OPENSEARCH_INITIAL_CHANNEL, + channel=OPENSEARCH_CHANNEL, config=main_orchestrator_conf, ), ops_test.model.deploy( @@ -71,7 +88,7 @@ async def test_large_deployment_deploy_original_charm(ops_test: OpsTest) -> None application_name=OPENSEARCH_FAILOVER_APP_NAME, num_units=WORKLOAD[OPENSEARCH_FAILOVER_APP_NAME], series=SERIES, - channel=OPENSEARCH_INITIAL_CHANNEL, + channel=OPENSEARCH_CHANNEL, config=failover_orchestrator_conf, ), ops_test.model.deploy( @@ -79,22 +96,26 @@ async def test_large_deployment_deploy_original_charm(ops_test: OpsTest) -> None application_name=APP_NAME, num_units=WORKLOAD[APP_NAME], series=SERIES, - channel=OPENSEARCH_INITIAL_CHANNEL, - config=data_hot_conf, + channel=OPENSEARCH_CHANNEL, + config=data_conf, ), ) - # Large deployment setup - await ops_test.model.integrate("main:peer-cluster-orchestrator", "failover:peer-cluster") - await ops_test.model.integrate("main:peer-cluster-orchestrator", f"{APP_NAME}:peer-cluster") + # integrate TLS to all applications + for app in [OPENSEARCH_MAIN_APP_NAME, OPENSEARCH_FAILOVER_APP_NAME, APP_NAME]: + await ops_test.model.integrate(app, TLS_CERTIFICATES_APP_NAME) + + # create the peer-cluster-relation await ops_test.model.integrate( - "failover:peer-cluster-orchestrator", f"{APP_NAME}:peer-cluster" + f"{APP_NAME}:{REL_PEER}", f"{OPENSEARCH_MAIN_APP_NAME}:{REL_ORCHESTRATOR}" + ) + await ops_test.model.integrate( + f"{OPENSEARCH_FAILOVER_APP_NAME}:{REL_PEER}", + f"{OPENSEARCH_MAIN_APP_NAME}:{REL_ORCHESTRATOR}", + ) + await ops_test.model.integrate( + f"{APP_NAME}:{REL_PEER}", f"{OPENSEARCH_FAILOVER_APP_NAME}:{REL_ORCHESTRATOR}" ) - - # TLS setup - await ops_test.model.integrate("main", TLS_CERTIFICATES_APP_NAME) - await ops_test.model.integrate("failover", TLS_CERTIFICATES_APP_NAME) - await ops_test.model.integrate(APP_NAME, TLS_CERTIFICATES_APP_NAME) # Charms except s3-integrator should be active await wait_until( @@ -117,18 +138,12 @@ async def test_large_deployment_deploy_original_charm(ops_test: OpsTest) -> None timeout=3600, ) + await set_watermark(ops_test, APP_NAME) -@pytest.mark.skip(reason="Fix with DPE-4528") -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_manually_upgrade_to_local( - ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner -) -> None: - """Test upgrade from usptream to currently locally built version.""" - units = await get_application_units(ops_test, OPENSEARCH_MAIN_APP_NAME) - leader_id = [u.id for u in units if u.is_leader][0] +async def _upgrade(ops_test: OpsTest, local_build: bool = False, revision: str = None) -> None: + app = OPENSEARCH_MAIN_APP_NAME + leader_id = await get_leader_unit_id(ops_test, app) action = await run_action( ops_test, leader_id, @@ -144,13 +159,14 @@ async def test_manually_upgrade_to_local( async with ops_test.fast_forward(): for app, unit_count in WORKLOAD.items(): - application = ops_test.model.applications[app] - units = await get_application_units(ops_test, app) - leader_id = [u.id for u in units if u.is_leader][0] + leader_id = get_leader_unit_id(ops_test, app) logger.info(f"Refresh app {app}, leader {leader_id}") - await application.refresh(path=charm) + if local_build: + await refresh(ops_test, app, path=charm) + else: + await refresh(ops_test, app, revision=revision) logger.info("Refresh is over, waiting for the charm to settle") if unit_count == 1: @@ -166,10 +182,13 @@ async def test_manually_upgrade_to_local( logger.info(f"Upgrade of app {app} finished") continue + # Wait until we are set in an idle state and can rollback the revision. + # app status blocked: that will happen if we are jumping N-2 versions in our test + # app status active: that will happen if we are jumping N-1 in our test await wait_until( ops_test, apps=[app], - apps_statuses=["blocked"], + apps_statuses=["active", "blocked"], units_statuses=["active"], wait_for_exact_units={ app: unit_count, @@ -197,9 +216,123 @@ async def test_manually_upgrade_to_local( ) logger.info(f"Upgrade of app {app} finished") + +####################################################################### +# +# Tests +# +####################################################################### +@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) +@pytest.mark.group("happy_path_upgrade") +@pytest.mark.abort_on_fail +@pytest.mark.skip_if_deployed +async def test_large_deployment_deploy_original_charm(ops_test: OpsTest) -> None: + """Deploy OpenSearch.""" + await _build_env(ops_test, STARTING_VERSION) + + +@pytest.mark.group("happy_path_upgrade") +@pytest.mark.abort_on_fail +async def test_upgrade_between_versions( + ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner +) -> None: + """Test upgrade from upstream to currently locally built version.""" + for version, rev in VERSION_TO_REVISION.items(): + if version == STARTING_VERSION: + # We're starting in this version + continue + + logger.info(f"Upgrading to version {version}") + await _upgrade(ops_test, revision=rev) + + await _upgrade(ops_test, local_build=True) + # continuous writes checks + await assert_continuous_writes_consistency( + ops_test, + c_writes, + [APP_NAME, OPENSEARCH_MAIN_APP_NAME], + ) + + +################################################################################## +# +# test scenarios from each version: +# Start with each version, moving to local and then rolling back mid-upgrade +# Once this test passes, the 2nd test will rerun the upgrade, this time to +# its end. +# +################################################################################## +@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) +@pytest.mark.parametrize("version", UPGRADE_INITIAL_VERSION) +@pytest.mark.abort_on_fail +@pytest.mark.skip_if_deployed +async def test_deploy_from_version(ops_test: OpsTest, version) -> None: + """Deploy OpenSearch.""" + await _build_env(ops_test, version) + + +@pytest.mark.parametrize("version", UPGRADE_INITIAL_VERSION) +@pytest.mark.abort_on_fail +async def test_upgrade_rollback_from_local( + ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner, version +) -> None: + """Test upgrade and rollback to each version available.""" + app = OPENSEARCH_MAIN_APP_NAME + leader_id = await get_leader_unit_id(ops_test, app) + action = await run_action( + ops_test, + leader_id, + "pre-upgrade-check", + app=OPENSEARCH_MAIN_APP_NAME, + ) + assert action.status == "completed" + + logger.info("Build charm locally") + global charm + if not charm: + charm = await ops_test.build_charm(".") + + async with ops_test.fast_forward(): + logger.info(f"Refresh app {app}, leader {leader_id}") + + async with ops_test.fast_forward(): + for app, unit_count in WORKLOAD.items(): + leader_id = get_leader_unit_id(ops_test, app) + + await refresh(ops_test, app, path=charm) + logger.info("Refresh is over, waiting for the charm to settle") + + # Wait until we are set in an idle state and can rollback the revision. + # app status blocked: that will happen if we are jumping N-2 versions in our test + # app status active: that will happen if we are jumping N-1 in our test + await wait_until( + ops_test, + apps=[app], + apps_statuses=["active", "blocked"], + units_statuses=["active"], + wait_for_exact_units={ + app: unit_count, + }, + idle_period=120, + timeout=3600, + ) + # continuous writes checks await assert_continuous_writes_consistency( ops_test, c_writes, [APP_NAME, OPENSEARCH_MAIN_APP_NAME], ) + + +@pytest.mark.parametrize("version", UPGRADE_INITIAL_VERSION) +@pytest.mark.abort_on_fail +async def test_upgrade_from_version_to_local( + ops_test: OpsTest, c_writes: ContinuousWrites, c_writes_runner, version +) -> None: + """Test upgrade from usptream to currently locally built version.""" + logger.info("Build charm locally") + global charm + if not charm: + charm = await ops_test.build_charm(".") + await assert_upgrade_to_local(ops_test, c_writes, charm) diff --git a/tests/integration/upgrades/test_small_deployment_upgrades.py b/tests/integration/upgrades/test_small_deployment_upgrades.py index cee4e6853..69a92d556 100644 --- a/tests/integration/upgrades/test_small_deployment_upgrades.py +++ b/tests/integration/upgrades/test_small_deployment_upgrades.py @@ -14,44 +14,25 @@ IDLE_PERIOD, MODEL_CONFIG, SERIES, + get_leader_unit_id, run_action, set_watermark, ) -from ..helpers_deployments import get_application_units, wait_until +from ..helpers_deployments import wait_until from ..tls.test_tls import TLS_CERTIFICATES_APP_NAME -from .helpers import assert_upgrade_to_local, refresh +from .helpers import ( + OPENSEARCH_CHANNEL, + OPENSEARCH_ORIGINAL_CHARM_NAME, + STARTING_VERSION, + UPGRADE_INITIAL_VERSION, + VERSION_TO_REVISION, + assert_upgrade_to_local, + refresh, +) logger = logging.getLogger(__name__) -OPENSEARCH_ORIGINAL_CHARM_NAME = "opensearch" -OPENSEARCH_CHANNEL = "2/edge" - - -STARTING_VERSION = "2.15.0" - - -VERSION_TO_REVISION = { - STARTING_VERSION: 144, - "2.16.0": 160, -} - - -FROM_VERSION_PREFIX = "from_v{}_to_local" - - -UPGRADE_INITIAL_VERSION = [ - ( - pytest.param( - version, - id=FROM_VERSION_PREFIX.format(version), - marks=pytest.mark.group(FROM_VERSION_PREFIX.format(version)), - ) - ) - for version in VERSION_TO_REVISION.keys() -] - - charm = None @@ -114,8 +95,7 @@ async def test_upgrade_between_versions( ) -> None: """Test upgrade from upstream to currently locally built version.""" app = (await app_name(ops_test)) or APP_NAME - units = await get_application_units(ops_test, app) - leader_id = [u.id for u in units if u.is_leader][0] + leader_id = get_leader_unit_id(ops_test, app) for version, rev in VERSION_TO_REVISION.items(): if version == STARTING_VERSION: @@ -212,8 +192,7 @@ async def test_upgrade_rollback_from_local( ) -> None: """Test upgrade and rollback to each version available.""" app = (await app_name(ops_test)) or APP_NAME - units = await get_application_units(ops_test, app) - leader_id = [u.id for u in units if u.is_leader][0] + leader_id = await get_leader_unit_id(ops_test, app) action = await run_action( ops_test,