Skip to content

Commit

Permalink
[DPE-5558] Break CA rotation into integration test groups (#458)
Browse files Browse the repository at this point in the history
Currently, we are having a lot of time outs in CA rotation testing.
Breaking between small and large deployments and having parallel runners
will help with that overall duration.
  • Loading branch information
phvalguima authored Sep 27, 2024
1 parent a62f180 commit c9edade
Showing 1 changed file with 65 additions and 82 deletions.
147 changes: 65 additions & 82 deletions tests/integration/tls/test_ca_rotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,32 @@

APP_UNITS = {MAIN_APP: 3, FAILOVER_APP: 1, DATA_APP: 1}

SMALL_DEPLOYMENT = "small"
LARGE_DEPLOYMENT = "large"
ALL_GROUPS = {
(deploy_type): pytest.param(
deploy_type,
id=deploy_type,
marks=[
pytest.mark.group(deploy_type),
pytest.mark.runner(
[
"self-hosted",
"linux",
"X64",
"jammy",
"xlarge" if deploy_type == LARGE_DEPLOYMENT else "large",
]
),
],
)
for deploy_type in [LARGE_DEPLOYMENT, SMALL_DEPLOYMENT]
}
ALL_DEPLOYMENTS = list(ALL_GROUPS.values())

@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"])
@pytest.mark.group(1)

@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"])
@pytest.mark.group(SMALL_DEPLOYMENT)
@pytest.mark.abort_on_fail
@pytest.mark.skip_if_deployed
async def test_build_and_deploy_active(ops_test: OpsTest) -> None:
Expand Down Expand Up @@ -71,69 +94,10 @@ async def test_build_and_deploy_active(ops_test: OpsTest) -> None:


@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"])
@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_rollout_new_ca(ops_test: OpsTest) -> None:
"""Test that the cluster restarted and functional after processing a new CA certificate"""
c_writes = ContinuousWrites(ops_test, APP_NAME)
await c_writes.start()

# trigger a rollout of the new CA by changing the config on TLS Provider side
new_config = {"ca-common-name": "NEW_CA"}
await ops_test.model.applications[TLS_CERTIFICATES_APP_NAME].set_config(new_config)

start_count = await c_writes.count()

await wait_until(
ops_test,
apps=[APP_NAME],
apps_statuses=["active"],
units_statuses=["active"],
timeout=1800,
idle_period=60,
wait_for_exact_units=len(UNIT_IDS),
)

# Check if the continuous-writes client works with the new certs as well
with open(ContinuousWrites.CERT_PATH, "r") as f:
orig_cert = f.read()
await c_writes.stop()

await c_writes.start() # Forces the Cont. Writes to pick the new cert

with open(ContinuousWrites.CERT_PATH, "r") as f:
new_cert = f.read()

assert orig_cert != new_cert, "New cert was not picked up"
await asyncio.sleep(30)
final_count = await c_writes.count()
await c_writes.stop()
assert final_count > start_count, "Writes have not continued during CA rotation"

# using the SSL API requires authentication with app-admin cert and key
leader_unit_ip = await get_leader_unit_ip(ops_test)
url = f"https://{leader_unit_ip}:9200/_plugins/_security/api/ssl/certs"
admin_secret = await get_secret_by_label(ops_test, "opensearch:app:app-admin")

with open("admin.cert", "w") as cert:
cert.write(admin_secret["cert"])

with open("admin.key", "w") as key:
key.write(admin_secret["key"])

response = requests.get(url, cert=("admin.cert", "admin.key"), verify=False)
data = response.json()
assert new_config["ca-common-name"] in data["http_certificates_list"][0]["issuer_dn"]


@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"])
@pytest.mark.group(1)
@pytest.mark.group(LARGE_DEPLOYMENT)
@pytest.mark.abort_on_fail
async def test_build_large_deployment(ops_test: OpsTest) -> None:
"""Setup a large deployments cluster."""
# remove the existing application
await ops_test.model.remove_application(APP_NAME, block_until_done=True)

# deploy new cluster
my_charm = await ops_test.build_charm(".")
await asyncio.gather(
Expand Down Expand Up @@ -162,6 +126,11 @@ async def test_build_large_deployment(ops_test: OpsTest) -> None:
series=SERIES,
config={"cluster_name": CLUSTER_NAME, "init_hold": True, "roles": "data"},
),
ops_test.model.deploy(
TLS_CERTIFICATES_APP_NAME,
channel="stable",
config={"ca-common-name": "CN_CA"},
),
)

# integrate TLS to all applications
Expand All @@ -188,33 +157,47 @@ async def test_build_large_deployment(ops_test: OpsTest) -> None:
)


@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"])
@pytest.mark.group(1)
@pytest.mark.parametrize("deploy_type", ALL_DEPLOYMENTS)
@pytest.mark.abort_on_fail
async def test_rollout_new_ca_large_deployment(ops_test: OpsTest) -> None:
async def test_rollout_new_ca(ops_test: OpsTest, deploy_type) -> None:
"""Repeat the CA rotation test for the large deployment."""
c_writes = ContinuousWrites(ops_test, DATA_APP)
if deploy_type == SMALL_DEPLOYMENT:
app = APP_NAME
else:
app = DATA_APP
c_writes = ContinuousWrites(ops_test, app)
await c_writes.start()

# trigger a rollout of the new CA by changing the config on TLS Provider side
new_config = {"ca-common-name": "EVEN_NEWER_CA"}
new_config = {"ca-common-name": "NEW_CA"}
await ops_test.model.applications[TLS_CERTIFICATES_APP_NAME].set_config(new_config)

start_count = await c_writes.count()

await wait_until(
ops_test,
apps=[MAIN_APP, DATA_APP, FAILOVER_APP],
apps_full_statuses={
MAIN_APP: {"active": []},
DATA_APP: {"active": []},
FAILOVER_APP: {"active": []},
},
units_statuses=["active"],
wait_for_exact_units={app: units for app, units in APP_UNITS.items()},
timeout=2400,
idle_period=IDLE_PERIOD,
)
if deploy_type == SMALL_DEPLOYMENT:
await wait_until(
ops_test,
apps=[APP_NAME],
apps_statuses=["active"],
units_statuses=["active"],
wait_for_exact_units=len(UNIT_IDS),
timeout=2400,
idle_period=IDLE_PERIOD,
)
else:
await wait_until(
ops_test,
apps=[MAIN_APP, DATA_APP, FAILOVER_APP],
apps_full_statuses={
MAIN_APP: {"active": []},
DATA_APP: {"active": []},
FAILOVER_APP: {"active": []},
},
units_statuses=["active"],
wait_for_exact_units={app: units for app, units in APP_UNITS.items()},
timeout=2400,
idle_period=IDLE_PERIOD,
)

# Check if the continuous-writes client works with the new certs as well
with open(ContinuousWrites.CERT_PATH, "r") as f:
Expand All @@ -233,9 +216,9 @@ async def test_rollout_new_ca_large_deployment(ops_test: OpsTest) -> None:
assert final_count > start_count, "Writes have not continued during CA rotation"

# using the SSL API requires authentication with app-admin cert and key
leader_unit_ip = await get_leader_unit_ip(ops_test, DATA_APP)
leader_unit_ip = await get_leader_unit_ip(ops_test, app)
url = f"https://{leader_unit_ip}:9200/_plugins/_security/api/ssl/certs"
admin_secret = await get_secret_by_label(ops_test, "opensearch-data:app:app-admin")
admin_secret = await get_secret_by_label(ops_test, f"{app}:app:app-admin")

with open("admin.cert", "w") as cert:
cert.write(admin_secret["cert"])
Expand Down

0 comments on commit c9edade

Please sign in to comment.