Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DPE-2710 - test: add storage scale down/up storage re-use test #52

Merged
merged 3 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ maintainers:
containers:
zookeeper:
resource: zookeeper-image
mounts:
- storage: zookeeper
location: /var/lib/zookeeper

resources:
zookeeper-image:
Expand Down Expand Up @@ -44,7 +47,7 @@ requires:
optional: true

storage:
data:
zookeeper:
type: filesystem
description: Directories where snapshot and transaction data is stored
minimum-size: 10G
Expand Down
31 changes: 31 additions & 0 deletions tests/integration/ha/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,3 +524,34 @@ async def delete_pod(ops_test, unit_name: str) -> None:
)

await wait_idle(ops_test)


def get_transaction_logs_and_snapshots(
ops_test, unit_name: str, container_name: str = CONTAINER
) -> dict[str, list[str]]:
"""Gets the most recent transaction log and snapshot files.

Args:
ops_test: OpsTest
unit_name: the Juju unit to get timestamps from
container_name: the container to run command on
Defaults to '{container_name}'

Returns:
Dict of keys "transactions", "snapshots" and value of list of filenames
"""
transaction_files = subprocess.check_output(
f"kubectl exec {unit_name.replace('/', '-')} -c {container_name} -n {ops_test.model.info.name} -- ls -1 /var/lib/zookeeper/data-log/version-2",
stderr=subprocess.PIPE,
shell=True,
universal_newlines=True,
).splitlines()

snapshot_files = subprocess.check_output(
f"kubectl exec {unit_name.replace('/', '-')} -c {container_name} -n {ops_test.model.info.name} -- ls -1 /var/lib/zookeeper/data/version-2",
stderr=subprocess.PIPE,
shell=True,
universal_newlines=True,
).splitlines()

return {"transactions": transaction_files, "snapshots": snapshot_files}
83 changes: 83 additions & 0 deletions tests/integration/ha/test_ha.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,89 @@ async def test_deploy_active(ops_test: OpsTest):
await helpers.wait_idle(ops_test)


@pytest.mark.abort_on_fail
async def test_scale_down_up_data(ops_test: OpsTest, request):
"""Tests unit scale-down + up returns with data."""
hosts = helpers.get_hosts(ops_test)
password = helpers.get_super_password(ops_test)
parent = request.node.name
current_scale = len(hosts.split(","))
scaling_unit_name = sorted(
[unit.name for unit in ops_test.model.applications[helpers.APP_NAME].units]
)[-1]

logger.info("Starting continuous_writes...")
cw.start_continuous_writes(
parent=parent, hosts=hosts, username=helpers.USERNAME, password=password
)
await asyncio.sleep(CLIENT_TIMEOUT * 3) # letting client set up and start writing

logger.info("Checking writes are running at all...")
assert cw.count_znodes(
parent=parent, hosts=hosts, username=helpers.USERNAME, password=password
)

logger.info("Getting transaction and snapshot files...")
current_files = helpers.get_transaction_logs_and_snapshots(
ops_test, unit_name=scaling_unit_name
)

logger.info(f"Scaling down to {current_scale - 1} units...")
await ops_test.model.applications[helpers.APP_NAME].scale(current_scale - 1)
await helpers.wait_idle(ops_test, units=current_scale - 1)

surviving_hosts = helpers.get_hosts(ops_test)

logger.info("Checking writes are increasing...")
writes = cw.count_znodes(
parent=parent, hosts=surviving_hosts, username=helpers.USERNAME, password=password
)
await asyncio.sleep(CLIENT_TIMEOUT * 3) # increasing writes
new_writes = cw.count_znodes(
parent=parent, hosts=surviving_hosts, username=helpers.USERNAME, password=password
)
assert new_writes > writes, "writes not continuing to ZK"

logger.info(f"Scaling back up to {current_scale} units...")
await ops_test.model.applications[helpers.APP_NAME].scale(current_scale)
await helpers.wait_idle(ops_test, units=current_scale)

logger.info("Stopping continuous_writes...")
cw.stop_continuous_writes()

logger.info("Counting writes on surviving units...")
last_write = cw.get_last_znode(
parent=parent, hosts=surviving_hosts, username=helpers.USERNAME, password=password
)
total_writes = cw.count_znodes(
parent=parent, hosts=surviving_hosts, username=helpers.USERNAME, password=password
)
assert last_write == total_writes

logger.info("Checking new unit caught up...")
new_host = max(set(helpers.get_hosts(ops_test).split(",")) - set(surviving_hosts.split(",")))
marcoppenheimer marked this conversation as resolved.
Show resolved Hide resolved
last_write_new = cw.get_last_znode(
parent=parent, hosts=new_host, username=helpers.USERNAME, password=password
)
total_writes_new = cw.count_znodes(
parent=parent, hosts=new_host, username=helpers.USERNAME, password=password
)
assert last_write == last_write_new
assert total_writes == total_writes_new

logger.info("Getting new transaction and snapshot files...")
new_files = helpers.get_transaction_logs_and_snapshots(ops_test, unit_name=scaling_unit_name)

# zookeeper rolls snapshots + txn logs when a unit re-joins, meaning we can't check log timestamps
# checking file existence ensures re-use, as new files will have a different file suffix
# if storage wasn't re-used, there would be no files with the original suffix
for txn_log in current_files["transactions"]:
assert txn_log in new_files["transactions"], "storage not re-used, missing txn logs"

for snapshot in current_files["snapshots"]:
assert snapshot in new_files["snapshots"], "storage not re-used, missing snapshots"


@pytest.mark.abort_on_fail
async def test_pod_reschedule(ops_test: OpsTest, request):
"""Forcefully reschedules ZooKeeper pod."""
Expand Down