From 341cf1371fac4663744136e6a7ebf5ba9d8fb78f Mon Sep 17 00:00:00 2001 From: Elena Bondarenko Date: Mon, 6 Jan 2025 16:40:15 +0100 Subject: [PATCH] Add quota alerting Signed-off-by: Elena Bondarenko --- .../client_bm_upi_1az_rhcos_nvme_2w.yaml | 1 + .../client_bm_upi_1az_rhcos_nvme_3w.yaml | 1 + .../hypershift_client_bm_2w.yaml | 1 + .../hypershift_client_bm_3w.yaml | 1 + ocs_ci/framework/__init__.py | 27 +++++++++ ocs_ci/ocs/resources/storageconsumer.py | 27 +++++++++ ocs_ci/ocs/ui/page_objects/storage_clients.py | 2 +- tests/functional/monitoring/conftest.py | 59 +++++++++++++++++++ 8 files changed, 118 insertions(+), 1 deletion(-) diff --git a/conf/deployment/fusion_hci_pc/client_bm_upi_1az_rhcos_nvme_2w.yaml b/conf/deployment/fusion_hci_pc/client_bm_upi_1az_rhcos_nvme_2w.yaml index 9867702a59f..fe536705ecf 100644 --- a/conf/deployment/fusion_hci_pc/client_bm_upi_1az_rhcos_nvme_2w.yaml +++ b/conf/deployment/fusion_hci_pc/client_bm_upi_1az_rhcos_nvme_2w.yaml @@ -7,5 +7,6 @@ ENV_DATA: worker_replicas: 2 mon_type: 'hostpath' osd_type: 'nvme' + quota: 'unrestricted' REPORTING: ocs_must_gather_image: "quay.io/rhceph-dev/ocs-must-gather" diff --git a/conf/deployment/fusion_hci_pc/client_bm_upi_1az_rhcos_nvme_3w.yaml b/conf/deployment/fusion_hci_pc/client_bm_upi_1az_rhcos_nvme_3w.yaml index 5ec19d69af8..53de90f7503 100644 --- a/conf/deployment/fusion_hci_pc/client_bm_upi_1az_rhcos_nvme_3w.yaml +++ b/conf/deployment/fusion_hci_pc/client_bm_upi_1az_rhcos_nvme_3w.yaml @@ -7,5 +7,6 @@ ENV_DATA: worker_replicas: 3 mon_type: 'hostpath' osd_type: 'nvme' + quota: 'unrestricted' REPORTING: ocs_must_gather_image: "quay.io/rhceph-dev/ocs-must-gather" diff --git a/conf/deployment/fusion_hci_pc/hypershift_client_bm_2w.yaml b/conf/deployment/fusion_hci_pc/hypershift_client_bm_2w.yaml index 73e942d82b8..f389510a9e0 100644 --- a/conf/deployment/fusion_hci_pc/hypershift_client_bm_2w.yaml +++ b/conf/deployment/fusion_hci_pc/hypershift_client_bm_2w.yaml @@ -5,6 +5,7 @@ ENV_DATA: worker_replicas: 2 mon_type: 'hostpath' osd_type: 'ssd' + quota: 'unrestricted' REPORTING: ocs_must_gather_image: "quay.io/ocs-dev/ocs-must-gather" ocs_must_gather_latest_tag: 'latest' diff --git a/conf/deployment/fusion_hci_pc/hypershift_client_bm_3w.yaml b/conf/deployment/fusion_hci_pc/hypershift_client_bm_3w.yaml index 4b41d399e51..604c640c9d1 100644 --- a/conf/deployment/fusion_hci_pc/hypershift_client_bm_3w.yaml +++ b/conf/deployment/fusion_hci_pc/hypershift_client_bm_3w.yaml @@ -5,6 +5,7 @@ ENV_DATA: worker_replicas: 3 mon_type: 'hostpath' osd_type: 'ssd' + quota: 'unrestricted' REPORTING: ocs_must_gather_image: "quay.io/ocs-dev/ocs-must-gather" ocs_must_gather_latest_tag: 'latest' diff --git a/ocs_ci/framework/__init__.py b/ocs_ci/framework/__init__.py index be332687753..4ed023303bc 100644 --- a/ocs_ci/framework/__init__.py +++ b/ocs_ci/framework/__init__.py @@ -290,6 +290,18 @@ def get_consumer_indexes_list(self): return consumer_indexes_list + def get_consumer_with_resticted_quota_index(self): + """ + Get the consumer cluster index + of the first consumer which has quota restrictions + """ + consumer_indexes = self.get_consumer_indexes_list() + for index in consumer_indexes: + cluster = self.clusters[index] + if cluster.ENV_DATA["quota"] != "unlimited": + return index + raise ClusterNotFoundException("Didn't find any consumer with resticted quota") + def get_cluster_index_by_name(self, cluster_name): """ Get the cluster index by the cluster name @@ -497,6 +509,21 @@ def __init__(self): switch_index = config.cur_index super().__init__(switch_index) + class RunWithRestrictedQuotaConsumerConfigContextIfAvailable(RunWithConfigContext): + """ + Context manager that makes sure that a given code block is executed + on a Consumer with restricted quota. + If such config is not available, then run with current config context. + """ + + def __init__(self): + try: + switch_index = config.get_consumer_with_resticted_quota_index() + except ClusterNotFoundException: + logger.DEBUG("No consumer with restricted quota found") + switch_index = config.cur_index + super().__init__(switch_index) + class RunWithFirstConsumerConfigContextIfAvailable(RunWithConfigContext): """ Context manager that makes sure that a given code block is executed on First consumer. diff --git a/ocs_ci/ocs/resources/storageconsumer.py b/ocs_ci/ocs/resources/storageconsumer.py index a3b3853d9b4..9fc4ab98fed 100644 --- a/ocs_ci/ocs/resources/storageconsumer.py +++ b/ocs_ci/ocs/resources/storageconsumer.py @@ -6,6 +6,7 @@ from ocs_ci.framework import config from ocs_ci.ocs import constants, ocp +from ocs_ci.helpers import helpers from ocs_ci.ocs.resources.ocs import OCS from ocs_ci.utility.utils import exec_cmd @@ -121,6 +122,32 @@ def get_heartbeat_cronjob(self): ][0] return cronjob + def fill_up_quota_percentage(self, percentage, quota=None): + """ + Create a PVC of such size that the correct percentage of quota is used + + Returns: + PVC object + """ + pvc_name = f"pvc-quota-{percentage}" + if not quota: + quota = config.ENV_DATA["quota"] + quota_value = quota.split(" ")[0] + quota_units = quota.split(" ")[1] + pvc_size_int = quota_value * percentage // 100 + pvc_size = f"{pvc_size_int}{quota_units}" + rbd_storageclass = helpers.default_storage_class(constants.CEPHBLOCKPOOL) + pvc_obj = helpers.create_pvc( + pvc_name=pvc_name, + sc_name=rbd_storageclass, + namespace="default", + size=pvc_size, + do_reload=False, + access_mode=constants.ACCESS_MODE_RWO, + volume_mode=constants.VOLUME_MODE_BLOCK, + ) + return pvc_obj + def get_all_client_clusters(): """ diff --git a/ocs_ci/ocs/ui/page_objects/storage_clients.py b/ocs_ci/ocs/ui/page_objects/storage_clients.py index 9c5af6f72e8..7eedd83802b 100644 --- a/ocs_ci/ocs/ui/page_objects/storage_clients.py +++ b/ocs_ci/ocs/ui/page_objects/storage_clients.py @@ -1,6 +1,6 @@ import logging from selenium.common.exceptions import WebDriverException - +from ocs_ci.framework import config from ocs_ci.ocs.ui.base_ui import take_screenshot, copy_dom, BaseUI logger = logging.getLogger(__name__) diff --git a/tests/functional/monitoring/conftest.py b/tests/functional/monitoring/conftest.py index 33aacc2ab03..4f3405ef607 100644 --- a/tests/functional/monitoring/conftest.py +++ b/tests/functional/monitoring/conftest.py @@ -1209,3 +1209,62 @@ def teardown(): teardown() return measured_op + + +@pytest.fixture +def measure_fill_up_client_quota( + request, + measurement_dir, + threading_lock, +): + """ + Create PVCs on the client cluster where quota is restricted + to reach 85% of the quota, measure the time when it was created and + alerts that were triggered during this event. + + Returns: + dict: Contains information about `start` and `stop` time + for creating and then deleting the PVC + """ + logger.info("Switch to client cluster with restricted quota") + with config.get_consumer_with_resticted_quota_index(): + client_cluster = config.cluster_ctx.MULTICLUSTER["multicluster_index"] + logger.info(f"Client cluster key: {client_cluster}") + cluster_id = exec_cmd( + "oc get clusterversion version -o jsonpath='{.spec.clusterID}'" + ).stdout.decode("utf-8") + client_name = f"storageconsumer-{cluster_id}" + client = storageconsumer.StorageConsumer( + client_name, consumer_context=client_cluster + ) + pvc = None + + def use_up_quota_80_percent(): + nonlocal pvc + nonlocal client + quota = config.ENV_DATA["quota"] + pvc = client.fill_up_quota_percentage(percentage=80, quota=quota) + # run_time of operation + run_time = 60 * 3 + logger.info(f"Waiting for {run_time} seconds") + time.sleep(run_time) + return + + def teardown(): + nonlocal pvc + with config.get_consumer_with_resticted_quota_index(): + pvc.ocp.wait_for_delete(resource_name=pvc.name, timeout=180) + + request.addfinalizer(teardown) + + test_file = os.path.join(measurement_dir, "measure_change_client_version.json") + measured_op = measure_operation( + use_up_quota_80_percent, + test_file, + threading_lock=threading_lock, + metadata={"client_name": client_name}, + ) + + teardown() + + return measured_op