From fa1fdd6e3ff2018c8dea88dba7719c7bb981e764 Mon Sep 17 00:00:00 2001 From: pintojoy Date: Fri, 5 Apr 2024 17:28:15 +0530 Subject: [PATCH 01/16] Adding tests to automate RHSTOR-5149 Signed-off-by: pintojoy --- ocs_ci/helpers/helpers.py | 38 +++++++++++++++++ ...t_set_recovery_profile_to_favour_new_io.py | 42 +++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 tests/functional/odf-cli/test_set_recovery_profile_to_favour_new_io.py diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py index 493d13ee132..83f2fb03298 100644 --- a/ocs_ci/helpers/helpers.py +++ b/ocs_ci/helpers/helpers.py @@ -4783,3 +4783,41 @@ def is_rbd_default_storage_class(custom_sc=None): logger.error("Storageclass {default_rbd_sc} is not a default RBD StorageClass.") return False + + +def odf_cli_set_recover_profile(recovery_profile): + """ + Set the recovery profile for a Ceph service. + + Args: + recovery_profile (str): The recovery profile name (balanced or high_client_ops or high_recovery_ops) + """ + from pathlib import Path + + if not Path(constants.CLI_TOOL_LOCAL_PATH).exists(): + retrieve_cli_binary(cli_type="odf") + + logger.info(f"Setting ceph recovery profile {recovery_profile} using odf-cli tool.") + cmd = ( + f"{constants.CLI_TOOL_LOCAL_PATH} --kubeconfig {os.getenv('KUBECONFIG')} " + f" set recovery-profile {recovery_profile}" + ) + + logger.info(cmd) + return exec_cmd(cmd, use_shell=True) + + +def get_ceph_recovery_profile(): + """ + Return CEPH recover profile + + """ + + #Fetchhing recovery profile from ceph config + toolbox = pod.get_ceph_tools_pod() + ceph_cmd = "ceph config get osd osd_mclock_profile" + + ceph_output = toolbox.exec_ceph_cmd(ceph_cmd) + + return ceph_output + diff --git a/tests/functional/odf-cli/test_set_recovery_profile_to_favour_new_io.py b/tests/functional/odf-cli/test_set_recovery_profile_to_favour_new_io.py new file mode 100644 index 00000000000..9a9828cdaaf --- /dev/null +++ b/tests/functional/odf-cli/test_set_recovery_profile_to_favour_new_io.py @@ -0,0 +1,42 @@ +import pytest +import logging + +from ocs_ci.helpers.helpers import ( + odf_cli_set_recover_profile, + get_ceph_recovery_profile, +) +from ocs_ci.framework.pytest_customization.marks import brown_squad +from ocs_ci.framework.testlib import tier1 + +log = logging.getLogger(__name__) + + +@brown_squad +@tier1 +class TestRecoveryProfileInCeph: + @pytest.mark.polarion_id("OCS-XXXX") + @pytest.mark.parametrize( + argnames=["recovery_profile"], + argvalues=[ + pytest.param("balanced"), + pytest.param("high_client_ops"), + pytest.param("high_recovery_ops"), + ], + ) + def test_set_recovery_profile_odfcli(self, recovery_profile): + """ + Test setting the recovery profile by ODF CLI. + Steps: + 1. Set recovery-profile using ODF cli tool + 2. Verify recovery profile from the ceph toolbox pod + """ + + # Setting up and verifying the recovery profile value with the odf CLI tool + + assert odf_cli_set_recover_profile(recovery_profile) + log.info("Fetching ceph osd_mclock_profile/recovery profile using odf-cli tool.") + a = get_ceph_recovery_profile() + log.info (f"Applied recovery profile on ceph cluster is {a}") + assert ( + recovery_profile == get_ceph_recovery_profile() + ), f"Recovery profile set by ODF CLI ({recovery_profile}) does not match with the value reported by Ceph" From 22b81a1f4c37899ce5450e25b96a5094e4373d33 Mon Sep 17 00:00:00 2001 From: pintojoy Date: Fri, 5 Apr 2024 21:00:36 +0530 Subject: [PATCH 02/16] Fixing tox issues Signed-off-by: pintojoy --- ocs_ci/helpers/helpers.py | 2 +- .../odf-cli/test_set_recovery_profile_to_favour_new_io.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py index 83f2fb03298..404f705c531 100644 --- a/ocs_ci/helpers/helpers.py +++ b/ocs_ci/helpers/helpers.py @@ -4813,7 +4813,7 @@ def get_ceph_recovery_profile(): """ - #Fetchhing recovery profile from ceph config + # Fetchhing recovery profile from ceph config toolbox = pod.get_ceph_tools_pod() ceph_cmd = "ceph config get osd osd_mclock_profile" diff --git a/tests/functional/odf-cli/test_set_recovery_profile_to_favour_new_io.py b/tests/functional/odf-cli/test_set_recovery_profile_to_favour_new_io.py index 9a9828cdaaf..6e5fc52af4b 100644 --- a/tests/functional/odf-cli/test_set_recovery_profile_to_favour_new_io.py +++ b/tests/functional/odf-cli/test_set_recovery_profile_to_favour_new_io.py @@ -34,9 +34,11 @@ def test_set_recovery_profile_odfcli(self, recovery_profile): # Setting up and verifying the recovery profile value with the odf CLI tool assert odf_cli_set_recover_profile(recovery_profile) - log.info("Fetching ceph osd_mclock_profile/recovery profile using odf-cli tool.") + log.info( + "Fetching ceph osd_mclock_profile/recovery profile using odf-cli tool." + ) a = get_ceph_recovery_profile() - log.info (f"Applied recovery profile on ceph cluster is {a}") + log.info(f"Applied recovery profile on ceph cluster is {a}") assert ( recovery_profile == get_ceph_recovery_profile() ), f"Recovery profile set by ODF CLI ({recovery_profile}) does not match with the value reported by Ceph" From 04cb9b89b1d88c31baa07f3b2049350d10518525 Mon Sep 17 00:00:00 2001 From: pintojoy Date: Tue, 4 Jun 2024 14:08:38 +0530 Subject: [PATCH 03/16] Fixing merge conflict Signed-off-by: pintojoy --- ocs_ci/helpers/helpers.py | 2 +- tests/functional/odf-cli/test_state.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 tests/functional/odf-cli/test_state.py diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py index 404f705c531..b8d8d333f8e 100644 --- a/ocs_ci/helpers/helpers.py +++ b/ocs_ci/helpers/helpers.py @@ -4790,7 +4790,7 @@ def odf_cli_set_recover_profile(recovery_profile): Set the recovery profile for a Ceph service. Args: - recovery_profile (str): The recovery profile name (balanced or high_client_ops or high_recovery_ops) + recovery_profile (str): The recovery profile name (balanced or high_client_ops or high_recovery_ops ) """ from pathlib import Path diff --git a/tests/functional/odf-cli/test_state.py b/tests/functional/odf-cli/test_state.py new file mode 100644 index 00000000000..e69de29bb2d From 41b53fe2d7f19dfbe7c9004ae7a6e085a7dd65e1 Mon Sep 17 00:00:00 2001 From: pintojoy Date: Fri, 7 Jun 2024 16:19:01 +0530 Subject: [PATCH 04/16] Fixing merge conflict Signed-off-by: pintojoy --- .../cluster_expansion/test_add_capacity.py | 71 ++++++++- .../nodes/test_node_replacement_proactive.py | 146 +++++++++++++++++- 2 files changed, 215 insertions(+), 2 deletions(-) diff --git a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py index 4d7eb6580bd..0b832c975dd 100644 --- a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py +++ b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py @@ -2,6 +2,7 @@ import logging from ocs_ci.framework import config +import concurrent.futures from ocs_ci.framework.pytest_customization.marks import ( polarion_id, pre_upgrade, @@ -26,6 +27,12 @@ cloud_platform_required, ) from ocs_ci.ocs import constants +from ocs_ci.utility.utils import TimeoutSampler, ceph_health_check_base +from ocs_ci.helpers.managed_services import ( + get_used_capacity, + verify_osd_used_capacity_greater_than_expected, +) + from ocs_ci.ocs.ocp import OCP from ocs_ci.ocs.resources.pod import ( get_osd_pods, @@ -35,6 +42,7 @@ from ocs_ci.ocs.cluster import ( check_ceph_health_after_add_capacity, is_flexible_scaling_enabled, + CephCluster, ) from ocs_ci.ocs.resources.storage_cluster import ( get_device_class, @@ -50,7 +58,16 @@ logger = logging.getLogger(__name__) -def add_capacity_test(ui_flag=False): +@pytest.mark.polarion_id("OCS-XXXX") +@pytest.mark.parametrize( + argnames=["recovery_profile"], + argvalues=[ + pytest.param("balanced"), + pytest.param("high_client_ops"), + pytest.param("high_recovery_ops"), + ], +) +def add_capacity_test(recovery_profile, multi_pvc_factory, ui_flag=False): """ Add capacity on non-lso cluster @@ -58,6 +75,58 @@ def add_capacity_test(ui_flag=False): ui_flag(bool): add capacity via ui [true] or via cli [false] """ + ceph_cluster = CephCluster() + pvc_count = 20 + ceph_capacity = int(ceph_cluster.get_ceph_capacity()) + size = int((ceph_capacity * 0.4) / pvc_count) + filesize = int(size * 0.8) + # Change the file size to MB for the FIO function + file_size = f"{filesize * constants.GB2MB}M" + + pvc_objs = multi_pvc_factory( + interface=constants.CEPHFILESYSTEM, + size=size, + num_of_pvc=pvc_count, + ) + pod_objs = list() + + log.info(f"filee{size}") + + for pvc_obj in pvc_objs: + pod_objs.append(pod_factory(pvc=pvc_obj)) + + executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) + futures_fio = [] + for pod in pod_objs: + futures_fio.append( + executor.submit( + pod.run_io, + storage_type="fs", + size=file_size, + invalidate=0, + bs="512K", + runtime=2100, + timeout=3300, + jobs=1, + readwrite="readwrite", + ) + ) + for _ in concurrent.futures.as_completed(futures_fio): + log.info("Some pod submitted FIO") + concurrent.futures.wait(futures_fio) + executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) + + get_used_capacity("After filling up the cluster") + sample = TimeoutSampler( + timeout=3600, + sleep=300, + func=verify_osd_used_capacity_greater_than_expected, + expected_used_capacity=30.0, + ) + if not sample.wait_for_func_status(result=True): + log.error("After 60 seconds the used capacity smaller than 30%") + raise TimeoutExpiredError + osd_size = storage_cluster.get_osd_size() existing_osd_pods = get_osd_pods() existing_osd_pod_names = [pod.name for pod in existing_osd_pods] diff --git a/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py b/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py index a5145d01105..975e8c1864f 100644 --- a/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py +++ b/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py @@ -2,8 +2,11 @@ import pytest import random +import time from ocs_ci.framework import config +import concurrent.futures +from ocs_ci.ocs.cluster import CephCluster from ocs_ci.ocs.resources import pod from ocs_ci.framework.testlib import ( tier4a, @@ -14,6 +17,7 @@ from ocs_ci.ocs import constants, node from ocs_ci.ocs.cluster import CephCluster, is_lso_cluster, is_ms_provider_cluster from ocs_ci.ocs.resources.storage_cluster import osd_encryption_verification +from ocs_ci.utility.utils import TimeoutSampler, ceph_health_check_base from ocs_ci.framework.pytest_customization.marks import ( skipif_managed_service, skipif_hci_provider_and_client, @@ -26,6 +30,10 @@ ) from ocs_ci.helpers.helpers import verify_storagecluster_nodetopology from ocs_ci.helpers.sanity_helpers import Sanity +from ocs_ci.helpers.managed_services import ( + get_used_capacity, + verify_osd_used_capacity_greater_than_expected, +) log = logging.getLogger(__name__) @@ -197,20 +205,156 @@ def init_sanity(self): """ self.sanity_helpers = Sanity() + @pytest.mark.polarion_id("OCS-XXXX") + @pytest.mark.parametrize( + argnames=["recovery_profile"], + argvalues=[ + pytest.param("balanced"), + pytest.param("high_client_ops"), + pytest.param("high_recovery_ops"), + ], + ) def test_nodereplacement_proactive_with_io_running( self, + recovery_profile, pvc_factory, pod_factory, dc_pod_factory, bucket_factory, rgw_bucket_factory, + multi_pvc_factory, ): """ Knip-894 Node Replacement proactive when IO running in the background """ + ceph_cluster = CephCluster() + pvc_count = 20 + ceph_capacity = int(ceph_cluster.get_ceph_capacity()) + size = int((ceph_capacity * 0.4) / pvc_count) + filesize = int(size * 0.8) + # Change the file size to MB for the FIO function + file_size = f"{filesize * constants.GB2MB}M" + + pvc_objs = multi_pvc_factory( + interface=constants.CEPHFILESYSTEM, + size=size, + num_of_pvc=pvc_count, + ) + pod_objs = list() + + log.info(f"filee{size}") + + for pvc_obj in pvc_objs: + pod_objs.append(pod_factory(pvc=pvc_obj)) + + executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) + futures_fio = [] + for pod in pod_objs: + futures_fio.append( + executor.submit( + pod.run_io, + storage_type="fs", + size=file_size, + invalidate=0, + bs="512K", + runtime=2100, + timeout=3300, + jobs=1, + readwrite="readwrite", + ) + ) + for _ in concurrent.futures.as_completed(futures_fio): + log.info("Some pod submitted FIO") + concurrent.futures.wait(futures_fio) + executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) + """ + futures_results = [] + for pod in pod_objs: + futures_results.append(executor.submit(pod.get_fio_results(timeout=3600))) + for _ in concurrent.futures.as_completed(futures_results): + log.info("Just waiting for fio jobs results") + concurrent.futures.wait(futures_results) + for pod_obj in pod_objs: + file_name = f"{pod_obj.name}-node_replacement" + pod_obj.fillup_fs( + size=file_size, fio_filename=file_name, performance_pod=True + ) + """ + get_used_capacity("After filling up the cluster") + sample = TimeoutSampler( + timeout=3600, + sleep=300, + func=verify_osd_used_capacity_greater_than_expected, + expected_used_capacity=30.0, + ) + if not sample.wait_for_func_status(result=True): + log.error("After 60 seconds the used capacity smaller than 30%") + raise TimeoutExpiredError + """ # Get worker nodes + pvc_list = [] + pod_list = [] + for i in range( + int(self.num_of_pvcs / 2) + ): # on each loop cycle 1 pvc and 1 clone + index = i + 1 + + log.info("Start creating PVC") + pvc_obj = helpers.create_pvc( + sc_name=self.sc_obj.name, + size=self.pvc_size_str, + namespace=self.namespace, + access_mode=constants.ACCESS_MODE_RWX, + ) + helpers.wait_for_resource_state(pvc_obj, constants.STATUS_BOUND) + + log.info( + f"PVC {pvc_obj.name} was successfully created in namespace {self.namespace}." + ) + # Create a pod on one node + log.info(f"Creating Pod with pvc {pvc_obj.name} on node") + + pvc_obj.reload() + + try: + pod_obj = helpers.create_pod( + interface_type=self.interface, + pvc_name=pvc_obj.name, + namespace=pvc_obj.namespace, + node_name=node_one, + pod_dict_path=constants.PERF_POD_YAML, + ) + except Exception as e: + log.error( + f"Pod on PVC {pvc_obj.name} was not created, exception {str(e)}" + ) + raise PodNotCreated("Pod on PVC was not created.") + + # Confirm that pod is running on the selected_nodes + helpers.wait_for_resource_state( + resource=pod_obj, state=constants.STATUS_RUNNING, timeout=600 + ) + pvc_list.append(pvc_obj) + pod_list.append(pod_obj) + + file_name = f"{pod_obj.name}-ceph_capacity_recovery" + log.info(f"Starting IO on the POD {pod_obj.name}") + + filesize = int(float(self.pvc_size_str[:-2]) * 0.95) + # Change the file size to MB for the FIO function + file_size = f"{filesize * constants.GB2MB}M" + + log.info(f"Going to write file of size {file_size}") + pod_obj.fillup_fs( + size=file_size, fio_filename=file_name, performance_pod=True + ) + # Wait for fio to finish + pod_obj.get_fio_results(timeout=3600) + + get_used_capacity(f"After creation of pvc {index}") + """ worker_node_list = node.get_worker_nodes() log.info(f"Current available worker nodes are {worker_node_list}") @@ -220,7 +364,7 @@ def test_nodereplacement_proactive_with_io_running( for worker_node in worker_node_list: if worker_node != osd_node_name: rbd_dc_pod = dc_pod_factory( - interface=constants.CEPHBLOCKPOOL, node_name=worker_node, size=20 + interface=constants.CEPHBLOCKPOOL, node_name=worker_node, size=80 ) pod.run_io_in_bg(rbd_dc_pod, expect_to_fail=False, fedora_dc=True) From 57185a26e25c17aaca7185716c2b727320b9c828 Mon Sep 17 00:00:00 2001 From: pintojoy Date: Fri, 7 Jun 2024 16:29:51 +0530 Subject: [PATCH 05/16] Fixing merge conflict Signed-off-by: pintojoy --- ocs_ci/helpers/helpers.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py index b8d8d333f8e..c664ca8b05f 100644 --- a/ocs_ci/helpers/helpers.py +++ b/ocs_ci/helpers/helpers.py @@ -4755,36 +4755,6 @@ def flatten_multilevel_dict(d): return leaves_list -def is_rbd_default_storage_class(custom_sc=None): - """ - Check if RDB is a default storageclass for the cluster - - Args: - custom_sc: custom storageclass name. - - Returns: - bool : True if RBD is set as the Default storage class for the cluster, False otherwise. - """ - default_rbd_sc = ( - constants.DEFAULT_STORAGECLASS_RBD if custom_sc is None else custom_sc - ) - cmd = ( - f"oc get storageclass {default_rbd_sc} -o=jsonpath='{{.metadata.annotations}}' " - ) - try: - check_annotations = json.loads(run_cmd(cmd)) - except json.decoder.JSONDecodeError: - logger.error("Error to get annotation value from storageclass.") - return False - - if check_annotations.get("storageclass.kubernetes.io/is-default-class") == "true": - logger.info(f"Storageclass {default_rbd_sc} is a default RBD StorageClass.") - return True - - logger.error("Storageclass {default_rbd_sc} is not a default RBD StorageClass.") - return False - - def odf_cli_set_recover_profile(recovery_profile): """ Set the recovery profile for a Ceph service. From b8f5340347a0ea04235d413245905f25b2e9bafb Mon Sep 17 00:00:00 2001 From: pintojoy Date: Fri, 7 Jun 2024 16:34:04 +0530 Subject: [PATCH 06/16] Fixing merge conflict Signed-off-by: pintojoy --- ocs_ci/helpers/helpers.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py index c664ca8b05f..4e6b5d49140 100644 --- a/ocs_ci/helpers/helpers.py +++ b/ocs_ci/helpers/helpers.py @@ -4791,3 +4791,34 @@ def get_ceph_recovery_profile(): return ceph_output + + +def is_rbd_default_storage_class(custom_sc=None): + """ + Check if RDB is a default storageclass for the cluster + + Args: + custom_sc: custom storageclass name. + + Returns: + bool : True if RBD is set as the Default storage class for the cluster, False otherwise. + """ + default_rbd_sc = ( + constants.DEFAULT_STORAGECLASS_RBD if custom_sc is None else custom_sc + ) + cmd = ( + f"oc get storageclass {default_rbd_sc} -o=jsonpath='{{.metadata.annotations}}' " + ) + try: + check_annotations = json.loads(run_cmd(cmd)) + except json.decoder.JSONDecodeError: + logger.error("Error to get annotation value from storageclass.") + return False + + if check_annotations.get("storageclass.kubernetes.io/is-default-class") == "true": + logger.info(f"Storageclass {default_rbd_sc} is a default RBD StorageClass.") + return True + + logger.error("Storageclass {default_rbd_sc} is not a default RBD StorageClass.") + return False + From 6edf174ac3f9722451625a217b270610e67dfd2d Mon Sep 17 00:00:00 2001 From: pintojoy Date: Mon, 10 Jun 2024 18:42:13 +0530 Subject: [PATCH 07/16] Deleting irrelevent tests Signed-off-by: pintojoy --- ...t_set_recovery_profile_to_favour_new_io.py | 44 ------------------- tests/functional/odf-cli/test_state.py | 0 2 files changed, 44 deletions(-) delete mode 100644 tests/functional/odf-cli/test_set_recovery_profile_to_favour_new_io.py delete mode 100644 tests/functional/odf-cli/test_state.py diff --git a/tests/functional/odf-cli/test_set_recovery_profile_to_favour_new_io.py b/tests/functional/odf-cli/test_set_recovery_profile_to_favour_new_io.py deleted file mode 100644 index 6e5fc52af4b..00000000000 --- a/tests/functional/odf-cli/test_set_recovery_profile_to_favour_new_io.py +++ /dev/null @@ -1,44 +0,0 @@ -import pytest -import logging - -from ocs_ci.helpers.helpers import ( - odf_cli_set_recover_profile, - get_ceph_recovery_profile, -) -from ocs_ci.framework.pytest_customization.marks import brown_squad -from ocs_ci.framework.testlib import tier1 - -log = logging.getLogger(__name__) - - -@brown_squad -@tier1 -class TestRecoveryProfileInCeph: - @pytest.mark.polarion_id("OCS-XXXX") - @pytest.mark.parametrize( - argnames=["recovery_profile"], - argvalues=[ - pytest.param("balanced"), - pytest.param("high_client_ops"), - pytest.param("high_recovery_ops"), - ], - ) - def test_set_recovery_profile_odfcli(self, recovery_profile): - """ - Test setting the recovery profile by ODF CLI. - Steps: - 1. Set recovery-profile using ODF cli tool - 2. Verify recovery profile from the ceph toolbox pod - """ - - # Setting up and verifying the recovery profile value with the odf CLI tool - - assert odf_cli_set_recover_profile(recovery_profile) - log.info( - "Fetching ceph osd_mclock_profile/recovery profile using odf-cli tool." - ) - a = get_ceph_recovery_profile() - log.info(f"Applied recovery profile on ceph cluster is {a}") - assert ( - recovery_profile == get_ceph_recovery_profile() - ), f"Recovery profile set by ODF CLI ({recovery_profile}) does not match with the value reported by Ceph" diff --git a/tests/functional/odf-cli/test_state.py b/tests/functional/odf-cli/test_state.py deleted file mode 100644 index e69de29bb2d..00000000000 From d498ea31f6949aca4fb6575dd45de678a6be8145 Mon Sep 17 00:00:00 2001 From: pintojoy Date: Tue, 11 Jun 2024 13:42:54 +0530 Subject: [PATCH 08/16] Redrafting the test Signed-off-by: pintojoy --- .../cluster_expansion/test_add_capacity.py | 218 ++++++++++++------ 1 file changed, 148 insertions(+), 70 deletions(-) diff --git a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py index 0b832c975dd..0661c7594fc 100644 --- a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py +++ b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py @@ -1,8 +1,8 @@ import pytest import logging +import concurrent.futures from ocs_ci.framework import config -import concurrent.futures from ocs_ci.framework.pytest_customization.marks import ( polarion_id, pre_upgrade, @@ -10,6 +10,7 @@ skipif_bm, skipif_external_mode, skipif_bmpsi, + tier4b, skipif_ibm_power, skipif_no_lso, skipif_lso, @@ -27,13 +28,13 @@ cloud_platform_required, ) from ocs_ci.ocs import constants -from ocs_ci.utility.utils import TimeoutSampler, ceph_health_check_base -from ocs_ci.helpers.managed_services import ( - get_used_capacity, - verify_osd_used_capacity_greater_than_expected, -) - +from ocs_ci.ocs.exceptions import TimeoutExpiredError from ocs_ci.ocs.ocp import OCP +from ocs_ci.helpers.helpers import ( + odf_cli_set_recover_profile, + get_ceph_recovery_profile, +) +from ocs_ci.utility.utils import TimeoutSampler from ocs_ci.ocs.resources.pod import ( get_osd_pods, get_ceph_tools_pod, @@ -53,80 +54,23 @@ from ocs_ci.ocs.ui.helpers_ui import ui_add_capacity_conditions, ui_add_capacity from ocs_ci.utility.utils import is_cluster_y_version_upgraded from ocs_ci.utility import version - +from ocs_ci.helpers.managed_services import ( + get_used_capacity, + verify_osd_used_capacity_greater_than_expected, +) logger = logging.getLogger(__name__) -@pytest.mark.polarion_id("OCS-XXXX") -@pytest.mark.parametrize( - argnames=["recovery_profile"], - argvalues=[ - pytest.param("balanced"), - pytest.param("high_client_ops"), - pytest.param("high_recovery_ops"), - ], -) -def add_capacity_test(recovery_profile, multi_pvc_factory, ui_flag=False): +def add_capacity_test(ui_flag=False): """ Add capacity on non-lso cluster + Args: ui_flag(bool): add capacity via ui [true] or via cli [false] """ - ceph_cluster = CephCluster() - pvc_count = 20 - ceph_capacity = int(ceph_cluster.get_ceph_capacity()) - size = int((ceph_capacity * 0.4) / pvc_count) - filesize = int(size * 0.8) - # Change the file size to MB for the FIO function - file_size = f"{filesize * constants.GB2MB}M" - - pvc_objs = multi_pvc_factory( - interface=constants.CEPHFILESYSTEM, - size=size, - num_of_pvc=pvc_count, - ) - pod_objs = list() - - log.info(f"filee{size}") - - for pvc_obj in pvc_objs: - pod_objs.append(pod_factory(pvc=pvc_obj)) - - executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) - futures_fio = [] - for pod in pod_objs: - futures_fio.append( - executor.submit( - pod.run_io, - storage_type="fs", - size=file_size, - invalidate=0, - bs="512K", - runtime=2100, - timeout=3300, - jobs=1, - readwrite="readwrite", - ) - ) - for _ in concurrent.futures.as_completed(futures_fio): - log.info("Some pod submitted FIO") - concurrent.futures.wait(futures_fio) - executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) - - get_used_capacity("After filling up the cluster") - sample = TimeoutSampler( - timeout=3600, - sleep=300, - func=verify_osd_used_capacity_greater_than_expected, - expected_used_capacity=30.0, - ) - if not sample.wait_for_func_status(result=True): - log.error("After 60 seconds the used capacity smaller than 30%") - raise TimeoutExpiredError - osd_size = storage_cluster.get_osd_size() existing_osd_pods = get_osd_pods() existing_osd_pod_names = [pod.name for pod in existing_osd_pods] @@ -271,3 +215,137 @@ def test_add_capacity_pre_upgrade(self, reduce_and_resume_cluster_load): Test to add variable capacity to the OSD cluster while IOs running """ add_capacity_test() + + +@brown_squad +@ignore_leftovers +@pytest.mark.second_to_last +@skipif_managed_service +@skipif_aws_i3 +@skipif_bm +@skipif_bmpsi +@skipif_lso +@skipif_external_mode +@skipif_ibm_power +@skipif_managed_service +@skipif_hci_provider_and_client +@tier4b +@pytest.mark.polarion_id("OCS-XXXX") +@pytest.mark.parametrize( + argnames=["recovery_profile"], + argvalues=[ + pytest.param("balanced"), + pytest.param("high_client_ops"), + pytest.param("high_recovery_ops"), + ], +) +class TestAddCapacityRecoveryProfile(ManageTest): + """ + Add capacity on non-lso cluster + + """ + + @pytest.fixture(autouse=True) + def setup(self, recovery_profile, multi_pvc_factory, pod_factory): + """ + Setting up test environment + """ + assert odf_cli_set_recover_profile(recovery_profile) + logger.info( + "Fetching ceph osd_mclock_profile/recovery profile using odf-cli tool." + ) + a = get_ceph_recovery_profile() + logger.info(f"Applied recovery profile on ceph cluster is {a}") + assert ( + recovery_profile == get_ceph_recovery_profile() + ), f"Recovery profile set by ODF CLI ({recovery_profile}) does not match with the value reported by Ceph" + + ceph_cluster = CephCluster() + pvc_count = 20 + + # Get file size to fill up the cluster + ceph_capacity = int(ceph_cluster.get_ceph_capacity()) + size = int((ceph_capacity * 0.4) / pvc_count) + filesize = int(size * 0.8) + # Change the file size to MB for the FIO function + file_size = f"{filesize * constants.GB2MB}M" + + # Creating PVCs for filling up the cluster + pvc_objs = multi_pvc_factory( + interface=constants.CEPHFILESYSTEM, + size=size, + num_of_pvc=pvc_count, + ) + pod_objs = list() + + for pvc_obj in pvc_objs: + pod_objs.append(pod_factory(pvc=pvc_obj)) + + # Run FIO concurrently on created pods + executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) + futures_fio = [] + for pod in pod_objs: + futures_fio.append( + executor.submit( + pod.run_io, + storage_type="fs", + size=file_size, + invalidate=0, + bs="512K", + runtime=2100, + timeout=3300, + jobs=1, + readwrite="readwrite", + ) + ) + for _ in concurrent.futures.as_completed(futures_fio): + logger.info("Some pod submitted FIO") + concurrent.futures.wait(futures_fio) + executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) + + # Wait for cluster to be filled up to 30% + get_used_capacity("After filling up the cluster") + sample = TimeoutSampler( + timeout=3600, + sleep=300, + func=verify_osd_used_capacity_greater_than_expected, + expected_used_capacity=30.0, + ) + if not sample.wait_for_func_status(result=True): + logger.error("After 3600 seconds the used capacity smaller than 30%") + raise TimeoutExpiredError + + @pytest.fixture(autouse=True) + def teardown(self): + """ + teardown function, Setting recovery-profile back to balanced. + """ + assert odf_cli_set_recover_profile("balanced") + logger.info( + "Fetching ceph osd_mclock_profile/recovery profile using odf-cli tool." + ) + a = get_ceph_recovery_profile() + logger.info(f"Applied recovery profile on ceph cluster is {a}") + + def test_add_capacity_recovery_profile_cli(self, reduce_and_resume_cluster_load): + + """ + Test setting the recovery profile by ODF CLI and run add capacity test. + Steps: + 1. Set recovery-profile using ODF cli tool + 2. Verify recovery profile from the ceph toolbox pod + 3. Add capacity test via CLI + + """ + # Setting up and verifying the recovery profile value with the odf CLI tool + add_capacity_test(ui_flag=False) + + def test_add_capacity_recovery_profile_ui(self, reduce_and_resume_cluster_load): + """ + Test setting the recovery profile by ODF CLI and run add capacity test. + Steps: + 1. Set recovery-profile using ODF cli tool + 2. Verify recovery profile from the ceph toolbox pod + 3. Add capacity test via UI + """ + add_capacity_test(ui_flag=True) \ No newline at end of file From d4c5f4032a8604b5e881076f68a319c0be08ddb9 Mon Sep 17 00:00:00 2001 From: pintojoy Date: Tue, 25 Jun 2024 13:02:04 +0530 Subject: [PATCH 09/16] Fixing tox issues Signed-off-by: pintojoy --- ocs_ci/helpers/helpers.py | 2 - .../cluster_expansion/test_add_capacity.py | 2 +- .../nodes/test_node_replacement_proactive.py | 323 ++++++++++-------- 3 files changed, 183 insertions(+), 144 deletions(-) diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py index 4e6b5d49140..a834ba26ddc 100644 --- a/ocs_ci/helpers/helpers.py +++ b/ocs_ci/helpers/helpers.py @@ -4792,7 +4792,6 @@ def get_ceph_recovery_profile(): return ceph_output - def is_rbd_default_storage_class(custom_sc=None): """ Check if RDB is a default storageclass for the cluster @@ -4821,4 +4820,3 @@ def is_rbd_default_storage_class(custom_sc=None): logger.error("Storageclass {default_rbd_sc} is not a default RBD StorageClass.") return False - diff --git a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py index 0661c7594fc..3971c2c840c 100644 --- a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py +++ b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py @@ -348,4 +348,4 @@ def test_add_capacity_recovery_profile_ui(self, reduce_and_resume_cluster_load): 2. Verify recovery profile from the ceph toolbox pod 3. Add capacity test via UI """ - add_capacity_test(ui_flag=True) \ No newline at end of file + add_capacity_test(ui_flag=True) diff --git a/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py b/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py index 975e8c1864f..0875278668c 100644 --- a/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py +++ b/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py @@ -2,11 +2,9 @@ import pytest import random -import time +import concurrent.futures from ocs_ci.framework import config -import concurrent.futures -from ocs_ci.ocs.cluster import CephCluster from ocs_ci.ocs.resources import pod from ocs_ci.framework.testlib import ( tier4a, @@ -15,14 +13,20 @@ ipi_deployment_required, ) from ocs_ci.ocs import constants, node +from ocs_ci.helpers.helpers import ( + odf_cli_set_recover_profile, + get_ceph_recovery_profile, +) +from ocs_ci.utility.utils import TimeoutSampler from ocs_ci.ocs.cluster import CephCluster, is_lso_cluster, is_ms_provider_cluster from ocs_ci.ocs.resources.storage_cluster import osd_encryption_verification -from ocs_ci.utility.utils import TimeoutSampler, ceph_health_check_base +from ocs_ci.ocs.exceptions import TimeoutExpiredError from ocs_ci.framework.pytest_customization.marks import ( skipif_managed_service, skipif_hci_provider_and_client, skipif_bmpsi, bugzilla, + tier4b, skipif_external_mode, skipif_ms_consumer, skipif_hci_client, @@ -35,6 +39,7 @@ verify_osd_used_capacity_greater_than_expected, ) + log = logging.getLogger(__name__) @@ -205,156 +210,20 @@ def init_sanity(self): """ self.sanity_helpers = Sanity() - @pytest.mark.polarion_id("OCS-XXXX") - @pytest.mark.parametrize( - argnames=["recovery_profile"], - argvalues=[ - pytest.param("balanced"), - pytest.param("high_client_ops"), - pytest.param("high_recovery_ops"), - ], - ) def test_nodereplacement_proactive_with_io_running( self, - recovery_profile, pvc_factory, pod_factory, dc_pod_factory, bucket_factory, rgw_bucket_factory, - multi_pvc_factory, ): """ Knip-894 Node Replacement proactive when IO running in the background """ - ceph_cluster = CephCluster() - pvc_count = 20 - ceph_capacity = int(ceph_cluster.get_ceph_capacity()) - size = int((ceph_capacity * 0.4) / pvc_count) - filesize = int(size * 0.8) - # Change the file size to MB for the FIO function - file_size = f"{filesize * constants.GB2MB}M" - - pvc_objs = multi_pvc_factory( - interface=constants.CEPHFILESYSTEM, - size=size, - num_of_pvc=pvc_count, - ) - pod_objs = list() - - log.info(f"filee{size}") - for pvc_obj in pvc_objs: - pod_objs.append(pod_factory(pvc=pvc_obj)) - - executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) - futures_fio = [] - for pod in pod_objs: - futures_fio.append( - executor.submit( - pod.run_io, - storage_type="fs", - size=file_size, - invalidate=0, - bs="512K", - runtime=2100, - timeout=3300, - jobs=1, - readwrite="readwrite", - ) - ) - for _ in concurrent.futures.as_completed(futures_fio): - log.info("Some pod submitted FIO") - concurrent.futures.wait(futures_fio) - executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) - """ - futures_results = [] - for pod in pod_objs: - futures_results.append(executor.submit(pod.get_fio_results(timeout=3600))) - for _ in concurrent.futures.as_completed(futures_results): - log.info("Just waiting for fio jobs results") - concurrent.futures.wait(futures_results) - for pod_obj in pod_objs: - file_name = f"{pod_obj.name}-node_replacement" - pod_obj.fillup_fs( - size=file_size, fio_filename=file_name, performance_pod=True - ) - """ - get_used_capacity("After filling up the cluster") - sample = TimeoutSampler( - timeout=3600, - sleep=300, - func=verify_osd_used_capacity_greater_than_expected, - expected_used_capacity=30.0, - ) - if not sample.wait_for_func_status(result=True): - log.error("After 60 seconds the used capacity smaller than 30%") - raise TimeoutExpiredError - - """ # Get worker nodes - pvc_list = [] - pod_list = [] - for i in range( - int(self.num_of_pvcs / 2) - ): # on each loop cycle 1 pvc and 1 clone - index = i + 1 - - log.info("Start creating PVC") - pvc_obj = helpers.create_pvc( - sc_name=self.sc_obj.name, - size=self.pvc_size_str, - namespace=self.namespace, - access_mode=constants.ACCESS_MODE_RWX, - ) - helpers.wait_for_resource_state(pvc_obj, constants.STATUS_BOUND) - - log.info( - f"PVC {pvc_obj.name} was successfully created in namespace {self.namespace}." - ) - # Create a pod on one node - log.info(f"Creating Pod with pvc {pvc_obj.name} on node") - - pvc_obj.reload() - - try: - pod_obj = helpers.create_pod( - interface_type=self.interface, - pvc_name=pvc_obj.name, - namespace=pvc_obj.namespace, - node_name=node_one, - pod_dict_path=constants.PERF_POD_YAML, - ) - except Exception as e: - log.error( - f"Pod on PVC {pvc_obj.name} was not created, exception {str(e)}" - ) - raise PodNotCreated("Pod on PVC was not created.") - - # Confirm that pod is running on the selected_nodes - helpers.wait_for_resource_state( - resource=pod_obj, state=constants.STATUS_RUNNING, timeout=600 - ) - pvc_list.append(pvc_obj) - pod_list.append(pod_obj) - - file_name = f"{pod_obj.name}-ceph_capacity_recovery" - log.info(f"Starting IO on the POD {pod_obj.name}") - - filesize = int(float(self.pvc_size_str[:-2]) * 0.95) - # Change the file size to MB for the FIO function - file_size = f"{filesize * constants.GB2MB}M" - - log.info(f"Going to write file of size {file_size}") - pod_obj.fillup_fs( - size=file_size, fio_filename=file_name, performance_pod=True - ) - # Wait for fio to finish - pod_obj.get_fio_results(timeout=3600) - - get_used_capacity(f"After creation of pvc {index}") - """ worker_node_list = node.get_worker_nodes() log.info(f"Current available worker nodes are {worker_node_list}") @@ -364,7 +233,7 @@ def test_nodereplacement_proactive_with_io_running( for worker_node in worker_node_list: if worker_node != osd_node_name: rbd_dc_pod = dc_pod_factory( - interface=constants.CEPHBLOCKPOOL, node_name=worker_node, size=80 + interface=constants.CEPHBLOCKPOOL, node_name=worker_node, size=20 ) pod.run_io_in_bg(rbd_dc_pod, expect_to_fail=False, fedora_dc=True) @@ -485,3 +354,175 @@ def test_nodereplacement_twice(self): assert ( verify_storagecluster_nodetopology ), "Storagecluster node topology is having an entry of non ocs node(s) - Not expected" + + +@brown_squad +@tier4b +@ignore_leftovers +@ipi_deployment_required +@skipif_managed_service +@skipif_hci_provider_and_client +@skipif_bmpsi +@skipif_external_mode +class TestNodeReplacementWithRecoveryProfile(ManageTest): + """ + Knip-894 Node replacement proactive with IO + + """ + + @pytest.fixture(autouse=True) + def init_sanity(self): + """ + Initialize Sanity instance + + """ + self.sanity_helpers = Sanity() + + @pytest.fixture(autouse=True) + def teardown(self): + """ + teardown function, Setting recovery-profile back to balanced. + """ + assert odf_cli_set_recover_profile("balanced") + log.info( + "Fetching ceph osd_mclock_profile/recovery profile using odf-cli tool." + ) + a = get_ceph_recovery_profile() + log.info(f"Applied recovery profile on ceph cluster is {a}") + + @pytest.mark.polarion_id("OCS-XXXX") + @pytest.mark.parametrize( + argnames=["recovery_profile"], + argvalues=[ + pytest.param("balanced"), + pytest.param("high_client_ops"), + pytest.param("high_recovery_ops"), + ], + ) + def test_nodereplacement_proactive_with_recovery_profile_and_io_running( + self, + recovery_profile, + pvc_factory, + pod_factory, + dc_pod_factory, + bucket_factory, + rgw_bucket_factory, + multi_pvc_factory, + ): + """ + Test setting the recovery profile by ODF CLI. + Steps: + 1. Set recovery-profile using ODF cli tool + 2. Verify recovery profile from the ceph toolbox pod + 3. Node Replacement proactive when IO running in the background + + """ + # Setting up and verifying the recovery profile value with the odf CLI tool + + assert odf_cli_set_recover_profile(recovery_profile) + log.info( + "Fetching ceph osd_mclock_profile/recovery profile using odf-cli tool." + ) + a = get_ceph_recovery_profile() + log.info(f"Applied recovery profile on ceph cluster is {a}") + assert ( + recovery_profile == get_ceph_recovery_profile() + ), f"Recovery profile set by ODF CLI ({recovery_profile}) does not match with the value reported by Ceph" + + ceph_cluster = CephCluster() + pvc_count = 20 + + # Get file size to fill up the cluster + ceph_capacity = int(ceph_cluster.get_ceph_capacity()) + size = int((ceph_capacity * 0.4) / pvc_count) + filesize = int(size * 0.8) + # Change the file size to MB for the FIO function + file_size = f"{filesize * constants.GB2MB}M" + + # Creating PVCs for filling up the cluster + pvc_objs = multi_pvc_factory( + interface=constants.CEPHFILESYSTEM, + size=size, + num_of_pvc=pvc_count, + ) + pod_objs = list() + + for pvc_obj in pvc_objs: + pod_objs.append(pod_factory(pvc=pvc_obj)) + + # Run FIO concurrently on created pods + executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) + futures_fio = [] + for pod1 in pod_objs: + futures_fio.append( + executor.submit( + pod1.run_io, + storage_type="fs", + size=file_size, + invalidate=0, + bs="512K", + runtime=2100, + timeout=3300, + jobs=1, + readwrite="readwrite", + ) + ) + for _ in concurrent.futures.as_completed(futures_fio): + log.info("Some pod submitted FIO") + concurrent.futures.wait(futures_fio) + executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) + + # Wait for cluster to be filled up to 30% + get_used_capacity("After filling up the cluster") + sample = TimeoutSampler( + timeout=3600, + sleep=300, + func=verify_osd_used_capacity_greater_than_expected, + expected_used_capacity=30.0, + ) + if not sample.wait_for_func_status(result=True): + log.error("After 3600 seconds the used capacity smaller than 30%") + raise TimeoutExpiredError + + worker_node_list = node.get_worker_nodes() + log.info(f"Current available worker nodes are {worker_node_list}") + + osd_node_name = select_osd_node_name() + + log.info("Creating dc pod backed with rbd pvc and running io in bg") + for worker_node in worker_node_list: + if worker_node != osd_node_name: + rbd_dc_pod = dc_pod_factory( + interface=constants.CEPHBLOCKPOOL, node_name=worker_node, size=80 + ) + pod.run_io_in_bg(rbd_dc_pod, expect_to_fail=False, fedora_dc=True) + + log.info("Creating dc pod backed with cephfs pvc and running io in bg") + for worker_node in worker_node_list: + if worker_node != osd_node_name: + cephfs_dc_pod = dc_pod_factory( + interface=constants.CEPHFILESYSTEM, node_name=worker_node, size=20 + ) + pod.run_io_in_bg(cephfs_dc_pod, expect_to_fail=False, fedora_dc=True) + + delete_and_create_osd_node(osd_node_name) + + # Creating Resources + log.info("Creating Resources using sanity helpers") + self.sanity_helpers.create_resources( + pvc_factory, pod_factory, bucket_factory, rgw_bucket_factory + ) + # Deleting Resources + self.sanity_helpers.delete_resources() + + # Verify everything running fine + log.info("Verifying All resources are Running and matches expected result") + self.sanity_helpers.health_check(tries=120) + + # Verify OSD is encrypted + if config.ENV_DATA.get("encryption_at_rest"): + osd_encryption_verification() + + assert ( + verify_storagecluster_nodetopology + ), "Storagecluster node topology is having an entry of non ocs node(s) - Not expected" From 8ae2fbaa777169f4b90e278a24024e3d7f068d83 Mon Sep 17 00:00:00 2001 From: pintojoy Date: Tue, 2 Jul 2024 21:21:35 +0530 Subject: [PATCH 10/16] Fixing odf-cli tool command Signed-off-by: pintojoy --- ocs_ci/helpers/helpers.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py index a834ba26ddc..6a64974409b 100644 --- a/ocs_ci/helpers/helpers.py +++ b/ocs_ci/helpers/helpers.py @@ -4768,13 +4768,9 @@ def odf_cli_set_recover_profile(recovery_profile): retrieve_cli_binary(cli_type="odf") logger.info(f"Setting ceph recovery profile {recovery_profile} using odf-cli tool.") - cmd = ( - f"{constants.CLI_TOOL_LOCAL_PATH} --kubeconfig {os.getenv('KUBECONFIG')} " - f" set recovery-profile {recovery_profile}" - ) - - logger.info(cmd) - return exec_cmd(cmd, use_shell=True) + cmd = f"odf-cli set recovery-profile {recovery_profile}" + output = run_cmd(cmd) + return output def get_ceph_recovery_profile(): From 35b12ab57e6e50ef8a77bd33bf4bba632101bc8e Mon Sep 17 00:00:00 2001 From: pintojoy Date: Tue, 9 Jul 2024 14:50:46 +0530 Subject: [PATCH 11/16] Fixinng odf cli command issues Signed-off-by: pintojoy --- ocs_ci/helpers/helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py index 6a64974409b..8bc8f91695a 100644 --- a/ocs_ci/helpers/helpers.py +++ b/ocs_ci/helpers/helpers.py @@ -4768,8 +4768,9 @@ def odf_cli_set_recover_profile(recovery_profile): retrieve_cli_binary(cli_type="odf") logger.info(f"Setting ceph recovery profile {recovery_profile} using odf-cli tool.") - cmd = f"odf-cli set recovery-profile {recovery_profile}" + cmd = f"odf-cli set recovery-profile {recovery_profile}" output = run_cmd(cmd) + logger.info(output) return output From c0e2c6c1d13a5753bf36e453cc64eff704dea032 Mon Sep 17 00:00:00 2001 From: pintojoy Date: Tue, 9 Jul 2024 14:57:39 +0530 Subject: [PATCH 12/16] Fixinng odf cli assert command Signed-off-by: pintojoy --- ocs_ci/helpers/helpers.py | 4 +--- .../z_cluster/cluster_expansion/test_add_capacity.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py index 8bc8f91695a..72dbdb224cb 100644 --- a/ocs_ci/helpers/helpers.py +++ b/ocs_ci/helpers/helpers.py @@ -4769,9 +4769,7 @@ def odf_cli_set_recover_profile(recovery_profile): logger.info(f"Setting ceph recovery profile {recovery_profile} using odf-cli tool.") cmd = f"odf-cli set recovery-profile {recovery_profile}" - output = run_cmd(cmd) - logger.info(output) - return output + run_cmd(cmd) def get_ceph_recovery_profile(): diff --git a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py index 3971c2c840c..963cdbd2f51 100644 --- a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py +++ b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py @@ -250,7 +250,7 @@ def setup(self, recovery_profile, multi_pvc_factory, pod_factory): """ Setting up test environment """ - assert odf_cli_set_recover_profile(recovery_profile) + odf_cli_set_recover_profile(recovery_profile) logger.info( "Fetching ceph osd_mclock_profile/recovery profile using odf-cli tool." ) From e4283e8dcce8d274ca8a05b8876a437efc367758 Mon Sep 17 00:00:00 2001 From: pintojoy Date: Thu, 11 Jul 2024 11:18:37 +0530 Subject: [PATCH 13/16] Filling 20% of data to avaoid timeouts Signed-off-by: pintojoy --- .../z_cluster/cluster_expansion/test_add_capacity.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py index 963cdbd2f51..c265d851641 100644 --- a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py +++ b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py @@ -303,16 +303,16 @@ def setup(self, recovery_profile, multi_pvc_factory, pod_factory): concurrent.futures.wait(futures_fio) executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) - # Wait for cluster to be filled up to 30% + # Wait for cluster to be filled up to 20% get_used_capacity("After filling up the cluster") sample = TimeoutSampler( timeout=3600, sleep=300, func=verify_osd_used_capacity_greater_than_expected, - expected_used_capacity=30.0, + expected_used_capacity=20.0, ) if not sample.wait_for_func_status(result=True): - logger.error("After 3600 seconds the used capacity smaller than 30%") + logger.error("After 3600 seconds the used capacity smaller than 20%") raise TimeoutExpiredError @pytest.fixture(autouse=True) From 03eb5c028d0b7b9d4924cc1200d4443c5064b9ea Mon Sep 17 00:00:00 2001 From: pintojoy Date: Thu, 11 Jul 2024 11:55:49 +0530 Subject: [PATCH 14/16] Fixing script issue Signed-off-by: pintojoy --- .../functional/z_cluster/cluster_expansion/test_add_capacity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py index c265d851641..91e0b4fc4e7 100644 --- a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py +++ b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py @@ -320,7 +320,7 @@ def teardown(self): """ teardown function, Setting recovery-profile back to balanced. """ - assert odf_cli_set_recover_profile("balanced") + odf_cli_set_recover_profile("balanced") logger.info( "Fetching ceph osd_mclock_profile/recovery profile using odf-cli tool." ) From 0bcf61f096cf188fae726886c0e67ea697092eee Mon Sep 17 00:00:00 2001 From: pintojoy Date: Thu, 11 Jul 2024 16:30:40 +0530 Subject: [PATCH 15/16] Fixing script issue Signed-off-by: pintojoy --- .../z_cluster/cluster_expansion/test_add_capacity.py | 3 --- .../z_cluster/nodes/test_node_replacement_proactive.py | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py index 91e0b4fc4e7..0609e2fc068 100644 --- a/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py +++ b/tests/functional/z_cluster/cluster_expansion/test_add_capacity.py @@ -221,12 +221,9 @@ def test_add_capacity_pre_upgrade(self, reduce_and_resume_cluster_load): @ignore_leftovers @pytest.mark.second_to_last @skipif_managed_service -@skipif_aws_i3 -@skipif_bm @skipif_bmpsi @skipif_lso @skipif_external_mode -@skipif_ibm_power @skipif_managed_service @skipif_hci_provider_and_client @tier4b diff --git a/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py b/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py index 0875278668c..b6241010629 100644 --- a/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py +++ b/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py @@ -383,7 +383,7 @@ def teardown(self): """ teardown function, Setting recovery-profile back to balanced. """ - assert odf_cli_set_recover_profile("balanced") + odf_cli_set_recover_profile("balanced") log.info( "Fetching ceph osd_mclock_profile/recovery profile using odf-cli tool." ) @@ -419,7 +419,7 @@ def test_nodereplacement_proactive_with_recovery_profile_and_io_running( """ # Setting up and verifying the recovery profile value with the odf CLI tool - assert odf_cli_set_recover_profile(recovery_profile) + odf_cli_set_recover_profile(recovery_profile) log.info( "Fetching ceph osd_mclock_profile/recovery profile using odf-cli tool." ) From e7b65032d64f2b06bf08cac11781875588cd73e1 Mon Sep 17 00:00:00 2001 From: pintojoy Date: Thu, 11 Jul 2024 19:46:15 +0530 Subject: [PATCH 16/16] Fixing script issue Signed-off-by: pintojoy --- .../z_cluster/nodes/test_node_replacement_proactive.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py b/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py index b6241010629..31926fe859a 100644 --- a/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py +++ b/tests/functional/z_cluster/nodes/test_node_replacement_proactive.py @@ -472,16 +472,16 @@ def test_nodereplacement_proactive_with_recovery_profile_and_io_running( concurrent.futures.wait(futures_fio) executor = concurrent.futures.ThreadPoolExecutor(max_workers=pvc_count) - # Wait for cluster to be filled up to 30% + # Wait for cluster to be filled up to 20% get_used_capacity("After filling up the cluster") sample = TimeoutSampler( timeout=3600, sleep=300, func=verify_osd_used_capacity_greater_than_expected, - expected_used_capacity=30.0, + expected_used_capacity=20.0, ) if not sample.wait_for_func_status(result=True): - log.error("After 3600 seconds the used capacity smaller than 30%") + log.error("After 3600 seconds the used capacity smaller than 20%") raise TimeoutExpiredError worker_node_list = node.get_worker_nodes()