From 735f85057dc516d268612771ac08eea829f5962f Mon Sep 17 00:00:00 2001
From: Joy John Pinto <86055382+pintojoy@users.noreply.github.com>
Date: Mon, 21 Oct 2024 17:52:51 +0530
Subject: [PATCH] Fixing test_performance_profile_validation (#10085)

* Fixing test_performance_profile_validation
* Adding more timeout for the profiles to be reflected
* Fixing mon values from constants.py

Signed-off-by: pintojoy <jopinto@redhat.com>
---
 ocs_ci/helpers/helpers.py                     |  26 +++++
 ocs_ci/ocs/constants.py                       |   8 +-
 ocs_ci/ocs/resources/storage_cluster.py       |   2 +-
 .../test_performance_profile_validation.py    | 101 ++++++++++++++----
 4 files changed, 113 insertions(+), 24 deletions(-)

diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py
index febcee04061..99a8f7c8a9a 100644
--- a/ocs_ci/helpers/helpers.py
+++ b/ocs_ci/helpers/helpers.py
@@ -5366,3 +5366,29 @@ def update_volsync_channel():
             logger.error(
                 f"Pod volsync-controller-manager not in {constants.STATUS_RUNNING} after 300 seconds"
             )
+
+
+def verify_performance_profile_change(perf_profile):
+    """
+    Verify that newly applied performance profile got updated in storage cluster
+
+    Args:
+        perf_profile (str): Applied performance profile
+
+    Returns:
+        bool: True in case performance profile is updated, False otherwise
+    """
+    from ocs_ci.ocs.resources.storage_cluster import StorageCluster
+
+    # Importing storage cluster object here to avoid circular dependency
+
+    storage_cluster = StorageCluster(
+        resource_name=config.ENV_DATA["storage_cluster_name"],
+        namespace=config.ENV_DATA["cluster_namespace"],
+    )
+
+    assert (
+        perf_profile == storage_cluster.data["spec"]["resourceProfile"]
+    ), f"Performance profile is not updated successfully to {perf_profile}"
+    logger.info(f"Performance profile successfully got updated to {perf_profile} mode")
+    return True
diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py
index c9a80db0a9d..9f839286098 100644
--- a/ocs_ci/ocs/constants.py
+++ b/ocs_ci/ocs/constants.py
@@ -1397,15 +1397,15 @@
 }
 
 PERFORMANCE_PROFILE_REQUEST_CPU_VALUES = {
-    "mgr": "1500m'",
-    "mon": "1500",
+    "mgr": "1500m",
+    "mon": "1500m",
     "osd": "4",
     "mds": "3",
     "rgw": "2",
 }
 PERFORMANCE_PROFILE_REQUEST_MEMORY_VALUES = {
     "mgr": "2Gi",
-    "mon": "2Gi",
+    "mon": "3Gi",
     "osd": "8Gi",
     "mds": "8Gi",
     "rgw": "4Gi",
@@ -1452,7 +1452,7 @@
 }
 PERFORMANCE_PROFILE_MEMORY_LIMIT_VALUES = {
     "mgr": "4Gi",
-    "mon": "2Gi",
+    "mon": "3Gi",
     "osd": "8Gi",
     "mds": "8Gi",
     "rgw": "4Gi",
diff --git a/ocs_ci/ocs/resources/storage_cluster.py b/ocs_ci/ocs/resources/storage_cluster.py
index 2989ea74f2b..d3f80d67ec5 100644
--- a/ocs_ci/ocs/resources/storage_cluster.py
+++ b/ocs_ci/ocs/resources/storage_cluster.py
@@ -1006,7 +1006,7 @@ def verify_storage_cluster():
     elif storage_cluster.data["spec"].get("resourceProfile") != storage_cluster.data[
         "status"
     ].get("lastAppliedResourceProfile"):
-        timeout = 1200
+        timeout = 1800
     else:
         timeout = 600
     storage_cluster.wait_for_phase(phase="Ready", timeout=timeout)
diff --git a/tests/functional/z_cluster/test_performance_profile_validation.py b/tests/functional/z_cluster/test_performance_profile_validation.py
index 1a841c2941d..6cd6718d6be 100644
--- a/tests/functional/z_cluster/test_performance_profile_validation.py
+++ b/tests/functional/z_cluster/test_performance_profile_validation.py
@@ -4,6 +4,7 @@
 from ocs_ci.framework.pytest_customization.marks import (
     tier4a,
     skipif_ocs_version,
+    ignore_leftovers,
     brown_squad,
 )
 from ocs_ci.framework.testlib import (
@@ -14,12 +15,13 @@
 from ocs_ci.framework import config
 from ocs_ci.ocs.ocp import OCP
 from ocs_ci.ocs.resources.storage_cluster import verify_storage_cluster
+from ocs_ci.helpers.helpers import verify_performance_profile_change
 from ocs_ci.ocs import constants
 from ocs_ci.ocs.resources.pod import (
     get_pods_having_label,
     Pod,
 )
-from ocs_ci.utility.utils import run_cmd
+from ocs_ci.utility.utils import run_cmd, TimeoutSampler
 from ocs_ci.ocs.resources.storage_cluster import StorageCluster
 
 
@@ -36,28 +38,73 @@
 @pytest.mark.polarion_id("OCS-5656")
 @pytest.mark.polarion_id("OCS-5657")
 class TestProfileDefaultValuesCheck(ManageTest):
-    def test_validate_cluster_resource_profile(self):
+    @pytest.mark.parametrize(
+        argnames=["perf_profile"],
+        argvalues=[
+            pytest.param(*["performance"]),
+            pytest.param(*["lean"]),
+            pytest.param(*["balanced"]),
+        ],
+    )
+    @ignore_leftovers
+    def test_validate_cluster_resource_profile(self, perf_profile):
         """
         Testcase to validate osd, mgr, mon, mds and rgw pod memory and cpu values
         are matching with the predefined set of values post profile updation
 
         """
         pv_pod_obj = []
+        namespace = config.ENV_DATA["cluster_namespace"]
         log.info("Obtaining the performance profile values from the cluster")
         storage_cluster_name = config.ENV_DATA["storage_cluster_name"]
         storage_cluster = StorageCluster(
             resource_name=storage_cluster_name,
             namespace=config.ENV_DATA["cluster_namespace"],
         )
-        performance_profile = storage_cluster.data["spec"]["resourceProfile"]
-        if performance_profile == constants.PERFORMANCE_PROFILE_LEAN:
+        self.perf_profile = perf_profile
+        try:
+            exist_performance_profile = storage_cluster.data["spec"]["resourceProfile"]
+            curr_prof = storage_cluster.data["spec"]["resourceProfile"]
+            log.info(f"Current performance profile is {curr_prof}")
+        except KeyError:
+            # On some occasions, a cluster will be deployed without performance profile, In that case, set it to None.
+            log.info(
+                "If a cluster is deployed without performance profile, set existing_profile value as None"
+            )
+            exist_performance_profile = None
+            pass
+        if exist_performance_profile == self.perf_profile:
+            log.info("Performance profile is same as profile that is already present")
+        else:
+            ptch = f'{{"spec": {{"resourceProfile":"{self.perf_profile}"}}}}'
+            ptch_cmd = (
+                f"oc patch storagecluster {storage_cluster.data.get('metadata').get('name')} "
+                f"-n {namespace}  --type merge --patch '{ptch}'"
+            )
+            run_cmd(ptch_cmd)
+            log.info("Verify storage cluster is in Ready state")
+            verify_storage_cluster()
+
+            # Wait up to 600 seconds for performance changes to reflect
+            sample = TimeoutSampler(
+                timeout=600,
+                sleep=300,
+                func=verify_performance_profile_change,
+                perf_profile=self.perf_profile,
+            )
+            if not sample.wait_for_func_status(True):
+                raise Exception(
+                    f"Performance profile is not updated successfully to {self.perf_profile}"
+                )
+
+        if self.perf_profile == constants.PERFORMANCE_PROFILE_LEAN:
             expected_cpu_request_values = constants.LEAN_PROFILE_REQUEST_CPU_VALUES
             expected_memory_request_values = (
                 constants.LEAN_PROFILE_REQUEST_MEMORY_VALUES
             )
             expected_cpu_limit_values = constants.LEAN_PROFILE_CPU_LIMIT_VALUES
             expected_memory_limit_values = constants.LEAN_PROFILE_MEMORY_LIMIT_VALUES
-        elif performance_profile == constants.PERFORMANCE_PROFILE_BALANCED:
+        elif self.perf_profile == constants.PERFORMANCE_PROFILE_BALANCED:
             expected_cpu_request_values = constants.BALANCED_PROFILE_REQUEST_CPU_VALUES
             expected_memory_request_values = (
                 constants.BALANCED_PROFILE_REQUEST_MEMORY_VALUES
@@ -66,12 +113,12 @@ def test_validate_cluster_resource_profile(self):
             expected_memory_limit_values = (
                 constants.BALANCED_PROFILE_MEMORY_LIMIT_VALUES
             )
-        elif performance_profile == constants.PERFORMANCE_PROFILE_PERFORMANCE:
+        elif self.perf_profile == constants.PERFORMANCE_PROFILE_PERFORMANCE:
             expected_cpu_request_values = (
                 constants.PERFORMANCE_PROFILE_REQUEST_CPU_VALUES
             )
             expected_memory_request_values = (
-                constants.PERFORMANCE_PROFILE_REQUESt_LIMIT_VALUES
+                constants.PERFORMANCE_PROFILE_REQUEST_MEMORY_VALUES
             )
             expected_cpu_limit_values = constants.PERFORMANCE_PROFILE_CPU_LIMIT_VALUES
             expected_memory_limit_values = (
@@ -80,8 +127,6 @@ def test_validate_cluster_resource_profile(self):
         else:
             log.error("Does not match any performance profiles")
 
-        log.info(performance_profile)
-
         label_selector = list(expected_cpu_limit_values.keys())
 
         for label in label_selector:
@@ -90,11 +135,17 @@ def test_validate_cluster_resource_profile(self):
             ):
                 pv_pod_obj.append(Pod(**pod))
                 podd = Pod(**pod)
-                log.info(podd.name)
+                log.info(f"Verifying memory and cpu values for pod {podd.name}")
+                log.info(f"RequestCPU{expected_cpu_request_values}")
+                log.info(f"LimitCPU{expected_cpu_limit_values}")
+                log.info(f"RequestMEM{expected_memory_request_values}")
+                log.info(f"LimitMEM{expected_memory_limit_values}")
                 resource_dict = OCP(
                     namespace=config.ENV_DATA["cluster_namespace"], kind="pod"
                 ).get(resource_name=podd.name)["spec"]["containers"][0]["resources"]
-
+                log.info(
+                    f"CPU request and limit values for pod {podd.name} are {resource_dict}"
+                )
                 assert (
                     resource_dict["limits"]["cpu"] == expected_cpu_limit_values[label]
                     and resource_dict["limits"]["memory"]
@@ -114,6 +165,7 @@ def test_validate_cluster_resource_profile(self):
             pytest.param(*["balanced"], marks=pytest.mark.polarion_id("OCS-5644")),
         ],
     )
+    @ignore_leftovers
     def test_change_cluster_resource_profile(self, perf_profile):
         """
         Testcase to validate osd, mgr, mon, mds and rgw pod memory and cpu values
@@ -128,7 +180,15 @@ def test_change_cluster_resource_profile(self, perf_profile):
             resource_name=storage_cluster_name,
             namespace=config.ENV_DATA["cluster_namespace"],
         )
-        exist_performance_profile = storage_cluster.data["spec"]["resourceProfile"]
+        try:
+            exist_performance_profile = storage_cluster.data["spec"]["resourceProfile"]
+        except KeyError:
+            # On some occasions, a cluster will be deployed without performance profile, In that case, set it to None.
+            log.info(
+                "If a cluster is deployed without performance profile, set existing_profile value as None"
+            )
+            exist_performance_profile = None
+            pass
         if exist_performance_profile == self.perf_profile:
             log.info("Performance profile is same as profile that is already present")
         else:
@@ -142,14 +202,17 @@ def test_change_cluster_resource_profile(self, perf_profile):
 
             verify_storage_cluster()
 
-            # Verify that storage cluster is updated with new profile
-
-            assert (
-                self.perf_profile == storage_cluster.data["spec"]["resourceProfile"]
-            ), f"Performance profile is not updated successfully to {self.perf_profile}"
-            log.info(
-                f"Performance profile successfully got updated to {self.perf_profile} mode"
+            sample = TimeoutSampler(
+                timeout=600,
+                sleep=30,
+                func=verify_performance_profile_change,
+                perf_profile=self.perf_profile,
             )
+            if not sample.wait_for_func_status(True):
+                raise Exception(
+                    f"Performance profile is not updated successfully to {self.perf_profile}"
+                )
+
             log.info("Reverting profile changes")
             ptch = f'{{"spec": {{"resourceProfile":"{exist_performance_profile}"}}}}'