diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py index b0fad161afd..2069893afe0 100644 --- a/ocs_ci/helpers/helpers.py +++ b/ocs_ci/helpers/helpers.py @@ -5619,3 +5619,153 @@ def apply_custom_taint_and_toleration(taint_label="xyz"): ) for pod_obj in pod_list: pod_obj.delete(wait=False) + + +def create_ceph_block_pool_for_deviceclass( + device_class, + pool_name=None, + namespace=None, + replica=3, + failure_domain=None, +): + """ + Create a Ceph block pool for a device class + + Args: + device_class (str): The device class name + pool_name (str): The pool name to create + namespace (str): The pool namespace + replica (int): The replica size for a pool + failure_domain (str): Failure domain name + verify (bool): True to verify the pool exists after creation. False otherwise + + Returns: + OCS: The OCS instance for the Ceph block pool + + """ + cbp_data = templating.load_yaml(constants.DEVICECLASS_CEPHBLOCKPOOL_YAML) + cbp_data["metadata"]["name"] = ( + pool_name if pool_name else create_unique_resource_name("test", "cbp") + ) + cbp_data["metadata"]["namespace"] = ( + namespace or config.ENV_DATA["cluster_namespace"] + ) + cbp_data["spec"]["deviceClass"] = device_class + cbp_data["spec"]["replicated"]["size"] = replica + cbp_data["spec"]["failureDomain"] = failure_domain or get_failure_domin() + + cbp_obj = create_resource(**cbp_data) + cbp_obj.reload() + return cbp_obj + + +def create_lvs_resource(storageclass, worker_nodes=None, min_size=None, max_size=None): + """ + Create the LocalVolumeSet resource. + + Args: + storageclass (string): storageClassName value to be used in + LocalVolumeSet CR based on LOCAL_VOLUME_YAML + worker_nodes (list): The worker node names to be used in the LocalVolumeSet resource + min_size (str): The min size to be used in the LocalVolumeSet resource + max_size (str): The max size to be used in the LocalVolumeSet resource + + Returns: + OCS: The OCS instance for the LocalVolumeSet resource + + """ + worker_nodes = worker_nodes or node.get_worker_nodes() + + # Pull local volume set yaml data + logger.info("Pulling LocalVolumeSet CR data from yaml") + lvs_data = templating.load_yaml(constants.LOCAL_VOLUME_SET_YAML) + + # Since we don't have datastore with SSD on our current VMware machines, localvolumeset doesn't detect + # NonRotational disk. As a workaround we are setting Rotational to device MechanicalProperties to detect + # HDD disk + if config.ENV_DATA.get( + "local_storage_allow_rotational_disks" + ) or config.ENV_DATA.get("odf_provider_mode_deployment"): + logger.info( + "Adding Rotational for deviceMechanicalProperties spec" + " to detect HDD disk" + ) + lvs_data["spec"]["deviceInclusionSpec"]["deviceMechanicalProperties"].append( + "Rotational" + ) + + if min_size: + lvs_data["spec"]["deviceInclusionSpec"]["minSize"] = min_size + if max_size: + lvs_data["spec"]["deviceInclusionSpec"]["maxSize"] = max_size + # Update local volume set data with Worker node Names + logger.info( + "Updating LocalVolumeSet CR data with worker nodes Name: %s", worker_nodes + ) + lvs_data["spec"]["nodeSelector"]["nodeSelectorTerms"][0]["matchExpressions"][0][ + "values" + ] = worker_nodes + + # Set storage class + logger.info( + "Updating LocalVolumeSet CR data with LSO storageclass: %s", storageclass + ) + lvs_data["spec"]["storageClassName"] = storageclass + + # set volumeMode to Filesystem for MCG only deployment + if config.ENV_DATA["mcg_only_deployment"]: + lvs_data["spec"]["volumeMode"] = constants.VOLUME_MODE_FILESYSTEM + + lvs_obj = create_resource(**lvs_data) + lvs_obj.reload() + return lvs_obj + + +def create_deviceclass_storageclass( + pool_name, + sc_name=None, + cluster_id="openshift-storage", + reclaim_policy="Delete", + volume_binding_mode="WaitForFirstConsumer", + image_features=None, + encrypted="false", + allow_volume_expansion=True, +): + """ + Create a StorageClass resource for device class from provided parameters. + + Args: + pool_name (str): Name of the pool. + sc_name (str): Name of the StorageClass. If not provided, it will set a random name. + cluster_id (str): Cluster ID. + reclaim_policy (str): Reclaim policy (e.g., "Delete" or "Retain"). + volume_binding_mode (str): Volume binding mode (e.g., "Immediate", "WaitForFirstConsumer"). + image_features (str): Image features for the pool. + encrypted (str): Encryption flag ("true" or "false"). + allow_volume_expansion (bool): Allow volume expansion (True/False). + + Returns: + OCS: The OCS instance for the StorageClass resource + + """ + suffix = "".join(random.choices("0123456789", k=5)) + sc_name = sc_name or f"ssd{suffix}" + image_features = ( + image_features or "layering,deep-flatten,exclusive-lock,object-map,fast-diff" + ) + + sc_data = templating.load_yaml(constants.DEVICECLASS_STORAGECLASS_YAML) + + # Update the YAML with the provided parameters + sc_data["metadata"]["name"] = sc_name + sc_data["parameters"]["pool"] = pool_name + sc_data["allowVolumeExpansion"] = allow_volume_expansion + sc_data["reclaimPolicy"] = reclaim_policy + sc_data["volumeBindingMode"] = volume_binding_mode + sc_data["parameters"]["imageFeatures"] = image_features + sc_data["parameters"]["clusterID"] = cluster_id + sc_data["parameters"]["encrypted"] = encrypted + + sc_obj = create_resource(**sc_data) + sc_obj.reload() + return sc_obj diff --git a/ocs_ci/helpers/multiple_device_classes.py b/ocs_ci/helpers/multiple_device_classes.py new file mode 100644 index 00000000000..63c7937ccd9 --- /dev/null +++ b/ocs_ci/helpers/multiple_device_classes.py @@ -0,0 +1,108 @@ +import logging +import random + +from ocs_ci.helpers.helpers import create_lvs_resource +from ocs_ci.ocs.cluster import check_ceph_osd_tree, check_ceph_osd_df_tree +from ocs_ci.ocs.exceptions import CephHealthException +from ocs_ci.ocs.node import add_disk_to_node +from ocs_ci.ocs.resources.pod import get_ceph_tools_pod +from ocs_ci.ocs.resources.storage_cluster import ( + get_storage_size, + get_device_class, + verify_storage_device_class, + verify_device_class_in_osd_tree, +) +from ocs_ci.utility.utils import sum_of_two_storage_sizes + +from ocs_ci.ocs import constants, defaults +from ocs_ci.ocs.ocp import OCP + + +log = logging.getLogger(__name__) + + +def create_new_lvs_for_new_deviceclass( + worker_nodes, create_disks_for_lvs=True, ssd=True +): + """ + Create new LocalVolumeSet resource for a new device class + + Args: + worker_nodes (list): The worker node names to be used in the LocalVolumeSet resource. + create_disks_for_lvs (bool): If True, it will create a new disks for the new LocalVolumeSet resource. + ssd (bool): if True, mark disk as SSD + + Returns: + OCS: The OCS instance for the LocalVolumeSet resource + + """ + osd_size = get_storage_size() + log.info(f"the osd size is {osd_size}") + old_lvs_max_size = sum_of_two_storage_sizes(osd_size, "30Gi") + ocp_lvs_obj = OCP( + kind=constants.LOCAL_VOLUME_SET, + namespace=defaults.LOCAL_STORAGE_NAMESPACE, + resource_name=constants.LOCAL_BLOCK_RESOURCE, + ) + log.info( + f"Update the old LocalVolumeSet {ocp_lvs_obj.resource_name} with the maxSize " + f"{old_lvs_max_size} so it will not consume the new PVs" + ) + params = ( + f'{{"spec": {{"deviceInclusionSpec": {{"maxSize": "{old_lvs_max_size}"}}}}}}' + ) + lvs_result = ocp_lvs_obj.patch(params=params, format_type="json") + assert ( + lvs_result + ), f"Failed to update the LocalVolumeSet {ocp_lvs_obj.resource_name}" + + log.info( + "Create a new minSize that will be be higher than the maxSize of the old LVS, so that the new LVS " + "will consume the disks with the new size" + ) + min_size = sum_of_two_storage_sizes(old_lvs_max_size, "10Gi") + log.info( + "Limit the max size of the new LVS, so it will consume only the new added disks" + ) + max_size = sum_of_two_storage_sizes(min_size, "60Gi") + suffix = "".join(random.choices("0123456789", k=5)) + sc_name = f"localvolume{suffix}" + lvs_obj = create_lvs_resource(sc_name, worker_nodes, min_size, max_size) + + if create_disks_for_lvs: + disk_size_in_gb = sum_of_two_storage_sizes(min_size, "10Gi") + disk_size = int(disk_size_in_gb[:-2]) + for n in worker_nodes: + add_disk_to_node(n, disk_size=disk_size, ssd=ssd) + + return lvs_obj + + +def check_ceph_state_post_add_deviceclass(): + """ + Check the Ceph state post add a new deviceclass. + The function checks the Ceph device classes and osd tree. + + Raises: + CephHealthException: In case the Ceph device classes and osd tree checks + didn't finish successfully + + """ + log.info("Check the Ceph device classes and osd tree") + device_class = get_device_class() + ct_pod = get_ceph_tools_pod() + try: + verify_storage_device_class(device_class) + verify_device_class_in_osd_tree(ct_pod, device_class) + except AssertionError as ex: + raise CephHealthException(ex) + if not check_ceph_osd_tree(): + raise CephHealthException("The ceph osd tree checks didn't finish successfully") + if not check_ceph_osd_df_tree(): + raise CephHealthException( + "The ceph osd df tree output is not formatted correctly" + ) + + +def verification_steps_after_adding_new_deviceclass(): + pass diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py index 97c62d4db6e..aa96c426a8c 100644 --- a/ocs_ci/ocs/constants.py +++ b/ocs_ci/ocs/constants.py @@ -105,6 +105,8 @@ AI_NETWORK_CONFIG_TEMPLATE = os.path.join( "ocp-deployment", "ai-host-network-config.yaml.j2" ) +MULTIPLE_DEVICECLASSES_DIR = os.path.join(TEMPLATE_DIR, "multiple-deviceclasses") + # Statuses STATUS_READY = "Ready" PEER_READY = "Peer ready" @@ -3117,3 +3119,14 @@ MACHINE_POOL_ACTIONS = [CREATE, EDIT, DELETE] # MDR multicluster roles MDR_ROLES = ["ActiveACM", "PassiveACM", "PrimaryODF", "SecondaryODF"] + +# Multiple device classes Yaml files +STORAGE_DEVICESET_YAML = os.path.join( + MULTIPLE_DEVICECLASSES_DIR, "storage_device_set.yaml" +) +DEVICECLASS_CEPHBLOCKPOOL_YAML = os.path.join( + MULTIPLE_DEVICECLASSES_DIR, "deviceclass-cephblockpool.yaml" +) +DEVICECLASS_STORAGECLASS_YAML = os.path.join( + MULTIPLE_DEVICECLASSES_DIR, "deviceclass-storageclass.yaml" +) diff --git a/ocs_ci/ocs/resources/pv.py b/ocs_ci/ocs/resources/pv.py index a5fb8a051f3..be32a240468 100644 --- a/ocs_ci/ocs/resources/pv.py +++ b/ocs_ci/ocs/resources/pv.py @@ -245,3 +245,36 @@ def get_node_pv_objs(sc_name, node_name): for pv_obj in pv_objs if pv_obj["metadata"]["labels"]["kubernetes.io/hostname"] == node_name ] + + +def wait_for_pvs_in_lvs_to_reach_status( + lvs_obj, pv_count, expected_status, timeout=180, sleep=10 +): + """ + Wait for the Persistent Volumes (PVs) associated with a specific LocalVolumeSet (LVS) + to reach the expected status within a given timeout. + + Args: + lvs_obj (OCP): The LocalVolumeSet object whose PVs are being monitored. + pv_count (int): The number of PVs expected to reach the desired status. + expected_status (str): The expected status of the PVs (e.g., "Bound", "Available"). + timeout (int): Maximum time to wait for the PVs to reach the expected status, in seconds. + sleep (int): Interval between successive checks, in seconds. + + Returns: + bool: True if all PVs reach the expected status within the timeout, False otherwise. + + Raises: + TimeoutExpiredError: If the PVs do not reach the expected status within the specified timeout. + ResourceWrongStatusException: If any PV enters an unexpected or error status. + + """ + selector = f"storage.openshift.com/owner-name={lvs_obj.name}" + pv_obj = ocp.OCP(kind=constants.PV) + return pv_obj.wait_for_resource( + condition=expected_status, + resource_count=pv_count, + selector=selector, + timeout=timeout, + sleep=sleep, + ) diff --git a/ocs_ci/ocs/resources/storage_cluster.py b/ocs_ci/ocs/resources/storage_cluster.py index c18da0b946a..b55ef2df2d5 100644 --- a/ocs_ci/ocs/resources/storage_cluster.py +++ b/ocs_ci/ocs/resources/storage_cluster.py @@ -1110,19 +1110,24 @@ def verify_storage_device_class(device_class): ), f"deviceClass is set to {device_class_name} but it should be set to {device_class}" -def verify_device_class_in_osd_tree(ct_pod, device_class): +def verify_device_class_in_osd_tree(ct_pod, device_class, osd_id_per_deviceclass=None): """ Verifies device class in ceph osd tree output Args: ct_pod (:obj:`OCP`): Object of the Ceph tools pod device_class (str): Name of the device class + osd_id_per_deviceclass (dict): A dictionary of the osd id per it's deviceclass """ + osd_id_per_deviceclass = osd_id_per_deviceclass or {} + log.info("Verifying DeviceClass in ceph osd tree") osd_tree = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd tree") for each in osd_tree["nodes"]: if each["type"] == "osd": + osd_id = each.get("id") + device_class = osd_id_per_deviceclass.get(osd_id, device_class) osd_name = each["name"] device_class_in_osd_tree = each["device_class"] log.debug(f"DeviceClass for {osd_name} is {device_class_in_osd_tree}") @@ -2944,3 +2949,64 @@ def get_csi_images_for_client_ocp_version(ocp_version=None): csi_ocp_version_images = csi_images.split(first_str)[1].split(last_str)[0] csi_ocp_version_images_urls = extract_image_urls(csi_ocp_version_images) return csi_ocp_version_images_urls + + +def add_new_deviceset_in_storagecluster( + device_class, name, count=3, replica=1, access_modes=None +): + """ + Add a new DeviceSet to the StorageCluster. + + Args: + device_class (str): Device class for the DeviceSet. + name (str): Name of the DeviceSet. + count (int): Number of devices in the DeviceSet. + replica (int): Number of replicas. + access_modes (list): List of access modes. + + Returns: + bool: True if the patch was applied successfully, False otherwise. + + """ + access_modes = access_modes or ["ReadWriteOnce"] + + template_data = templating.load_yaml(constants.STORAGE_DEVICESET_YAML) + # Update the YAML with the relevant parameters + template_data["spec"]["storageDeviceSets"][0]["count"] = count + template_data["spec"]["storageDeviceSets"][0]["dataPVCTemplate"]["spec"][ + "accessModes" + ] = access_modes + template_data["spec"]["storageDeviceSets"][0]["dataPVCTemplate"]["spec"][ + "storageClassName" + ] = device_class + template_data["spec"]["storageDeviceSets"][0]["deviceClass"] = device_class + template_data["spec"]["storageDeviceSets"][0]["name"] = name + template_data["spec"]["storageDeviceSets"][0]["replica"] = replica + params = json.dumps(template_data) # Convert dictionary to JSON string + + sc = get_storage_cluster() + # Apply the patch using the updated YAML + result = sc.patch( + resource_name=sc.get()["items"][0]["metadata"]["name"], + params=params, + format_type="merge", + ) + + return result + + +def get_all_device_classes(): + """ + Get all the device classes in the storagecluster + + Returns: + list: The device classes in the storagecluster + + """ + storage_cluster_name = config.ENV_DATA["storage_cluster_name"] + storage_cluster = StorageCluster( + resource_name=storage_cluster_name, + namespace=config.ENV_DATA["cluster_namespace"], + ) + storage_device_sets = storage_cluster.data["spec"]["storageDeviceSets"] + return storage_device_sets diff --git a/ocs_ci/templates/multiple-deviceclasses/deviceclass-cephblockpool.yaml b/ocs_ci/templates/multiple-deviceclasses/deviceclass-cephblockpool.yaml new file mode 100644 index 00000000000..0a405bce525 --- /dev/null +++ b/ocs_ci/templates/multiple-deviceclasses/deviceclass-cephblockpool.yaml @@ -0,0 +1,21 @@ +apiVersion: ceph.rook.io/v1 +kind: CephBlockPool +metadata: + name: deviceclass-ssd-pool + namespace: openshift-storage +spec: + deviceClass: local-ssd + enableCrushUpdates: true + enableRBDStats: true + erasureCoded: + codingChunks: 0 + dataChunks: 0 + failureDomain: host + parameters: + compression_mode: none + replicated: + replicasPerFailureDomain: 1 + requireSafeReplicaSize: true + size: 3 + statusCheck: + mirror: {} diff --git a/ocs_ci/templates/multiple-deviceclasses/deviceclass_storageclass.yaml b/ocs_ci/templates/multiple-deviceclasses/deviceclass_storageclass.yaml new file mode 100644 index 00000000000..cdca47de3e2 --- /dev/null +++ b/ocs_ci/templates/multiple-deviceclasses/deviceclass_storageclass.yaml @@ -0,0 +1,24 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: deviceclass-ssd + annotations: + description: Provides RWO Filesystem volumes, and RWO and RWX Block volumes + reclaimspace.csiaddons.openshift.io/schedule: '@weekly' +allowVolumeExpansion: true +parameters: + clusterID: openshift-storage + csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/controller-expand-secret-namespace: openshift-storage + csi.storage.k8s.io/fstype: ext4 + csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node + csi.storage.k8s.io/node-stage-secret-namespace: openshift-storage + csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner + csi.storage.k8s.io/provisioner-secret-namespace: openshift-storage + encrypted: "false" + imageFeatures: layering,deep-flatten,exclusive-lock,object-map,fast-diff + imageFormat: "2" + pool: deviceclass-ssd-pool130 +provisioner: openshift-storage.rbd.csi.ceph.com +reclaimPolicy: Delete +volumeBindingMode: WaitForFirstConsumer diff --git a/ocs_ci/templates/multiple-deviceclasses/storage_device_set.yaml b/ocs_ci/templates/multiple-deviceclasses/storage_device_set.yaml new file mode 100644 index 00000000000..09dab4ccf2f --- /dev/null +++ b/ocs_ci/templates/multiple-deviceclasses/storage_device_set.yaml @@ -0,0 +1,22 @@ +spec: + storageDeviceSets: + - config: {} + count: 3 # Default value + dataPVCTemplate: + metadata: {} + spec: + accessModes: + - ReadWriteOnce # Default value + resources: + requests: + storage: "1" + storageClassName: ssd2 # Default value + volumeMode: Block + status: {} + deviceClass: ssd2 # Default value + encrypted: false + name: ssd2 # Default value + placement: {} + preparePlacement: {} + replica: 1 # Default value + resources: {} diff --git a/tests/functional/z_cluster/cluster_expansion/test_create_multiple_device_classes.py b/tests/functional/z_cluster/cluster_expansion/test_create_multiple_device_classes.py new file mode 100644 index 00000000000..e3ac9ba4032 --- /dev/null +++ b/tests/functional/z_cluster/cluster_expansion/test_create_multiple_device_classes.py @@ -0,0 +1,70 @@ +import logging + +from ocs_ci.framework.testlib import ( + ManageTest, + ignore_leftovers, + tier1, + brown_squad, +) +from ocs_ci.ocs import constants +from ocs_ci.ocs.node import get_osd_running_nodes +from ocs_ci.helpers.multiple_device_classes import ( + create_new_lvs_for_new_deviceclass, +) +from ocs_ci.ocs.resources.pv import wait_for_pvs_in_lvs_to_reach_status +from ocs_ci.ocs.resources.storage_cluster import ( + add_new_deviceset_in_storagecluster, + get_storage_cluster, +) +from ocs_ci.helpers.helpers import ( + create_ceph_block_pool_for_deviceclass, + create_deviceclass_storageclass, +) + +log = logging.getLogger(__name__) + + +@brown_squad +@tier1 +@ignore_leftovers +class TestMultipleDeviceClasses(ManageTest): + def test_add_new_ssd_device_class(self): + osd_node_names = get_osd_running_nodes() + log.info(f"osd node names = {osd_node_names}") + lvs_obj = create_new_lvs_for_new_deviceclass(osd_node_names) + log.info( + f"Wait for the PVs in the LocalVolumeSet {lvs_obj.name} to be available" + ) + wait_for_pvs_in_lvs_to_reach_status( + lvs_obj, len(osd_node_names), constants.STATUS_AVAILABLE + ) + + log.info( + f"Add a new deviceset in the storagecluster for the new LocalVolumeSet {lvs_obj.name} " + f"which will also create a new deviceclass" + ) + res = add_new_deviceset_in_storagecluster(lvs_obj.name, lvs_obj.name) + assert res, "Failed to patch the storagecluster with the new deviceset" + sc_obj = get_storage_cluster() + sc_obj.wait_for_resource( + condition=constants.STATUS_READY, + resource_name=constants.DEFAULT_CLUSTERNAME, + timeout=180, + sleep=10, + ) + + log.info(f"Add a new CephBlockPool for the device class {lvs_obj.name}") + cbp_obj = create_ceph_block_pool_for_deviceclass(lvs_obj.name) + assert ( + cbp_obj + ), f"Failed to create the CephBlockPool for the device class {lvs_obj.name}" + cbp_obj.ocp.wait_for_resource( + condition=constants.STATUS_READY, + resource_name=cbp_obj.name, + timeout=120, + sleep=10, + ) + + log.info(f"Add a new StorageClass for the pool {cbp_obj.name}") + sc_obj = create_deviceclass_storageclass(pool_name=cbp_obj.name) + assert sc_obj, f"Failed to create the StorageClass for the pool {cbp_obj.name}"