From 058e53395d35c8cfb3e53a7ac27ec9b1e83418b9 Mon Sep 17 00:00:00 2001 From: Oded Viner <61982127+OdedViner@users.noreply.github.com> Date: Wed, 10 Jul 2024 12:24:44 +0300 Subject: [PATCH] Multus Upgarde 4.15->4.16 - dropping holder design (#9924) Signed-off-by: oviner --- conf/ocsci/multus_delete_csi_holder_pods.yaml | 4 + ocs_ci/deployment/deployment.py | 57 +--- ocs_ci/framework/conf/default_config.yaml | 2 + ocs_ci/helpers/helpers.py | 269 ++++++++++++++++++ ocs_ci/ocs/constants.py | 1 + ocs_ci/ocs/ocs_upgrade.py | 8 + 6 files changed, 298 insertions(+), 43 deletions(-) create mode 100644 conf/ocsci/multus_delete_csi_holder_pods.yaml diff --git a/conf/ocsci/multus_delete_csi_holder_pods.yaml b/conf/ocsci/multus_delete_csi_holder_pods.yaml new file mode 100644 index 00000000000..c34b0192007 --- /dev/null +++ b/conf/ocsci/multus_delete_csi_holder_pods.yaml @@ -0,0 +1,4 @@ +--- +# This configuration file is used for delete csi holder pods on ODF cluster with multus post upgrade +ENV_DATA: + multus_delete_csi_holder_pods: True diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index d24e34b1fe9..6a09584e95d 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -1042,50 +1042,21 @@ def deploy_ocs_via_operator(self, image=None): # Create Multus Networks if config.ENV_DATA.get("is_multus_enabled"): - from ocs_ci.deployment.nmstate import NMStateInstaller - - logger.info("Install NMState operator and create an instance") - nmstate_obj = NMStateInstaller() - nmstate_obj.running_nmstate() - logger.info("Configure NodeNetworkConfigurationPolicy on all worker nodes") - worker_node_names = get_worker_nodes() - for worker_node_name in worker_node_names: - worker_network_configuration = config.ENV_DATA["baremetal"]["servers"][ - worker_node_name - ] - node_network_configuration_policy = templating.load_yaml( - constants.NODE_NETWORK_CONFIGURATION_POLICY - ) - node_network_configuration_policy["spec"]["nodeSelector"][ - "kubernetes.io/hostname" - ] = worker_node_name - node_network_configuration_policy["metadata"][ - "name" - ] = worker_network_configuration[ - "node_network_configuration_policy_name" - ] - node_network_configuration_policy["spec"]["desiredState"]["interfaces"][ - 0 - ]["ipv4"]["address"][0]["ip"] = worker_network_configuration[ - "node_network_configuration_policy_ip" - ] - node_network_configuration_policy["spec"]["desiredState"]["interfaces"][ - 0 - ]["ipv4"]["address"][0]["prefix-length"] = worker_network_configuration[ - "node_network_configuration_policy_prefix_length" - ] - node_network_configuration_policy["spec"]["desiredState"]["routes"][ - "config" - ][0]["destination"] = worker_network_configuration[ - "node_network_configuration_policy_destination_route" - ] - public_net_yaml = tempfile.NamedTemporaryFile( - mode="w+", prefix="multus_public", delete=False - ) - templating.dump_data_to_temp_yaml( - node_network_configuration_policy, public_net_yaml.name + ocs_version = version.get_semantic_ocs_version_from_config() + if ( + config.ENV_DATA.get("multus_create_public_net") + and ocs_version >= version.VERSION_4_16 + ): + from ocs_ci.deployment.nmstate import NMStateInstaller + + logger.info("Install NMState operator and create an instance") + nmstate_obj = NMStateInstaller() + nmstate_obj.running_nmstate() + from ocs_ci.helpers.helpers import ( + configure_node_network_configuration_policy_on_all_worker_nodes, ) - run_cmd(f"oc create -f {public_net_yaml.name}") + + configure_node_network_configuration_policy_on_all_worker_nodes() create_public_net = config.ENV_DATA["multus_create_public_net"] create_cluster_net = config.ENV_DATA["multus_create_cluster_net"] diff --git a/ocs_ci/framework/conf/default_config.yaml b/ocs_ci/framework/conf/default_config.yaml index ac91575c07c..d5d65b62426 100644 --- a/ocs_ci/framework/conf/default_config.yaml +++ b/ocs_ci/framework/conf/default_config.yaml @@ -268,6 +268,8 @@ ENV_DATA: multus_cluster_net_range: "192.168.30.0/24" multus_cluster_net_type: "macvlan" multus_cluster_net_mode: "bridge" + multus_destination_route: "192.168.252.0/24" + multus_delete_csi_holder_pods: False #RDR Green field rdr_osd_deployment_mode: "greenfield" diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py index 7918d8d7837..fc211d84a66 100644 --- a/ocs_ci/helpers/helpers.py +++ b/ocs_ci/helpers/helpers.py @@ -4783,3 +4783,272 @@ def is_rbd_default_storage_class(custom_sc=None): logger.error("Storageclass {default_rbd_sc} is not a default RBD StorageClass.") return False + + +def get_network_attachment_definitions( + nad_name, namespace=config.ENV_DATA["cluster_namespace"] +): + """ + Get NetworkAttachmentDefinition obj + + Args: + nad_name (str): network_attachment_definition name + namespace (str): Namespace of the resource + Returns: + network_attachment_definitions (obj) : network_attachment_definitions object + + """ + return OCP( + kind=constants.NETWORK_ATTACHEMENT_DEFINITION, + namespace=namespace, + resource_name=nad_name, + ) + + +def add_route_public_nad(): + """ + Add route section to network_attachment_definitions object + + """ + nad_obj = get_network_attachment_definitions( + nad_name=config.ENV_DATA.get("multus_public_net_name"), + namespace=config.ENV_DATA.get("multus_public_net_namespace"), + ) + nad_config_str = nad_obj.data["spec"]["config"] + nad_config_dict = json.loads(nad_config_str) + nad_config_dict["ipam"]["routes"] = [ + {"dst": config.ENV_DATA["multus_destination_route"]} + ] + nad_config_dict_string = json.dumps(nad_config_dict) + logger.info("Creating Multus public network") + public_net_data = templating.load_yaml(constants.MULTUS_PUBLIC_NET_YAML) + public_net_data["metadata"]["name"] = config.ENV_DATA.get("multus_public_net_name") + public_net_data["metadata"]["namespace"] = config.ENV_DATA.get( + "multus_public_net_namespace" + ) + public_net_data["spec"]["config"] = nad_config_dict_string + public_net_yaml = tempfile.NamedTemporaryFile( + mode="w+", prefix="multus_public", delete=False + ) + templating.dump_data_to_temp_yaml(public_net_data, public_net_yaml.name) + run_cmd(f"oc apply -f {public_net_yaml.name}") + + +def reset_all_osd_pods(): + """ + Reset all osd pods + + """ + from ocs_ci.ocs.resources.pod import get_osd_pods + + osd_pod_objs = get_osd_pods() + for osd_pod_obj in osd_pod_objs: + osd_pod_obj.delete() + + +def enable_csi_disable_holder_pods(): + """ + Enable CSI_DISABLE_HOLDER_PODS in rook-ceph-operator-config config-map + + """ + configmap_obj = OCP( + kind=constants.CONFIGMAP, + namespace=config.ENV_DATA["cluster_namespace"], + resource_name=constants.ROOK_OPERATOR_CONFIGMAP, + ) + value = "true" + params = f'{{"data": {{"CSI_DISABLE_HOLDER_PODS": "{value}"}}}}' + configmap_obj.patch(params=params, format_type="merge") + + +def delete_csi_holder_pods(): + """ + + Drain/schedule worker nodes and reset csi-holder-pods + + Procedure: + 1.Cordon worker node-X + 2.Drain worker node-X + 3.Reset csi-cephfsplugin-holder and csi-rbdplugin-holder pods on node-X + 4.schedule node-X + 5.Verify all node-X in Ready state + + """ + from ocs_ci.ocs.utils import get_pod_name_by_pattern + from ocs_ci.ocs.node import drain_nodes, schedule_nodes + + pods_csi_cephfsplugin_holder = get_pod_name_by_pattern("csi-cephfsplugin-holder") + pods_csi_rbdplugin_holder = get_pod_name_by_pattern("csi-rbdplugin-holder") + pods_csi_holder = pods_csi_cephfsplugin_holder + pods_csi_rbdplugin_holder + worker_pods_dict = dict() + from ocs_ci.ocs.resources.pod import get_pod_obj + + for pod_name in pods_csi_holder: + pod_obj = get_pod_obj( + name=pod_name, namespace=config.ENV_DATA["cluster_namespace"] + ) + if pod_obj.pod_data["spec"]["nodeName"] in worker_pods_dict: + worker_pods_dict[pod_obj.pod_data["spec"]["nodeName"]].append(pod_obj) + else: + worker_pods_dict[pod_obj.pod_data["spec"]["nodeName"]] = [pod_obj] + + for worker_node_name, csi_pod_objs in worker_pods_dict.items(): + run_cmd(f"oc adm cordon {worker_node_name}") + drain_nodes([worker_node_name]) + for csi_pod_obj in csi_pod_objs: + csi_pod_obj.delete() + schedule_nodes([worker_node_name]) + + +def configure_node_network_configuration_policy_on_all_worker_nodes(): + """ + Configure NodeNetworkConfigurationPolicy CR on each worker node in cluster + + """ + from ocs_ci.ocs.node import get_worker_nodes + + # This function require changes for compact mode + logger.info("Configure NodeNetworkConfigurationPolicy on all worker nodes") + worker_node_names = get_worker_nodes() + for worker_node_name in worker_node_names: + worker_network_configuration = config.ENV_DATA["baremetal"]["servers"][ + worker_node_name + ] + node_network_configuration_policy = templating.load_yaml( + constants.NODE_NETWORK_CONFIGURATION_POLICY + ) + node_network_configuration_policy["spec"]["nodeSelector"][ + "kubernetes.io/hostname" + ] = worker_node_name + node_network_configuration_policy["metadata"][ + "name" + ] = worker_network_configuration["node_network_configuration_policy_name"] + node_network_configuration_policy["spec"]["desiredState"]["interfaces"][0][ + "ipv4" + ]["address"][0]["ip"] = worker_network_configuration[ + "node_network_configuration_policy_ip" + ] + node_network_configuration_policy["spec"]["desiredState"]["interfaces"][0][ + "ipv4" + ]["address"][0]["prefix-length"] = worker_network_configuration[ + "node_network_configuration_policy_prefix_length" + ] + node_network_configuration_policy["spec"]["desiredState"]["routes"]["config"][ + 0 + ]["destination"] = worker_network_configuration[ + "node_network_configuration_policy_destination_route" + ] + public_net_yaml = tempfile.NamedTemporaryFile( + mode="w+", prefix="multus_public", delete=False + ) + templating.dump_data_to_temp_yaml( + node_network_configuration_policy, public_net_yaml.name + ) + run_cmd(f"oc create -f {public_net_yaml.name}") + + +def get_daemonsets_names(namespace=config.ENV_DATA["cluster_namespace"]): + """ + Get all daemonspaces in namespace + + Args: + namespace (str): namespace + + Returns: + list: all daemonset names in the namespace + + """ + daemonset_names = list() + daemonset_objs = OCP( + kind=constants.DAEMONSET, + namespace=namespace, + ) + for daemonset_obj in daemonset_objs.data.get("items"): + daemonset_names.append(daemonset_obj["metadata"]["name"]) + return daemonset_names + + +def get_daemonsets_obj(name, namespace=config.ENV_DATA["cluster_namespace"]): + """ + Get daemonset obj + Args: + name (str): the name of daemeonset + namespace (str): the namespace of daemonset + + Returns: + ocp_obj: daemonset ocp obj + + """ + return OCP(kind=constants.DAEMONSET, namespace=namespace, resource_name=name) + + +def delete_csi_holder_daemonsets(): + """ + Delete csi holder daemonsets + + """ + daemonset_names = get_daemonsets_names() + for daemonset_name in daemonset_names: + if "holder" in daemonset_name: + daemonsets_obj = get_daemonsets_obj(daemonset_name) + daemonsets_obj.delete(resource_name=daemonset_name) + + +def verify_pod_pattern_does_not_exist(pattern, namespace): + """ + Verify csi-holder pods do not exist + + Args: + pattern (str): the pattern of pod + namespace (str): the namespace of pod + + Returns: + bool: if pod with pattern exist return False otherwise return True + + """ + from ocs_ci.ocs.utils import get_pod_name_by_pattern + + return len(get_pod_name_by_pattern(pattern=pattern, namespace=namespace)) == 0 + + +def verify_csi_holder_pods_do_not_exist(): + """ + Verify csi holder pods do not exist + + Raises: + TimeoutExpiredError: if csi-holder pod exist raise Exception + + """ + sample = TimeoutSampler( + timeout=300, + sleep=10, + func=verify_pod_pattern_does_not_exist, + pattern="holder", + namespace=config.ENV_DATA["cluster_namespace"], + ) + if not sample.wait_for_func_status(result=True): + raise TimeoutExpiredError( + "The csi holder pod exist even though we deleted the daemonset after 300 seconds" + ) + + +def upgrade_multus_holder_design(): + """ + Upgrade multus holder design from ODF4.15 to ODF4.16 + + """ + if not config.ENV_DATA.get("multus_delete_csi_holder_pods"): + return + if config.ENV_DATA.get("multus_create_public_net"): + add_route_public_nad() + from ocs_ci.deployment.nmstate import NMStateInstaller + + logger.info("Install NMState operator and create an instance") + nmstate_obj = NMStateInstaller() + nmstate_obj.running_nmstate() + configure_node_network_configuration_policy_on_all_worker_nodes() + reset_all_osd_pods() + enable_csi_disable_holder_pods() + delete_csi_holder_pods() + delete_csi_holder_daemonsets() + verify_csi_holder_pods_do_not_exist() diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py index d7e4a38d6b1..23e1f79a593 100644 --- a/ocs_ci/ocs/constants.py +++ b/ocs_ci/ocs/constants.py @@ -158,6 +158,7 @@ POD = "Pod" ROUTE = "Route" SERVICE = "Service" +DAEMONSET = "DaemonSet" CEPHOBJECTSTORE = "cephobjectstore" NODE = "Node" DEPLOYMENTCONFIG = "deploymentconfig" diff --git a/ocs_ci/ocs/ocs_upgrade.py b/ocs_ci/ocs/ocs_upgrade.py index 741ff74aa40..71e587e0c91 100644 --- a/ocs_ci/ocs/ocs_upgrade.py +++ b/ocs_ci/ocs/ocs_upgrade.py @@ -786,6 +786,14 @@ def run_ocs_upgrade( # in pending state is_all_csvs_succeeded = check_all_csvs_are_succeeded(namespace=namespace) assert is_all_csvs_succeeded, "Not all CSV's are in succeeded state" + upgrade_version = version.get_semantic_version(upgrade_version, True) + if ( + config.ENV_DATA.get("is_multus_enabled") + and upgrade_version == version.VERSION_4_16 + ): + from ocs_ci.helpers.helpers import upgrade_multus_holder_design + + upgrade_multus_holder_design() ocs_install_verification( timeout=600,