From f455165f5cd6c8d32b374fbf96dc3f485c5ca464 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Fri, 2 Feb 2024 14:00:24 +0200 Subject: [PATCH] ANA interface cleanup Fixes: #397, sub-issue 3 Remove `ana_reporting` & `auto_ha_state`. Use exclusively the subsystem enable HA flag - pass the enable HA subsystem flag to spdk as ana_reporting - listeners checks the subsystem enable HA flag Signed-off-by: Alexander Indenbaum --- README.md | 2 +- control/cli.py | 9 ----- control/grpc.py | 72 +++++++------------------------------ control/proto/gateway.proto | 10 +----- tests/test_cli.py | 31 ++-------------- tests/test_grpc.py | 4 +-- tests/test_omap_lock.py | 9 ++--- 7 files changed, 21 insertions(+), 116 deletions(-) diff --git a/README.md b/README.md index 319977c3..11fc7c53 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ The following command executes all the steps required to set up the NVMe-oF envi $ make demo docker-compose exec ceph bash -c "rbd -p rbd info demo_image || rbd -p rbd create demo_image --size 10M" rbd: error opening image demo_image: (2) No such file or directory -docker-compose run --rm nvmeof-cli --server-address 192.168.13.3 --server-port 5500 subsystem add --subsystem "nqn.2016-06.io.spdk:cnode1" --ana-reporting --enable-ha +docker-compose run --rm nvmeof-cli --server-address 192.168.13.3 --server-port 5500 subsystem add --subsystem "nqn.2016-06.io.spdk:cnode1" --enable-ha Adding subsystem nqn.2016-06.io.spdk:cnode1: Successful docker-compose run --rm nvmeof-cli --server-address 192.168.13.3 --server-port 5500 namespace add --subsystem "nqn.2016-06.io.spdk:cnode1" --rbd-pool rbd --rbd-image demo_image Adding namespace 1 to nqn.2016-06.io.spdk:cnode1, load balancing group 1: Successful diff --git a/control/cli.py b/control/cli.py index bf4a434b..72c20b7a 100644 --- a/control/cli.py +++ b/control/cli.py @@ -555,15 +555,12 @@ def subsystem_add(self, args): self.cli.parser.error("--subsystem argument is mandatory for add command") if args.force: self.cli.parser.error("--force argument is not allowed for add command") - if args.enable_ha and not args.ana_reporting: - self.cli.parser.error("ANA reporting must be enabled when HA is active") if args.subsystem == GatewayUtils.DISCOVERY_NQN: self.cli.parser.error("Can't add a discovery subsystem") req = pb2.create_subsystem_req(subsystem_nqn=args.subsystem, serial_number=args.serial_number, max_namespaces=args.max_namespaces, - ana_reporting=args.ana_reporting, enable_ha=args.enable_ha) try: ret = self.stub.create_subsystem(req) @@ -603,8 +600,6 @@ def subsystem_del(self, args): self.cli.parser.error("--serial-number argument is not allowed for del command") if args.max_namespaces != None: self.cli.parser.error("--max-namespaces argument is not allowed for del command") - if args.ana_reporting: - self.cli.parser.error("--ana-reporting argument is not allowed for del command") if args.enable_ha: self.cli.parser.error("--enable-ha argument is not allowed for del command") if args.subsystem == GatewayUtils.DISCOVERY_NQN: @@ -645,8 +640,6 @@ def subsystem_list(self, args): out_func, err_func = self.get_output_functions(args) if args.max_namespaces != None: self.cli.parser.error("--max-namespaces argument is not allowed for list command") - if args.ana_reporting: - self.cli.parser.error("--ana-reporting argument is not allowed for list command") if args.enable_ha: self.cli.parser.error("--enable-ha argument is not allowed for list command") if args.force: @@ -719,7 +712,6 @@ def subsystem_list(self, args): argument("--subsystem", "-n", help="Subsystem NQN", required=False), argument("--serial-number", "-s", help="Serial number", required=False), argument("--max-namespaces", "-m", help="Maximum number of namespaces", type=int, required=False), - argument("--ana-reporting", "-a", help="Enable ANA reporting", action='store_true', required=False), argument("--enable-ha", "-t", help="Enable automatic HA", action='store_true', required=False), argument("--force", help="Delete subsytem's namespaces if any, then delete subsystem. If not set a subsystem deletion would fail in case it contains namespaces", action='store_true', required=False), ]) @@ -762,7 +754,6 @@ def listener_add(self, args): adrfam=adrfam, traddr=traddr, trsvcid=args.trsvcid, - auto_ha_state="AUTO_HA_UNSET", ) try: diff --git a/control/grpc.py b/control/grpc.py index 72b82f28..30e5d230 100644 --- a/control/grpc.py +++ b/control/grpc.py @@ -141,6 +141,7 @@ def __init__(self, config, gateway_state, omap_lock, spdk_rpc_client, ceph_utils self.gateway_group = self.config.get("gateway", "group") self.verify_nqns = self.config.getboolean_with_default("gateway", "verify_nqns", True) self._init_cluster_context() + self.subsys_ha = {} def is_valid_host_nqn(nqn): if nqn == "*": @@ -377,7 +378,7 @@ def create_subsystem_safe(self, request, context): create_subsystem_error_prefix = f"Failure creating subsystem {request.subsystem_nqn}" self.logger.info( - f"Received request to create subsystem {request.subsystem_nqn}, enable_ha: {request.enable_ha}, ana reporting: {request.ana_reporting}, context: {context}") + f"Received request to create subsystem {request.subsystem_nqn}, enable_ha: {request.enable_ha}, context: {context}") errmsg = "" if self.verify_nqns: @@ -390,10 +391,6 @@ def create_subsystem_safe(self, request, context): errmsg = f"{create_subsystem_error_prefix}: Can't create a discovery subsystem" self.logger.error(f"{errmsg}") return pb2.req_status(status = errno.EINVAL, error_message = errmsg) - if request.enable_ha and not request.ana_reporting: - errmsg = f"{create_subsystem_error_prefix}: HA is enabled but ANA reporting is disabled" - self.logger.error(f"{errmsg}") - return pb2.req_status(status = errno.EINVAL, error_message = errmsg) min_cntlid = self.config.getint_with_default("gateway", "min_controller_id", 1) max_cntlid = self.config.getint_with_default("gateway", "max_controller_id", 65519) @@ -424,6 +421,7 @@ def create_subsystem_safe(self, request, context): errmsg = f"{create_subsystem_error_prefix}: {errmsg}" self.logger.error(f"{errmsg}") return pb2.req_status(status=errno.EEXIST, error_message=errmsg) + enable_ha = (request.enable_ha == True) ret = rpc_nvmf.nvmf_create_subsystem( self.spdk_rpc_client, nqn=request.subsystem_nqn, @@ -431,8 +429,9 @@ def create_subsystem_safe(self, request, context): max_namespaces=request.max_namespaces, min_cntlid=min_cntlid, max_cntlid=max_cntlid, - ana_reporting = request.ana_reporting, + ana_reporting = enable_ha, ) + self.subsys_ha[request.subsystem_nqn] = enable_ha self.logger.info(f"create_subsystem {request.subsystem_nqn}: {ret}") except Exception as ex: errmsg = f"{create_subsystem_error_prefix}:\n{ex}" @@ -522,6 +521,7 @@ def delete_subsystem_safe(self, request, context): self.spdk_rpc_client, nqn=request.subsystem_nqn, ) + self.subsys_ha.pop(request.subsystem_nqn) self.logger.info(f"delete_subsystem {request.subsystem_nqn}: {ret}") except Exception as ex: errmsg = f"{delete_subsystem_error_prefix}:\n{ex}" @@ -1649,24 +1649,12 @@ def list_connections(self, request, context=None): return self.execute_grpc_function(self.list_connections_safe, request, context) def get_subsystem_ha_status(self, nqn) -> bool: - enable_ha = False - state = self.gateway_state.local.get_state() - subsys_str = state.get(GatewayState.build_subsystem_key(nqn)) - if subsys_str: - self.logger.debug(f"value of sub-system: {subsys_str}") - try: - subsys_dict = json.loads(subsys_str) - try: - enable_ha = subsys_dict["enable_ha"] - except KeyError: - enable_ha = False - self.logger.info(f"Subsystem {nqn} enable_ha: {enable_ha}") - except Exception as ex: - self.logger.error(f"Got exception trying to parse subsystem {nqn}:\n{ex}") - enable_ha = False - pass - else: + if nqn not in self.subsys_ha: self.logger.warning(f"Subsystem {nqn} not found") + return False + + enable_ha = self.subsys_ha[nqn] + self.logger.info(f"Subsystem {nqn} enable_ha: {enable_ha}") return enable_ha def matching_listener_exists(self, context, nqn, gw_name, traddr, trsvcid) -> bool: @@ -1692,15 +1680,9 @@ def create_listener_safe(self, request, context): self.logger.error(f"{errmsg}") return pb2.req_status(status=errno.ENOKEY, error_message=errmsg) - auto_ha_state = GatewayEnumUtils.get_key_from_value(pb2.AutoHAState, request.auto_ha_state) - if auto_ha_state == None: - errmsg=f"{create_listener_error_prefix}: Unknown auto HA state {request.auto_ha_state}" - self.logger.error(f"{errmsg}") - return pb2.req_status(status=errno.ENOKEY, error_message=errmsg) - self.logger.info(f"Received request to create {request.gateway_name}" f" TCP {adrfam} listener for {request.nqn} at" - f" {traddr}:{request.trsvcid}, auto HA state: {auto_ha_state}, context: {context}") + f" {traddr}:{request.trsvcid}, context: {context}") if GatewayUtils.is_discovery_nqn(request.nqn): errmsg=f"{create_listener_error_prefix}: Can't create a listener for a discovery subsystem" @@ -1749,35 +1731,7 @@ def create_listener_safe(self, request, context): self.logger.error(create_listener_error_prefix) return pb2.req_status(status=errno.EINVAL, error_message=create_listener_error_prefix) - enable_ha = False - if auto_ha_state == "AUTO_HA_UNSET": - if context == None: - self.logger.error(f"auto_ha_state is not set but we are in an update()") - state = self.gateway_state.local.get_state() - subsys_str = state.get(GatewayState.build_subsystem_key(request.nqn)) - if subsys_str: - self.logger.debug(f"value of sub-system: {subsys_str}") - try: - subsys_dict = json.loads(subsys_str) - try: - enable_ha = subsys_dict["enable_ha"] - auto_ha_state_key = "AUTO_HA_ON" if enable_ha else "AUTO_HA_OFF" - request.auto_ha_state = GatewayEnumUtils.get_value_from_key(pb2.AutoHAState, auto_ha_state_key) - except KeyError: - enable_ha = False - self.logger.info(f"enable_ha: {enable_ha}") - except Exception as ex: - self.logger.error(f"Got exception trying to parse subsystem {request.nqn}:\n{ex}") - pass - else: - self.logger.warning(f"No subsystem for {request.nqn}") - else: - if context != None: - self.logger.error(f"auto_ha_state is set to {auto_ha_state} but we are not in an update()") - if auto_ha_state == "AUTO_HA_OFF": - enable_ha = False - elif auto_ha_state == "AUTO_HA_ON": - enable_ha = True + enable_ha = self.get_subsystem_ha_status(request.nqn) if enable_ha: for x in range (MAX_ANA_GROUPS): diff --git a/control/proto/gateway.proto b/control/proto/gateway.proto index 210b8c1b..b41901de 100644 --- a/control/proto/gateway.proto +++ b/control/proto/gateway.proto @@ -23,12 +23,6 @@ enum LogLevel { DEBUG = 4; } -enum AutoHAState { - AUTO_HA_UNSET = 0; - AUTO_HA_OFF = 1; - AUTO_HA_ON = 2; -} - service Gateway { // Creates a namespace from an RBD image rpc namespace_add(namespace_add_req) returns (nsid_status) {} @@ -148,8 +142,7 @@ message create_subsystem_req { string subsystem_nqn = 1; string serial_number = 2; optional uint32 max_namespaces = 3; - bool ana_reporting = 4; - bool enable_ha = 5; + bool enable_ha = 4; } message delete_subsystem_req { @@ -187,7 +180,6 @@ message create_listener_req { string traddr = 3; optional AddressFamily adrfam = 5; optional uint32 trsvcid = 6; - optional AutoHAState auto_ha_state = 7; } message delete_listener_req { diff --git a/tests/test_cli.py b/tests/test_cli.py index f4a43956..c813ca4f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -156,14 +156,12 @@ def test_create_subsystem(self, caplog, gateway): caplog.clear() cli(["subsystem", "add", "--subsystem", subsystem]) assert f"create_subsystem {subsystem}: True" in caplog.text - assert "ana reporting: False" in caplog.text cli(["--format", "json", "subsystem", "list"]) assert f'"serial_number": "{serial}"' not in caplog.text assert f'"nqn": "{subsystem}"' in caplog.text caplog.clear() cli(["subsystem", "add", "--subsystem", subsystem2, "--serial-number", serial]) assert f"create_subsystem {subsystem2}: True" in caplog.text - assert "ana reporting: False" in caplog.text caplog.clear() cli(["--format", "json", "subsystem", "list"]) assert f'"serial_number": "{serial}"' in caplog.text @@ -521,7 +519,7 @@ def test_create_listener(self, caplog, listener, gateway): assert "enable_ha: False" in caplog.text assert "ipv4" in caplog.text.lower() assert f"Adding {subsystem} listener at {listener[3]}:{listener[5]}: Successful" in caplog.text - assert f"auto HA state: AUTO_HA_UNSET" in caplog.text + @pytest.mark.parametrize("listener_ipv6", listener_list_ipv6) def test_create_listener_ipv6(self, caplog, listener_ipv6, gateway): @@ -530,7 +528,6 @@ def test_create_listener_ipv6(self, caplog, listener_ipv6, gateway): assert "enable_ha: False" in caplog.text assert "ipv6" in caplog.text.lower() assert f"Adding {subsystem} listener at [{listener_ipv6[3]}]:{listener_ipv6[5]}: Successful" in caplog.text - assert f"auto HA state: AUTO_HA_UNSET" in caplog.text @pytest.mark.parametrize("listener", listener_list_no_port) def test_create_listener_no_port(self, caplog, listener, gateway): @@ -539,7 +536,6 @@ def test_create_listener_no_port(self, caplog, listener, gateway): assert "enable_ha: False" in caplog.text assert "ipv4" in caplog.text.lower() assert f"Adding {subsystem} listener at {listener[3]}:4420: Successful" in caplog.text - assert f"auto HA state: AUTO_HA_UNSET" in caplog.text @pytest.mark.parametrize("listener", listener_list_negative_port) def test_create_listener_negative_port(self, caplog, listener, gateway): @@ -571,16 +567,6 @@ def test_create_listener_wrong_gateway(self, caplog, listener, gateway): cli(["listener", "add", "--subsystem", subsystem] + listener) assert f"Gateway name must match current gateway ({gateway_name})" in caplog.text - def test_create_listener_wrong_ha_state(self, caplog, gateway): - gw, stub = gateway - caplog.clear() - listener_add_req = pb2.create_listener_req(nqn=subsystem, gateway_name=gateway_name, - adrfam="ipv4", traddr=addr, trsvcid=5021, auto_ha_state="AUTO_HA_ON") - ret = stub.create_listener(listener_add_req) - assert "ipv4" in caplog.text.lower() - assert f"auto HA state: AUTO_HA_ON" in caplog.text - assert f"auto_ha_state is set to AUTO_HA_ON but we are not in an update()" in caplog.text - @pytest.mark.parametrize("listener", listener_list_invalid_adrfam) def test_create_listener_invalid_adrfam(self, caplog, listener, gateway): caplog.clear() @@ -711,24 +697,12 @@ def test_delete_subsystem_with_discovery_nqn(self, caplog, gateway): class TestCreateWithAna: def test_create_subsystem_ana(self, caplog, gateway): - caplog.clear() - cli(["subsystem", "list"]) - assert "No subsystems" in caplog.text - rc = 0 - try: - cli(["subsystem", "add", "--subsystem", subsystem, "--enable-ha"]) - except SystemExit as sysex: - # should fail with non-zero return code - rc = int(str(sysex)) - assert "ANA reporting must be enabled when HA is active" in caplog.text - assert rc != 0 caplog.clear() cli(["subsystem", "list"]) assert "No subsystems" in caplog.text caplog.clear() - cli(["subsystem", "add", "--subsystem", subsystem, "--ana-reporting", "--enable-ha"]) + cli(["subsystem", "add", "--subsystem", subsystem, "--enable-ha"]) assert f"Adding subsystem {subsystem}: Successful" in caplog.text - assert "ana reporting: True" in caplog.text caplog.clear() cli(["subsystem", "list"]) assert serial not in caplog.text @@ -761,7 +735,6 @@ def test_create_listener_ana(self, caplog, listener, gateway): assert "enable_ha: True" in caplog.text assert "ipv4" in caplog.text.lower() assert f"Adding {subsystem} listener at {listener[3]}:{listener[5]}: Successful" in caplog.text - assert f"auto HA state: AUTO_HA_UNSET" in caplog.text class TestDeleteAna: diff --git a/tests/test_grpc.py b/tests/test_grpc.py index 8c7eb590..e6947cc9 100644 --- a/tests/test_grpc.py +++ b/tests/test_grpc.py @@ -13,7 +13,7 @@ def create_resource_by_index(i): subsystem = f"{subsystem_prefix}{i}" - cli(["subsystem", "add", "--subsystem", subsystem, "--ana-reporting", "--enable-ha" ]) + cli(["subsystem", "add", "--subsystem", subsystem, "--enable-ha" ]) cli(["namespace", "add", "--subsystem", subsystem, "--rbd-pool", pool, "--rbd-image", image, "--size", "16MiB", "--rbd-create-image"]) def check_resource_by_index(i, caplog): @@ -44,7 +44,6 @@ def test_create_get_subsys(caplog, config): # add a listener cli(["listener", "add", "--subsystem", f"{subsystem_prefix}0", "--gateway-name", gateway.name, "--traddr", "127.0.0.1", "--trsvcid", "5001"]) - assert f"auto HA state: AUTO_HA_UNSET" in caplog.text assert f"Adding {subsystem_prefix}0 listener at 127.0.0.1:5001: Successful" in caplog.text # Change ANA group id for the first namesapce @@ -76,7 +75,6 @@ def test_create_get_subsys(caplog, config): time.sleep(0.1) time.sleep(20) # Make sure update() is over - assert f"auto HA state: AUTO_HA_ON" in caplog.text assert f"{subsystem_prefix}0 with ANA group id 4" in caplog.text assert f"Received request to set QOS limits for namespace using NSID 1 on {subsystem_prefix}0, R/W IOs per second: 2000 Read megabytes per second: 5" in caplog.text caplog.clear() diff --git a/tests/test_omap_lock.py b/tests/test_omap_lock.py index a6aaab32..936de842 100644 --- a/tests/test_omap_lock.py +++ b/tests/test_omap_lock.py @@ -273,8 +273,7 @@ def test_multi_gateway_concurrent_changes(config, image, conn_concurrent, caplog gateway_name="GatewayAAA", adrfam="ipv4", traddr="127.0.0.1", - trsvcid=5001, - auto_ha_state="AUTO_HA_UNSET") + trsvcid=5001) listener_ret = stubA.create_listener(listener_req) assert listener_ret.status == 0 assert f"Received request to create GatewayAAA TCP ipv4 listener for {subsystem_prefix}0 at 127.0.0.1:5001" in caplog.text @@ -320,8 +319,7 @@ def test_multi_gateway_listener_update(config, image, conn_concurrent, caplog): gateway_name="GatewayAAA", adrfam="ipv4", traddr="127.0.0.1", - trsvcid=5101, - auto_ha_state="AUTO_HA_UNSET") + trsvcid=5101) listener_ret = stubA.create_listener(listenerA_req) assert listener_ret.status == 0 assert f"Received request to create GatewayAAA TCP ipv4 listener for {subsystem} at 127.0.0.1:5101" in caplog.text @@ -331,8 +329,7 @@ def test_multi_gateway_listener_update(config, image, conn_concurrent, caplog): gateway_name="GatewayBBB", adrfam="ipv4", traddr="127.0.0.1", - trsvcid=5102, - auto_ha_state="AUTO_HA_UNSET") + trsvcid=5102) listener_ret = stubB.create_listener(listenerB_req) assert listener_ret.status == 0 assert f"Received request to create GatewayBBB TCP ipv4 listener for {subsystem} at 127.0.0.1:5102" in caplog.text