Skip to content

Commit

Permalink
ANA interface cleanup
Browse files Browse the repository at this point in the history
Fixes: ceph#397, sub-issue 3
Remove `ana_reporting` & `auto_ha_state`. Use exclusively the subsystem enable HA flag

- pass the enable HA subsystem flag to spdk as ana_reporting
- listeners checks the subsystem enable HA flag

Signed-off-by: Alexander Indenbaum <[email protected]>
  • Loading branch information
Alexander Indenbaum authored and baum committed Feb 5, 2024
1 parent 649f2dc commit f455165
Show file tree
Hide file tree
Showing 7 changed files with 21 additions and 116 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ The following command executes all the steps required to set up the NVMe-oF envi
$ make demo
docker-compose exec ceph bash -c "rbd -p rbd info demo_image || rbd -p rbd create demo_image --size 10M"
rbd: error opening image demo_image: (2) No such file or directory
docker-compose run --rm nvmeof-cli --server-address 192.168.13.3 --server-port 5500 subsystem add --subsystem "nqn.2016-06.io.spdk:cnode1" --ana-reporting --enable-ha
docker-compose run --rm nvmeof-cli --server-address 192.168.13.3 --server-port 5500 subsystem add --subsystem "nqn.2016-06.io.spdk:cnode1" --enable-ha
Adding subsystem nqn.2016-06.io.spdk:cnode1: Successful
docker-compose run --rm nvmeof-cli --server-address 192.168.13.3 --server-port 5500 namespace add --subsystem "nqn.2016-06.io.spdk:cnode1" --rbd-pool rbd --rbd-image demo_image
Adding namespace 1 to nqn.2016-06.io.spdk:cnode1, load balancing group 1: Successful
Expand Down
9 changes: 0 additions & 9 deletions control/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,15 +555,12 @@ def subsystem_add(self, args):
self.cli.parser.error("--subsystem argument is mandatory for add command")
if args.force:
self.cli.parser.error("--force argument is not allowed for add command")
if args.enable_ha and not args.ana_reporting:
self.cli.parser.error("ANA reporting must be enabled when HA is active")
if args.subsystem == GatewayUtils.DISCOVERY_NQN:
self.cli.parser.error("Can't add a discovery subsystem")

req = pb2.create_subsystem_req(subsystem_nqn=args.subsystem,
serial_number=args.serial_number,
max_namespaces=args.max_namespaces,
ana_reporting=args.ana_reporting,
enable_ha=args.enable_ha)
try:
ret = self.stub.create_subsystem(req)
Expand Down Expand Up @@ -603,8 +600,6 @@ def subsystem_del(self, args):
self.cli.parser.error("--serial-number argument is not allowed for del command")
if args.max_namespaces != None:
self.cli.parser.error("--max-namespaces argument is not allowed for del command")
if args.ana_reporting:
self.cli.parser.error("--ana-reporting argument is not allowed for del command")
if args.enable_ha:
self.cli.parser.error("--enable-ha argument is not allowed for del command")
if args.subsystem == GatewayUtils.DISCOVERY_NQN:
Expand Down Expand Up @@ -645,8 +640,6 @@ def subsystem_list(self, args):
out_func, err_func = self.get_output_functions(args)
if args.max_namespaces != None:
self.cli.parser.error("--max-namespaces argument is not allowed for list command")
if args.ana_reporting:
self.cli.parser.error("--ana-reporting argument is not allowed for list command")
if args.enable_ha:
self.cli.parser.error("--enable-ha argument is not allowed for list command")
if args.force:
Expand Down Expand Up @@ -719,7 +712,6 @@ def subsystem_list(self, args):
argument("--subsystem", "-n", help="Subsystem NQN", required=False),
argument("--serial-number", "-s", help="Serial number", required=False),
argument("--max-namespaces", "-m", help="Maximum number of namespaces", type=int, required=False),
argument("--ana-reporting", "-a", help="Enable ANA reporting", action='store_true', required=False),
argument("--enable-ha", "-t", help="Enable automatic HA", action='store_true', required=False),
argument("--force", help="Delete subsytem's namespaces if any, then delete subsystem. If not set a subsystem deletion would fail in case it contains namespaces", action='store_true', required=False),
])
Expand Down Expand Up @@ -762,7 +754,6 @@ def listener_add(self, args):
adrfam=adrfam,
traddr=traddr,
trsvcid=args.trsvcid,
auto_ha_state="AUTO_HA_UNSET",
)

try:
Expand Down
72 changes: 13 additions & 59 deletions control/grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ def __init__(self, config, gateway_state, omap_lock, spdk_rpc_client, ceph_utils
self.gateway_group = self.config.get("gateway", "group")
self.verify_nqns = self.config.getboolean_with_default("gateway", "verify_nqns", True)
self._init_cluster_context()
self.subsys_ha = {}

def is_valid_host_nqn(nqn):
if nqn == "*":
Expand Down Expand Up @@ -377,7 +378,7 @@ def create_subsystem_safe(self, request, context):
create_subsystem_error_prefix = f"Failure creating subsystem {request.subsystem_nqn}"

self.logger.info(
f"Received request to create subsystem {request.subsystem_nqn}, enable_ha: {request.enable_ha}, ana reporting: {request.ana_reporting}, context: {context}")
f"Received request to create subsystem {request.subsystem_nqn}, enable_ha: {request.enable_ha}, context: {context}")

errmsg = ""
if self.verify_nqns:
Expand All @@ -390,10 +391,6 @@ def create_subsystem_safe(self, request, context):
errmsg = f"{create_subsystem_error_prefix}: Can't create a discovery subsystem"
self.logger.error(f"{errmsg}")
return pb2.req_status(status = errno.EINVAL, error_message = errmsg)
if request.enable_ha and not request.ana_reporting:
errmsg = f"{create_subsystem_error_prefix}: HA is enabled but ANA reporting is disabled"
self.logger.error(f"{errmsg}")
return pb2.req_status(status = errno.EINVAL, error_message = errmsg)

min_cntlid = self.config.getint_with_default("gateway", "min_controller_id", 1)
max_cntlid = self.config.getint_with_default("gateway", "max_controller_id", 65519)
Expand Down Expand Up @@ -424,15 +421,17 @@ def create_subsystem_safe(self, request, context):
errmsg = f"{create_subsystem_error_prefix}: {errmsg}"
self.logger.error(f"{errmsg}")
return pb2.req_status(status=errno.EEXIST, error_message=errmsg)
enable_ha = (request.enable_ha == True)
ret = rpc_nvmf.nvmf_create_subsystem(
self.spdk_rpc_client,
nqn=request.subsystem_nqn,
serial_number=request.serial_number,
max_namespaces=request.max_namespaces,
min_cntlid=min_cntlid,
max_cntlid=max_cntlid,
ana_reporting = request.ana_reporting,
ana_reporting = enable_ha,
)
self.subsys_ha[request.subsystem_nqn] = enable_ha
self.logger.info(f"create_subsystem {request.subsystem_nqn}: {ret}")
except Exception as ex:
errmsg = f"{create_subsystem_error_prefix}:\n{ex}"
Expand Down Expand Up @@ -522,6 +521,7 @@ def delete_subsystem_safe(self, request, context):
self.spdk_rpc_client,
nqn=request.subsystem_nqn,
)
self.subsys_ha.pop(request.subsystem_nqn)
self.logger.info(f"delete_subsystem {request.subsystem_nqn}: {ret}")
except Exception as ex:
errmsg = f"{delete_subsystem_error_prefix}:\n{ex}"
Expand Down Expand Up @@ -1649,24 +1649,12 @@ def list_connections(self, request, context=None):
return self.execute_grpc_function(self.list_connections_safe, request, context)

def get_subsystem_ha_status(self, nqn) -> bool:
enable_ha = False
state = self.gateway_state.local.get_state()
subsys_str = state.get(GatewayState.build_subsystem_key(nqn))
if subsys_str:
self.logger.debug(f"value of sub-system: {subsys_str}")
try:
subsys_dict = json.loads(subsys_str)
try:
enable_ha = subsys_dict["enable_ha"]
except KeyError:
enable_ha = False
self.logger.info(f"Subsystem {nqn} enable_ha: {enable_ha}")
except Exception as ex:
self.logger.error(f"Got exception trying to parse subsystem {nqn}:\n{ex}")
enable_ha = False
pass
else:
if nqn not in self.subsys_ha:
self.logger.warning(f"Subsystem {nqn} not found")
return False

enable_ha = self.subsys_ha[nqn]
self.logger.info(f"Subsystem {nqn} enable_ha: {enable_ha}")
return enable_ha

def matching_listener_exists(self, context, nqn, gw_name, traddr, trsvcid) -> bool:
Expand All @@ -1692,15 +1680,9 @@ def create_listener_safe(self, request, context):
self.logger.error(f"{errmsg}")
return pb2.req_status(status=errno.ENOKEY, error_message=errmsg)

auto_ha_state = GatewayEnumUtils.get_key_from_value(pb2.AutoHAState, request.auto_ha_state)
if auto_ha_state == None:
errmsg=f"{create_listener_error_prefix}: Unknown auto HA state {request.auto_ha_state}"
self.logger.error(f"{errmsg}")
return pb2.req_status(status=errno.ENOKEY, error_message=errmsg)

self.logger.info(f"Received request to create {request.gateway_name}"
f" TCP {adrfam} listener for {request.nqn} at"
f" {traddr}:{request.trsvcid}, auto HA state: {auto_ha_state}, context: {context}")
f" {traddr}:{request.trsvcid}, context: {context}")

if GatewayUtils.is_discovery_nqn(request.nqn):
errmsg=f"{create_listener_error_prefix}: Can't create a listener for a discovery subsystem"
Expand Down Expand Up @@ -1749,35 +1731,7 @@ def create_listener_safe(self, request, context):
self.logger.error(create_listener_error_prefix)
return pb2.req_status(status=errno.EINVAL, error_message=create_listener_error_prefix)

enable_ha = False
if auto_ha_state == "AUTO_HA_UNSET":
if context == None:
self.logger.error(f"auto_ha_state is not set but we are in an update()")
state = self.gateway_state.local.get_state()
subsys_str = state.get(GatewayState.build_subsystem_key(request.nqn))
if subsys_str:
self.logger.debug(f"value of sub-system: {subsys_str}")
try:
subsys_dict = json.loads(subsys_str)
try:
enable_ha = subsys_dict["enable_ha"]
auto_ha_state_key = "AUTO_HA_ON" if enable_ha else "AUTO_HA_OFF"
request.auto_ha_state = GatewayEnumUtils.get_value_from_key(pb2.AutoHAState, auto_ha_state_key)
except KeyError:
enable_ha = False
self.logger.info(f"enable_ha: {enable_ha}")
except Exception as ex:
self.logger.error(f"Got exception trying to parse subsystem {request.nqn}:\n{ex}")
pass
else:
self.logger.warning(f"No subsystem for {request.nqn}")
else:
if context != None:
self.logger.error(f"auto_ha_state is set to {auto_ha_state} but we are not in an update()")
if auto_ha_state == "AUTO_HA_OFF":
enable_ha = False
elif auto_ha_state == "AUTO_HA_ON":
enable_ha = True
enable_ha = self.get_subsystem_ha_status(request.nqn)

if enable_ha:
for x in range (MAX_ANA_GROUPS):
Expand Down
10 changes: 1 addition & 9 deletions control/proto/gateway.proto
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,6 @@ enum LogLevel {
DEBUG = 4;
}

enum AutoHAState {
AUTO_HA_UNSET = 0;
AUTO_HA_OFF = 1;
AUTO_HA_ON = 2;
}

service Gateway {
// Creates a namespace from an RBD image
rpc namespace_add(namespace_add_req) returns (nsid_status) {}
Expand Down Expand Up @@ -148,8 +142,7 @@ message create_subsystem_req {
string subsystem_nqn = 1;
string serial_number = 2;
optional uint32 max_namespaces = 3;
bool ana_reporting = 4;
bool enable_ha = 5;
bool enable_ha = 4;
}

message delete_subsystem_req {
Expand Down Expand Up @@ -187,7 +180,6 @@ message create_listener_req {
string traddr = 3;
optional AddressFamily adrfam = 5;
optional uint32 trsvcid = 6;
optional AutoHAState auto_ha_state = 7;
}

message delete_listener_req {
Expand Down
31 changes: 2 additions & 29 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,12 @@ def test_create_subsystem(self, caplog, gateway):
caplog.clear()
cli(["subsystem", "add", "--subsystem", subsystem])
assert f"create_subsystem {subsystem}: True" in caplog.text
assert "ana reporting: False" in caplog.text
cli(["--format", "json", "subsystem", "list"])
assert f'"serial_number": "{serial}"' not in caplog.text
assert f'"nqn": "{subsystem}"' in caplog.text
caplog.clear()
cli(["subsystem", "add", "--subsystem", subsystem2, "--serial-number", serial])
assert f"create_subsystem {subsystem2}: True" in caplog.text
assert "ana reporting: False" in caplog.text
caplog.clear()
cli(["--format", "json", "subsystem", "list"])
assert f'"serial_number": "{serial}"' in caplog.text
Expand Down Expand Up @@ -521,7 +519,7 @@ def test_create_listener(self, caplog, listener, gateway):
assert "enable_ha: False" in caplog.text
assert "ipv4" in caplog.text.lower()
assert f"Adding {subsystem} listener at {listener[3]}:{listener[5]}: Successful" in caplog.text
assert f"auto HA state: AUTO_HA_UNSET" in caplog.text


@pytest.mark.parametrize("listener_ipv6", listener_list_ipv6)
def test_create_listener_ipv6(self, caplog, listener_ipv6, gateway):
Expand All @@ -530,7 +528,6 @@ def test_create_listener_ipv6(self, caplog, listener_ipv6, gateway):
assert "enable_ha: False" in caplog.text
assert "ipv6" in caplog.text.lower()
assert f"Adding {subsystem} listener at [{listener_ipv6[3]}]:{listener_ipv6[5]}: Successful" in caplog.text
assert f"auto HA state: AUTO_HA_UNSET" in caplog.text

@pytest.mark.parametrize("listener", listener_list_no_port)
def test_create_listener_no_port(self, caplog, listener, gateway):
Expand All @@ -539,7 +536,6 @@ def test_create_listener_no_port(self, caplog, listener, gateway):
assert "enable_ha: False" in caplog.text
assert "ipv4" in caplog.text.lower()
assert f"Adding {subsystem} listener at {listener[3]}:4420: Successful" in caplog.text
assert f"auto HA state: AUTO_HA_UNSET" in caplog.text

@pytest.mark.parametrize("listener", listener_list_negative_port)
def test_create_listener_negative_port(self, caplog, listener, gateway):
Expand Down Expand Up @@ -571,16 +567,6 @@ def test_create_listener_wrong_gateway(self, caplog, listener, gateway):
cli(["listener", "add", "--subsystem", subsystem] + listener)
assert f"Gateway name must match current gateway ({gateway_name})" in caplog.text

def test_create_listener_wrong_ha_state(self, caplog, gateway):
gw, stub = gateway
caplog.clear()
listener_add_req = pb2.create_listener_req(nqn=subsystem, gateway_name=gateway_name,
adrfam="ipv4", traddr=addr, trsvcid=5021, auto_ha_state="AUTO_HA_ON")
ret = stub.create_listener(listener_add_req)
assert "ipv4" in caplog.text.lower()
assert f"auto HA state: AUTO_HA_ON" in caplog.text
assert f"auto_ha_state is set to AUTO_HA_ON but we are not in an update()" in caplog.text

@pytest.mark.parametrize("listener", listener_list_invalid_adrfam)
def test_create_listener_invalid_adrfam(self, caplog, listener, gateway):
caplog.clear()
Expand Down Expand Up @@ -711,24 +697,12 @@ def test_delete_subsystem_with_discovery_nqn(self, caplog, gateway):

class TestCreateWithAna:
def test_create_subsystem_ana(self, caplog, gateway):
caplog.clear()
cli(["subsystem", "list"])
assert "No subsystems" in caplog.text
rc = 0
try:
cli(["subsystem", "add", "--subsystem", subsystem, "--enable-ha"])
except SystemExit as sysex:
# should fail with non-zero return code
rc = int(str(sysex))
assert "ANA reporting must be enabled when HA is active" in caplog.text
assert rc != 0
caplog.clear()
cli(["subsystem", "list"])
assert "No subsystems" in caplog.text
caplog.clear()
cli(["subsystem", "add", "--subsystem", subsystem, "--ana-reporting", "--enable-ha"])
cli(["subsystem", "add", "--subsystem", subsystem, "--enable-ha"])
assert f"Adding subsystem {subsystem}: Successful" in caplog.text
assert "ana reporting: True" in caplog.text
caplog.clear()
cli(["subsystem", "list"])
assert serial not in caplog.text
Expand Down Expand Up @@ -761,7 +735,6 @@ def test_create_listener_ana(self, caplog, listener, gateway):
assert "enable_ha: True" in caplog.text
assert "ipv4" in caplog.text.lower()
assert f"Adding {subsystem} listener at {listener[3]}:{listener[5]}: Successful" in caplog.text
assert f"auto HA state: AUTO_HA_UNSET" in caplog.text

class TestDeleteAna:

Expand Down
4 changes: 1 addition & 3 deletions tests/test_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

def create_resource_by_index(i):
subsystem = f"{subsystem_prefix}{i}"
cli(["subsystem", "add", "--subsystem", subsystem, "--ana-reporting", "--enable-ha" ])
cli(["subsystem", "add", "--subsystem", subsystem, "--enable-ha" ])
cli(["namespace", "add", "--subsystem", subsystem, "--rbd-pool", pool, "--rbd-image", image, "--size", "16MiB", "--rbd-create-image"])

def check_resource_by_index(i, caplog):
Expand Down Expand Up @@ -44,7 +44,6 @@ def test_create_get_subsys(caplog, config):
# add a listener
cli(["listener", "add", "--subsystem", f"{subsystem_prefix}0", "--gateway-name",
gateway.name, "--traddr", "127.0.0.1", "--trsvcid", "5001"])
assert f"auto HA state: AUTO_HA_UNSET" in caplog.text
assert f"Adding {subsystem_prefix}0 listener at 127.0.0.1:5001: Successful" in caplog.text

# Change ANA group id for the first namesapce
Expand Down Expand Up @@ -76,7 +75,6 @@ def test_create_get_subsys(caplog, config):
time.sleep(0.1)

time.sleep(20) # Make sure update() is over
assert f"auto HA state: AUTO_HA_ON" in caplog.text
assert f"{subsystem_prefix}0 with ANA group id 4" in caplog.text
assert f"Received request to set QOS limits for namespace using NSID 1 on {subsystem_prefix}0, R/W IOs per second: 2000 Read megabytes per second: 5" in caplog.text
caplog.clear()
Expand Down
Loading

0 comments on commit f455165

Please sign in to comment.