diff --git a/crmsh/ui_cluster.py b/crmsh/ui_cluster.py
index 654f44ae2f..23340695f5 100644
--- a/crmsh/ui_cluster.py
+++ b/crmsh/ui_cluster.py
@@ -172,40 +172,73 @@ def do_start(self, context, *args):
for node in node_list:
logger.info("Cluster services started on {}".format(node))
+ @staticmethod
+ def _node_ready_to_stop_cluster_service(node):
+ """
+ Check if the specific node is ready to stop cluster service
+
+ If both corosync.service and pacemaker.service is active, return True
+ If some services started, stop them first and return False
+ """
+ corosync_active = utils.service_is_active("corosync.service", remote_addr=node)
+ sbd_active = utils.service_is_active("sbd.service", remote_addr=node)
+ pacemaker_active = utils.service_is_active("pacemaker.service", remote_addr=node)
+
+ if not corosync_active:
+ if sbd_active:
+ utils.stop_service("corosync", remote_addr=node)
+ logger.info(f"The cluster stack stopped on {node}")
+ else:
+ logger.info(f"The cluster stack already stopped on {node}")
+ return False
+
+ elif not pacemaker_active:
+ utils.stop_service("corosync", remote_addr=node)
+ logger.info("The cluster stack stopped on {}".format(node))
+ return False
+
+ return True
+
+ @staticmethod
+ def _wait_for_dc(node=None):
+ """
+ Wait for the cluster's DC to become available
+ """
+ if not utils.service_is_active("pacemaker.service", remote_addr=node):
+ return
+
+ dc_deadtime = utils.get_property("dc-deadtime", peer=node) or str(constants.DC_DEADTIME_DEFAULT)
+ dc_timeout = int(dc_deadtime.strip('s')) + 5
+ try:
+ utils.check_function_with_timeout(utils.get_dc, wait_timeout=dc_timeout, peer=node)
+ except TimeoutError:
+ logger.error("No DC found currently, please wait if the cluster is still starting")
+ raise utils.TerminateSubCommand
+
+ @staticmethod
+ def _set_dlm(node=None):
+ """
+ When dlm running and quorum is lost, before stop cluster service, should set
+ enable_quorum_fencing=0, enable_quorum_lockspace=0 for dlm config option
+ """
+ if utils.is_dlm_running(node) and not utils.is_quorate(node):
+ logger.debug("Quorum is lost; Set enable_quorum_fencing=0 and enable_quorum_lockspace=0 for dlm")
+ utils.set_dlm_option(peer=node, enable_quorum_fencing=0, enable_quorum_lockspace=0)
+
@command.skill_level('administrator')
def do_stop(self, context, *args):
'''
Stops the cluster services on all nodes or specific node(s)
'''
node_list = parse_option_for_nodes(context, *args)
- for node in node_list[:]:
- if not utils.service_is_active("corosync.service", remote_addr=node):
- if utils.service_is_active("sbd.service", remote_addr=node):
- utils.stop_service("corosync", remote_addr=node)
- logger.info("Cluster services stopped on {}".format(node))
- else:
- logger.info("Cluster services already stopped on {}".format(node))
- node_list.remove(node)
- elif not utils.service_is_active("pacemaker.service", remote_addr=node):
- utils.stop_service("corosync", remote_addr=node)
- logger.info("Cluster services stopped on {}".format(node))
- node_list.remove(node)
+ node_list = [n for n in node_list if self._node_ready_to_stop_cluster_service(n)]
if not node_list:
return
+ logger.debug(f"stop node list: {node_list}")
- dc_deadtime = utils.get_property("dc-deadtime") or constants.DC_DEADTIME_DEFAULT
- dc_timeout = int(dc_deadtime.strip('s')) + 5
- try:
- utils.check_function_with_timeout(utils.get_dc, wait_timeout=dc_timeout)
- except TimeoutError:
- logger.error("No DC found currently, please wait if the cluster is still starting")
- return False
+ self._wait_for_dc(node_list[0])
- # When dlm running and quorum is lost, before stop cluster service, should set
- # enable_quorum_fencing=0, enable_quorum_lockspace=0 for dlm config option
- if utils.is_dlm_running() and not utils.is_quorate():
- logger.debug("Quorum is lost; Set enable_quorum_fencing=0 and enable_quorum_lockspace=0 for dlm")
- utils.set_dlm_option(enable_quorum_fencing=0, enable_quorum_lockspace=0)
+ self._set_dlm(node_list[0])
# Stop pacemaker since it can make sure cluster has quorum until stop corosync
utils.stop_service("pacemaker", node_list=node_list)
@@ -216,7 +249,7 @@ def do_stop(self, context, *args):
utils.stop_service("corosync", node_list=node_list)
for node in node_list:
- logger.info("Cluster services stopped on {}".format(node))
+ logger.info("The cluster stack stopped on {}".format(node))
@command.skill_level('administrator')
def do_restart(self, context, *args):
diff --git a/crmsh/utils.py b/crmsh/utils.py
index f99782c006..d8dd56089d 100644
--- a/crmsh/utils.py
+++ b/crmsh/utils.py
@@ -853,14 +853,14 @@ def append_file(dest, src):
return False
-def get_dc():
+def get_dc(peer=None):
cmd = "crmadmin -D -t 1"
- rc, s, _ = get_stdout_stderr(add_sudo(cmd))
- if rc != 0:
+ out = get_stdout_or_raise_error(add_sudo(cmd), remote=peer, no_raise=True)
+ if not out:
return None
- if not s.startswith("Designated"):
+ if not out.startswith("Designated"):
return None
- return s.split()[-1]
+ return out.split()[-1]
def wait4dc(what="", show_progress=True):
@@ -2945,47 +2945,62 @@ def is_standby(node):
return re.search(r'Node\s+{}:\s+standby'.format(node), out) is not None
-def get_dlm_option_dict():
+def get_dlm_option_dict(peer=None):
"""
Get dlm config option dictionary
"""
- out = get_stdout_or_raise_error("dlm_tool dump_config")
+ out = get_stdout_or_raise_error("dlm_tool dump_config", remote=peer)
return dict(re.findall("(\w+)=(\w+)", out))
-def set_dlm_option(**kargs):
+def set_dlm_option(peer=None, **kargs):
"""
Set dlm option
"""
- dlm_option_dict = get_dlm_option_dict()
+ dlm_option_dict = get_dlm_option_dict(peer=peer)
for option, value in kargs.items():
if option not in dlm_option_dict:
- raise ValueError('"{}" is not dlm config option'.format(option))
+ raise ValueError(f'"{option}" is not dlm config option')
if dlm_option_dict[option] != value:
- get_stdout_or_raise_error('dlm_tool set_config "{}={}"'.format(option, value))
+ get_stdout_or_raise_error(f'dlm_tool set_config "{option}={value}"', remote=peer)
-def is_dlm_running():
+def is_dlm_running(peer=None):
"""
Check if dlm ra controld is running
"""
- from . import xmlutil
- return xmlutil.CrmMonXmlParser.is_resource_started(constants.DLM_CONTROLD_RA)
+ return is_resource_running(constants.DLM_CONTROLD_RA, peer=peer)
-def is_dlm_configured():
+def has_resource_configured(ra_type, peer=None):
+ """
+ Check if the RA configured
+ """
+ out = get_stdout_or_raise_error("crm_mon -1rR", remote=peer)
+ return re.search(ra_type, out) is not None
+
+
+def is_resource_running(ra_type, peer=None):
+ """
+ Check if the RA running
+ """
+ out = get_stdout_or_raise_error("crm_mon -1rR", remote=peer)
+ patt = f"\({ra_type}\):\s*Started"
+ return re.search(patt, out) is not None
+
+
+def is_dlm_configured(peer=None):
"""
Check if dlm configured
"""
- from . import xmlutil
- return xmlutil.CrmMonXmlParser.is_resource_configured(constants.DLM_CONTROLD_RA)
+ return has_resource_configured(constants.DLM_CONTROLD_RA, peer=peer)
-def is_quorate():
+def is_quorate(peer=None):
"""
Check if cluster is quorated
"""
- out = get_stdout_or_raise_error("corosync-quorumtool -s", success_val_list=[0, 2])
+ out = get_stdout_or_raise_error("corosync-quorumtool -s", remote=peer, success_val_list=[0, 2])
res = re.search(r'Quorate:\s+(.*)', out)
if res:
return res.group(1) == "Yes"
@@ -3012,14 +3027,16 @@ def get_pcmk_delay_max(two_node_without_qdevice=False):
return 0
-def get_property(name):
+def get_property(name, property_type="crm_config", peer=None):
"""
Get cluster properties
"""
- cib_path = os.getenv('CIB_file', constants.CIB_RAW_FILE)
- cmd = "CIB_file={} crm configure get_property {}".format(cib_path, name)
- rc, stdout, _ = get_stdout_stderr(cmd)
- return stdout if rc == 0 else None
+ if property_type == "crm_config":
+ cib_path = os.getenv('CIB_file', constants.CIB_RAW_FILE)
+ cmd = "CIB_file={} sudo --preserve-env=CIB_file crm configure get_property {}".format(cib_path, name)
+ else:
+ cmd = "sudo crm_attribute -t {} -n {} -Gq".format(property_type, name)
+ return get_stdout_or_raise_error(cmd, remote=peer, no_raise=True)
def set_property(**kwargs):
@@ -3145,7 +3162,7 @@ def read_from_file(infile):
return to_ascii(data)
-def check_function_with_timeout(check_function, wait_timeout=30, interval=1):
+def check_function_with_timeout(check_function, wait_timeout=30, interval=1, *args, **kwargs):
"""
Run check_function in a loop
Return when check_function is true
@@ -3154,7 +3171,7 @@ def check_function_with_timeout(check_function, wait_timeout=30, interval=1):
current_time = int(time.time())
timeout = current_time + wait_timeout
while current_time <= timeout:
- if check_function():
+ if check_function(*args, **kwargs):
return
time.sleep(interval)
current_time = int(time.time())
diff --git a/test/features/bootstrap_bugs.feature b/test/features/bootstrap_bugs.feature
index 8775ce32d2..97b1085ab9 100644
--- a/test/features/bootstrap_bugs.feature
+++ b/test/features/bootstrap_bugs.feature
@@ -131,3 +131,25 @@ Feature: Regression test for bootstrap bugs
Then Service "corosync" is "started" on "hanode1"
When Run "crm cluster stop" on "hanode1"
Then Service "corosync" is "stopped" on "hanode1"
+
+ @clean
+ Scenario: Can't stop all nodes' cluster service when local node's service is down(bsc#1213889)
+ Given Cluster service is "stopped" on "hanode1"
+ And Cluster service is "stopped" on "hanode2"
+ When Run "crm cluster init -y" on "hanode1"
+ Then Cluster service is "started" on "hanode1"
+ When Run "crm cluster join -c hanode1 -y" on "hanode2"
+ Then Cluster service is "started" on "hanode2"
+ When Wait for DC
+ When Wait "10" seconds
+ Then Online nodes are "hanode1 hanode2"
+ # Add more operations
+ When Run "crm node standby hanode1" on "hanode1"
+ When Run "crm node online hanode1" on "hanode1"
+ When Run "crm node standby hanode2" on "hanode1"
+ When Run "crm node online hanode2" on "hanode1"
+ When Wait "10" seconds
+ When Run "crm cluster stop" on "hanode1"
+ And Run "crm cluster stop --all" on "hanode1"
+ Then Cluster service is "stopped" on "hanode1"
+ And Cluster service is "stopped" on "hanode2"
diff --git a/test/testcases/confbasic-xml.exp b/test/testcases/confbasic-xml.exp
index 20892dcdb6..b934d5bb3b 100644
--- a/test/testcases/confbasic-xml.exp
+++ b/test/testcases/confbasic-xml.exp
@@ -67,7 +67,7 @@
-
+
diff --git a/test/testcases/confbasic.exp b/test/testcases/confbasic.exp
index 5fc2dff519..ad15b963ad 100644
--- a/test/testcases/confbasic.exp
+++ b/test/testcases/confbasic.exp
@@ -89,7 +89,7 @@ primitive d2 Delay \
params mondelay=45 \
op start timeout=60s interval=0s \
op stop timeout=60s interval=0s \
- op monitor timeout=30s interval=10s \
+ op monitor timeout=40s interval=10s \
op monitor role=Started interval=60s timeout=30s
primitive d3 ocf:pacemaker:Dummy \
op monitor timeout=20s interval=10s \
diff --git a/test/testcases/file.exp b/test/testcases/file.exp
index dce48de52e..e78b5b8655 100644
--- a/test/testcases/file.exp
+++ b/test/testcases/file.exp
@@ -11,9 +11,9 @@ primitive p1 ocf:pacemaker:Dummy \
op stop timeout=20s interval=0s
primitive p2 Delay \
params startdelay=2 mondelay=2 stopdelay=2 \
- op monitor timeout=30s interval=10s \
+ op monitor timeout=40s interval=10s \
op start timeout=30s interval=0s \
- op stop timeout=30s interval=0s
+ op stop timeout=40s interval=0s
primitive p3 ocf:pacemaker:Dummy \
op monitor timeout=20s interval=10s \
op start timeout=20s interval=0s \
@@ -56,9 +56,9 @@ primitive p0 ocf:pacemaker:Dummy \
op stop timeout=20s interval=0s
primitive p2 Delay \
params startdelay=2 mondelay=2 stopdelay=2 \
- op monitor timeout=30s interval=10s \
+ op monitor timeout=40s interval=10s \
op start timeout=30s interval=0s \
- op stop timeout=30s interval=0s
+ op stop timeout=40s interval=0s
primitive p3 ocf:pacemaker:Dummy \
op monitor timeout=20s interval=10s \
op start timeout=20s interval=0s \
diff --git a/test/testcases/resource.exp b/test/testcases/resource.exp
index 977b9b5c2f..114939413d 100644
--- a/test/testcases/resource.exp
+++ b/test/testcases/resource.exp
@@ -133,9 +133,9 @@ resource p0 is NOT running
-
+
-
+
@@ -167,9 +167,9 @@ resource p0 is NOT running
-
+
-
+
diff --git a/test/unittests/test_ui_cluster.py b/test/unittests/test_ui_cluster.py
index 66463777be..17da24771a 100644
--- a/test/unittests/test_ui_cluster.py
+++ b/test/unittests/test_ui_cluster.py
@@ -79,51 +79,91 @@ def test_do_start(self, mock_parse_nodes, mock_active, mock_start, mock_qdevice_
mock_qdevice_configured.assert_called_once_with()
mock_info.assert_called_once_with("Cluster services started on node1")
- @mock.patch('logging.Logger.info')
- @mock.patch('crmsh.utils.service_is_active')
+ @mock.patch('crmsh.ui_cluster.Cluster._wait_for_dc')
+ @mock.patch('crmsh.ui_cluster.Cluster._node_ready_to_stop_cluster_service')
@mock.patch('crmsh.ui_cluster.parse_option_for_nodes')
- def test_do_stop_already_stopped(self, mock_parse_nodes, mock_active, mock_info):
+ def test_do_stop_return(self, mock_parse_nodes, mock_node_ready_to_stop_cluster_service, mock_dc):
+ mock_parse_nodes.return_value = ["node1", "node2"]
+ mock_node_ready_to_stop_cluster_service.side_effect = [False, False]
+
context_inst = mock.Mock()
- mock_parse_nodes.return_value = ["node1"]
- mock_active.side_effect = [False, False]
- self.ui_cluster_inst.do_stop(context_inst, "node1")
- mock_active.assert_has_calls([
- mock.call("corosync.service", remote_addr="node1"),
- mock.call("sbd.service", remote_addr="node1")
- ])
- mock_info.assert_called_once_with("Cluster services already stopped on node1")
+ self.ui_cluster_inst.do_stop(context_inst, "node1", "node2")
+
+ mock_parse_nodes.assert_called_once_with(context_inst, "node1", "node2")
+ mock_node_ready_to_stop_cluster_service.assert_has_calls([mock.call("node1"), mock.call("node2")])
+ mock_dc.assert_not_called()
@mock.patch('logging.Logger.debug')
@mock.patch('logging.Logger.info')
- @mock.patch('crmsh.utils.stop_service')
- @mock.patch('crmsh.utils.set_dlm_option')
- @mock.patch('crmsh.utils.is_quorate')
- @mock.patch('crmsh.utils.is_dlm_running')
- @mock.patch('crmsh.utils.get_dc')
- @mock.patch('crmsh.utils.check_function_with_timeout')
- @mock.patch('crmsh.utils.get_property')
@mock.patch('crmsh.utils.service_is_active')
+ @mock.patch('crmsh.utils.stop_service')
+ @mock.patch('crmsh.ui_cluster.Cluster._set_dlm')
+ @mock.patch('crmsh.ui_cluster.Cluster._wait_for_dc')
+ @mock.patch('crmsh.ui_cluster.Cluster._node_ready_to_stop_cluster_service')
@mock.patch('crmsh.ui_cluster.parse_option_for_nodes')
- def test_do_stop(self, mock_parse_nodes, mock_active, mock_get_property, mock_check, mock_get_dc, mock_dlm_running, mock_is_quorate, mock_set_dlm, mock_stop, mock_info, mock_debug):
+ def test_do_stop(self, mock_parse_nodes, mock_node_ready_to_stop_cluster_service, mock_dc,
+ mock_set_dlm, mock_stop, mock_is_active, mock_info, mock_debug):
+ mock_parse_nodes.return_value = ["node1", "node2"]
+ mock_node_ready_to_stop_cluster_service.side_effect = [True, False]
+ mock_stop.side_effect = [["node1"], ["node1"], ["node1"]]
+ mock_is_active.return_value = True
+
context_inst = mock.Mock()
- mock_parse_nodes.return_value = ["node1"]
- mock_active.side_effect = [True, True, True]
- mock_dlm_running.return_value = True
- mock_is_quorate.return_value = False
- mock_get_property.return_value = "20s"
+ self.ui_cluster_inst.do_stop(context_inst, "node1", "node2")
- self.ui_cluster_inst.do_stop(context_inst, "node1")
+ mock_parse_nodes.assert_called_once_with(context_inst, "node1", "node2")
+ mock_node_ready_to_stop_cluster_service.assert_has_calls([mock.call("node1"), mock.call("node2")])
+ mock_debug.assert_called_once_with("stop node list: ['node1']")
+ mock_dc.assert_called_once_with("node1")
+ mock_set_dlm.assert_called_once_with("node1")
+ mock_stop.assert_has_calls([
+ mock.call("pacemaker", node_list=["node1"]),
+ mock.call("corosync-qdevice.service", node_list=["node1"]),
+ mock.call("corosync", node_list=["node1"]),
+ ])
+ mock_info.assert_called_once_with("The cluster stack stopped on node1")
- mock_active.assert_has_calls([
+ @mock.patch('logging.Logger.info')
+ @mock.patch('crmsh.utils.stop_service')
+ @mock.patch('crmsh.utils.service_is_active')
+ def test_node_ready_to_stop_cluster_service_corosync(self, mock_is_active, mock_stop, mock_info):
+ mock_is_active.side_effect = [False, True, False]
+ res = self.ui_cluster_inst._node_ready_to_stop_cluster_service("node1")
+ assert res is False
+ mock_is_active.assert_has_calls([
mock.call("corosync.service", remote_addr="node1"),
+ mock.call("sbd.service", remote_addr="node1"),
mock.call("pacemaker.service", remote_addr="node1"),
- mock.call("corosync-qdevice.service")
])
- mock_stop.assert_has_calls([
- mock.call("pacemaker", node_list=["node1"]),
- mock.call("corosync-qdevice.service", node_list=["node1"]),
- mock.call("corosync", node_list=["node1"])
+ mock_stop.assert_called_once_with("corosync", remote_addr="node1")
+ mock_info.assert_called_once_with("The cluster stack stopped on node1")
+
+ @mock.patch('logging.Logger.info')
+ @mock.patch('crmsh.utils.stop_service')
+ @mock.patch('crmsh.utils.service_is_active')
+ def test_node_ready_to_stop_cluster_service_pacemaker(self, mock_is_active, mock_stop, mock_info):
+ mock_is_active.side_effect = [True, True, False]
+ res = self.ui_cluster_inst._node_ready_to_stop_cluster_service("node1")
+ assert res is False
+ mock_is_active.assert_has_calls([
+ mock.call("corosync.service", remote_addr="node1"),
+ mock.call("sbd.service", remote_addr="node1"),
+ mock.call("pacemaker.service", remote_addr="node1"),
+ ])
+ mock_stop.assert_called_once_with("corosync", remote_addr="node1")
+ mock_info.assert_called_once_with("The cluster stack stopped on node1")
+
+ @mock.patch('logging.Logger.info')
+ @mock.patch('crmsh.utils.stop_service')
+ @mock.patch('crmsh.utils.service_is_active')
+ def test_node_ready_to_stop_cluster_service(self, mock_is_active, mock_stop, mock_info):
+ mock_is_active.side_effect = [True, True, True]
+ res = self.ui_cluster_inst._node_ready_to_stop_cluster_service("node1")
+ assert res is True
+ mock_is_active.assert_has_calls([
+ mock.call("corosync.service", remote_addr="node1"),
+ mock.call("sbd.service", remote_addr="node1"),
+ mock.call("pacemaker.service", remote_addr="node1"),
])
- mock_info.assert_called_once_with("Cluster services stopped on node1")
- mock_debug.assert_called_once_with("Quorum is lost; Set enable_quorum_fencing=0 and enable_quorum_lockspace=0 for dlm")
- mock_check.assert_called_once_with(mock_get_dc, wait_timeout=25)
+ mock_info.assert_not_called()
+ mock_stop.assert_not_called()
diff --git a/test/unittests/test_utils.py b/test/unittests/test_utils.py
index 43065d3ad3..f30ac976c2 100644
--- a/test/unittests/test_utils.py
+++ b/test/unittests/test_utils.py
@@ -1500,7 +1500,7 @@ def test_get_dlm_option_dict(mock_run):
"key1": "value1",
"key2": "value2"
}
- mock_run.assert_called_once_with("dlm_tool dump_config")
+ mock_run.assert_called_once_with("dlm_tool dump_config", remote=None)
@mock.patch('crmsh.utils.get_dlm_option_dict')
@@ -1522,14 +1522,14 @@ def test_set_dlm_option(mock_get_dict, mock_run):
"key2": "value2"
}
utils.set_dlm_option(key2="test")
- mock_run.assert_called_once_with('dlm_tool set_config "key2=test"')
+ mock_run.assert_called_once_with('dlm_tool set_config "key2=test"', remote=None)
-@mock.patch('crmsh.xmlutil.CrmMonXmlParser.is_resource_configured')
+@mock.patch('crmsh.utils.has_resource_configured')
def test_is_dlm_configured(mock_configured):
mock_configured.return_value = True
assert utils.is_dlm_configured() is True
- mock_configured.assert_called_once_with(constants.DLM_CONTROLD_RA)
+ mock_configured.assert_called_once_with(constants.DLM_CONTROLD_RA, peer=None)
@mock.patch('crmsh.utils.get_stdout_or_raise_error')
@@ -1538,7 +1538,7 @@ def test_is_quorate_exception(mock_run):
with pytest.raises(ValueError) as err:
utils.is_quorate()
assert str(err.value) == "Failed to get quorate status from corosync-quorumtool"
- mock_run.assert_called_once_with("corosync-quorumtool -s", success_val_list=[0, 2])
+ mock_run.assert_called_once_with("corosync-quorumtool -s", remote=None, success_val_list=[0, 2])
@mock.patch('crmsh.utils.get_stdout_or_raise_error')
@@ -1548,7 +1548,7 @@ def test_is_quorate(mock_run):
Quorate: Yes
"""
assert utils.is_quorate() is True
- mock_run.assert_called_once_with("corosync-quorumtool -s", success_val_list=[0, 2])
+ mock_run.assert_called_once_with("corosync-quorumtool -s", remote=None, success_val_list=[0, 2])
@mock.patch('crmsh.utils.etree.fromstring')
@@ -1617,12 +1617,12 @@ def test_list_cluster_nodes(mock_run, mock_env, mock_isfile, mock_file2elem):
@mock.patch('os.getenv')
-@mock.patch('crmsh.utils.get_stdout_stderr')
+@mock.patch('crmsh.utils.get_stdout_or_raise_error')
def test_get_property(mock_run, mock_env):
- mock_run.return_value = (0, "data", None)
+ mock_run.return_value = "data"
mock_env.return_value = "cib.xml"
assert utils.get_property("no-quorum-policy") == "data"
- mock_run.assert_called_once_with("CIB_file=cib.xml crm configure get_property no-quorum-policy")
+ mock_run.assert_called_once_with("CIB_file=cib.xml sudo --preserve-env=CIB_file crm configure get_property no-quorum-policy", remote=None, no_raise=True)
@mock.patch('crmsh.utils.get_stdout_or_raise_error')