Skip to content

Commit

Permalink
Dev: ui_corosync: reject to remove a link if removing it breaks the c…
Browse files Browse the repository at this point in the history
…luster (jsc#PED-8083)
  • Loading branch information
nicholasyang2022 committed Sep 6, 2024
1 parent b1061b4 commit 87ce212
Show file tree
Hide file tree
Showing 2 changed files with 244 additions and 1 deletion.
32 changes: 31 additions & 1 deletion crmsh/ui_corosync.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import dataclasses
import ipaddress
import json
import re
import subprocess
import sys
import typing
Expand All @@ -13,6 +14,7 @@
from . import corosync
from . import log
from . import constants
from .prun import prun
from .service_manager import ServiceManager

logger = log.setup_logger(__name__)
Expand Down Expand Up @@ -142,7 +144,6 @@ def do_show(self, context):
continue
print(f' {name}:\t{value}')
print('')
# TODO: show link status

def do_update(self, context, *argv):
lm = self.__load_and_validate_links()
Expand Down Expand Up @@ -221,12 +222,41 @@ def do_remove(self, context, linknumber: str):
raise ValueError(f'Invalid linknumber: {linknumber}')
linknumber = int(linknumber)
lm = self.__load_and_validate_links()
self._check_link_removable(lm, linknumber)
self.__save_changed_config(
lm,
lm.remove_link(linknumber),
reload=True,
)

@staticmethod
def _check_link_removable(lm: corosync.LinkManager, linknumber):
if not ServiceManager().service_is_active("corosync.service"):
return
nodes = utils.list_cluster_nodes()
for host, result in prun.prun({node: 'corosync-cmapctl -m stats' for node in nodes}).items():
# for each node <host> in the cluster
match result:
case prun.SSHError() as e:
raise ValueError(str(e))
case prun.ProcessResult() as x:
if x.returncode != 0:
raise ValueError(f'{host}: {x.returncode}, {sh.Utils.decode_str(x.stderr)}')
stdout = sh.Utils.decode_str(x.stdout)
connected_nodes = {
nodeid
for nodeid, ln, connected in re.findall(
'^stats\\.knet\\.node([0-9]+)\\.link([0-9]+)\\.connected\\W+.*=\\s*([0-9]+)$',
stdout, re.ASCII | re.MULTILINE,
) if connected == '1' # only connected node
and ln != str(linknumber) # filter out the link to be removed
}
# the number of nodes connected to <host> should be equal to the total number of nodes in the cluster
if len(connected_nodes) < len(next(link for link in lm.links() if link is not None).nodes):
# or <host> will lose quorum
raise ValueError(f'Cannot remove link {linknumber}. Removing this link makes the cluster to lose quorum.')


@staticmethod
def _validate_node_addresses(node_addrs: typing.Mapping[str, str]):
node_interfaces = {
Expand Down
213 changes: 213 additions & 0 deletions test/unittests/test_ui_corosync.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import unittest
from unittest import mock

from crmsh import ui_corosync
from crmsh import corosync
from crmsh.ui_corosync import LinkArgumentParser
from crmsh.corosync import LinkManager
from crmsh.prun import prun


class TestLinkArgumentParser(unittest.TestCase):
Expand Down Expand Up @@ -59,3 +64,211 @@ def test_garbage_inputs(self):
LinkArgumentParser().parse(True, True, ['0', 'node1=192.0.2.100', 'options', 'foo'])
with self.assertRaises(LinkArgumentParser.SyntaxException):
LinkArgumentParser().parse(True, True, ['0', 'node1=192.0.2.100', 'options', 'foo=bar', 'foo'])


@mock.patch('crmsh.prun.prun.prun')
@mock.patch('crmsh.utils.list_cluster_nodes')
@mock.patch('crmsh.service_manager.ServiceManager.service_is_active')
class TestCheckLinkRemovable(unittest.TestCase):
def setUp(self):
self.mock_lm = mock.Mock(LinkManager)

def test_corosync_not_running(self, mock_sia, mock_lcn, mock_prun):
mock_sia.return_value = False
ui_corosync.Link._check_link_removable(self.mock_lm, 0)
mock_sia.assert_called_once()
mock_lcn.assert_not_called()
mock_prun.assert_not_called()
self.mock_lm.links.assert_not_called()

def test_prun_failure(self, mock_sia, mock_lcn, mock_prun):
mock_sia.return_value = True
mock_lcn.return_value = ['node1', 'node2', 'node3']
mock_prun.return_value = {
'node2': prun.SSHError('root', 'node2', 'msg'),
}
with self.assertRaises(ValueError):
ui_corosync.Link._check_link_removable(self.mock_lm, 0)
mock_prun.assert_called_once()
self.mock_lm.links.assert_not_called()

def test_cmapctl_failure(self, mock_sia, mock_lcn, mock_prun):
mock_sia.return_value = True
mock_lcn.return_value = ['node1', 'node2', 'node3']
mock_prun.return_value = {
'node2': prun.ProcessResult(1, b'', b'msg'),
}
with self.assertRaises(ValueError):
ui_corosync.Link._check_link_removable(self.mock_lm, 0)
mock_prun.assert_called_once()
self.mock_lm.links.assert_not_called()

def test_all_links_connected(self, mock_sia, mock_lcn, mock_prun):
mock_sia.return_value = True
mock_lcn.return_value = ['node1', 'node2', 'node3']
mock_prun.return_value = {
'node1': prun.ProcessResult(
returncode=0, stderr=b'',
stdout=b'stats.knet.node1.link0.connected (u8) = 1\n'
b'stats.knet.node2.link0.connected (u8) = 1\n'
b'stats.knet.node3.link0.connected (u8) = 1\n'
b'stats.knet.node1.link1.connected (u8) = 1\n'
b'stats.knet.node2.link1.connected (u8) = 1\n'
b'stats.knet.node3.link1.connected (u8) = 1\n'
),
'node2': prun.ProcessResult(
returncode=0, stderr=b'',
stdout=b'stats.knet.node1.link0.connected (u8) = 1\n'
b'stats.knet.node2.link0.connected (u8) = 1\n'
b'stats.knet.node3.link0.connected (u8) = 1\n'
b'stats.knet.node1.link1.connected (u8) = 1\n'
b'stats.knet.node2.link1.connected (u8) = 1\n'
b'stats.knet.node3.link1.connected (u8) = 1\n'
),
'node3': prun.ProcessResult(
returncode=0, stderr=b'',
stdout=b'stats.knet.node1.link0.connected (u8) = 1\n'
b'stats.knet.node2.link0.connected (u8) = 1\n'
b'stats.knet.node3.link0.connected (u8) = 1\n'
b'stats.knet.node1.link1.connected (u8) = 1\n'
b'stats.knet.node2.link1.connected (u8) = 1\n'
b'stats.knet.node3.link1.connected (u8) = 1\n'
),
}
self.mock_lm.links.return_value = [
corosync.Link(0, [
corosync.LinkNode(1, 'node1', '192.0.2.100'),
corosync.LinkNode(2, 'node2', '192.0.2.101'),
corosync.LinkNode(3, 'node3', '192.0.2.102'),
]),
corosync.Link(1, [
corosync.LinkNode(1, 'node1', '192.0.2.100'),
corosync.LinkNode(2, 'node2', '192.0.2.101'),
corosync.LinkNode(3, 'node3', '192.0.2.102'),
]),
0, 0, 0, 0, 0, 0,
]
ui_corosync.Link._check_link_removable(self.mock_lm, 0)

def test_one_node_pair_disconnected(self, mock_sia, mock_lcn, mock_prun):
mock_sia.return_value = True
mock_lcn.return_value = ['node1', 'node2', 'node3']
mock_prun.return_value = {
'node1': prun.ProcessResult(
returncode=0, stderr=b'',
stdout=b'stats.knet.node1.link0.connected (u8) = 1\n'
b'stats.knet.node2.link0.connected (u8) = 1\n'
b'stats.knet.node3.link0.connected (u8) = 1\n'
b'stats.knet.node1.link1.connected (u8) = 1\n'
b'stats.knet.node2.link1.connected (u8) = 1\n'
b'stats.knet.node3.link1.connected (u8) = 1\n'
),
'node2': prun.ProcessResult(
returncode=0, stderr=b'',
stdout=b'stats.knet.node1.link0.connected (u8) = 1\n'
b'stats.knet.node2.link0.connected (u8) = 1\n'
b'stats.knet.node3.link0.connected (u8) = 1\n'
b'stats.knet.node1.link1.connected (u8) = 1\n'
b'stats.knet.node2.link1.connected (u8) = 1\n'
b'stats.knet.node3.link1.connected (u8) = 0\n'
),
'node3': prun.ProcessResult(
returncode=0, stderr=b'',
stdout=b'stats.knet.node1.link0.connected (u8) = 1\n'
b'stats.knet.node2.link0.connected (u8) = 1\n'
b'stats.knet.node3.link0.connected (u8) = 1\n'
b'stats.knet.node1.link1.connected (u8) = 1\n'
b'stats.knet.node2.link1.connected (u8) = 0\n'
b'stats.knet.node3.link1.connected (u8) = 1\n'
),
}
self.mock_lm.links.return_value = [
corosync.Link(0, [
corosync.LinkNode(1, 'node1', '192.0.2.100'),
corosync.LinkNode(2, 'node2', '192.0.2.101'),
corosync.LinkNode(3, 'node3', '192.0.2.102'),
]),
corosync.Link(1, [
corosync.LinkNode(1, 'node1', '192.0.2.100'),
corosync.LinkNode(2, 'node2', '192.0.2.101'),
corosync.LinkNode(3, 'node3', '192.0.2.102'),
]),
0, 0, 0, 0, 0, 0,
]
with self.assertRaises(ValueError):
ui_corosync.Link._check_link_removable(self.mock_lm, 0)

def test_pair_connected(self, mock_sia, mock_lcn, mock_prun):
mock_sia.return_value = True
mock_lcn.return_value = ['node1', 'node2', 'node3']
mock_prun.return_value = {
'node1': prun.ProcessResult(
returncode=0, stderr=b'',
stdout=b'stats.knet.node1.link0.connected (u8) = 1\n'
b'stats.knet.node2.link0.connected (u8) = 1\n'
b'stats.knet.node3.link0.connected (u8) = 1\n'
b'stats.knet.node1.link1.connected (u8) = 1\n'
b'stats.knet.node2.link1.connected (u8) = 1\n'
b'stats.knet.node3.link1.connected (u8) = 0\n'
b'stats.knet.node1.link2.connected (u8) = 1\n'
b'stats.knet.node2.link2.connected (u8) = 0\n'
b'stats.knet.node3.link2.connected (u8) = 0\n'
b'stats.knet.node1.link3.connected (u8) = 1\n'
b'stats.knet.node2.link3.connected (u8) = 0\n'
b'stats.knet.node3.link3.connected (u8) = 1\n'
),
'node2': prun.ProcessResult(
returncode=0, stderr=b'',
stdout=b'stats.knet.node1.link0.connected (u8) = 1\n'
b'stats.knet.node2.link0.connected (u8) = 1\n'
b'stats.knet.node3.link0.connected (u8) = 1\n'
b'stats.knet.node1.link1.connected (u8) = 1\n'
b'stats.knet.node2.link1.connected (u8) = 1\n'
b'stats.knet.node3.link1.connected (u8) = 0\n'
b'stats.knet.node1.link2.connected (u8) = 0\n'
b'stats.knet.node2.link2.connected (u8) = 1\n'
b'stats.knet.node3.link2.connected (u8) = 1\n'
b'stats.knet.node1.link3.connected (u8) = 0\n'
b'stats.knet.node2.link3.connected (u8) = 1\n'
b'stats.knet.node3.link3.connected (u8) = 0\n'
),
'node3': prun.ProcessResult(
returncode=0, stderr=b'',
stdout=b'stats.knet.node1.link0.connected (u8) = 1\n'
b'stats.knet.node2.link0.connected (u8) = 1\n'
b'stats.knet.node3.link0.connected (u8) = 1\n'
b'stats.knet.node1.link1.connected (u8) = 0\n'
b'stats.knet.node2.link1.connected (u8) = 0\n'
b'stats.knet.node3.link1.connected (u8) = 1\n'
b'stats.knet.node1.link2.connected (u8) = 0\n'
b'stats.knet.node2.link2.connected (u8) = 1\n'
b'stats.knet.node3.link2.connected (u8) = 1\n'
b'stats.knet.node1.link3.connected (u8) = 1\n'
b'stats.knet.node2.link3.connected (u8) = 0\n'
b'stats.knet.node3.link3.connected (u8) = 1\n'
),
}
self.mock_lm.links.return_value = [
corosync.Link(0, [
corosync.LinkNode(1, 'node1', '192.0.2.100'),
corosync.LinkNode(2, 'node2', '192.0.2.101'),
corosync.LinkNode(3, 'node3', '192.0.2.102'),
]),
corosync.Link(1, [
corosync.LinkNode(1, 'node1', '192.0.2.100'),
corosync.LinkNode(2, 'node2', '192.0.2.101'),
corosync.LinkNode(3, 'node3', '192.0.2.102'),
]),
corosync.Link(2, [
corosync.LinkNode(1, 'node1', '192.0.2.100'),
corosync.LinkNode(2, 'node2', '192.0.2.101'),
corosync.LinkNode(3, 'node3', '192.0.2.102'),
]),
corosync.Link(3, [
corosync.LinkNode(1, 'node1', '192.0.2.100'),
corosync.LinkNode(2, 'node2', '192.0.2.101'),
corosync.LinkNode(3, 'node3', '192.0.2.102'),
]),
0, 0, 0, 0,
]
ui_corosync.Link._check_link_removable(self.mock_lm, 0)

0 comments on commit 87ce212

Please sign in to comment.