From 882a059eaa808df118740b61fa7352480997b78e Mon Sep 17 00:00:00 2001 From: Shivaprasad Ashok Metimath Date: Wed, 13 Mar 2024 06:30:20 +0000 Subject: [PATCH] Addressing minor changes identified Prereq, xname_errs removal removing the unused func: do_nodes_power_off and its testcases --- sat/apiclient/pcs.py | 42 ++------- sat/cli/bootsys/power.py | 49 ---------- tests/cli/bootsys/test_power.py | 153 +------------------------------- 3 files changed, 6 insertions(+), 238 deletions(-) diff --git a/sat/apiclient/pcs.py b/sat/apiclient/pcs.py index 59edcc67..03c6c392 100644 --- a/sat/apiclient/pcs.py +++ b/sat/apiclient/pcs.py @@ -33,40 +33,6 @@ class PCSError(APIError): """An error occurred in PCS.""" - def __init__(self, message, xname_errs=None): - """Create a new PCSError with the given message and info about the failing xnames. - - Args: - message (str): the error message - xname_errs (list): a list of dictionaries representing the failures for - the individual components that failed. Each dict should have the - following keys: - e: the error code - err_msg: the error message - xname: the actual xname which failed - """ - self.message = message - self.xname_errs = xname_errs if xname_errs is not None else [] - self.xnames = [xname_err['xname'] for xname_err in self.xname_errs - if 'xname' in xname_err] - - def __str__(self): - """Convert to str.""" - if not self.xname_errs: - return self.message - else: - # A mapping from a tuple of (err_code, err_msg) to a list of xnames - # with that combination of err_code and err_msg. - xnames_by_err = defaultdict(list) - for xname_err in self.xname_errs: - xnames_by_err[(xname_err.get('e'), xname_err.get('err_msg'))].append(xname_err.get('xname')) - - xname_err_summary = '\n'.join([f'xname(s) ({", ".join(xnames)}) failed with ' - f'e={err_info[0]} and err_msg="{err_info[1]}"' - for err_info, xnames in xnames_by_err.items()]) - - return f'{self.message}\n{xname_err_summary}' - class PCSClient(APIGatewayClient): """Client for the Power Control Service.""" @@ -81,7 +47,6 @@ def set_xnames_power_state(self, xnames, power_state, force=False, recursive=Fal power_state (str): the desired power state. Either "on" or "off". force (bool): if True, disable checks and force the power operation. recursive (bool): if True, power on component and its descendants. - prereq (bool): if True, power on component and its ancestors. Returns: None @@ -111,6 +76,9 @@ def set_xnames_power_state(self, xnames, power_state, force=False, recursive=Fal try: for xname in xnames: xname_instance = XName(xname) + # Current if block handles to fetch children of chassis & computemodule only. + # TO DO: It will have to be updated to handle any other component type i/p + # INFO: current sat swap usually is executed for computemodule only if xname_instance.get_type() == 'Chassis': target_xnames.update( hsm_client.query_components( @@ -134,8 +102,8 @@ def set_xnames_power_state(self, xnames, power_state, force=False, recursive=Fal try: self.post('transitions', json=params).json() except APIError as err: - raise PCSError(f'Power {power_state} operation failed for xname(s).', - xname_errs=xnames) from err + raise PCSError(f'Power {power_state} operation failed for xname(s): ' + f'{", ".join(target_xnames)}.') from err def get_xnames_power_state(self, xnames): """Get the power state of the given xnames from PCS. diff --git a/sat/cli/bootsys/power.py b/sat/cli/bootsys/power.py index 461a08f3..a2aaf423 100644 --- a/sat/cli/bootsys/power.py +++ b/sat/cli/bootsys/power.py @@ -113,52 +113,3 @@ def member_has_completed(self, member): return False return current_state == self.power_state - - -def do_nodes_power_off(timeout): - """Ensure the compute and application nodes (UANs) are powered off. - - Args: - timeout (int): the timeout for waiting for nodes to reach powered off - state according to PCS after turning off their power. - - Returns: - A tuple of: - timed_out_nodes: a set of nodes that timed out waiting to reach - power state 'off' according to pcs. - failed_nodes: a set of nodes that failed to power off with pcs - """ - inf = engine() - on_nodes = set(get_nodes_by_role_and_state('compute', 'on') + - get_nodes_by_role_and_state('application', 'on')) - num_on_nodes = len(on_nodes) - - if not on_nodes: - # All nodes are already off - return set(), set() - - LOGGER.info(f'Forcing power off of {num_on_nodes} compute or application ' - f'{inf.plural("node")} still powered on: {", ".join(on_nodes)}') - - wait_nodes = on_nodes - failed_nodes = set() - pcs_client = PCSClient(SATSession()) - try: - pcs_client.set_xnames_power_state(list(on_nodes), 'off', force=True) - except PCSError as err: - LOGGER.warning(err) - if err.xnames: - failed_nodes = set(err.xnames) - # Only wait on the nodes that did not have an error powering off - wait_nodes = set(on_nodes) - failed_nodes - else: - # This probably indicates all nodes failed to be powered off - return set(), on_nodes - - num_wait_nodes = len(wait_nodes) - LOGGER.info(f'Waiting {timeout} seconds until {num_wait_nodes} {inf.plural("node", num_wait_nodes)} ' - f'reach powered off state according to PCS.') - - waiter = PCSPowerWaiter(wait_nodes, 'off', timeout) - timed_out_nodes = waiter.wait_for_completion() - return timed_out_nodes, failed_nodes diff --git a/tests/cli/bootsys/test_power.py b/tests/cli/bootsys/test_power.py index 1f54b713..abbd750d 100644 --- a/tests/cli/bootsys/test_power.py +++ b/tests/cli/bootsys/test_power.py @@ -29,8 +29,7 @@ from unittest.mock import patch from sat.apiclient import APIError -from sat.cli.bootsys.power import (PCSError, PCSPowerWaiter, - do_nodes_power_off, +from sat.cli.bootsys.power import (PCSPowerWaiter, get_nodes_by_role_and_state) from tests.common import ExtendedTestCase @@ -96,156 +95,6 @@ def test_member_has_completed_api_error(self): self.assert_in_element(f'Failed to query power state: {api_err_msg}', cm.output) -class TestDoNodesPowerOff(ExtendedTestCase): - """Test the do_nodes_power_off function.""" - - def setUp(self): - """Set up some mocks.""" - self.mock_pcs_client = patch('sat.cli.bootsys.power.PCSClient').start().return_value - self.mock_sat_session = patch('sat.cli.bootsys.power.SATSession').start() - - self.timeout = 10 # does not actually affect duration; just for asserts - self.compute_nodes = ['x1000c0s0b0n0', 'x1000c0s0b0n1'] - self.application_nodes = ['x3000c0s24b1n0', 'x3000c0s24b2n0'] - self.all_nodes = set(self.compute_nodes + self.application_nodes) - self.timed_out_nodes = set() - - def mock_get_nodes(role, _): - if role == 'compute': - return self.compute_nodes - elif role == 'application': - return self.application_nodes - else: - return [] - - self.mock_get_nodes = patch('sat.cli.bootsys.power.get_nodes_by_role_and_state', - mock_get_nodes).start() - - self.mock_pcs_waiter_cls = patch('sat.cli.bootsys.power.PCSPowerWaiter').start() - self.mock_pcs_waiter = self.mock_pcs_waiter_cls.return_value - self.mock_pcs_waiter.wait_for_completion.return_value = self.timed_out_nodes - - self.mock_print = patch('builtins.print').start() - - def tearDown(self): - """Stop all patches.""" - patch.stopall() - - def assert_log_calls(self, logs, num_wait=None, include_wait_call=True): - """Assert that the expected logging calls are made. - - Args: - logs (object): the recording object returned from the assertLogs() - context manager. has two attributes: `output` and `records` - num_wait (int): the number of nodes waited on. defaults to - len(self.all_nodes) - include_wait_call (bool): whether to include a print statement - for a wait. - """ - num_wait = num_wait if num_wait is not None else len(self.all_nodes) - calls = [f'Forcing power off of {len(self.all_nodes)} compute or application ' - f'nodes still powered on: {", ".join(self.all_nodes)}'] - if include_wait_call: - calls.append(f'Waiting {self.timeout} seconds until {num_wait} nodes ' - f'reach powered off state according to PCS.') - for c in calls: - self.assert_in_element(c, logs.output) - - def assert_pcs_client_call(self): - """Assert the call is made to the PCSClient to power off nodes.""" - self.mock_pcs_client.set_xnames_power_state.assert_called_once_with( - list(self.all_nodes), 'off', force=True - ) - - def test_do_nodes_power_off_already_off(self): - """Test do_nodes_power_off when all nodes are already off.""" - self.compute_nodes = [] - self.application_nodes = [] - timed_out, failed = do_nodes_power_off(self.timeout) - - self.assertEqual(set(), timed_out) - self.assertEqual(set(), failed) - - self.mock_pcs_client.set_xnames_power_state.assert_not_called() - self.mock_pcs_waiter_cls.assert_not_called() - - def test_do_nodes_power_off_success(self): - """Test do_nodes_power_off in the successful case.""" - with self.assertLogs(level=logging.INFO) as cm: - timed_out, failed = do_nodes_power_off(self.timeout) - - self.assert_pcs_client_call() - self.assertEqual(set(), timed_out) - self.assertEqual(set(), failed) - self.mock_pcs_waiter_cls.assert_called_once_with( - self.all_nodes, 'off', self.timeout - ) - self.mock_pcs_waiter.wait_for_completion.assert_called_once_with() - self.assert_log_calls(cm) - - def test_do_nodes_power_off_one_failed(self): - """Test do_nodes_power_off when one fails to power off and the rest succeed.""" - expected_failed = {self.compute_nodes[0]} - failed_xname_errs = [ - { - 'e': 1, - 'err_msg': 'NodeBMC unreachable', - 'xname': self.compute_nodes[0] - } - ] - pcs_err_msg = 'Power off operation failed.' - pcs_err = PCSError(pcs_err_msg, xname_errs=failed_xname_errs) - self.mock_pcs_client.set_xnames_power_state.side_effect = pcs_err - - with self.assertLogs(level=logging.INFO) as cm: - timed_out, failed = do_nodes_power_off(self.timeout) - - self.assert_in_element(f'{pcs_err_msg}\n' - f'xname(s) ({self.compute_nodes[0]}) failed with ' - f'e={failed_xname_errs[0]["e"]} and ' - f'err_msg="{failed_xname_errs[0]["err_msg"]}"', - cm.output) - self.assert_pcs_client_call() - self.assertEqual(set(), timed_out) - self.assertEqual(expected_failed, failed) - self.mock_pcs_waiter_cls.assert_called_once_with( - self.all_nodes - expected_failed, 'off', self.timeout - ) - self.mock_pcs_waiter.wait_for_completion.assert_called_once_with() - self.assert_log_calls(cm, num_wait=len(self.all_nodes - expected_failed)) - - def test_do_nodes_power_off_pcs_failed(self): - """Test do_nodes_power_off when the PCS power off request fails.""" - pcs_err_msg = 'PCS did not respond' - pcs_err = PCSError(pcs_err_msg) - expected_failed = self.all_nodes - self.mock_pcs_client.set_xnames_power_state.side_effect = pcs_err - - with self.assertLogs(level=logging.INFO) as cm: - timed_out, failed = do_nodes_power_off(self.timeout) - - self.assert_in_element(pcs_err_msg, cm.output) - self.assert_pcs_client_call() - self.assertEqual(set(), timed_out) - self.assertEqual(expected_failed, failed) - self.mock_pcs_waiter_cls.assert_not_called() - self.assert_log_calls(cm, include_wait_call=False) - - def test_do_nodes_power_off_one_timed_out(self): - """Test do_node_power_off when one node times out.""" - expected_timed_out = {self.compute_nodes[0]} - self.mock_pcs_waiter.wait_for_completion.return_value = expected_timed_out - - with self.assertLogs(level=logging.INFO) as cm: - timed_out, failed = do_nodes_power_off(self.timeout) - - self.assert_pcs_client_call() - self.assertEqual(expected_timed_out, timed_out) - self.assertEqual(set(), failed) - self.mock_pcs_waiter_cls.assert_called_once_with(self.all_nodes, 'off', self.timeout) - self.assert_log_calls(cm) - - class TestGetNodesByRoleAndState(unittest.TestCase): """Test the get_nodes_by_role_and_state function."""