diff --git a/CHANGELOG.md b/CHANGELOG.md index 611a65bd..bbe419dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.25.5] - 2023-09-26 + +### Added +- Added support for the Power Control Service (PCS). Functionality using CAPMC + was changed to use PCS instead. + ## [3.25.4] - 2023-09-01 ### Fixed diff --git a/sat/apiclient/pcs.py b/sat/apiclient/pcs.py new file mode 100644 index 00000000..d1d9c69c --- /dev/null +++ b/sat/apiclient/pcs.py @@ -0,0 +1,192 @@ +# +# MIT License +# +# (C) Copyright 2021-2023 Hewlett Packard Enterprise Development LP +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +""" +Basic client library for PCS. +""" +from collections import defaultdict + +from csm_api_client.service.gateway import APIError, APIGatewayClient +from csm_api_client.service.hsm import HSMClient + + +class PCSError(APIError): + """An error occurred in PCS.""" + + def __init__(self, message, xname_errs=None): + """Create a new PCSError with the given message and info about the failing xnames. + + Args: + message (str): the error message + xname_errs (list): a list of dictionaries representing the failures for + the individual components that failed. Each dict should have the + following keys: + e: the error code + err_msg: the error message + xname: the actual xname which failed + """ + self.message = message + self.xname_errs = xname_errs if xname_errs is not None else [] + self.xnames = [xname_err['xname'] for xname_err in self.xname_errs + if 'xname' in xname_err] + + def __str__(self): + """Convert to str.""" + if not self.xname_errs: + return self.message + else: + # A mapping from a tuple of (err_code, err_msg) to a list of xnames + # with that combination of err_code and err_msg. + xnames_by_err = defaultdict(list) + for xname_err in self.xname_errs: + xnames_by_err[(xname_err.get('e'), xname_err.get('err_msg'))].append(xname_err.get('xname')) + + xname_err_summary = '\n'.join([f'xname(s) ({", ".join(xnames)}) failed with ' + f'e={err_info[0]} and err_msg="{err_info[1]}"' + for err_info, xnames in xnames_by_err.items()]) + + return f'{self.message}\n{xname_err_summary}' + + +class PCSClient(APIGatewayClient): + """Client for the Power Control Service.""" + base_resource_path = 'power-control/v1/' + + def set_xnames_power_state(self, xnames, power_state, force=False, recursive=False, prereq=False): + """Set the power state of the given xnames. + + Args: + xnames (list): the xnames (str) to perform the power operation + against. + power_state (str): the desired power state. Either "on" or "off". + force (bool): if True, disable checks and force the power operation. + recursive (bool): if True, power on component and its descendants. + prereq (bool): if True, power on component and its ancestors. + + Returns: + None + + Raises: + ValueError: if the given `power_state` is not one of 'on' or 'off' + PCSError: if the attempt to power on/off the given xnames with PCS + fails. This exception contains more specific information about + the failure, which will be included in its __str__. + """ + allowed_states = {'on', 'off', 'soft-off', 'soft-restart', 'hard-restart', 'init', 'force-off'} + power_state = power_state.lower() + if power_state not in allowed_states: + allowed_states_str = ", ".join("\"" + state + "\"" for state in allowed_states) + raise ValueError(f'Invalid power state {power_state} given. Must be {allowed_states_str}') + + if force: + if power_state in {'off', 'soft-off'}: + power_state = 'force-off' + elif power_state == 'soft-restart': + power_state = 'hard-restart' + + target_xnames = set() + if recursive: + hsm_client = HSMClient(self.session) + + try: + target_xnames |= set( + component['XName'] + for xname in xnames + for component in hsm_client.get_node_components(ancestor=xname) + ) + except APIError as err: + raise PCSError(f'Could not retrieve descendent components for xnames: {err}') from err + if prereq: + hsm_client = HSMClient(self.session) + try: + target_xnames |= set( + component['XName'] + for component in hsm_client.get_all_components() + if any(xname.startswith(component['XName']) for xname in xnames) + ) + except APIError as err: + raise PCSError(f'Could not query ancestor components for xnames: {err}') from err + + params = { + 'operation': power_state, + 'taskDeadlineMinutes': -1, + 'location': [ + {'xname': xname} + for xname in target_xnames + ] + } + try: + self.post('transitions', json=params).json() + except APIError as err: + raise PCSError(f'Power {power_state} operation failed for xname(s).', + xname_errs=xnames) from err + + def get_xnames_power_state(self, xnames): + """Get the power state of the given xnames from PCS. + + Args: + xnames (list): the xnames (str) to get power state for. + + Returns: + dict: a dictionary whose keys are the power states and whose values + are lists of xnames in those power states. + + Raises: + PCSError: if the request to get power state fails. + """ + xnames = set(xnames) + try: + resp = self.get('power-status', params={'xname': xnames}).json().get('status') + nodes_by_power_state = defaultdict(list) + for node in resp: + nodes_by_power_state[node['powerState']].append(node['xname']) + return nodes_by_power_state + except APIError as err: + raise PCSError(f'Failed to get power state of xname(s): {", ".join(xnames)}') from err + + def get_xname_power_state(self, xname): + """Get the power state of a single xname from PCS. + + Args: + xname (str): the xname to get power state of + + Returns: + str: the power state of the node + + Raises: + PCSError: if the request to PCS fails or the expected information + is not returned by the PCS API. + """ + try: + resp = self.get('power-status', params={'xname': xname}).json().get('status') + except APIError as err: + raise PCSError(f'Failed to get power state for xname {xname}: {err}') from err + + matching_states = [node['powerState'] for node in resp + if node['xname'] == xname] + if not matching_states: + raise PCSError(f'Unable to determine power state of {xname}. Not ' + f'present in response from PCS: {resp}') + elif len(matching_states) > 1: + raise PCSError(f'Unable to determine power state of {xname}. PCS ' + f'reported multiple power states: {", ".join(matching_states)}') + return matching_states.pop() diff --git a/sat/cli/bootsys/cabinet_power.py b/sat/cli/bootsys/cabinet_power.py index d3e369ec..523a97fa 100644 --- a/sat/cli/bootsys/cabinet_power.py +++ b/sat/cli/bootsys/cabinet_power.py @@ -26,14 +26,10 @@ """ import logging -from csm_api_client.k8s import load_kube_api -from kubernetes.client import BatchV1Api -from kubernetes.config import ConfigException - -from sat.apiclient import APIError, CAPMCClient, HSMClient -from sat.cli.bootsys.power import CAPMCPowerWaiter +from sat.apiclient import APIError, HSMClient +from sat.apiclient.pcs import PCSClient +from sat.cli.bootsys.power import PCSPowerWaiter from sat.config import get_config_value -from sat.cronjob import recreate_namespaced_stuck_cronjobs from sat.hms_discovery import (HMSDiscoveryCronJob, HMSDiscoveryError, HMSDiscoveryScheduledWaiter) from sat.session import SATSession @@ -74,32 +70,32 @@ def do_air_cooled_cabinets_power_off(args): return LOGGER.info(f'Powering off {len(node_xnames)} non-management nodes in air-cooled cabinets.') - capmc_client = CAPMCClient(SATSession()) + pcs_client = PCSClient(SATSession()) try: - capmc_client.set_xnames_power_state(node_xnames, 'off', force=True) + pcs_client.set_xnames_power_state(node_xnames, 'off', force=True) except APIError as err: LOGGER.warning(f'Failed to power off all air-cooled non-management nodes: {err}') LOGGER.info(f'Waiting for {len(node_xnames)} non-management nodes in air-cooled cabinets ' f'to reach powered off state.') - capmc_waiter = CAPMCPowerWaiter(node_xnames, 'off', - get_config_value('bootsys.capmc_timeout')) - timed_out_xnames = capmc_waiter.wait_for_completion() + pcs_waiter = PCSPowerWaiter(node_xnames, 'off', + get_config_value('bootsys.pcs_timeout')) + timed_out_xnames = pcs_waiter.wait_for_completion() if timed_out_xnames: LOGGER.error(f'The following non-management nodes failed to reach the powered off ' - f'state after powering off with CAPMC: {timed_out_xnames}') + f'state after powering off with PCS: {timed_out_xnames}') raise SystemExit(1) LOGGER.info(f'All {len(node_xnames)} non-management nodes in air-cooled cabinets ' - f'reached powered off state according to CAPMC.') + f'reached powered off state according to PCS.') def get_xnames_for_power_action(hsm_client): """Get xnames of RouterModules, ComputeModules, and Chassis. This helper function gets all the xnames used in a power action (turn on or - turn off) individually since CAPMC does not support recursively powering off + turn off) individually since PCS does not support recursively powering off disabled components in Shasta v1.5. See CRAYSAT-920. Returns: @@ -131,9 +127,9 @@ def do_liquid_cooled_cabinets_power_off(args): LOGGER.info(f'Powering off all liquid-cooled chassis, compute modules, and router modules. ' f'({len(xnames_to_power_off)} components total)') - capmc_client = CAPMCClient(SATSession()) + pcs_client = PCSClient(SATSession()) try: - capmc_client.set_xnames_power_state(xnames_to_power_off, 'off') + pcs_client.set_xnames_power_state(xnames_to_power_off, 'off') except APIError as err: LOGGER.warning(f'Failed to power off all cabinets: {err}') if hasattr(err, '__cause__'): @@ -141,17 +137,17 @@ def do_liquid_cooled_cabinets_power_off(args): LOGGER.info(f'Waiting for {len(xnames_to_power_off)} components to reach ' f'powered off state.') - capmc_waiter = CAPMCPowerWaiter(xnames_to_power_off, 'off', - get_config_value('bootsys.capmc_timeout')) - timed_out_xnames = capmc_waiter.wait_for_completion() + pcs_waiter = PCSPowerWaiter(xnames_to_power_off, 'off', + get_config_value('bootsys.pcs_timeout')) + timed_out_xnames = pcs_waiter.wait_for_completion() if timed_out_xnames: LOGGER.error(f'The following components failed to reach the powered off ' - f'state after powering off with CAPMC: {timed_out_xnames}') + f'state after powering off with PCS: {timed_out_xnames}') raise SystemExit(1) LOGGER.info(f'All {len(xnames_to_power_off)} liquid-cooled chassis components reached powered off ' - f'state according to CAPMC.') + f'state according to PCS.') def do_cabinets_power_off(args): @@ -181,9 +177,9 @@ def do_cabinets_power_off(args): def do_cabinets_power_on(args): """Power on the liquid-cooled compute cabinets in the system. - Do not do this with a manual call to CAPMC. Instead, restart the + Do not do this with a manual call to PCS. Instead, restart the hms-discovery cronjob in k8s, and let it do the power on for us. Then wait - for all the compute modules of type "Mountain" to be powered on in CAPMC. + for all the compute modules of type "Mountain" to be powered on in PCS. Args: args (argparse.Namespace): The parsed bootsys arguments. @@ -221,11 +217,11 @@ def do_cabinets_power_on(args): raise SystemExit(1) # Once ComputeModules are powered on, it is possible to boot nodes with BOS. - # Suppress warnings about CAPMC state query errors because we expect the + # Suppress warnings about PCS state query errors because we expect the # compute modules to be unreachable until they are powered on. - module_waiter = CAPMCPowerWaiter(xnames_to_power_on, 'on', - get_config_value('bootsys.discovery_timeout'), - suppress_warnings=True) + module_waiter = PCSPowerWaiter(xnames_to_power_on, 'on', + get_config_value('bootsys.discovery_timeout'), + suppress_warnings=True) modules_timed_out = module_waiter.wait_for_completion() if modules_timed_out: diff --git a/sat/cli/bootsys/parser.py b/sat/cli/bootsys/parser.py index ae55994d..91ef2e68 100644 --- a/sat/cli/bootsys/parser.py +++ b/sat/cli/bootsys/parser.py @@ -33,8 +33,8 @@ TIMEOUT_SPECS = [ - TimeoutSpec('capmc', ['shutdown'], 120, - 'components reach powered off state after they are shutdown with CAPMC.'), + TimeoutSpec('pcs', ['shutdown'], 120, + 'components reach powered off state after they are shutdown with PCS.'), TimeoutSpec('discovery', ['boot'], 600, 'compute modules reach the powered on state ' 'after the HMS Discovery cronjob is resumed.'), diff --git a/sat/cli/bootsys/power.py b/sat/cli/bootsys/power.py index 8fdb14b6..461a08f3 100644 --- a/sat/cli/bootsys/power.py +++ b/sat/cli/bootsys/power.py @@ -1,7 +1,7 @@ # # MIT License # -# (C) Copyright 2020 Hewlett Packard Enterprise Development LP +# (C) Copyright 2020, 2023 Hewlett Packard Enterprise Development LP # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -22,13 +22,14 @@ # OTHER DEALINGS IN THE SOFTWARE. # """ -Support for powering off computes/UANs with CAPMC. +Support for powering off computes/UANs with PCS. """ import logging from inflect import engine -from sat.apiclient import APIError, CAPMCClient, CAPMCError, HSMClient +from sat.apiclient import APIError, HSMClient +from sat.apiclient.pcs import PCSClient, PCSError from sat.session import SATSession from sat.waiting import GroupWaiter @@ -36,7 +37,7 @@ def get_nodes_by_role_and_state(role, power_state): - """Get all the nodes matching the given role in HSM and power state in CAPMC. + """Get all the nodes matching the given role in HSM and power state in PCS. Args: role (str): the role to search for. @@ -48,17 +49,17 @@ def get_nodes_by_role_and_state(role, power_state): Raises: APIError: if there is a failure to get the needed information from HSM - or CAPMC. + or PCS. """ hsm_client = HSMClient(SATSession()) - capmc_client = CAPMCClient(SATSession()) + pcs_client = PCSClient(SATSession()) role_nodes = hsm_client.get_component_xnames({'type': 'Node', 'role': role}) LOGGER.debug('Found %s node(s) with role %s: %s', len(role_nodes), role, role_nodes) if not role_nodes: return role_nodes - nodes_by_power_state = capmc_client.get_xnames_power_state(role_nodes) + nodes_by_power_state = pcs_client.get_xnames_power_state(role_nodes) matching_nodes = nodes_by_power_state.get(power_state, []) LOGGER.debug('Found %s node(s) with role %s and power state %s: %s', len(matching_nodes), role, power_state, matching_nodes) @@ -66,11 +67,11 @@ def get_nodes_by_role_and_state(role, power_state): return matching_nodes -class CAPMCPowerWaiter(GroupWaiter): - """Waits for all members to reach the given power state in CAPMC.""" +class PCSPowerWaiter(GroupWaiter): + """Waits for all members to reach the given power state in PCS.""" def __init__(self, members, power_state, timeout, poll_interval=5, suppress_warnings=False): - """Create a new CAPMCPowerStateWaiter. + """Create a new PCSPowerStateWaiter. Args: members (list or set): the xnames to wait for. @@ -81,15 +82,15 @@ def __init__(self, members, power_state, timeout, poll_interval=5, suppress_warn suppress_warnings (bool): if True, suppress warnings when a query to get_xname_status results in an error and node(s) in undefined state. As an example, this is useful when waiting for a BMC or - node controller to be powered on since CAPMC will fail to query + node controller to be powered on since PCS will fail to query the power status until it is powered on. """ super().__init__(members, timeout, poll_interval) self.power_state = power_state - self.capmc_client = CAPMCClient(SATSession(), suppress_warnings=suppress_warnings) + self.pcs_client = PCSClient(SATSession()) def condition_name(self): - return 'CAPMC power ' + self.power_state + return 'PCS power ' + self.power_state def member_has_completed(self, member): """Return whether the member xname has reached the desired power state. @@ -99,12 +100,12 @@ def member_has_completed(self, member): Returns: bool: True if the xname has reached the desired power state - according to CAPMC. + according to PCS. """ LOGGER.debug('Checking whether xname %s has reached desired power state %s', member, self.power_state) try: - current_state = self.capmc_client.get_xname_power_state(member) + current_state = self.pcs_client.get_xname_power_state(member) except APIError as err: # When cabinets are powered off, the query will respond with 400 bad request # until components are reachable. @@ -119,13 +120,13 @@ def do_nodes_power_off(timeout): Args: timeout (int): the timeout for waiting for nodes to reach powered off - state according to CAPMC after turning off their power. + state according to PCS after turning off their power. Returns: A tuple of: timed_out_nodes: a set of nodes that timed out waiting to reach - power state 'off' according to capmc. - failed_nodes: a set of nodes that failed to power off with capmc + power state 'off' according to pcs. + failed_nodes: a set of nodes that failed to power off with pcs """ inf = engine() on_nodes = set(get_nodes_by_role_and_state('compute', 'on') + @@ -141,10 +142,10 @@ def do_nodes_power_off(timeout): wait_nodes = on_nodes failed_nodes = set() - capmc_client = CAPMCClient(SATSession()) + pcs_client = PCSClient(SATSession()) try: - capmc_client.set_xnames_power_state(list(on_nodes), 'off', force=True) - except CAPMCError as err: + pcs_client.set_xnames_power_state(list(on_nodes), 'off', force=True) + except PCSError as err: LOGGER.warning(err) if err.xnames: failed_nodes = set(err.xnames) @@ -156,8 +157,8 @@ def do_nodes_power_off(timeout): num_wait_nodes = len(wait_nodes) LOGGER.info(f'Waiting {timeout} seconds until {num_wait_nodes} {inf.plural("node", num_wait_nodes)} ' - f'reach powered off state according to CAPMC.') + f'reach powered off state according to PCS.') - waiter = CAPMCPowerWaiter(wait_nodes, 'off', timeout) + waiter = PCSPowerWaiter(wait_nodes, 'off', timeout) timed_out_nodes = waiter.wait_for_completion() return timed_out_nodes, failed_nodes diff --git a/sat/cli/swap/blade.py b/sat/cli/swap/blade.py index 199068b3..d9e5917c 100644 --- a/sat/cli/swap/blade.py +++ b/sat/cli/swap/blade.py @@ -38,7 +38,7 @@ import inflect from kubernetes.client.exceptions import ApiException -from sat.apiclient.capmc import CAPMCClient +from sat.apiclient.pcs import PCSClient from sat.cached_property import cached_property from sat.hms_discovery import ( HMSDiscoveryCronJob, @@ -159,7 +159,7 @@ def __init__(self, args): session = SATSession() self.hsm_client = HSMClient(session) - self.capmc_client = CAPMCClient(session) + self.pcs_client = PCSClient(session) @cached_property def blade_nodes(self): @@ -309,18 +309,18 @@ def disable_slot(self): @blade_swap_stage('Power off slot') def power_off_slot(self): - """Powers off a slot using CAPMC + """Powers off a slot using PCS Raises: - BladeSwapError: if there is a problem powering off the slot with CAPMC + BladeSwapError: if there is a problem powering off the slot with PCS """ if self.blade_class == 'river': # Power off nodes on the blade individually on River blades - xnames_on = self.capmc_client.get_xnames_power_state( + xnames_on = self.pcs_client.get_xnames_power_state( [node['ID'] for node in self.blade_nodes] ).get('on') if xnames_on: - self.capmc_client.set_xnames_power_state( + self.pcs_client.set_xnames_power_state( xnames_on, 'off', recursive=True, force=True, @@ -330,7 +330,7 @@ def power_off_slot(self): LOGGER.info('All nodes on River blade %s are already powered off, continuing', self.xname) else: # Power off the whole slot on Mountain - self.capmc_client.set_xnames_power_state( + self.pcs_client.set_xnames_power_state( [self.xname], 'off', recursive=True, force=True, @@ -565,7 +565,7 @@ def enable_slot(self): @blade_swap_stage('Power on slot') def power_on_slot(self): - """Power on the slot using CAPMC. + """Power on the slot using PCS. Raises: BladeSwapError: if the slot cannot be powered on @@ -573,7 +573,7 @@ def power_on_slot(self): params = {'recursive': True} if self.blade_class == 'river': params['force'] = True - self.capmc_client.set_xnames_power_state([self.xname], 'on', **params) + self.pcs_client.set_xnames_power_state([self.xname], 'on', **params) @blade_swap_stage('Enable nodes') def enable_nodes(self): diff --git a/tests/apiclient/test_pcs.py b/tests/apiclient/test_pcs.py new file mode 100644 index 00000000..5eacad70 --- /dev/null +++ b/tests/apiclient/test_pcs.py @@ -0,0 +1,99 @@ +# +# MIT License +# +# (C) Copyright 2021-2023 Hewlett Packard Enterprise Development LP +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +""" +Tests for the PCSClient class. +""" + +import unittest +from unittest.mock import MagicMock + +from csm_api_client.service.gateway import APIError + +from sat.apiclient.pcs import PCSClient, PCSError +from sat.session import SATSession + + +class TestPCSClient(unittest.TestCase): + """Tests for the PCSClient class""" + + def setUp(self): + self.power_status_retval = { + 'status': [ + { + 'xname': 'x3000c0s0b0n0', + 'powerState': 'on', + 'managementState': 'available', + 'error': None, + 'supportedPowerTransitions': [ + 'off', + 'soft-restart', + 'hard-restart', + 'force-off', + 'soft-off', + ], + 'lastUpdated': '2022-08-24T16:45:53.953811137Z', + }, + { + 'xname': 'x3000c0s0b0n1', + 'powerState': 'off', + 'managementState': 'available', + 'error': None, + 'supportedPowerTransitions': [ + 'off', + 'soft-restart', + 'hard-restart', + 'force-off', + 'soft-off', + ], + 'lastUpdated': '2022-08-24T16:45:53.953811137Z', + }, + ], + } + self.mock_session = MagicMock(autospec=SATSession) + self.mock_session.session.get.return_value.json.return_value = self.power_status_retval + self.mock_session.host = 'api-service-gw-nmn.local' + self.pcs_client = PCSClient(self.mock_session) + self.xnames = [f'x3000c0s0b0n{n}' for n in [0, 1]] + + def test_get_xname_power_state(self): + """Test getting power state for an xname""" + status = self.pcs_client.get_xname_power_state('x3000c0s0b0n0') + self.assertEqual(status, 'on') + + def test_get_xname_power_state_fails(self): + """Test error handling when PCS can't be queried""" + self.mock_session.session.get.side_effect = APIError + with self.assertRaises(PCSError): + self.pcs_client.get_xname_power_state('x3000c0s0b0n0') + + def test_get_xnames_power_state(self): + """Test getting power state for multiple xnames""" + status = self.pcs_client.get_xnames_power_state(self.xnames) + self.assertEqual(['x3000c0s0b0n0'], status['on']) + self.assertEqual(['x3000c0s0b0n1'], status['off']) + + def test_get_xnames_power_state_fails(self): + """Test error handling when getting power state for multiple xnames""" + self.mock_session.session.get.side_effect = APIError + with self.assertRaises(PCSError): + self.pcs_client.get_xnames_power_state(self.xnames) diff --git a/tests/cli/bootsys/test_cabinet_power.py b/tests/cli/bootsys/test_cabinet_power.py index 796e6f79..2da9d468 100644 --- a/tests/cli/bootsys/test_cabinet_power.py +++ b/tests/cli/bootsys/test_cabinet_power.py @@ -42,8 +42,8 @@ def setUp(self): patch_prefix = 'sat.cli.bootsys.cabinet_power' self.mock_sat_session = patch(f'{patch_prefix}.SATSession').start().return_value self.mock_hsm_client = patch(f'{patch_prefix}.HSMClient').start().return_value - self.mock_capmc_client = patch(f'{patch_prefix}.CAPMCClient').start().return_value - self.mock_capmc_waiter = patch(f'{patch_prefix}.CAPMCPowerWaiter').start().return_value + self.mock_pcs_client = patch(f'{patch_prefix}.PCSClient').start().return_value + self.mock_pcs_waiter = patch(f'{patch_prefix}.PCSPowerWaiter').start().return_value # Mock as if we have nodes in slots 1-15 as Management nodes, and the node in slot 17 not self.mock_river_nodes = [f'x3000c0s{slot}b0n0' for slot in [1, 3, 5, 7, 9, 11, 13, 15, 17]] @@ -62,7 +62,7 @@ def mock_get_component_xnames(params): return [] self.mock_hsm_client.get_component_xnames.side_effect = mock_get_component_xnames - self.mock_capmc_waiter.wait_for_completion.return_value = self.timed_out_xnames + self.mock_pcs_waiter.wait_for_completion.return_value = self.timed_out_xnames def tearDown(self): patch.stopall() @@ -83,10 +83,10 @@ def test_do_ac_cab_off_non_empty_success(self): do_air_cooled_cabinets_power_off(self.args) self.assert_hsm_client_calls() - self.mock_capmc_client.set_xnames_power_state.assert_called_once_with( + self.mock_pcs_client.set_xnames_power_state.assert_called_once_with( self.mock_river_non_mgmt_nodes, 'off', force=True ) - self.mock_capmc_waiter.wait_for_completion.assert_called_once_with() + self.mock_pcs_waiter.wait_for_completion.assert_called_once_with() self.assertEqual(3, len(logs_cm.records)) self.assertRegex(logs_cm.records[0].message, f'Powering off {self.num_non_mgmt_river_nodes}') @@ -104,10 +104,10 @@ def test_do_ac_cab_off_non_empty_failure(self): do_air_cooled_cabinets_power_off(self.args) self.assert_hsm_client_calls() - self.mock_capmc_client.set_xnames_power_state.assert_called_once_with( + self.mock_pcs_client.set_xnames_power_state.assert_called_once_with( self.mock_river_non_mgmt_nodes, 'off', force=True ) - self.mock_capmc_waiter.wait_for_completion.assert_called_once_with() + self.mock_pcs_waiter.wait_for_completion.assert_called_once_with() self.assertEqual(1, len(logs_cm.records)) self.assertRegex(logs_cm.records[0].message, 'non-management nodes failed to reach the powered off state.*' @@ -122,8 +122,8 @@ def test_do_ac_cab_off_empty(self): do_air_cooled_cabinets_power_off(self.args) self.assert_hsm_client_calls() - self.mock_capmc_client.set_xnames_power_state.assert_not_called() - self.mock_capmc_waiter.wait_for_completion.assert_not_called() + self.mock_pcs_client.set_xnames_power_state.assert_not_called() + self.mock_pcs_waiter.wait_for_completion.assert_not_called() self.assertEqual(logs_cm.records[0].message, 'No non-management nodes in air-cooled cabinets to power off.') diff --git a/tests/cli/bootsys/test_power.py b/tests/cli/bootsys/test_power.py index ec01c9bb..b05136ce 100644 --- a/tests/cli/bootsys/test_power.py +++ b/tests/cli/bootsys/test_power.py @@ -1,7 +1,7 @@ # # MIT License # -# (C) Copyright 2020 Hewlett Packard Enterprise Development LP +# (C) Copyright 2020, 2023 Hewlett Packard Enterprise Development LP # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -29,21 +29,19 @@ from unittest.mock import patch from sat.apiclient import APIError -from sat.cli.bootsys.power import ( - CAPMCError, - CAPMCPowerWaiter, - do_nodes_power_off, - get_nodes_by_role_and_state) +from sat.cli.bootsys.power import (PCSError, PCSPowerWaiter, + do_nodes_power_off, + get_nodes_by_role_and_state) from tests.common import ExtendedTestCase -class TestCAPMCPowerWaiter(ExtendedTestCase): - """Tests for the CAPMCPowerWaiter.""" +class TestPCSPowerWaiter(ExtendedTestCase): + """Tests for the PCSPowerWaiter.""" def setUp(self): """Set up some patches and shared objects.""" - self.mock_capmc_client_cls = patch('sat.cli.bootsys.power.CAPMCClient').start() - self.mock_capmc_client = self.mock_capmc_client_cls.return_value + self.mock_pcs_client_cls = patch('sat.cli.bootsys.power.PCSClient').start() + self.mock_pcs_client = self.mock_pcs_client_cls.return_value self.mock_sat_session = patch('sat.cli.bootsys.power.SATSession').start() self.members = {'x5000c0s0b0n0', 'x5000c0s1b0n0'} @@ -51,49 +49,49 @@ def setUp(self): self.timeout = 60 self.poll_interval = 5 self.suppress_warnings = True - self.waiter = CAPMCPowerWaiter(self.members, self.power_state, - self.timeout, self.poll_interval, self.suppress_warnings) + self.waiter = PCSPowerWaiter(self.members, self.power_state, + self.timeout, self.poll_interval, self.suppress_warnings) def tearDown(self): """Stop all patches.""" patch.stopall() def test_init(self): - """Test creation of a CAPMCPowerWaiter""" + """Test creation of a PCSPowerWaiter""" self.assertEqual(self.members, self.waiter.members) self.assertEqual(self.power_state, self.waiter.power_state) self.assertEqual(self.timeout, self.waiter.timeout) self.assertEqual(self.poll_interval, self.waiter.poll_interval) - self.mock_capmc_client_cls.assert_called_once_with(self.mock_sat_session.return_value, - suppress_warnings=self.suppress_warnings) - self.assertEqual(self.mock_capmc_client, self.waiter.capmc_client) + self.mock_pcs_client_cls.assert_called_once_with(self.mock_sat_session.return_value, + suppress_warnings=self.suppress_warnings) + self.assertEqual(self.mock_pcs_client, self.waiter.pcs_client) def test_condition_name(self): - """Test the condition_name of the CAPMCPowerWaiter""" - self.assertEqual(f'CAPMC power {self.power_state}', self.waiter.condition_name()) + """Test the condition_name of the PCSPowerWaiter""" + self.assertEqual(f'PCS power {self.power_state}', self.waiter.condition_name()) def test_member_has_completed_complete(self): """Test member_has_completed when it has reached desired power state.""" member = 'x5000c0s0b0n0' - self.mock_capmc_client.get_xname_power_state.return_value = self.power_state + self.mock_pcs_client.get_xname_power_state.return_value = self.power_state self.assertTrue(self.waiter.member_has_completed(member)) - self.mock_capmc_client.get_xname_power_state.assert_called_once_with(member) + self.mock_pcs_client.get_xname_power_state.assert_called_once_with(member) def test_member_has_completed_incomplete(self): """Test member_has_completed when it has not reach desired power state.""" member = 'x5000c0s0b0n0' power_state = 'on' - self.mock_capmc_client.get_xname_power_state.return_value = power_state + self.mock_pcs_client.get_xname_power_state.return_value = power_state # This assertion ensures that later edits to these tests don't invalidate this test case self.assertNotEqual(power_state, self.power_state) self.assertFalse(self.waiter.member_has_completed(member)) - self.mock_capmc_client.get_xname_power_state.assert_called_once_with(member) + self.mock_pcs_client.get_xname_power_state.assert_called_once_with(member) def test_member_has_completed_api_error(self): - """Test member_has_completed when the CAPMCClient raises and APIError.""" + """Test member_has_completed when the PCSClient raises and APIError.""" member = 'x5000c0s0b0n0' - api_err_msg = 'CAPMC failure' - self.mock_capmc_client.get_xname_power_state.side_effect = APIError(api_err_msg) + api_err_msg = 'PCS failure' + self.mock_pcs_client.get_xname_power_state.side_effect = APIError(api_err_msg) with self.assertLogs(level=logging.DEBUG) as cm: self.assertFalse(self.waiter.member_has_completed(member)) self.assert_in_element(f'Failed to query power state: {api_err_msg}', cm.output) @@ -104,7 +102,7 @@ class TestDoNodesPowerOff(ExtendedTestCase): def setUp(self): """Set up some mocks.""" - self.mock_capmc_client = patch('sat.cli.bootsys.power.CAPMCClient').start().return_value + self.mock_pcs_client = patch('sat.cli.bootsys.power.PCSClient').start().return_value self.mock_sat_session = patch('sat.cli.bootsys.power.SATSession').start() self.timeout = 10 # does not actually affect duration; just for asserts @@ -124,9 +122,9 @@ def mock_get_nodes(role, _): self.mock_get_nodes = patch('sat.cli.bootsys.power.get_nodes_by_role_and_state', mock_get_nodes).start() - self.mock_capmc_waiter_cls = patch('sat.cli.bootsys.power.CAPMCPowerWaiter').start() - self.mock_capmc_waiter = self.mock_capmc_waiter_cls.return_value - self.mock_capmc_waiter.wait_for_completion.return_value = self.timed_out_nodes + self.mock_pcs_waiter_cls = patch('sat.cli.bootsys.power.PCSPowerWaiter').start() + self.mock_pcs_waiter = self.mock_pcs_waiter_cls.return_value + self.mock_pcs_waiter.wait_for_completion.return_value = self.timed_out_nodes self.mock_print = patch('builtins.print').start() @@ -150,13 +148,13 @@ def assert_log_calls(self, logs, num_wait=None, include_wait_call=True): f'nodes still powered on: {", ".join(self.all_nodes)}'] if include_wait_call: calls.append(f'Waiting {self.timeout} seconds until {num_wait} nodes ' - f'reach powered off state according to CAPMC.') + f'reach powered off state according to PCS.') for c in calls: self.assert_in_element(c, logs.output) - def assert_capmc_client_call(self): - """Assert the call is made to the CAPMCClient to power off nodes.""" - self.mock_capmc_client.set_xnames_power_state.assert_called_once_with( + def assert_pcs_client_call(self): + """Assert the call is made to the PCSClient to power off nodes.""" + self.mock_pcs_client.set_xnames_power_state.assert_called_once_with( list(self.all_nodes), 'off', force=True ) @@ -169,21 +167,21 @@ def test_do_nodes_power_off_already_off(self): self.assertEqual(set(), timed_out) self.assertEqual(set(), failed) - self.mock_capmc_client.set_xnames_power_state.assert_not_called() - self.mock_capmc_waiter_cls.assert_not_called() + self.mock_pcs_client.set_xnames_power_state.assert_not_called() + self.mock_pcs_waiter_cls.assert_not_called() def test_do_nodes_power_off_success(self): """Test do_nodes_power_off in the successful case.""" with self.assertLogs(level=logging.INFO) as cm: timed_out, failed = do_nodes_power_off(self.timeout) - self.assert_capmc_client_call() + self.assert_pcs_client_call() self.assertEqual(set(), timed_out) self.assertEqual(set(), failed) - self.mock_capmc_waiter_cls.assert_called_once_with( + self.mock_pcs_waiter_cls.assert_called_once_with( self.all_nodes, 'off', self.timeout ) - self.mock_capmc_waiter.wait_for_completion.assert_called_once_with() + self.mock_pcs_waiter.wait_for_completion.assert_called_once_with() self.assert_log_calls(cm) def test_do_nodes_power_off_one_failed(self): @@ -196,56 +194,56 @@ def test_do_nodes_power_off_one_failed(self): 'xname': self.compute_nodes[0] } ] - capmc_err_msg = 'Power off operation failed.' - capmc_err = CAPMCError(capmc_err_msg, xname_errs=failed_xname_errs) - self.mock_capmc_client.set_xnames_power_state.side_effect = capmc_err + pcs_err_msg = 'Power off operation failed.' + pcs_err = PCSError(pcs_err_msg, xname_errs=failed_xname_errs) + self.mock_pcs_client.set_xnames_power_state.side_effect = pcs_err with self.assertLogs(level=logging.INFO) as cm: timed_out, failed = do_nodes_power_off(self.timeout) - self.assert_in_element(f'{capmc_err_msg}\n' + self.assert_in_element(f'{pcs_err_msg}\n' f'xname(s) ({self.compute_nodes[0]}) failed with ' f'e={failed_xname_errs[0]["e"]} and ' f'err_msg="{failed_xname_errs[0]["err_msg"]}"', cm.output) - self.assert_capmc_client_call() + self.assert_pcs_client_call() self.assertEqual(set(), timed_out) self.assertEqual(expected_failed, failed) - self.mock_capmc_waiter_cls.assert_called_once_with( + self.mock_pcs_waiter_cls.assert_called_once_with( self.all_nodes - expected_failed, 'off', self.timeout ) - self.mock_capmc_waiter.wait_for_completion.assert_called_once_with() + self.mock_pcs_waiter.wait_for_completion.assert_called_once_with() self.assert_log_calls(cm, num_wait=len(self.all_nodes - expected_failed)) - def test_do_nodes_power_off_capmc_failed(self): - """Test do_nodes_power_off when the CAPMC power off request fails.""" - capmc_err_msg = 'CAPMC did not respond' - capmc_err = CAPMCError(capmc_err_msg) + def test_do_nodes_power_off_pcs_failed(self): + """Test do_nodes_power_off when the PCS power off request fails.""" + pcs_err_msg = 'PCS did not respond' + pcs_err = PCSError(pcs_err_msg) expected_failed = self.all_nodes - self.mock_capmc_client.set_xnames_power_state.side_effect = capmc_err + self.mock_pcs_client.set_xnames_power_state.side_effect = pcs_err with self.assertLogs(level=logging.INFO) as cm: timed_out, failed = do_nodes_power_off(self.timeout) - self.assert_in_element(capmc_err_msg, cm.output) - self.assert_capmc_client_call() + self.assert_in_element(pcs_err_msg, cm.output) + self.assert_pcs_client_call() self.assertEqual(set(), timed_out) self.assertEqual(expected_failed, failed) - self.mock_capmc_waiter_cls.assert_not_called() + self.mock_pcs_waiter_cls.assert_not_called() self.assert_log_calls(cm, include_wait_call=False) def test_do_nodes_power_off_one_timed_out(self): """Test do_node_power_off when one node times out.""" expected_timed_out = {self.compute_nodes[0]} - self.mock_capmc_waiter.wait_for_completion.return_value = expected_timed_out + self.mock_pcs_waiter.wait_for_completion.return_value = expected_timed_out with self.assertLogs(level=logging.INFO) as cm: timed_out, failed = do_nodes_power_off(self.timeout) - self.assert_capmc_client_call() + self.assert_pcs_client_call() self.assertEqual(expected_timed_out, timed_out) self.assertEqual(set(), failed) - self.mock_capmc_waiter_cls.assert_called_once_with(self.all_nodes, 'off', self.timeout) + self.mock_pcs_waiter_cls.assert_called_once_with(self.all_nodes, 'off', self.timeout) self.assert_log_calls(cm) @@ -278,8 +276,8 @@ def mock_get_xnames_power_state(xnames): self.mock_hsm_client = patch('sat.cli.bootsys.power.HSMClient').start().return_value self.mock_hsm_client.get_component_xnames = mock_get_xnames - self.mock_capmc_client = patch('sat.cli.bootsys.power.CAPMCClient').start().return_value - self.mock_capmc_client.get_xnames_power_state = mock_get_xnames_power_state + self.mock_pcs_client = patch('sat.cli.bootsys.power.PCSClient').start().return_value + self.mock_pcs_client.get_xnames_power_state = mock_get_xnames_power_state self.mock_sat_session = patch('sat.cli.bootsys.power.SATSession').start() def tearDown(self): diff --git a/tests/cli/bootsys/test_service_activity.py b/tests/cli/bootsys/test_service_activity.py index c224bc95..105c7c1a 100644 --- a/tests/cli/bootsys/test_service_activity.py +++ b/tests/cli/bootsys/test_service_activity.py @@ -202,7 +202,7 @@ def setUp(self): self.bos_session_details = { '1': { 'bos_launch': 'bos_launch_1', - 'computes': 'boot_capmc_finished', + 'computes': 'boot_pcs_finished', 'session_template_id': self.session_template_id, 'operation': 'shutdown', 'boa_launch': 'boa_launch_1', @@ -210,7 +210,7 @@ def setUp(self): }, '2': { 'bos_launch': 'bos_launch_2', - 'computes': 'boot_capmc_finished', + 'computes': 'boot_pcs_finished', 'session_template_id': self.session_template_id, 'operation': 'configure', 'boa_launch': 'boa_launch_2', @@ -218,7 +218,7 @@ def setUp(self): }, '3': { 'bos_launch': 'bos_launch_3', - 'computes': 'boot_capmc_finished', + 'computes': 'boot_pcs_finished', 'session_template_id': self.session_template_id, 'operation': 'reboot', 'boa_launch': 'boa_launch_3', diff --git a/tests/cli/swap/test_blade.py b/tests/cli/swap/test_blade.py index 099b47bd..3040f30a 100644 --- a/tests/cli/swap/test_blade.py +++ b/tests/cli/swap/test_blade.py @@ -36,7 +36,7 @@ from csm_api_client.service.hsm import HSMClient from kubernetes.client.exceptions import ApiException -from sat.apiclient.capmc import CAPMCClient +from sat.apiclient.pcs import PCSClient from sat.cli.swap.blade import ( blade_swap_stage, BladeSwapError, @@ -127,8 +127,8 @@ def setUp(self): self.mock_hsm_client = MagicMock(autospec=HSMClient) patch('sat.cli.swap.blade.HSMClient', return_value=self.mock_hsm_client).start() - self.mock_capmc_client = MagicMock(autospec=CAPMCClient) - patch('sat.cli.swap.blade.CAPMCClient', return_value=self.mock_capmc_client).start() + self.mock_pcs_client = MagicMock(autospec=PCSClient) + patch('sat.cli.swap.blade.PCSClient', return_value=self.mock_pcs_client).start() self.mock_sat_session = patch('sat.cli.swap.blade.SATSession').start() @@ -369,7 +369,7 @@ class TestPowerOffSlot(BaseBladeSwapProcedureTest): def test_slot_power_off_command_sent_mountain_blades(self): """Test that the slot power off command is sent properly for Mountain blades""" self.swap_out.power_off_slot() - self.mock_capmc_client.set_xnames_power_state.assert_called_once_with( + self.mock_pcs_client.set_xnames_power_state.assert_called_once_with( [self.blade_xname], 'off', recursive=True, @@ -382,10 +382,10 @@ def test_slot_power_off_command_sent_river_blades(self): patch('sat.cli.swap.blade.BladeSwapProcedure.blade_class', 'river').start() node_xnames = [n['ID'] for n in self.nodes] - self.mock_capmc_client.get_xnames_power_state.return_value = {'on': node_xnames} + self.mock_pcs_client.get_xnames_power_state.return_value = {'on': node_xnames} self.swap_out.power_off_slot() - self.mock_capmc_client.set_xnames_power_state.assert_called_once_with( + self.mock_pcs_client.set_xnames_power_state.assert_called_once_with( node_xnames, 'off', recursive=True, @@ -552,7 +552,7 @@ class TestPoweringOnSlot(BaseBladeSwapProcedureTest): def test_power_on_slot(self): """Test powering on the slot""" self.swap_in.power_on_slot() - self.mock_capmc_client.set_xnames_power_state.assert_called_once_with( + self.mock_pcs_client.set_xnames_power_state.assert_called_once_with( [self.blade_xname], 'on', recursive=True,