From 56cd3614a572cc463281c096e4710f35c4fb145e Mon Sep 17 00:00:00 2001 From: Max Marrone Date: Mon, 7 Oct 2024 11:11:46 -0400 Subject: [PATCH] feat(robot-server): Wire up global error recovery setting (#16416) ## Overview Closes EXEC-719. ## Changelog * Connect the new `PATCH /errorRecovery/settings` HTTP API so it actually affects run behavior. This closes EXEC-719. * One quirk to my implementation: This will not affect preexisting runs, *unless* you *also* do `PUT /runs/{id}/errorRecoveryPolicy`. In effect, `PUT /runs/{id}/errorRecoveryPolicy` refreshes the run's snapshot of `/errorRecovery/settings`. This is probably confusing; it would be better if `PATCH /errorRecovery/settings` immediately affected the preexisting run, or if `PUT /runs/{id}/errorRecoveryPolicy` did not have that refresh effect. I don't think this matters in practice because this button will be disabled or inaccessible in our UIs while there's an active run. Also fix a couple of nearby, unticketed bugs: * If a client gave us an empty error recovery policy, we were no-opping. Instead, we want to overwrite the current policy with the new empty one. * The logic in `error_recovery_mapping.py` was unintentionally enabling error recovery on OT-2 robots if the policy was specific enough. We want it to always be disabled on OT-2s. --- .../protocol_engine/error_recovery_policy.py | 1 + .../error_recovery/settings/router.py | 4 - .../error_recovery/settings/store.py | 13 +- .../robot_server/runs/dependencies.py | 10 +- .../runs/error_recovery_mapping.py | 121 +++++++---- .../runs/error_recovery_models.py | 49 +++-- .../robot_server/runs/router/base_router.py | 17 +- .../robot_server/runs/run_data_manager.py | 34 ++- .../runs/run_orchestrator_store.py | 8 +- .../error_recovery/settings/test_store.py | 4 +- .../tests/runs/router/test_base_router.py | 8 +- .../tests/runs/test_error_recovery_mapping.py | 6 +- .../tests/runs/test_run_data_manager.py | 199 +++++++++--------- ...tore.py => test_run_orchestrator_store.py} | 12 ++ 14 files changed, 283 insertions(+), 203 deletions(-) rename robot-server/tests/runs/{test_engine_store.py => test_run_orchestrator_store.py} (94%) diff --git a/api/src/opentrons/protocol_engine/error_recovery_policy.py b/api/src/opentrons/protocol_engine/error_recovery_policy.py index f9f39d99f4d..d959651393e 100644 --- a/api/src/opentrons/protocol_engine/error_recovery_policy.py +++ b/api/src/opentrons/protocol_engine/error_recovery_policy.py @@ -40,6 +40,7 @@ class ErrorRecoveryPolicy(Protocol): and return an appropriate `ErrorRecoveryType`. Args: + config: The config of the calling `ProtocolEngine`. failed_command: The command that failed, in its final `status=="failed"` state. defined_error_data: If the command failed with a defined error, details about that error. If the command failed with an undefined error, `None`. diff --git a/robot-server/robot_server/error_recovery/settings/router.py b/robot-server/robot_server/error_recovery/settings/router.py index 1a302b05582..4fdfeee5498 100644 --- a/robot-server/robot_server/error_recovery/settings/router.py +++ b/robot-server/robot_server/error_recovery/settings/router.py @@ -61,10 +61,6 @@ async def _get_current_response( store: ErrorRecoverySettingStore, ) -> PydanticResponse[SimpleBody[ResponseData]]: is_enabled = store.get_is_enabled() - if is_enabled is None: - # todo(mm, 2024-09-30): This defaulting will probably need to move down a layer - # when we connect this setting to `POST /runs`. - is_enabled = True return await PydanticResponse.create( SimpleBody.construct(data=ResponseData.construct(enabled=is_enabled)) ) diff --git a/robot-server/robot_server/error_recovery/settings/store.py b/robot-server/robot_server/error_recovery/settings/store.py index 6cef66aae2e..7bad6b5c77c 100644 --- a/robot-server/robot_server/error_recovery/settings/store.py +++ b/robot-server/robot_server/error_recovery/settings/store.py @@ -10,24 +10,25 @@ from robot_server.persistence.tables import boolean_setting_table, BooleanSettingKey +_ERROR_RECOVERY_ENABLED_DEFAULT = True + + class ErrorRecoverySettingStore: """Persistently stores settings related to error recovery.""" def __init__(self, sql_engine: sqlalchemy.engine.Engine) -> None: self._sql_engine = sql_engine - def get_is_enabled(self) -> bool | None: - """Get the value of the "error recovery enabled" setting. - - `None` is the default, i.e. it's never been explicitly set one way or the other. - """ + def get_is_enabled(self) -> bool: + """Get the value of the "error recovery enabled" setting.""" with self._sql_engine.begin() as transaction: - return transaction.execute( + result: bool | None = transaction.execute( sqlalchemy.select(boolean_setting_table.c.value).where( boolean_setting_table.c.key == BooleanSettingKey.ENABLE_ERROR_RECOVERY ) ).scalar_one_or_none() + return result if result is not None else _ERROR_RECOVERY_ENABLED_DEFAULT def set_is_enabled(self, is_enabled: bool | None) -> None: """Set the value of the "error recovery enabled" setting. diff --git a/robot-server/robot_server/runs/dependencies.py b/robot-server/robot_server/runs/dependencies.py index 146297715a9..60c96d1e5e6 100644 --- a/robot-server/robot_server/runs/dependencies.py +++ b/robot-server/robot_server/runs/dependencies.py @@ -2,6 +2,10 @@ from typing import Annotated from fastapi import Depends, status +from robot_server.error_recovery.settings.store import ( + ErrorRecoverySettingStore, + get_error_recovery_setting_store, +) from robot_server.protocols.dependencies import get_protocol_store from robot_server.protocols.protocol_models import ProtocolKind from robot_server.protocols.protocol_store import ProtocolStore @@ -156,12 +160,16 @@ async def get_run_data_manager( ], run_store: Annotated[RunStore, Depends(get_run_store)], runs_publisher: Annotated[RunsPublisher, Depends(get_runs_publisher)], + error_recovery_setting_store: Annotated[ + ErrorRecoverySettingStore, Depends(get_error_recovery_setting_store) + ], ) -> RunDataManager: """Get a run data manager to keep track of current/historical run data.""" return RunDataManager( - task_runner=task_runner, run_orchestrator_store=run_orchestrator_store, run_store=run_store, + error_recovery_setting_store=error_recovery_setting_store, + task_runner=task_runner, runs_publisher=runs_publisher, ) diff --git a/robot-server/robot_server/runs/error_recovery_mapping.py b/robot-server/robot_server/runs/error_recovery_mapping.py index fb936eddadd..d29ebf4b054 100644 --- a/robot-server/robot_server/runs/error_recovery_mapping.py +++ b/robot-server/robot_server/runs/error_recovery_mapping.py @@ -1,5 +1,4 @@ """Functions used for managing error recovery policy.""" -from typing import Optional from opentrons.protocol_engine.state.config import Config from robot_server.runs.error_recovery_models import ErrorRecoveryRule, ReactionIfMatch from opentrons.protocol_engine.commands.command_unions import ( @@ -14,52 +13,96 @@ def create_error_recovery_policy_from_rules( rules: list[ErrorRecoveryRule], + enabled: bool, ) -> ErrorRecoveryPolicy: - """Given a list of error recovery rules return an error recovery policy.""" + """Map a robot-server error recovery policy to an opentrons.protocol_engine one. - def _policy( + In its HTTP API, robot-server expresses error recovery policies as Pydantic models. + But opentrons.protocol_engine is more general, expressing them as Python callables. + + Args: + rules: The rules in the robot-server error recovery policy. + enabled: Whether error recovery should be enabled at all. + If `False`, `rules` is ignored. + + Returns: + An error recovery policy in `opentrons.protocol_engine` terms. + """ + + def mapped_policy( config: Config, failed_command: Command, - defined_error_data: Optional[CommandDefinedErrorData], + defined_error_data: CommandDefinedErrorData | None, ) -> ErrorRecoveryType: - for rule in rules: - command_type_matches = ( - failed_command.commandType == rule.matchCriteria.command.commandType - ) - error_type_matches = ( - defined_error_data is not None - and defined_error_data.public.errorType - == rule.matchCriteria.command.error.errorType + first_matching_rule = next( + ( + rule + for rule in rules + if _rule_matches_error(rule, failed_command, defined_error_data) + ), + None, + ) + robot_is_flex = config.robot_type == "OT-3 Standard" + error_is_defined = defined_error_data is not None + + if not enabled: + return ErrorRecoveryType.FAIL_RUN + elif not robot_is_flex: + # Although error recovery can theoretically work on OT-2s, we haven't tested + # it, and it's generally scarier because the OT-2 has much less hardware + # feedback. + return ErrorRecoveryType.FAIL_RUN + elif first_matching_rule is not None: + # The input policy explicitly deals this error, so do what it says. + return _map_error_recovery_type(first_matching_rule.ifMatch) + else: + # The input policy doesn't explicitly deal with this error, so the decision + # is our own. + # + # We try to WAIT_FOR_RECOVERY whenever we can, for two reasons: + # + # 1. It matches the frontend's current expectations. + # For example, the frontend expects us to WAIT_FOR_RECOVERY on + # overpressure errors, but it does not send us an error recovery policy + # that explicitly says that; it relies on this default. + # 2. Philosophically, we always want to give the operator a shot at + # recovery, even if we don't know the details of the problem and can't + # guarantee good robot behavior if they keep using it. + # + # We currently FAIL_RUN for undefined errors, with the thinking that they + # are especially likely to have messed something up in Protocol Engine's + # internal state, and that they are especially likely to cause confusing + # behavior. But we might want to change that--see point (2) above. + return ( + ErrorRecoveryType.WAIT_FOR_RECOVERY + if error_is_defined + else ErrorRecoveryType.FAIL_RUN ) - if command_type_matches and error_type_matches: - if rule.ifMatch == ReactionIfMatch.IGNORE_AND_CONTINUE: - return ErrorRecoveryType.IGNORE_AND_CONTINUE - elif rule.ifMatch == ReactionIfMatch.FAIL_RUN: - return ErrorRecoveryType.FAIL_RUN - elif rule.ifMatch == ReactionIfMatch.WAIT_FOR_RECOVERY: - return ErrorRecoveryType.WAIT_FOR_RECOVERY + return mapped_policy - return default_error_recovery_policy(config, failed_command, defined_error_data) - return _policy +def _rule_matches_error( + rule: ErrorRecoveryRule, + failed_command: Command, + defined_error_data: CommandDefinedErrorData | None, +) -> bool: + command_type_matches = ( + failed_command.commandType == rule.matchCriteria.command.commandType + ) + error_type_matches = ( + defined_error_data is not None + and defined_error_data.public.errorType + == rule.matchCriteria.command.error.errorType + ) + return command_type_matches and error_type_matches -def default_error_recovery_policy( - config: Config, - failed_command: Command, - defined_error_data: Optional[CommandDefinedErrorData], -) -> ErrorRecoveryType: - """The `ErrorRecoveryPolicy` to use when none has been set on a run.""" - # Although error recovery can theoretically work on OT-2s, we haven't tested it, - # and it's generally scarier because the OT-2 has much less hardware feedback. - robot_is_flex = config.robot_type == "OT-3 Standard" - # If the error is defined, we're taking that to mean that we should - # WAIT_FOR_RECOVERY. This is not necessarily the right long-term logic--we might - # want to FAIL_RUN on certain defined errors and WAIT_FOR_RECOVERY on certain - # undefined errors--but this is convenient for now. - error_is_defined = defined_error_data is not None - if robot_is_flex and error_is_defined: - return ErrorRecoveryType.WAIT_FOR_RECOVERY - else: - return ErrorRecoveryType.FAIL_RUN +def _map_error_recovery_type(reaction_if_match: ReactionIfMatch) -> ErrorRecoveryType: + match reaction_if_match: + case ReactionIfMatch.IGNORE_AND_CONTINUE: + return ErrorRecoveryType.IGNORE_AND_CONTINUE + case ReactionIfMatch.FAIL_RUN: + return ErrorRecoveryType.FAIL_RUN + case ReactionIfMatch.WAIT_FOR_RECOVERY: + return ErrorRecoveryType.WAIT_FOR_RECOVERY diff --git a/robot-server/robot_server/runs/error_recovery_models.py b/robot-server/robot_server/runs/error_recovery_models.py index 5558c65a8ac..a2990a007cb 100644 --- a/robot-server/robot_server/runs/error_recovery_models.py +++ b/robot-server/robot_server/runs/error_recovery_models.py @@ -4,13 +4,31 @@ from pydantic import BaseModel, Field +# There's a lot of nested classes here. +# Here's an example of a JSON document that this code models: +# { +# "policyRules": [ +# { +# "matchCriteria": { +# "command": { +# "commandType": "foo", +# "error": { +# "errorType": "bar" +# } +# } +# }, +# "ifMatch": "ignoreAndContinue" +# } +# ] +# } + class ReactionIfMatch(Enum): - """The type of the error recovery setting. + """How to handle a given error. - * `"ignoreAndContinue"`: Ignore this error and future errors of the same type. - * `"failRun"`: Errors of this type should fail the run. - * `"waitForRecovery"`: Instances of this error should initiate a recover operation. + * `"ignoreAndContinue"`: Ignore this error and continue with the next command. + * `"failRun"`: Fail the run. + * `"waitForRecovery"`: Enter interactive error recovery mode. """ @@ -19,20 +37,6 @@ class ReactionIfMatch(Enum): WAIT_FOR_RECOVERY = "waitForRecovery" -# There's a lot of nested classes here. This is the JSON schema this code models. -# "ErrorRecoveryRule": { -# "matchCriteria": { -# "command": { -# "commandType": "foo", -# "error": { -# "errorType": "bar" -# } -# } -# }, -# "ifMatch": "baz" -# } - - class ErrorMatcher(BaseModel): """The error type that this rule applies to.""" @@ -67,7 +71,7 @@ class ErrorRecoveryRule(BaseModel): ) ifMatch: ReactionIfMatch = Field( ..., - description="The specific recovery setting that will be in use if the type parameters match.", + description="How to handle errors matched by this rule.", ) @@ -76,6 +80,9 @@ class ErrorRecoveryPolicy(BaseModel): policyRules: List[ErrorRecoveryRule] = Field( ..., - description="A list or error recovery rules to apply for a run's recovery management." - "The rules are evaluated first-to-last. The first exact match will dectate recovery management.", + description=( + "A list of error recovery rules to apply for a run's recovery management." + " The rules are evaluated first-to-last." + " The first exact match will dictate recovery management." + ), ) diff --git a/robot-server/robot_server/runs/router/base_router.py b/robot-server/robot_server/runs/router/base_router.py index 3b0e7040e02..b9bd8cd24b2 100644 --- a/robot-server/robot_server/runs/router/base_router.py +++ b/robot-server/robot_server/runs/router/base_router.py @@ -426,11 +426,13 @@ async def update_run( @PydanticResponse.wrap_route( base_router.put, path="/runs/{runId}/errorRecoveryPolicy", - summary="Set run policies", + summary="Set a run's error recovery policy", description=dedent( """ Update how to handle different kinds of command failures. - The following rules will persist during the run. + + For this to have any effect, error recovery must also be enabled globally. + See `PATCH /errorRecovery/settings`. """ ), status_code=status.HTTP_201_CREATED, @@ -451,12 +453,11 @@ async def put_error_recovery_policy( request_body: Request body with run policies data. run_data_manager: Current and historical run data management. """ - policies = request_body.data.policyRules - if policies: - try: - run_data_manager.set_policies(run_id=runId, policies=policies) - except RunNotCurrentError as e: - raise RunStopped(detail=str(e)).as_error(status.HTTP_409_CONFLICT) from e + rules = request_body.data.policyRules + try: + run_data_manager.set_error_recovery_rules(run_id=runId, rules=rules) + except RunNotCurrentError as e: + raise RunStopped(detail=str(e)).as_error(status.HTTP_409_CONFLICT) from e return await PydanticResponse.create( content=SimpleEmptyBody.construct(), diff --git a/robot-server/robot_server/runs/run_data_manager.py b/robot-server/robot_server/runs/run_data_manager.py index b756b185bf1..4168b1d4d5d 100644 --- a/robot-server/robot_server/runs/run_data_manager.py +++ b/robot-server/robot_server/runs/run_data_manager.py @@ -22,6 +22,7 @@ CSVRuntimeParamPaths, ) +from robot_server.error_recovery.settings.store import ErrorRecoverySettingStore from robot_server.protocols.protocol_store import ProtocolResource from robot_server.service.task_runner import TaskRunner from robot_server.service.notifications import RunsPublisher @@ -35,6 +36,9 @@ from opentrons.protocol_engine.types import DeckConfigurationType, RunTimeParameter +_INITIAL_ERROR_RECOVERY_RULES: list[ErrorRecoveryRule] = [] + + def _build_run( run_resource: Union[RunResource, BadRunResource], state_summary: Union[StateSummary, BadStateSummary], @@ -145,11 +149,13 @@ def __init__( self, run_orchestrator_store: RunOrchestratorStore, run_store: RunStore, + error_recovery_setting_store: ErrorRecoverySettingStore, task_runner: TaskRunner, runs_publisher: RunsPublisher, ) -> None: self._run_orchestrator_store = run_orchestrator_store self._run_store = run_store + self._error_recovery_setting_store = error_recovery_setting_store self._task_runner = task_runner self._runs_publisher = runs_publisher @@ -190,6 +196,7 @@ async def create( """ prev_run_id = self._run_orchestrator_store.current_run_id if prev_run_id is not None: + # Allow clear() to propagate RunConflictError. prev_run_result = await self._run_orchestrator_store.clear() self._run_store.update_run_state( run_id=prev_run_id, @@ -197,9 +204,18 @@ async def create( commands=prev_run_result.commands, run_time_parameters=prev_run_result.parameters, ) + + error_recovery_is_enabled = self._error_recovery_setting_store.get_is_enabled() + initial_error_recovery_policy = ( + error_recovery_mapping.create_error_recovery_policy_from_rules( + _INITIAL_ERROR_RECOVERY_RULES, error_recovery_is_enabled + ) + ) + state_summary = await self._run_orchestrator_store.create( run_id=run_id, labware_offsets=labware_offsets, + initial_error_recovery_policy=initial_error_recovery_policy, deck_configuration=deck_configuration, protocol=protocol, run_time_param_values=run_time_param_values, @@ -215,6 +231,7 @@ async def create( self._run_store.insert_csv_rtp( run_id=run_id, run_time_parameters=run_time_parameters ) + self._runs_publisher.start_publishing_for_run( get_current_command=self.get_current_command, get_recovery_target_command=self.get_recovery_target_command, @@ -498,16 +515,23 @@ def get_all_commands_as_preserialized_list( run_id, include_fixit_commands ) - def set_policies(self, run_id: str, policies: List[ErrorRecoveryRule]) -> None: - """Create run policy rules for error recovery.""" + def set_error_recovery_rules( + self, run_id: str, rules: List[ErrorRecoveryRule] + ) -> None: + """Set the run's error recovery policy. + + The input rules get combined with the global error recovery enabled/disabled + setting, which this method retrieves automatically. + """ if run_id != self._run_orchestrator_store.current_run_id: raise RunNotCurrentError( f"Cannot update {run_id} because it is not the current run." ) - policy = error_recovery_mapping.create_error_recovery_policy_from_rules( - policies + is_enabled = self._error_recovery_setting_store.get_is_enabled() + mapped_policy = error_recovery_mapping.create_error_recovery_policy_from_rules( + rules, is_enabled ) - self._run_orchestrator_store.set_error_recovery_policy(policy=policy) + self._run_orchestrator_store.set_error_recovery_policy(policy=mapped_policy) def _get_state_summary(self, run_id: str) -> Union[StateSummary, BadStateSummary]: if run_id == self._run_orchestrator_store.current_run_id: diff --git a/robot-server/robot_server/runs/run_orchestrator_store.py b/robot-server/robot_server/runs/run_orchestrator_store.py index 46a384b96ea..03af7315ef9 100644 --- a/robot-server/robot_server/runs/run_orchestrator_store.py +++ b/robot-server/robot_server/runs/run_orchestrator_store.py @@ -53,8 +53,6 @@ ) from opentrons_shared_data.labware.types import LabwareUri -from .error_recovery_mapping import default_error_recovery_policy - _log = logging.getLogger(__name__) @@ -119,8 +117,6 @@ def run_handler_in_engine_thread_from_hardware_thread( class RunOrchestratorStore: """Factory and in-memory storage for ProtocolEngine.""" - _run_orchestrator: Optional[RunOrchestrator] = None - def __init__( self, hardware_api: HardwareControlAPI, @@ -138,6 +134,7 @@ def __init__( self._hardware_api = hardware_api self._robot_type = robot_type self._deck_type = deck_type + self._run_orchestrator: Optional[RunOrchestrator] = None self._default_run_orchestrator: Optional[RunOrchestrator] = None hardware_api.register_callback(_get_estop_listener(self)) @@ -194,6 +191,7 @@ async def create( self, run_id: str, labware_offsets: List[LabwareOffsetCreate], + initial_error_recovery_policy: error_recovery_policy.ErrorRecoveryPolicy, deck_configuration: DeckConfigurationType, notify_publishers: Callable[[], None], protocol: Optional[ProtocolResource], @@ -235,7 +233,7 @@ async def create( RobotTypeEnum.robot_literal_to_enum(self._robot_type) ), ), - error_recovery_policy=default_error_recovery_policy, + error_recovery_policy=initial_error_recovery_policy, load_fixed_trash=load_fixed_trash, deck_configuration=deck_configuration, notify_publishers=notify_publishers, diff --git a/robot-server/tests/error_recovery/settings/test_store.py b/robot-server/tests/error_recovery/settings/test_store.py index cc69f5d307f..e67f5e72ee9 100644 --- a/robot-server/tests/error_recovery/settings/test_store.py +++ b/robot-server/tests/error_recovery/settings/test_store.py @@ -17,7 +17,7 @@ def subject( def test_error_recovery_setting_store(subject: ErrorRecoverySettingStore) -> None: """Test `ErrorRecoverySettingStore`.""" - assert subject.get_is_enabled() is None + assert subject.get_is_enabled() is True subject.set_is_enabled(is_enabled=False) assert subject.get_is_enabled() is False @@ -26,4 +26,4 @@ def test_error_recovery_setting_store(subject: ErrorRecoverySettingStore) -> Non assert subject.get_is_enabled() is True subject.set_is_enabled(is_enabled=None) - assert subject.get_is_enabled() is None + assert subject.get_is_enabled() is True diff --git a/robot-server/tests/runs/router/test_base_router.py b/robot-server/tests/runs/router/test_base_router.py index 37e5cd6dd3d..8a10af1940d 100644 --- a/robot-server/tests/runs/router/test_base_router.py +++ b/robot-server/tests/runs/router/test_base_router.py @@ -698,8 +698,8 @@ async def test_create_policies( run_data_manager=mock_run_data_manager, ) decoy.verify( - mock_run_data_manager.set_policies( - run_id="rud-id", policies=policies.policyRules + mock_run_data_manager.set_error_recovery_rules( + run_id="rud-id", rules=policies.policyRules ) ) @@ -710,8 +710,8 @@ async def test_create_policies_raises_not_active_run( """It should raise that the run is not current.""" policies = decoy.mock(cls=ErrorRecoveryPolicy) decoy.when( - mock_run_data_manager.set_policies( - run_id="rud-id", policies=policies.policyRules + mock_run_data_manager.set_error_recovery_rules( + run_id="rud-id", rules=policies.policyRules ) ).then_raise(RunNotCurrentError()) with pytest.raises(ApiError) as exc_info: diff --git a/robot-server/tests/runs/test_error_recovery_mapping.py b/robot-server/tests/runs/test_error_recovery_mapping.py index a142fbc5e30..fba8e4315d9 100644 --- a/robot-server/tests/runs/test_error_recovery_mapping.py +++ b/robot-server/tests/runs/test_error_recovery_mapping.py @@ -71,7 +71,7 @@ def test_create_error_recovery_policy_with_rules( mock_rule: ErrorRecoveryRule, ) -> None: """Should return IGNORE_AND_CONTINUE if that's what we specify as the rule.""" - policy = create_error_recovery_policy_from_rules([mock_rule]) + policy = create_error_recovery_policy_from_rules([mock_rule], enabled=True) exampleConfig = Config( robot_type="OT-3 Standard", deck_type=DeckType.OT3_STANDARD, @@ -86,7 +86,7 @@ def test_create_error_recovery_policy_undefined_error( decoy: Decoy, mock_command: LiquidProbe ) -> None: """Should return a FAIL_RUN policy when error is not defined.""" - policy = create_error_recovery_policy_from_rules(rules=[]) + policy = create_error_recovery_policy_from_rules(rules=[], enabled=True) exampleConfig = Config( robot_type="OT-3 Standard", deck_type=DeckType.OT3_STANDARD, @@ -99,7 +99,7 @@ def test_create_error_recovery_policy_defined_error( decoy: Decoy, mock_command: LiquidProbe, mock_error_data: CommandDefinedErrorData ) -> None: """Should return a WAIT_FOR_RECOVERY policy when error is defined.""" - policy = create_error_recovery_policy_from_rules(rules=[]) + policy = create_error_recovery_policy_from_rules(rules=[], enabled=True) exampleConfig = Config( robot_type="OT-3 Standard", deck_type=DeckType.OT3_STANDARD, diff --git a/robot-server/tests/runs/test_run_data_manager.py b/robot-server/tests/runs/test_run_data_manager.py index 49399447597..981a0e7177c 100644 --- a/robot-server/tests/runs/test_run_data_manager.py +++ b/robot-server/tests/runs/test_run_data_manager.py @@ -1,10 +1,10 @@ """Tests for RunDataManager.""" from datetime import datetime from typing import Optional, List, Dict +from unittest.mock import sentinel import pytest from decoy import Decoy, matchers -from pathlib import Path from opentrons.protocol_engine import ( EngineStatus, @@ -21,16 +21,15 @@ LabwareOffset, Liquid, ) -from opentrons.protocol_engine.error_recovery_policy import ErrorRecoveryPolicy from opentrons.protocol_engine.types import BooleanParameter, CSVParameter from opentrons.protocol_runner import RunResult -from opentrons.types import DeckSlotName from opentrons.hardware_control.nozzle_manager import NozzleMap from opentrons_shared_data.errors.exceptions import InvalidStoredData from opentrons_shared_data.labware.labware_definition import LabwareDefinition +from robot_server.error_recovery.settings.store import ErrorRecoverySettingStore from robot_server.protocols.protocol_models import ProtocolKind from robot_server.protocols.protocol_store import ProtocolResource from robot_server.runs import error_recovery_mapping @@ -74,6 +73,12 @@ def mock_run_store(decoy: Decoy) -> RunStore: return decoy.mock(cls=RunStore) +@pytest.fixture +def mock_error_recovery_setting_store(decoy: Decoy) -> ErrorRecoverySettingStore: + """Get a mock ErrorRecoverySettingStore.""" + return decoy.mock(cls=ErrorRecoverySettingStore) + + @pytest.fixture() def mock_task_runner(decoy: Decoy) -> TaskRunner: """Get a mock background TaskRunner.""" @@ -102,6 +107,20 @@ def engine_state_summary() -> StateSummary: ) +@pytest.fixture(autouse=True) +def patch_error_recovery_mapping(decoy: Decoy, monkeypatch: pytest.MonkeyPatch) -> None: + """Replace the members of the error_recovery_mapping module with mocks.""" + monkeypatch.setattr( + error_recovery_mapping, + "create_error_recovery_policy_from_rules", + decoy.mock( + func=decoy.mock( + func=error_recovery_mapping.create_error_recovery_policy_from_rules + ) + ), + ) + + @pytest.fixture() def run_time_parameters() -> List[pe_types.RunTimeParameter]: """Get a RunTimeParameter list.""" @@ -150,6 +169,7 @@ def run_command() -> commands.Command: def subject( mock_run_orchestrator_store: RunOrchestratorStore, mock_run_store: RunStore, + mock_error_recovery_setting_store: ErrorRecoverySettingStore, mock_task_runner: TaskRunner, mock_runs_publisher: RunsPublisher, ) -> RunDataManager: @@ -157,6 +177,7 @@ def subject( return RunDataManager( run_orchestrator_store=mock_run_orchestrator_store, run_store=mock_run_store, + error_recovery_setting_store=mock_error_recovery_setting_store, task_runner=mock_task_runner, runs_publisher=mock_runs_publisher, ) @@ -166,6 +187,7 @@ async def test_create( decoy: Decoy, mock_run_orchestrator_store: RunOrchestratorStore, mock_run_store: RunStore, + mock_error_recovery_setting_store: ErrorRecoverySettingStore, subject: RunDataManager, engine_state_summary: StateSummary, run_resource: RunResource, @@ -173,102 +195,33 @@ async def test_create( """It should create an engine and a persisted run resource.""" run_id = "hello world" created_at = datetime(year=2021, month=1, day=1) - - decoy.when( - await mock_run_orchestrator_store.create( - run_id=run_id, - labware_offsets=[], - protocol=None, - deck_configuration=[], - run_time_param_values=None, - run_time_param_paths=None, - notify_publishers=mock_notify_publishers, - ) - ).then_return(engine_state_summary) - - decoy.when(mock_run_orchestrator_store.get_run_time_parameters()).then_return([]) - - decoy.when( - mock_run_store.insert( - run_id=run_id, - protocol_id=None, - created_at=created_at, - ) - ).then_return(run_resource) - - decoy.when(mock_run_orchestrator_store.get_run_time_parameters()).then_return([]) - - result = await subject.create( - run_id=run_id, - created_at=created_at, - labware_offsets=[], - protocol=None, - deck_configuration=[], - run_time_param_values=None, - run_time_param_paths=None, - notify_publishers=mock_notify_publishers, - ) - - assert result == Run( - id=run_resource.run_id, - protocolId=run_resource.protocol_id, - createdAt=run_resource.created_at, - current=True, - actions=run_resource.actions, - status=engine_state_summary.status, - errors=engine_state_summary.errors, - hasEverEnteredErrorRecovery=engine_state_summary.hasEverEnteredErrorRecovery, - labware=engine_state_summary.labware, - labwareOffsets=engine_state_summary.labwareOffsets, - pipettes=engine_state_summary.pipettes, - modules=engine_state_summary.modules, - liquids=engine_state_summary.liquids, - ) - decoy.verify(mock_run_store.insert_csv_rtp(run_id=run_id, run_time_parameters=[])) - - -async def test_create_with_options( - decoy: Decoy, - mock_run_orchestrator_store: RunOrchestratorStore, - mock_run_store: RunStore, - subject: RunDataManager, - engine_state_summary: StateSummary, - run_resource: RunResource, -) -> None: - """It should handle creation with a protocol, labware offsets and parameters.""" - run_id = "hello world" - created_at = datetime(year=2021, month=1, day=1) - protocol = ProtocolResource( - protocol_id="protocol-id", + protocol_id=sentinel.protocol_id, created_at=datetime(year=2022, month=2, day=2), source=None, # type: ignore[arg-type] protocol_key=None, protocol_kind=ProtocolKind.STANDARD, ) - labware_offset = pe_types.LabwareOffsetCreate( - definitionUri="namespace/load_name/version", - location=pe_types.LabwareOffsetLocation(slotName=DeckSlotName.SLOT_5), - vector=pe_types.LabwareOffsetVector(x=1, y=2, z=3), - ) - decoy.when( await mock_run_orchestrator_store.create( run_id=run_id, - labware_offsets=[labware_offset], + labware_offsets=sentinel.labware_offsets, + initial_error_recovery_policy=sentinel.initial_error_recovery_policy, protocol=protocol, - deck_configuration=[], - run_time_param_values={"foo": "bar"}, - run_time_param_paths={"xyzzy": Path("zork")}, + deck_configuration=sentinel.deck_configuration, + run_time_param_values=sentinel.run_time_param_values, + run_time_param_paths=sentinel.run_time_param_paths, notify_publishers=mock_notify_publishers, ) ).then_return(engine_state_summary) + decoy.when(mock_run_orchestrator_store.get_run_time_parameters()).then_return([]) + decoy.when( mock_run_store.insert( run_id=run_id, - protocol_id="protocol-id", + protocol_id=protocol.protocol_id, created_at=created_at, ) ).then_return(run_resource) @@ -276,21 +229,30 @@ async def test_create_with_options( bool_parameter = BooleanParameter( displayName="foo", variableName="bar", default=True, value=False ) - file_parameter = CSVParameter(displayName="my_file", variableName="file-id") - decoy.when(mock_run_orchestrator_store.get_run_time_parameters()).then_return( [bool_parameter, file_parameter] ) + expected_initial_error_recovery_rules: list[ErrorRecoveryRule] = [] + decoy.when(mock_error_recovery_setting_store.get_is_enabled()).then_return( + sentinel.error_recovery_enabled + ) + decoy.when( + error_recovery_mapping.create_error_recovery_policy_from_rules( + rules=expected_initial_error_recovery_rules, + enabled=sentinel.error_recovery_enabled, + ) + ).then_return(sentinel.initial_error_recovery_policy) + result = await subject.create( run_id=run_id, created_at=created_at, - labware_offsets=[labware_offset], + labware_offsets=sentinel.labware_offsets, protocol=protocol, - deck_configuration=[], - run_time_param_values={"foo": "bar"}, - run_time_param_paths={"xyzzy": Path("zork")}, + deck_configuration=sentinel.deck_configuration, + run_time_param_values=sentinel.run_time_param_values, + run_time_param_paths=sentinel.run_time_param_paths, notify_publishers=mock_notify_publishers, ) @@ -321,12 +283,24 @@ async def test_create_engine_error( decoy: Decoy, mock_run_orchestrator_store: RunOrchestratorStore, mock_run_store: RunStore, + mock_error_recovery_setting_store: ErrorRecoverySettingStore, subject: RunDataManager, ) -> None: """It should not create a resource if engine creation fails.""" run_id = "hello world" created_at = datetime(year=2021, month=1, day=1) + expected_initial_error_recovery_rules: list[ErrorRecoveryRule] = [] + decoy.when(mock_error_recovery_setting_store.get_is_enabled()).then_return( + sentinel.error_recovery_enabled + ) + decoy.when( + error_recovery_mapping.create_error_recovery_policy_from_rules( + rules=expected_initial_error_recovery_rules, + enabled=sentinel.error_recovery_enabled, + ) + ).then_return(sentinel.initial_error_recovery_policy) + decoy.when( await mock_run_orchestrator_store.create( run_id, @@ -336,6 +310,7 @@ async def test_create_engine_error( run_time_param_values=None, run_time_param_paths=None, notify_publishers=mock_notify_publishers, + initial_error_recovery_policy=matchers.Anything(), ) ).then_raise(RunConflictError("oh no")) @@ -783,6 +758,7 @@ async def test_create_archives_existing( run_command: commands.Command, mock_run_orchestrator_store: RunOrchestratorStore, mock_run_store: RunStore, + mock_error_recovery_setting_store: ErrorRecoverySettingStore, subject: RunDataManager, ) -> None: """It should persist the previously current run when a new run is created.""" @@ -798,11 +774,23 @@ async def test_create_archives_existing( ) ) + expected_initial_error_recovery_rules: list[ErrorRecoveryRule] = [] + decoy.when(mock_error_recovery_setting_store.get_is_enabled()).then_return( + sentinel.error_recovery_enabled + ) + decoy.when( + error_recovery_mapping.create_error_recovery_policy_from_rules( + rules=expected_initial_error_recovery_rules, + enabled=sentinel.error_recovery_enabled, + ) + ).then_return(sentinel.initial_error_recovery_policy) + decoy.when( await mock_run_orchestrator_store.create( run_id=run_id_new, labware_offsets=[], protocol=None, + initial_error_recovery_policy=sentinel.initial_error_recovery_policy, deck_configuration=[], run_time_param_values=None, run_time_param_paths=None, @@ -1126,35 +1114,36 @@ async def test_create_policies_raises_run_not_current( "not-current-run-id" ) with pytest.raises(RunNotCurrentError): - subject.set_policies( - run_id="run-id", policies=decoy.mock(cls=List[ErrorRecoveryRule]) + subject.set_error_recovery_rules( + run_id="run-id", rules=decoy.mock(cls=List[ErrorRecoveryRule]) ) async def test_create_policies_translates_and_calls_orchestrator( decoy: Decoy, - monkeypatch: pytest.MonkeyPatch, mock_run_orchestrator_store: RunOrchestratorStore, + mock_error_recovery_setting_store: ErrorRecoverySettingStore, subject: RunDataManager, ) -> None: """Should translate rules into policy and call orchestrator.""" - monkeypatch.setattr( - error_recovery_mapping, - "create_error_recovery_policy_from_rules", - decoy.mock( - func=decoy.mock( - func=error_recovery_mapping.create_error_recovery_policy_from_rules - ) - ), + decoy.when(mock_error_recovery_setting_store.get_is_enabled()).then_return( + sentinel.is_enabled ) - input_rules = decoy.mock(cls=List[ErrorRecoveryRule]) - expected_output = decoy.mock(cls=ErrorRecoveryPolicy) decoy.when( - error_recovery_mapping.create_error_recovery_policy_from_rules(input_rules) - ).then_return(expected_output) - decoy.when(mock_run_orchestrator_store.current_run_id).then_return("run-id") - subject.set_policies(run_id="run-id", policies=input_rules) - decoy.verify(mock_run_orchestrator_store.set_error_recovery_policy(expected_output)) + error_recovery_mapping.create_error_recovery_policy_from_rules( + rules=sentinel.input_rules, + enabled=sentinel.is_enabled, + ) + ).then_return(sentinel.expected_output) + decoy.when(mock_run_orchestrator_store.current_run_id).then_return( + sentinel.current_run_id + ) + subject.set_error_recovery_rules( + run_id=sentinel.current_run_id, rules=sentinel.input_rules + ) + decoy.verify( + mock_run_orchestrator_store.set_error_recovery_policy(sentinel.expected_output) + ) def test_get_nozzle_map_current_run( diff --git a/robot-server/tests/runs/test_engine_store.py b/robot-server/tests/runs/test_run_orchestrator_store.py similarity index 94% rename from robot-server/tests/runs/test_engine_store.py rename to robot-server/tests/runs/test_run_orchestrator_store.py index 46f25f3edb4..e34c1340359 100644 --- a/robot-server/tests/runs/test_engine_store.py +++ b/robot-server/tests/runs/test_run_orchestrator_store.py @@ -6,6 +6,7 @@ from opentrons_shared_data import get_shared_data_root from opentrons_shared_data.robot.types import RobotType +from opentrons.protocol_engine.error_recovery_policy import never_recover from opentrons.protocol_engine.errors.exceptions import EStopActivatedError from opentrons.types import DeckSlotName from opentrons.hardware_control import HardwareControlAPI, API @@ -58,6 +59,7 @@ async def test_create_engine(decoy: Decoy, subject: RunOrchestratorStore) -> Non result = await subject.create( run_id="run-id", labware_offsets=[], + initial_error_recovery_policy=never_recover, protocol=None, deck_configuration=[], notify_publishers=mock_notify_publishers, @@ -85,6 +87,7 @@ async def test_create_engine_uses_robot_type( await subject.create( run_id="run-id", labware_offsets=[], + initial_error_recovery_policy=never_recover, deck_configuration=[], protocol=None, notify_publishers=mock_notify_publishers, @@ -106,6 +109,7 @@ async def test_create_engine_with_labware_offsets( result = await subject.create( run_id="run-id", labware_offsets=[labware_offset], + initial_error_recovery_policy=never_recover, deck_configuration=[], protocol=None, notify_publishers=mock_notify_publishers, @@ -129,6 +133,7 @@ async def test_archives_state_if_engine_already_exists( await subject.create( run_id="run-id-1", labware_offsets=[], + initial_error_recovery_policy=never_recover, deck_configuration=[], protocol=None, notify_publishers=mock_notify_publishers, @@ -138,6 +143,7 @@ async def test_archives_state_if_engine_already_exists( await subject.create( run_id="run-id-2", labware_offsets=[], + initial_error_recovery_policy=never_recover, deck_configuration=[], protocol=None, notify_publishers=mock_notify_publishers, @@ -151,6 +157,7 @@ async def test_clear_engine(subject: RunOrchestratorStore) -> None: await subject.create( run_id="run-id", labware_offsets=[], + initial_error_recovery_policy=never_recover, deck_configuration=[], protocol=None, notify_publishers=mock_notify_publishers, @@ -172,6 +179,7 @@ async def test_clear_engine_not_stopped_or_idle( await subject.create( run_id="run-id", labware_offsets=[], + initial_error_recovery_policy=never_recover, deck_configuration=[], protocol=None, notify_publishers=mock_notify_publishers, @@ -187,6 +195,7 @@ async def test_clear_idle_engine(subject: RunOrchestratorStore) -> None: await subject.create( run_id="run-id", labware_offsets=[], + initial_error_recovery_policy=never_recover, deck_configuration=[], protocol=None, notify_publishers=mock_notify_publishers, @@ -238,6 +247,7 @@ async def test_get_default_orchestrator_current_unstarted( await subject.create( run_id="run-id", labware_offsets=[], + initial_error_recovery_policy=never_recover, deck_configuration=[], protocol=None, notify_publishers=mock_notify_publishers, @@ -252,6 +262,7 @@ async def test_get_default_orchestrator_conflict(subject: RunOrchestratorStore) await subject.create( run_id="run-id", labware_offsets=[], + initial_error_recovery_policy=never_recover, deck_configuration=[], protocol=None, notify_publishers=mock_notify_publishers, @@ -269,6 +280,7 @@ async def test_get_default_orchestrator_run_stopped( await subject.create( run_id="run-id", labware_offsets=[], + initial_error_recovery_policy=never_recover, deck_configuration=[], protocol=None, notify_publishers=mock_notify_publishers,