Merge pull request #6347 from cylc/8.3.x-sync

🤖 Merge 8.3.x-sync into master
cylc · Sep 3, 2024 · 907ff0d · 907ff0d
2 parents 9db2ca3 + 8bb1f44
commit 907ff0d
Show file tree

Hide file tree

Showing 14 changed files with 180 additions and 47 deletions.
diff --git a/.github/workflows/2_auto_publish_release.yml b/.github/workflows/2_auto_publish_release.yml
@@ -38,7 +38,7 @@ jobs:
       uses: cylc/release-actions/build-python-package@v1
 
     - name: Publish distribution to PyPI
-      uses: pypa/gh-action-pypi-publish@v1.9.0
+      uses: pypa/gh-action-pypi-publish@v1.10.1
       with:
         user: __token__ # uses the API token feature of PyPI - least permissions possible
         password: ${{ secrets.PYPI_TOKEN }}

diff --git a/changes.d/6175.fix.md b/changes.d/6175.fix.md
@@ -0,0 +1 @@
+The workflow-state command and xtrigger will now reject invalid polling arguments.
diff --git a/cylc/flow/dbstatecheck.py b/cylc/flow/dbstatecheck.py
@@ -36,6 +36,10 @@
     TASK_OUTPUT_FAILED,
     TASK_OUTPUT_FINISHED,
 )
+from cylc.flow.task_state import (
+    TASK_STATE_MAP,
+    TASK_STATUSES_FINAL,
+)
 from cylc.flow.util import deserialise_set
 from metomi.isodatetime.parsers import TimePointParser
 from metomi.isodatetime.exceptions import ISO8601SyntaxError
@@ -244,6 +248,8 @@ def workflow_state_query(
         stmt_args = []
         stmt_wheres = []
 
+        check_polling_config(selector, is_trigger, is_message)
+
         if is_trigger or is_message:
             target_table = CylcWorkflowDAO.TABLE_TASK_OUTPUTS
             mask = "name, cycle, outputs"
@@ -363,3 +369,25 @@ def _selector_in_outputs(selector: str, outputs: Iterable[str]) -> bool:
                 or TASK_OUTPUT_FAILED in outputs
             )
         )
+
+
+def check_polling_config(selector, is_trigger, is_message):
+    """Check for invalid or unreliable polling configurations."""
+    if selector and not (is_trigger or is_message):
+        # we are using task status polling
+        try:
+            trigger = TASK_STATE_MAP[selector]
+        except KeyError:
+            raise InputError(f'No such task state "{selector}"')
+        else:
+            if trigger is None:
+                raise InputError(
+                    f'Cannot poll for the "{selector}" task state'
+                )
+
+            if selector not in TASK_STATUSES_FINAL:
+                raise InputError(
+                    f'Polling for the "{selector}" task status is not'
+                    ' reliable as it is a transient state.'
+                    f'\nPoll for the "{trigger}" trigger instead.'
+                )
diff --git a/cylc/flow/scripts/workflow_state.py b/cylc/flow/scripts/workflow_state.py
@@ -33,8 +33,8 @@
 so you can start checking before the target workflow is started.
 
 Legacy (pre-8.3.0) options are supported, but deprecated, for existing scripts:
-    cylc workflow-state --task=NAME --point=CYCLE --status=STATUS
-        --output=MESSAGE --message=MESSAGE --task-point WORKFLOW
+  cylc workflow-state --task=NAME --point=CYCLE --status=STATUS
+      --output=MESSAGE --message=MESSAGE --task-point WORKFLOW
 (Note from 8.0 until 8.3.0 --output and --message both match task messages).
 
 In "cycle/task:selector" the selector will match task statuses, unless:
@@ -55,24 +55,23 @@
 
 Flow numbers are only printed for flow numbers > 1.
 
-USE IN TASK SCRIPTING:
+Use in task scripting:
   - To poll a task at the same cycle point in another workflow, just use
     $CYLC_TASK_CYCLE_POINT in the ID.
   - To poll a task at an offset cycle point, use the --offset option to
     have Cylc do the datetime arithmetic for you.
   - However, see also the workflow_state xtrigger for this use case.
 
-WARNINGS:
- - Typos in the workflow or task ID will result in fruitless polling.
- - To avoid missing transient states ("submitted", "running") poll for the
-   corresponding output trigger instead ("submitted", "started").
- - Cycle points are auto-converted to the DB point format (and UTC mode).
- - Task outputs manually completed by "cylc set" have "(force-completed)"
-   recorded as the task message in the DB, so it is best to query trigger
-   names, not messages, unless specifically interested in forced outputs.
+Warnings:
+  - Typos in the workflow or task ID will result in fruitless polling.
+  - To avoid missing transient states ("submitted", "running") poll for the
+    corresponding output trigger instead ("submitted", "started").
+  - Cycle points are auto-converted to the DB point format (and UTC mode).
+  - Task outputs manually completed by "cylc set" have "(force-completed)"
+    recorded as the task message in the DB, so it is best to query trigger
+    names, not messages, unless specifically interested in forced outputs.
 
 Examples:
-
   # Print the status of all tasks in WORKFLOW:
   $ cylc workflow-state WORKFLOW
 
@@ -115,7 +114,10 @@
 from cylc.flow.dbstatecheck import CylcWorkflowDBChecker
 from cylc.flow.terminal import cli_function
 from cylc.flow.workflow_files import infer_latest_run_from_id
-from cylc.flow.task_state import TASK_STATUSES_ORDERED
+from cylc.flow.task_state import (
+    TASK_STATUSES_FINAL,
+    TASK_STATUSES_ALL,
+)
 
 if TYPE_CHECKING:
     from optparse import Values
@@ -175,6 +177,8 @@ def __init__(
         self.alt_cylc_run_dir = alt_cylc_run_dir
         self.old_format = old_format
         self.pretty_print = pretty_print
+        self.is_message = is_message
+        self.is_trigger = is_trigger
 
         try:
             tokens = Tokens(self.id_)
@@ -197,17 +201,6 @@ def __init__(
         self.result: Optional[List[List[str]]] = None
         self._db_checker: Optional[CylcWorkflowDBChecker] = None
 
-        self.is_message = is_message
-        if is_message:
-            self.is_trigger = False
-        else:
-            self.is_trigger = (
-                is_trigger or
-                (
-                    self.selector is not None and
-                    self.selector not in TASK_STATUSES_ORDERED
-                )
-            )
         super().__init__(**kwargs)
 
     def _find_workflow(self) -> bool:
@@ -363,7 +356,6 @@ def get_option_parser() -> COP:
 
 @cli_function(get_option_parser, remove_opts=["--db"])
 def main(parser: COP, options: 'Values', *ids: str) -> None:
-
     # Note it would be cleaner to use 'id_cli.parse_ids()' here to get the
     # workflow ID and tokens, but that function infers run number and fails
     # if the workflow is not installed yet. We want to be able to start polling
@@ -428,6 +420,15 @@ def main(parser: COP, options: 'Values', *ids: str) -> None:
             msg += id_
         else:
             msg += id_.replace(options.depr_point, "$CYLC_TASK_CYCLE_POINT")
+
+        if (
+            options.depr_status
+            and options.depr_status in TASK_STATUSES_ALL
+            and options.depr_status not in TASK_STATUSES_FINAL
+        ):
+            # polling for non-final task statuses is flaky
+            msg += ' and the --triggers option'
+
         LOG.warning(msg)
 
     poller = WorkflowPoller(

diff --git a/cylc/flow/task_state.py b/cylc/flow/task_state.py
@@ -26,7 +26,15 @@
 )
 
 from cylc.flow.prerequisite import Prerequisite
-from cylc.flow.task_outputs import TaskOutputs
+from cylc.flow.task_outputs import (
+    TASK_OUTPUT_EXPIRED,
+    TASK_OUTPUT_FAILED,
+    TASK_OUTPUT_STARTED,
+    TASK_OUTPUT_SUBMITTED,
+    TASK_OUTPUT_SUBMIT_FAILED,
+    TASK_OUTPUT_SUCCEEDED,
+    TaskOutputs,
+)
 from cylc.flow.wallclock import get_current_time_string
 
 
@@ -155,13 +163,17 @@
     TASK_STATUS_RUNNING,
 }
 
-# Task statuses that can be manually triggered.
-TASK_STATUSES_TRIGGERABLE = {
-    TASK_STATUS_WAITING,
-    TASK_STATUS_EXPIRED,
-    TASK_STATUS_SUBMIT_FAILED,
-    TASK_STATUS_SUCCEEDED,
-    TASK_STATUS_FAILED,
+# Mapping between task outputs and their corresponding states
+TASK_STATE_MAP = {
+    # status: trigger
+    TASK_STATUS_WAITING: None,
+    TASK_STATUS_EXPIRED: TASK_OUTPUT_EXPIRED,
+    TASK_STATUS_PREPARING: None,
+    TASK_STATUS_SUBMIT_FAILED: TASK_OUTPUT_SUBMIT_FAILED,
+    TASK_STATUS_SUBMITTED: TASK_OUTPUT_SUBMITTED,
+    TASK_STATUS_RUNNING: TASK_OUTPUT_STARTED,
+    TASK_STATUS_FAILED: TASK_OUTPUT_FAILED,
+    TASK_STATUS_SUCCEEDED: TASK_OUTPUT_SUCCEEDED,
 }
 
 

diff --git a/cylc/flow/xtriggers/workflow_state.py b/cylc/flow/xtriggers/workflow_state.py
@@ -20,8 +20,12 @@
 
 from cylc.flow.scripts.workflow_state import WorkflowPoller
 from cylc.flow.id import tokenise
-from cylc.flow.exceptions import WorkflowConfigError
+from cylc.flow.exceptions import WorkflowConfigError, InputError
 from cylc.flow.task_state import TASK_STATUS_SUCCEEDED
+from cylc.flow.dbstatecheck import check_polling_config
+
+
+DEFAULT_STATUS = TASK_STATUS_SUCCEEDED
 
 
 def workflow_state(
@@ -84,7 +88,7 @@ def workflow_state(
         offset,
         flow_num,
         alt_cylc_run_dir,
-        TASK_STATUS_SUCCEEDED,
+        DEFAULT_STATUS,
         is_trigger, is_message,
         old_format=False,
         condition=workflow_task_id,
@@ -151,6 +155,15 @@ def validate(args: Dict[str, Any]):
     ):
         raise WorkflowConfigError("flow_num must be an integer if given.")
 
+    try:
+        check_polling_config(
+            tokens['cycle_sel'] or tokens['task_sel'] or DEFAULT_STATUS,
+            args['is_trigger'],
+            args['is_message'],
+        )
+    except InputError as exc:
+        raise WorkflowConfigError(str(exc)) from None
+
 
 # BACK COMPAT: workflow_state_backcompat
 # from: 8.0.0

diff --git a/tests/flakyfunctional/xtriggers/01-workflow_state/flow.cylc b/tests/flakyfunctional/xtriggers/01-workflow_state/flow.cylc
@@ -8,7 +8,7 @@
     initial cycle point = 2011
     final cycle point = 2016
     [[xtriggers]]
-        upstream = workflow_state("{{UPSTREAM}}//%(point)s/foo:data_ready"):PT1S
+        upstream = workflow_state("{{UPSTREAM}}//%(point)s/foo:data_ready", is_trigger=True):PT1S
    [[graph]]
         P1Y = """
             foo

diff --git a/tests/functional/shutdown/08-now1/flow.cylc b/tests/functional/shutdown/08-now1/flow.cylc
@@ -24,7 +24,7 @@
         [[[events]]]
             # wait for the stopping message, sleep a bit, then echo some stuff
             started handlers = """
-                cylc workflow-state %(workflow)s//%(point)s/%(name)s:stopping >/dev/null && sleep 1 && echo 'Hello %(id)s %(event)s'
+                cylc workflow-state %(workflow)s//%(point)s/%(name)s:stopping --triggers >/dev/null && sleep 1 && echo 'Hello %(id)s %(event)s'
             """
         [[[outputs]]]
             stopping = stopping

diff --git a/tests/functional/workflow-state/05-output.t b/tests/functional/workflow-state/05-output.t
@@ -27,6 +27,6 @@ workflow_run_ok "${TEST_NAME}" \
     cylc play --reference-test --debug --no-detach "${WORKFLOW_NAME}"
 
 TEST_NAME=${TEST_NAME_BASE}-cli-check
-run_ok "${TEST_NAME}" cylc workflow-state "${WORKFLOW_NAME}//20100101T0000Z/t1:out1" --max-polls=1
+run_ok "${TEST_NAME}" cylc workflow-state "${WORKFLOW_NAME}//20100101T0000Z/t1:out1" --triggers --max-polls=1
 
 purge
diff --git a/tests/functional/workflow-state/07-message2.t b/tests/functional/workflow-state/07-message2.t
@@ -29,7 +29,7 @@ workflow_run_ok "${TEST_NAME_BASE}-run" \
     cylc play --debug --no-detach "${WORKFLOW_NAME}"
 
 TEST_NAME=${TEST_NAME_BASE}-query
-run_fail "${TEST_NAME}" cylc workflow-state "${WORKFLOW_NAME}//2013/foo:x" --max-polls=1
+run_fail "${TEST_NAME}" cylc workflow-state "${WORKFLOW_NAME}//2013/foo:x" --triggers --max-polls=1
 
 grep_ok "failed after 1 polls" "${TEST_NAME}.stderr"
 

diff --git a/tests/functional/workflow-state/11-multi.t b/tests/functional/workflow-state/11-multi.t
@@ -54,15 +54,15 @@ CMD="cylc workflow-state --run-dir=$DBDIR --max-polls=1"
 #   foo|1|[1]|2024-06-05T16:34:02+12:00|2024-06-05T16:34:04+12:00|1|succeeded|0|0
 
 #---------------
-# Test the new-format command line (pre-8.3.0).
+# Test the new-format command line (8.3.0+).
 T=${TEST_NAME_BASE}-cli-c8b
 run_ok "${T}-1" $CMD c8b
 run_ok "${T}-2" $CMD c8b//1
 run_ok "${T}-3" $CMD c8b//1/foo
+run_fail "${T}-4" $CMD c8b//1/foo:waiting
 run_ok "${T}-4" $CMD c8b//1/foo:succeeded
 run_ok "${T}-5" $CMD "c8b//1/foo:the quick brown" --messages
 run_ok "${T}-6" $CMD "c8b//1/foo:x" --triggers
-run_ok "${T}-7" $CMD "c8b//1/foo:x"  # default to trigger if not a status
 run_ok "${T}-8" $CMD c8b//1
 run_ok "${T}-9" $CMD c8b//1:succeeded
 
@@ -86,7 +86,7 @@ run_fail "${T}-2" $CMD "c7//1/foo:the quick brown" --triggers
 run_ok   "${T}-3" $CMD "c7//1/foo:x" --triggers
 
 #---------------
-# Test the old-format command line (8.3.0+).
+# Test the old-format command line (pre-8.3.0).
 T=${TEST_NAME_BASE}-cli-8b-compat
 run_ok "${T}-1" $CMD c8b
 run_ok "${T}-2" $CMD c8b --point=1

diff --git a/tests/functional/workflow-state/11-multi/flow.cylc b/tests/functional/workflow-state/11-multi/flow.cylc
@@ -23,7 +23,7 @@
         # Cylc 8 new (from 8.3.0)
         c1 = workflow_state(c8b//1/foo, offset=P0, alt_cylc_run_dir={{ALT}}):PT1S
         c2 = workflow_state(c8b//1/foo:succeeded, offset=P0, alt_cylc_run_dir={{ALT}}):PT1S
-        c3 = workflow_state(c8b//1/foo:x, offset=P0, alt_cylc_run_dir={{ALT}}):PT1S
+        c3 = workflow_state(c8b//1/foo:x, offset=P0, alt_cylc_run_dir={{ALT}}, is_trigger=True):PT1S
         c4 = workflow_state(c8b//1/foo:"the quick brown", offset=P0, is_message=True, alt_cylc_run_dir={{ALT}}):PT1S
 
     [[graph]]

diff --git a/tests/unit/test_dbstatecheck.py b/tests/unit/test_dbstatecheck.py
@@ -0,0 +1,44 @@
+# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE.
+# Copyright (C) NIWA & British Crown (Met Office) & Contributors.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+from cylc.flow.dbstatecheck import check_polling_config
+from cylc.flow.exceptions import InputError
+
+import pytest
+
+
+def test_check_polling_config():
+    """It should reject invalid or unreliable polling configurations.
+
+    See https://github.com/cylc/cylc-flow/issues/6157
+    """
+    # invalid polling use cases
+    with pytest.raises(InputError, match='No such task state'):
+        check_polling_config('elephant', False, False)
+
+    with pytest.raises(InputError, match='Cannot poll for'):
+        check_polling_config('waiting', False, False)
+
+    with pytest.raises(InputError, match='is not reliable'):
+        check_polling_config('running', False, False)
+
+    # valid polling use cases
+    check_polling_config('started', True, False)
+    check_polling_config('started', False, True)
+
+    # valid query use cases
+    check_polling_config(None, False, True)
+    check_polling_config(None, False, False)