Skip to content

Commit

Permalink
Ensure that platform from group selection checks broadcast manager (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
wxtim authored Sep 10, 2024
1 parent f30f0c2 commit ddddfcd
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 13 deletions.
1 change: 1 addition & 0 deletions changes.d/6330.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix bug where broadcasting failed to change platform selected after host selection failure.
25 changes: 13 additions & 12 deletions cylc/flow/task_job_mgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,18 +306,19 @@ def submit_task_jobs(self, workflow, itasks, curve_auth,

# Get another platform, if task config platform is a group
use_next_platform_in_group = False
if itask.tdef.rtconfig['platform']:
try:
platform = get_platform(
itask.tdef.rtconfig['platform'],
bad_hosts=self.bad_hosts
)
except PlatformLookupError:
pass
else:
# If were able to select a new platform;
if platform and platform != itask.platform:
use_next_platform_in_group = True
bc_mgr = self.task_events_mgr.broadcast_mgr
rtconf = bc_mgr.get_updated_rtconfig(itask)
try:
platform = get_platform(
rtconf,
bad_hosts=self.bad_hosts
)
except PlatformLookupError:
pass

Check warning on line 317 in cylc/flow/task_job_mgr.py

View check run for this annotation

Codecov / codecov/patch

cylc/flow/task_job_mgr.py#L316-L317

Added lines #L316 - L317 were not covered by tests
else:
# If were able to select a new platform;
if platform and platform != itask.platform:
use_next_platform_in_group = True

Check warning on line 321 in cylc/flow/task_job_mgr.py

View check run for this annotation

Codecov / codecov/patch

cylc/flow/task_job_mgr.py#L321

Added line #L321 was not covered by tests

if use_next_platform_in_group:
# store the previous platform's hosts so that when
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ workflow_run_fail "${TEST_NAME_BASE}-run" \
cylc play --no-detach "${WORKFLOW_NAME}"

grep_workflow_log_ok "${TEST_NAME_BASE}-grep-1" \
"platform: badhostplatform - initialisation did not complete (no hosts were reachable)"
"platform: badhostplatform - initialisation did not complete"

grep_workflow_log_ok "${TEST_NAME_BASE}-grep-2" "CRITICAL - Workflow stalled"

Expand Down
49 changes: 49 additions & 0 deletions tests/integration/test_task_job_mgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,52 @@ async def test__prep_submit_task_job_impl_handles_execution_time_limit(
schd.task_job_mgr._prep_submit_task_job(
schd.workflow, task_a)
assert not task_a.summary.get('execution_time_limit', '')


async def test_broadcast_platform_change(
mock_glbl_cfg,
flow,
scheduler,
start,
log_filter,
):
"""Broadcast can change task platform.
Even after host selection failure.
see https://github.com/cylc/cylc-flow/issues/6320
"""
mock_glbl_cfg(
'cylc.flow.platforms.glbl_cfg',
'''
[platforms]
[[foo]]
hosts = food
''')

id_ = flow({
"scheduling": {"graph": {"R1": "mytask"}},
# Platform = None doesn't cause this issue!
"runtime": {"mytask": {"platform": "localhost"}}})

schd = scheduler(id_, run_mode='live')

async with start(schd) as log:
# Change the task platform with broadcast:
schd.broadcast_mgr.put_broadcast(
['1'], ['mytask'], [{'platform': 'foo'}])

# Simulate prior failure to contact hosts:
schd.task_job_mgr.task_remote_mgr.bad_hosts = {'food'}

# Attempt job submission:
schd.task_job_mgr.submit_task_jobs(
schd.workflow,
schd.pool.get_tasks(),
schd.server.curve_auth,
schd.server.client_pub_key_dir)

# Check that task platform hasn't become "localhost":
assert schd.pool.get_tasks()[0].platform['name'] == 'foo'
# ... and that remote init failed because all hosts bad:
assert log_filter(log, contains="(no hosts were reachable)")

0 comments on commit ddddfcd

Please sign in to comment.