From 291e845209f62d4aeea0d3f37f18741a3a457f8e Mon Sep 17 00:00:00 2001 From: Alejandro Dubrovsky Date: Wed, 31 Jan 2024 09:26:35 -0800 Subject: [PATCH] Add execution_role_arn to aws_batch_scheduler (#811) Co-authored-by: Alex Dubrovsky --- torchx/schedulers/aws_batch_scheduler.py | 10 ++++++++++ torchx/schedulers/test/aws_batch_scheduler_test.py | 11 ++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/torchx/schedulers/aws_batch_scheduler.py b/torchx/schedulers/aws_batch_scheduler.py index 897116ff0..5118a04f0 100644 --- a/torchx/schedulers/aws_batch_scheduler.py +++ b/torchx/schedulers/aws_batch_scheduler.py @@ -174,6 +174,7 @@ def _role_to_node_properties( start_idx: int, privileged: bool = False, job_role_arn: Optional[str] = None, + execution_role_arn: Optional[str] = None, ) -> Dict[str, object]: role.mounts += get_device_mounts(role.resource.devices) @@ -250,6 +251,8 @@ def _role_to_node_properties( } if job_role_arn: container["jobRoleArn"] = job_role_arn + if execution_role_arn: + container["executionRoleArn"] = execution_role_arn if role.num_replicas > 1: instance_type = instance_type_from_resource(role.resource) if instance_type is not None: @@ -355,6 +358,7 @@ class AWSBatchOpts(TypedDict, total=False): share_id: Optional[str] priority: int job_role_arn: Optional[str] + execution_role_arn: Optional[str] class AWSBatchScheduler(DockerWorkspaceMixin, Scheduler[AWSBatchOpts]): @@ -505,6 +509,7 @@ def _submit_dryrun(self, app: AppDef, cfg: AWSBatchOpts) -> AppDryRunInfo[BatchJ start_idx=node_idx, privileged=cfg["privileged"], job_role_arn=cfg.get("job_role_arn"), + execution_role_arn=cfg.get("execution_role_arn"), ) ) node_idx += role.num_replicas @@ -585,6 +590,11 @@ def _run_opts(self) -> runopts: type_=str, help="The Amazon Resource Name (ARN) of the IAM role that the container can assume for AWS permissions.", ) + opts.add( + "execution_role_arn", + type_=str, + help="The Amazon Resource Name (ARN) of the IAM role that the ECS agent can assume for AWS permissions.", + ) return opts def _get_job_id(self, app_id: str) -> Optional[str]: diff --git a/torchx/schedulers/test/aws_batch_scheduler_test.py b/torchx/schedulers/test/aws_batch_scheduler_test.py index a74f8faf9..5f759e609 100644 --- a/torchx/schedulers/test/aws_batch_scheduler_test.py +++ b/torchx/schedulers/test/aws_batch_scheduler_test.py @@ -157,8 +157,17 @@ def test_submit_dryrun_job_role_arn(self) -> None: info = create_scheduler("test").submit_dryrun(_test_app(), cfg) node_groups = info.request.job_def["nodeProperties"]["nodeRangeProperties"] self.assertEqual(1, len(node_groups)) + self.assertEqual(cfg["job_role_arn"], node_groups[0]["container"]["jobRoleArn"]) + + def test_submit_dryrun_execution_role_arn(self) -> None: + cfg = AWSBatchOpts( + {"queue": "ignored_in_test", "execution_role_arn": "veryexecutive"} + ) + info = create_scheduler("test").submit_dryrun(_test_app(), cfg) + node_groups = info.request.job_def["nodeProperties"]["nodeRangeProperties"] + self.assertEqual(1, len(node_groups)) self.assertEqual( - cfg["job_role_arn"], node_groups[0]["container"]["jobRoleArn"] + cfg["execution_role_arn"], node_groups[0]["container"]["executionRoleArn"] ) def test_submit_dryrun_privileged(self) -> None: