Skip to content

Commit

Permalink
Add execution_role_arn to aws_batch_scheduler (pytorch#811)
Browse files Browse the repository at this point in the history
Co-authored-by: Alex Dubrovsky <[email protected]>
  • Loading branch information
alito and Alex Dubrovsky authored Jan 31, 2024
1 parent 1a8e1eb commit 291e845
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
10 changes: 10 additions & 0 deletions torchx/schedulers/aws_batch_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def _role_to_node_properties(
start_idx: int,
privileged: bool = False,
job_role_arn: Optional[str] = None,
execution_role_arn: Optional[str] = None,
) -> Dict[str, object]:
role.mounts += get_device_mounts(role.resource.devices)

Expand Down Expand Up @@ -250,6 +251,8 @@ def _role_to_node_properties(
}
if job_role_arn:
container["jobRoleArn"] = job_role_arn
if execution_role_arn:
container["executionRoleArn"] = execution_role_arn
if role.num_replicas > 1:
instance_type = instance_type_from_resource(role.resource)
if instance_type is not None:
Expand Down Expand Up @@ -355,6 +358,7 @@ class AWSBatchOpts(TypedDict, total=False):
share_id: Optional[str]
priority: int
job_role_arn: Optional[str]
execution_role_arn: Optional[str]


class AWSBatchScheduler(DockerWorkspaceMixin, Scheduler[AWSBatchOpts]):
Expand Down Expand Up @@ -505,6 +509,7 @@ def _submit_dryrun(self, app: AppDef, cfg: AWSBatchOpts) -> AppDryRunInfo[BatchJ
start_idx=node_idx,
privileged=cfg["privileged"],
job_role_arn=cfg.get("job_role_arn"),
execution_role_arn=cfg.get("execution_role_arn"),
)
)
node_idx += role.num_replicas
Expand Down Expand Up @@ -585,6 +590,11 @@ def _run_opts(self) -> runopts:
type_=str,
help="The Amazon Resource Name (ARN) of the IAM role that the container can assume for AWS permissions.",
)
opts.add(
"execution_role_arn",
type_=str,
help="The Amazon Resource Name (ARN) of the IAM role that the ECS agent can assume for AWS permissions.",
)
return opts

def _get_job_id(self, app_id: str) -> Optional[str]:
Expand Down
11 changes: 10 additions & 1 deletion torchx/schedulers/test/aws_batch_scheduler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,17 @@ def test_submit_dryrun_job_role_arn(self) -> None:
info = create_scheduler("test").submit_dryrun(_test_app(), cfg)
node_groups = info.request.job_def["nodeProperties"]["nodeRangeProperties"]
self.assertEqual(1, len(node_groups))
self.assertEqual(cfg["job_role_arn"], node_groups[0]["container"]["jobRoleArn"])

def test_submit_dryrun_execution_role_arn(self) -> None:
cfg = AWSBatchOpts(
{"queue": "ignored_in_test", "execution_role_arn": "veryexecutive"}
)
info = create_scheduler("test").submit_dryrun(_test_app(), cfg)
node_groups = info.request.job_def["nodeProperties"]["nodeRangeProperties"]
self.assertEqual(1, len(node_groups))
self.assertEqual(
cfg["job_role_arn"], node_groups[0]["container"]["jobRoleArn"]
cfg["execution_role_arn"], node_groups[0]["container"]["executionRoleArn"]
)

def test_submit_dryrun_privileged(self) -> None:
Expand Down

0 comments on commit 291e845

Please sign in to comment.