forked from pytorch/torchx
-
Notifications
You must be signed in to change notification settings - Fork 1
62 lines (60 loc) · 1.85 KB
/
kubernetes-dist-train-integration-tests.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
name: Kubernetes Dist Train Integration Test
on:
push:
branches:
- main
pull_request:
jobs:
kubernetes-launch:
runs-on: ubuntu-20.04
permissions:
id-token: write
contents: read
steps:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.8
architecture: x64
- name: Checkout TorchX
uses: actions/checkout@v2
- name: Configure AWS Credentials
uses: aws-actions/[email protected]
with:
aws-region: us-west-2
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
role-session-name: github-torchx
continue-on-error: true
- name: Configure Kube Config
env:
AWS_ROLE_ARN: ${{ secrets.AWS_ROLE_ARN }}
run: |
set -eux
if [ -n "$AWS_ROLE_ARN" ]; then
aws eks update-kubeconfig --region=us-west-2 --name=${{ secrets.EKS_CLUSTER_NAME }}
fi
- name: Configure Docker
env:
AWS_ROLE_ARN: ${{ secrets.AWS_ROLE_ARN }}
run: |
set -eux
if [ -n "$AWS_ROLE_ARN" ]; then
aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 495572122715.dkr.ecr.us-west-2.amazonaws.com
fi
- name: Install dependencies
run: |
set -eux
pip install -e .[kubernetes]
- name: Run Kubernetes Integration Tests
env:
INTEGRATION_TEST_STORAGE: ${{ secrets.INTEGRATION_TEST_STORAGE }}
CONTAINER_REPO: ${{ secrets.CONTAINER_REPO }}
AWS_ROLE_ARN: ${{ secrets.AWS_ROLE_ARN }}
run: |
if [ -z "$AWS_ROLE_ARN" ]; then
# only dryrun if no secrets
ARGS="--dryrun"
else
ARGS=
fi
scripts/kube_dist_trainer.py $ARGS