forked from facebookincubator/AITemplate
-
Notifications
You must be signed in to change notification settings - Fork 7
145 lines (142 loc) · 6.4 KB
/
rocm_ci.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
name: ROCM_CI
on:
pull_request:
types: [labeled, synchronize, reopened]
jobs:
build:
if: contains(github.event.label.name, 'rocm')
runs-on: rocm
steps:
- uses: actions/checkout@v2
- name: Get CPU info on Ubuntu
if: contains(runner.os, 'linux')
run: |
cat /proc/cpuinfo
- name: Get env vars
run: |
echo GITHUB_WORKFLOW = $GITHUB_WORKFLOW
echo HOME = $HOME
echo GITHUB_ACTION = $GITHUB_ACTION
echo GITHUB_ACTIONS = $GITHUB_ACTIONS
echo GITHUB_REPOSITORY = $GITHUB_REPOSITORY
echo GITHUB_EVENT_NAME = $GITHUB_EVENT_NAME
echo GITHUB_EVENT_PATH = $GITHUB_EVENT_PATH
echo GITHUB_WORKSPACE = $GITHUB_WORKSPACE
echo GITHUB_SHA = $GITHUB_SHA
echo GITHUB_REF = $GITHUB_REF
export GIT_BRANCH=${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}}
echo GIT_BRANCH = $GIT_BRANCH
c++ --verbose
- name: Build AITemplate
run: |
rocm-smi
rocminfo | grep "gfx"
export GIT_BRANCH=${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}}
git clone --recursive -b $GIT_BRANCH https://github.com/facebookincubator/AITemplate.git
cd AITemplate
DOCKER_BUILDKIT=1 ./docker/build.sh rocm
docker run --network=host --device=/dev/kfd --device=/dev/dri --ipc=host --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v $HOME:/dockerx/ ait:latest
export ROCM_PATH=/opt/rocm
export ROC_USE_FGS_KERNARG=0
# clean up and reinstall ait
pip3 uninstall -y aitemplate
cd python
rm -rf dist build
python3 setup.py bdist_wheel
pip3 install dist/*.whl
#install necessary python modules
pip3 install timm
pip3 uninstall -y torch
pip3 install torch --extra-index-url https://download.pytorch.org/whl/rocm5.1.1
python3 -m pip install transformers click
python3 -c "import torch; print(torch.__version__)"
#run examples
- name: Run Resnet-50 Tests
run: |
echo "Running RESNET50 tests"
cd $GITHUB_WORKSPACE/AITemplate/examples/01_resnet-50
# populate log headers
export GIT_BRANCH=${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}}
echo -n "hostname: ">resnet50.log; hostname >> resnet50.log
echo -n "GPU_arch: " >> resnet50.log; rocminfo | grep "Name:" | grep "gfx" >> resnet50.log
rocminfo | grep "Compute Unit:" >> resnet50.log
echo "git_branch: $GIT_BRANCH" >> resnet50.log
git show --summary | grep commit >> resnet50.log
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> resnet50.log
HIP_VISIBLE_DEVICES=0,1 python3 benchmark_ait.py 2>&1 | tee -a resnet50.log
# test 2 gcd
for BATCH_SIZE in 1 2 4 8 16 32 64 128 256
do
HIP_VISIBLE_DEVICES=0 python3 benchmark_ait.py --batch-size $BATCH_SIZE 2>&1 | tee -a resnet50.log &
HIP_VISIBLE_DEVICES=1 python3 benchmark_ait.py --batch-size $BATCH_SIZE 2>&1 | tee -a resnet50.log
done
## BERT
- name: Run BERT Tests
run: |
echo "Running BERT tests"
cd $GITHUB_WORKSPACE/AITemplate/examples/03_bert
# populate log headers
export GIT_BRANCH=${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}}
echo -n "hostname: ">bert.log; hostname >> bert.log
echo -n "GPU_arch: " >> bert.log; rocminfo | grep "Name:" | grep "gfx" >> bert.log
rocminfo | grep "Compute Unit:" >> bert.log
echo "git_branch: $GIT_BRANCH" >> bert.log
git show --summary | grep commit >> bert.log
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> bert.log
# profiling
for sq in 64 128 384 512 1024
do
HIP_VISIBLE_DEVICES=0,1 python3 benchmark_ait.py --seq-length $sq 2>&1 | tee -a bert.log
done
# VIT
- name: Run VIT Tests
run: |
echo "Running VIT tests"
cd $GITHUB_WORKSPACE/AITemplate/examples/04_vit
# populate log headers
export GIT_BRANCH=${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}}
echo -n "hostname: ">vit.log; hostname >> vit.log
echo -n "GPU_arch: " >> vit.log; rocminfo | grep "Name:" | grep "gfx" >> vit.log
rocminfo | grep "Compute Unit:" >> vit.log
echo "git_branch: $GIT_BRANCH" >> vit.log
git show --summary | grep commit >> vit.log
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> vit.log
# profiling
HIP_VISIBLE_DEVICES=0,1 python3 benchmark_ait.py 2>&1 | tee -a vit.log
# test 2 gcd
for BATCH_SIZE in 1 2 4 8 16 32 64 128 256
do
HIP_VISIBLE_DEVICES=0 python3 benchmark_ait.py --batch-size $BATCH_SIZE 2>&1 | tee -a vit.log &
HIP_VISIBLE_DEVICES=1 python3 benchmark_ait.py --batch-size $BATCH_SIZE 2>&1 | tee -a vit.log
done
- name: Run Stable Diffusion tests
run: |
echo "Running Stable Diffusion tests"
cd $GITHUB_WORKSPACE/AITemplate/examples/05_stable_diffusion
python3 -m pip install transformers click torch diffusers==0.11.1 accelerate
# populate log headers
export GIT_BRANCH=${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}}
echo -n "hostname: ">sdiff.log; hostname >> sdiff.log
echo -n "GPU_arch: " >> sdiff.log; rocminfo | grep "Name:" | grep "gfx" >> sdiff.log
rocminfo | grep "Compute Unit:" >> sdiff.log
echo "git_branch: $GIT_BRANCH" >> sdiff.log
git show --summary | grep commit >> sdiff.log
/opt/rocm/bin/amdclang++ --version | grep -e 'InstalledDir' >> sdiff.log
# profiling
python3 scripts/download_pipeline.py --token ${{ secrets.HF_TOKEN }} 2>&1 | tee -a sdiff.log
HIP_VISIBLE_DEVICES=0,1 python3 scripts/compile.py 2>&1 | tee -a sdiff.log
HIP_VISIBLE_DEVICES=0 python3 scripts/demo.py --benchmark 1 2>&1 | tee -a sdiff.log
- name: Archive logs
uses: actions/upload-artifact@v3
with:
name: test results
path: AITemplate/examples/*/*.log
- name: Process the test results
run: |
echo "processing the test results"
cd $GITHUB_WORKSPACE/AITemplate/examples/
pip3 install --upgrade pip
pip3 install sqlalchemy pymysql pandas sshtunnel
export dbuser=${{ secrets.DBUSER }}
export dbpassword=${{ secrets.DBPASSWORD }}
python3 process_results.py