forked from karpathy/llm.c
-
Notifications
You must be signed in to change notification settings - Fork 1
221 lines (179 loc) · 9.22 KB
/
ci.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
name: Build and test
on:
create:
workflow_dispatch:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
build-and-test-cpu:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install OpenMP
if: matrix.os != 'windows-latest'
run: |
if [ "${{ runner.os }}" == "Linux" ]; then
sudo apt-get update && sudo apt-get install -y libomp-dev
elif [ "${{ runner.os }}" == "macOS" ]; then
brew install libomp
fi
- name: Install dependencies
run: pip install -r requirements.txt
- name: Run preprocessing
run: python dev/data/tinyshakespeare.py
- name: Train model
run: python train_gpt2.py --device=cpu
- name: Download Win32 Make.exe
if: matrix.os == 'windows-latest'
run: |
$wc = New-Object System.Net.WebClient
$url = 'https://github.com/maweil/MakeForWindows/releases/download/v4.4.1/make-bin-win64.zip'
$output = './make-bin-win64.zip'
$wc.DownloadFile($url, $output)
- name: Unzip Win32 Makefile
if: matrix.os == 'windows-latest'
run: |
unzip make-bin-win64.zip
- name: Compile training and testing program
if: matrix.os != 'windows-latest'
run: make test_gpt2 train_gpt2
- name: Compile training and testing program for Windows
if: matrix.os == 'windows-latest'
shell: cmd
run: |
call "C:\\Program Files\\Microsoft Visual Studio\\2022\\Enterprise\\VC\\Auxiliary\\Build\\vcvars64.bat"
make-4.4.1\dist\make WIN_CI_BUILD=1 test_gpt2 train_gpt2
- name: Execute testing program (With OpenMP)
if: matrix.os != 'windows-latest'
run: OMP_NUM_THREADS=8 ./test_gpt2
- name: Execute Windows testing program (With OpenMP)
if: matrix.os == 'windows-latest'
shell: cmd
run: |
copy test_gpt2 test_gpt2.exe
test_gpt2.exe
- name: Compile training and testing program without OpenMP
if: matrix.os != 'windows-latest'
run: NO_OMP=1 make test_gpt2 train_gpt2
- name: Execute testing program (No OpenMP)
if: matrix.os != 'windows-latest'
run: ./test_gpt2
build-cuda-windows:
runs-on: windows-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download Win32 Make.exe
run: |
$wc = New-Object System.Net.WebClient
$url = 'https://github.com/maweil/MakeForWindows/releases/download/v4.4.1/make-bin-win64.zip'
$output = './make-bin-win64.zip'
$wc.DownloadFile($url, $output)
- name: Unzip Win32 Makefile
run: |
unzip make-bin-win64.zip
- name: Install Cuda Toolkit 12.4 on Windows
run: |
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
choco install unzip -y
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
# Default installation path for CUDA Toolkit is C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4
- name: Add Path
run: |
echo "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.4\\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
- name: Build Cuda targets
shell: cmd
working-directory: ${{ github.workspace }}
run: |
call "C:\\Program Files\\Microsoft Visual Studio\\2022\\Enterprise\\VC\\Auxiliary\\Build\\vcvars64.bat"
make-4.4.1\dist\make -j WIN_CI_BUILD=1 train_gpt2fp32cu test_gpt2fp32cu test_gpt2cu train_gpt2cu profile_gpt2cu
build-ubuntu20-04:
runs-on: ubuntu-20.04
container:
image: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: System Info
run: |
nvcc --version
g++ --version
- name: Install cudnn frontend
run: |
apt-get update && apt-get install -y git
git clone https://github.com/NVIDIA/cudnn-frontend.git
- name: Build FP32 checkpoint
run: make train_gpt2fp32cu test_gpt2fp32cu
- name: Build FP32 precision
run: PRECISION=FP32 make train_gpt2cu test_gpt2cu profile_gpt2cu
- name: Build with CUDNN
run: PRECISION=BF16 USE_CUDNN=1 make train_gpt2cu test_gpt2cu profile_gpt2cu
build-cuda-fp32:
runs-on: ubuntu-latest
container:
image: nvidia/cuda:12.4.1-devel-ubuntu22.04
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Build FP32 checkpoint
run: make train_gpt2fp32cu test_gpt2fp32cu
- name: Build FP32 precision
run: PRECISION=FP32 make train_gpt2cu test_gpt2cu profile_gpt2cu
build-cuda-bf16:
runs-on: ubuntu-latest
container:
image: nvidia/cuda:12.4.1-devel-ubuntu22.04
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Build project
run: PRECISION=BF16 make test_gpt2cu train_gpt2cu profile_gpt2cu
build-cuda-fp16:
runs-on: ubuntu-latest
container:
image: nvidia/cuda:12.4.1-devel-ubuntu22.04
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Build project
run: PRECISION=FP16 make test_gpt2cu train_gpt2cu profile_gpt2cu
build-cuda-kernels:
runs-on: ubuntu-latest
container:
image: nvidia/cuda:12.4.1-devel-ubuntu22.04
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install OpenMP and OpenMPI
run: apt-get update && apt-get install -y libomp-dev libopenmpi-dev
- name: Build project
run: make -j4 -C dev/cuda