forked from karpathy/llm.c
-
Notifications
You must be signed in to change notification settings - Fork 1
100 lines (87 loc) · 3.28 KB
/
ci_tests.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
name: Unit, Static and other Tests
on:
create:
workflow_dispatch:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
dataloader_test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: test the dataloader without / with sanitize address
run: |
cd dev/test
make PRECISION=BF16 test_dataloader
./test_dataloader
make clean
make PRECISION=BF16 TEST_CFLAGS="-fsanitize=address -fno-omit-frame-pointer" test_dataloader
./test_dataloader
ptx_and_sass_files:
runs-on: ubuntu-latest
container:
image: nvidia/cuda:12.4.1-devel-ubuntu22.04
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install OpenMP and OpenMPI
run: apt-get update && apt-get install -y libomp-dev libopenmpi-dev
- name: Generate ptx/sass files and upload them to persistent storage
run: |
mkdir -p dev/cuda/ptx_sass_logs
make train_gpt2cu
cuobjdump --dump-ptx train_gpt2cu > dev/cuda/train_gpt2cu.ptx
cuobjdump --dump-sass train_gpt2cu > dev/cuda/train_gpt2cu.sass
cd dev/cuda
make -j all_ptx
make -j all_sass
cp *.ptx ptx_sass_logs/
cp *.sass ptx_sass_logs/
ls ptx_sass_logs/
- name: Generate ptx/sass files for A100 and upload them to persistent storage
run: |
mkdir -p dev/cuda/ptx_sass_logs_A100
make train_gpt2cu GPU_COMPUTE_CAPABILITY=80
cuobjdump --dump-ptx train_gpt2cu > dev/cuda/train_gpt2cu.ptx
cuobjdump --dump-sass train_gpt2cu > dev/cuda/train_gpt2cu.sass
cd dev/cuda
make -j GPU_COMPUTE_CAPABILITY=80 all_ptx
make -j GPU_COMPUTE_CAPABILITY=80 all_sass
cp *.ptx ptx_sass_logs_A100/
cp *.sass ptx_sass_logs_A100/
ls ptx_sass_logs_A100/
- name: Generate ptx/sass files for H100 and upload them to persistent storage
run: |
mkdir -p dev/cuda/ptx_sass_logs_H100
make train_gpt2cu GPU_COMPUTE_CAPABILITY=90
cuobjdump --dump-ptx train_gpt2cu > dev/cuda/train_gpt2cu.ptx
cuobjdump --dump-sass train_gpt2cu > dev/cuda/train_gpt2cu.sass
cd dev/cuda
make -j GPU_COMPUTE_CAPABILITY=90 all_ptx
make -j GPU_COMPUTE_CAPABILITY=90 all_sass
cp *.ptx ptx_sass_logs_H100/
cp *.sass ptx_sass_logs_H100/
ls ptx_sass_logs_H100/
- name: Upload ptx/sass files
uses: actions/upload-artifact@v4
with:
name: ptx_sass_files
path: dev/cuda/ptx_sass_logs/
retention-days: 30 # days to retain
- name: Upload ptx/sass files for A100
uses: actions/upload-artifact@v4
with:
name: ptx_sass_files_A100
path: dev/cuda/ptx_sass_logs_A100/
retention-days: 30 # days to retain
- name: Upload ptx/sass files for H100
uses: actions/upload-artifact@v4
with:
name: ptx_sass_files_H100
path: dev/cuda/ptx_sass_logs_H100/
retention-days: 30 # days to retain