-
Notifications
You must be signed in to change notification settings - Fork 1
135 lines (125 loc) · 4.57 KB
/
nvidia_workflow.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
name: NVIDIA PyTorch/CUDA Job
on:
workflow_dispatch:
inputs:
script_content:
description: 'Content of Python/CUDA script (.py or .cu file)'
required: true
type: string
filename:
description: 'Name of script (supports .py or .cu)'
required: true
type: string
reference_content:
description: 'Content of the reference code script (optional)'
required: false
type: string
reference_filename:
description: 'Name of reference script (supports .py or .cu)'
required: false
type: string
eval_content:
description: 'Content of the outer eval code script (optional)'
required: false
type: string
eval_filename:
description: 'Name of outer eval script (supports .py or .cu)'
required: false
type: string
jobs:
train:
runs-on: [gpumode-nvidia-arc]
timeout-minutes: 10
container:
image: nvidia/cuda:12.4.0-devel-ubuntu22.04
steps:
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Setup Python environment
run: |
uv venv .venv
echo "VIRTUAL_ENV=$PWD/.venv" >> $GITHUB_ENV
echo "$PWD/.venv/bin" >> $GITHUB_PATH
- name: Create script file
shell: bash
run: |
cat << 'EOL' > ${{ github.event.inputs.filename }}
${{ github.event.inputs.script_content }}
EOL
cat ${{ github.event.inputs.filename }} # Debug: show file contents
- name: Create reference scripts if provided
shell: bash
run: |
if [[ -n "${{ github.event.inputs.reference_filename }}" ]]; then
echo "Creating reference script..."
cat > "${{ github.event.inputs.reference_filename }}" <<EOL
${{ github.event.inputs.reference_content }}
EOL
cat "${{ github.event.inputs.reference_filename }}" # Debug: Show file contents
else
echo "No reference content provided."
fi
- name: Create eval scripts if provided
shell: bash
run: |
if [[ -n "${{ github.event.inputs.eval_filename }}" ]]; then
echo "Creating eval script..."
cat > "${{ github.event.inputs.eval_filename }}" <<EOL
${{ github.event.inputs.eval_content }}
EOL
cat "${{ github.event.inputs.eval_filename }}" # Debug: Show file contents
else
echo "No eval content provided."
fi
- name: Install dependencies
run: |
if grep -rE "(import torch|from torch)" "${{ github.event.inputs.filename }}"; then
echo "PyTorch detected, installing torch"
uv pip install numpy torch
fi
if grep -rE "(import triton|from triton)" "${{ github.event.inputs.filename }}"; then
echo "Triton detected, installing triton"
uv pip install triton
fi
- name: Run script
shell: bash
run: |
# Check if eval content exists without trying to evaluate it
if [ -f "${{ github.event.inputs.eval_filename }}" ]; then
if [[ "${{ github.event.inputs.eval_filename }}" == *.cu ]]; then
echo "Compiling and running CUDA files..."
CUDA_FILES="${{ github.event.inputs.eval_filename }}"
echo "Compiling: $CUDA_FILES"
nvcc $CUDA_FILES -o cuda_program
./cuda_program > training.log 2>&1
else
echo "Running Python file..."
python3 "${{ github.event.inputs.eval_filename }}" > training.log 2>&1
fi
else
if [[ "${{ github.event.inputs.filename }}" == *.cu ]]; then
echo "Compiling and running CUDA file..."
nvcc "${{ github.event.inputs.filename }}" -o cuda_program
./cuda_program > training.log 2>&1
else
echo "Running Python file..."
python3 "${{ github.event.inputs.filename }}" > training.log 2>&1
fi
fi
cat training.log # Debug: show output
- name: Upload training artifacts
uses: actions/upload-artifact@v4
if: always()
with:
name: training-artifacts
path: |
training.log
${{ github.event.inputs.filename }}
env:
CUDA_VISIBLE_DEVICES: 0