forked from yale-sys/prompt-cache
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval_sys_a40-7b-gpu.slurm
28 lines (26 loc) · 987 Bytes
/
eval_sys_a40-7b-gpu.slurm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/bin/zsh
#SBATCH --exclusive
#SBATCH --mem=0
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=16
#SBATCH --partition=gpuA40x4 # <- or one of: gpuA100x4 gpuA40x4 gpuA100x8 gpuMI100x8
#SBATCH --account=bccn-delta-gpu
#SBATCH --job-name=a40_7b_gpu
#SBATCH --time=47:00:00 # hh:mm:ss for the job
#SBATCH --constraint="scratch"
### GPU options ###
#SBATCH --gpus-per-node=1
#SBATCH --gpu-bind=closest # select a cpu close to gpu on pci bus topology
#SBATCH [email protected]
#SBATCH --mail-type="BEGIN,END"
module reset # drop modules and explicitly load the ones needed
# (good job metadata and reproducibility)
# $WORK and $SCRATCH are now set
module load python # ... or any appropriate modules
module list # job documentation and metadata
echo "job is starting on `hostname`"
srun python3 eval_sys.py \
--memo=a40 \
--llm_config_path=./config/llm_config_llama2_7b.json \
--use_cpu_for_inference=False