-
Notifications
You must be signed in to change notification settings - Fork 3
/
llama3-8b-triton-g6e.yml
49 lines (41 loc) · 1.24 KB
/
llama3-8b-triton-g6e.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# general configuration applicable to the entire app
general:
name: llama3-8b-g6e-triton
defaults: &ec2_settings
region: {{region}}
ami_id: {{gpu}}
device_name: /dev/sda1
ebs_del_on_termination: True
ebs_Iops: 16000
ebs_VolumeSize: 250
ebs_VolumeType: gp3
startup_script: startup_scripts/ubuntu_startup.txt
post_startup_script: post_startup_scripts/fmbench.txt
# Timeout period in Seconds before a run is stopped
fmbench_complete_timeout: 2400
instances:
- instance_type: g6e.2xlarge
deploy: yes
<<: *ec2_settings
fmbench_config:
- fmbench:llama3/8b/config-llama3-8b-g6e.2xl-tp-1-mc-max-triton-ec2.yml
- instance_type: g6e.4xlarge
deploy: yes
<<: *ec2_settings
fmbench_config:
- fmbench:llama3/8b/config-llama3-8b-g6e.4xl-tp-1-mc-max-triton-ec2.yml
- instance_type: g6e.12xlarge
deploy: yes
<<: *ec2_settings
fmbench_config:
- fmbench:llama3/8b/config-llama3-8b-g6e.12xl-tp-2-mc-max-triton-ec2.yml
- instance_type: g6e.24xlarge
deploy: yes
<<: *ec2_settings
fmbench_config:
- fmbench:llama3/8b/config-llama3-8b-g6e.24xl-tp-2-mc-max-triton-ec2.yml
- instance_type: g6e.48xlarge
deploy: yes
<<: *ec2_settings
fmbench_config:
- fmbench:llama3/8b/config-llama3-8b-g6e.48xl-tp-2-mc-max-triton-ec2.yml