-
Notifications
You must be signed in to change notification settings - Fork 20
/
project.yaml
131 lines (131 loc) · 13.7 KB
/
project.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
kind: project
metadata:
name: mlopspedia-bot-yonis
spec:
params:
source: git://github.com/mlrun/demo-llm-tuning.git#main
default_image: yonishelach/mlrun-llm
functions:
- url: src/data_collection.py
name: data-collecting
kind: job
image: mlrun/mlrun
- url: src/data_preprocess.py
name: data-preparing
kind: job
- url: src/trainer.py
name: training
kind: job
- name: serving
spec:
kind: serving
metadata:
name: serving
project: mlopspedia-bot-yonis
spec:
command: ''
args: []
image: yonishelach/mlrun-llm
build:
functionSourceCode: 
source: ./
commands: []
code_origin: http://github.com/mlrun/demo-llm-tuning#refs/heads/main#91145f96f3cd627431de34d0bae3547efbdd7097
origin_filename: src/serving.py
requirements: []
description: ''
default_handler: ''
disable_auto_mount: false
clone_target_dir: ''
env:
- name: V3IO_API
value: ''
- name: V3IO_USERNAME
value: ''
- name: V3IO_ACCESS_KEY
value: ''
- name: V3IO_FRAMESD
value: ''
resources:
requests:
memory: 1Mi
cpu: 25m
limits:
nvidia.com/gpu: 1
priority_class_name: igz-workload-medium
preemption_mode: prevent
min_replicas: 1
max_replicas: 4
source: ''
function_kind: serving_v2
readiness_timeout: 3000
function_handler: serving:handler
base_image_pull: false
graph:
steps:
preprocess:
kind: task
handler: preprocess
after: []
mlopspedia:
kind: task
class_name: LLMModelServer
class_args:
model_args:
load_in_8bit: true
device_map: cuda:0
trust_remote_code: true
tokenizer_name: tiiuae/falcon-7b
model_name: tiiuae/falcon-7b
peft_model: store://artifacts/mlopspedia-bot-yonis/falcon-7b-mlrun
after:
- preprocess
postprocess:
kind: task
handler: postprocess
after:
- mlopspedia
toxicity-classifier:
kind: task
class_name: ToxicityClassifierModelServer
class_args:
threshold: 0.7
after:
- postprocess
responder: true
engine: async
secret_sources: []
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: app.iguazio.com/lifecycle
operator: NotIn
values:
- preemptible
- key: eks.amazonaws.com/capacityType
operator: NotIn
values:
- SPOT
- key: node-lifecycle
operator: NotIn
values:
- spot
tolerations: null
security_context: {}
verbose: false
workflows:
- path: src/training_workflow.py
name: training_workflow
artifacts: []
conda: ''
source: git://github.com/mlrun/demo-llm-tuning.git#main
origin_url: http://github.com/mlrun/demo-llm-tuning#refs/heads/main
load_source_on_run: true
desired_state: online
default_image: yonishelach/mlrun-llm
build:
commands: []
requirements: []
custom_packagers: []