-
Notifications
You must be signed in to change notification settings - Fork 9
/
metrics.py
123 lines (92 loc) · 2.61 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import logging
from dataclasses import dataclass, field
@dataclass(kw_only=True)
class NodeMetrics():
arrival_timestamp: float = 0.
start_timestamp: float = 0.
completion_timestamp: float = 0.
run_timestamp: float = 0.
preempt_timestamp: float = 0.
queue_time: float = 0.
blocked_time: float = 0.
service_time: float = 0.
response_time: float = 0.
@dataclass(kw_only=True)
class FlowMetrics(NodeMetrics):
pass
@dataclass(kw_only=True)
class TaskMetrics(NodeMetrics):
pass
@dataclass(kw_only=True)
class RequestMetrics():
request_id: str = ''
router_arrival_timestamp: float = 0.
scheduler_arrival_timestamp: float = 0.
executor_start_timestamp: float = 0.
scheduler_completion_timestamp: float = 0.
router_completion_timestamp: float = 0.
router_queue_time: float = 0.
scheduler_queue_time: float = 0.
queue_time: float = 0.
service_time: float = 0.
scheduler_response_time: float = 0.
router_response_time: float = 0.
@dataclass(kw_only=True)
class GenerativeLLMRequestMetrics(RequestMetrics):
prompt_start_timestamp: float = 0.
prompt_end_timestamp: float = 0.
token_start_timestamp: float = 0.
token_end_timestamp: float = 0.
TTFT: float = 0.
@dataclass(kw_only=True)
class InstanceMetrics():
spin_up_timestamp: float = 0.
run_timestamp: float = 0.
spin_down_timestamp: float = 0.
busy_time: float = 0.
interval_time: float = 0.
@dataclass(kw_only=True)
class ApplicationMetrics():
num_requests: int = 0
num_tasks: int = 0
service_times: list[float] = field(default_factory=list)
response_times: list[float] = field(default_factory=list)
@dataclass(kw_only=True)
class RouterMetrics():
pass
@dataclass(kw_only=True)
class ArbiterMetrics():
pass
@dataclass(kw_only=True)
class ServerMetrics():
pass
@dataclass(kw_only=True)
class NodeSLO():
latency: float = 0.
@dataclass(kw_only=True)
class TaskSLO(NodeSLO):
"""
TaskSLOs capture any SLOs that are specific to a task.
"""
pass
@dataclass(kw_only=True)
class FlowSLO(NodeSLO):
"""
FlowSLOs capture any SLOs that are specific to a task.
"""
pass
@dataclass(kw_only=True)
class RequestSLO():
"""
RequestSLO captures any SLOs that are specific to a single request.
"""
TTFT: float = float('inf')
e2e_latency: float = float('inf')
@dataclass(kw_only=True)
class ApplicationSLO():
"""
ApplicationSLO captures any SLOs that apply across all application requests.
"""
TTFT: float = float('inf')
per_token_latency: float = float('inf')
throughput: float = 0.