Skip to content

Commit

Permalink
Merge pull request #121 from bytegorilla/main
Browse files Browse the repository at this point in the history
Increase TPS to 4 digits
  • Loading branch information
juberti authored Sep 5, 2024
2 parents 495accf + 92a5dec commit 30d8fe0
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 5 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Large Language Models (LLMs):
- Llama2 and 3 from several different providers, including
- Anyscale
- Azure
- Cerebras
- Cloudflare
- Groq
- OctoAI
Expand Down
2 changes: 1 addition & 1 deletion llm_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def on_token(ctx: llm_request.ApiContext, token: str):
assert r.output
minimal_output = r.error or r.output.replace("\n", "\\n").strip()[:64]
print(
f"| {r.model:42} | {r.ttr:4.2f} | {r.ttft:4.2f} | {r.tps:3.0f} "
f"| {r.model:42} | {r.ttr:4.2f} | {r.ttft:4.2f} | {r.tps:4.0f} "
f"| {r.output_tokens:3} | {r.total_time:5.2f} | {minimal_output} |"
)
elif args.format == "json":
Expand Down
6 changes: 3 additions & 3 deletions llm_benchmark_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,9 +579,9 @@ def _format_response(
return response.to_json(indent=2), "application/json"
else:
s = (
"| Provider/Model | TTR | TTFT | TPS | ITk | OTk | ITim | OTim | Total |"
"| Provider/Model | TTR | TTFT | TPS | ITk | OTk | ITim | OTim | Total |"
f" {'Response':{dlen}.{dlen}} |\n"
"| :----------------------------------------- | ---: | ---: | --: | ---: | --: | ---: | ---: | ----: |"
"| :----------------------------------------- | ---: | ---: | ---: | ---: | --: | ---: | ---: | ----: |"
f" {':--':-<{dlen}.{dlen}} |\n"
)

Expand All @@ -600,7 +600,7 @@ def _format_response(
total_time = r.total_time or 0.0
output = (r.error or r.output).strip().replace("\n", "\\n")
s += (
f"| {r.model[:42]:42} | {ttr:4.2f} | {ttft:4.2f} | {tps:3.0f} "
f"| {r.model[:42]:42} | {ttr:4.2f} | {ttft:4.2f} | {tps:4.0f} "
f"| {in_tokens:4} | {out_tokens:3} | {in_time:4.2f} | {out_time:4.2f} "
f"| {total_time:5.2f} | {output:{dlen}.{dlen}} |\n"
)
Expand Down
5 changes: 4 additions & 1 deletion llm_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
ApiResult = Tuple[aiohttp.ClientResponse, TokenGenerator]

AZURE_OPENAI_API_VERSION = "2024-02-15-preview"
MAX_TPS = 9999
MAX_TTFT = 9.99
MAX_TOTAL_TIME = 99.99

Expand Down Expand Up @@ -145,7 +146,9 @@ async def run(self, on_token: Optional[Callable[["ApiContext", str], None]] = No
if not self.metrics.error:
token_time = end_time - first_token_time
self.metrics.total_time = end_time - start_time
self.metrics.tps = min((self.metrics.output_tokens - 1) / token_time, 999)
self.metrics.tps = min((self.metrics.output_tokens - 1) / token_time, MAX_TPS)
if self.metrics.tps == MAX_TPS:
self.metrics.tps = 0.0
else:
self.metrics.ttft = MAX_TTFT
self.metrics.tps = 0.0
Expand Down

0 comments on commit 30d8fe0

Please sign in to comment.