diff --git a/llm_benchmark_suite.py b/llm_benchmark_suite.py index de783cf..7d29d62 100644 --- a/llm_benchmark_suite.py +++ b/llm_benchmark_suite.py @@ -124,6 +124,18 @@ async def run(self, pass_argv: List[str], spread: float) -> asyncio.Task: return await llm_benchmark.run(full_argv) +class _CerebrasLlm(_Llm): + """See https://docs.cerebras.ai/en/latest/wsc/Model-zoo/MZ-overview.html#list-of-models""" + + def __init__(self, model: str, display_model: Optional[str] = None): + super().__init__( + model, + "cerebras.ai/" + (display_model or model), + api_key=os.getenv("CEREBRAS_API_KEY"), + base_url="https://api.cerebras.ai/v1", + ) + + class _CloudflareLlm(_Llm): """See https://developers.cloudflare.com/workers-ai/models/""" @@ -224,6 +236,17 @@ def __init__( ) +class _OvhLlm(_Llm): + """See https://llama-3-70b-instruct.endpoints.kepler.ai.cloud.ovh.net/doc""" + + def __init__(self, model: str, display_model: Optional[str] = None): + super().__init__( + "", + "cloud.ovh.net/" + display_model, + base_url=f"https://{model}.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1", + ) + + class _PerplexityLlm(_Llm): """See https://docs.perplexity.ai/docs/model-cards""" @@ -248,26 +271,15 @@ def __init__(self, model: str, display_model: Optional[str] = None): ) -class _OvhLlm(_Llm): - """See https://llama-3-70b-instruct.endpoints.kepler.ai.cloud.ovh.net/doc""" - - def __init__(self, model: str, display_model: Optional[str] = None): - super().__init__( - "", - "cloud.ovh.net/" + display_model, - base_url=f"https://{model}.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1", - ) - - -class _CerebrasLlm(_Llm): - """See https://docs.cerebras.ai/en/latest/wsc/Model-zoo/MZ-overview.html#list-of-models""" +class _UltravoxLlm(_Llm): + """See https://docs.ultravox.ai/docs/models""" def __init__(self, model: str, display_model: Optional[str] = None): super().__init__( model, - "cerebras.ai/" + (display_model or model), - api_key=os.getenv("CEREBRAS_API_KEY"), - base_url="https://api.cerebras.ai/v1", + "ultravox.ai/" + (display_model or model), + api_key=os.getenv("ULTRAVOX_API_KEY"), + base_url="https://ultravox.api.fixie.ai/v1", ) @@ -509,11 +521,8 @@ def _audio_models(): # _Llm(GPT_4O), doesn't support audio yet _Llm(GEMINI_1_5_PRO), _Llm(GEMINI_1_5_FLASH), - _Llm( - "fixie-ai/ultravox-v0.4", - base_url="https://ultravox.api.fixie.ai/v1", - api_key=os.getenv("ULTRAVOX_API_KEY"), - ), + _UltravoxLlm("fixie-ai/ultravox-v0.4", "ultravox-v0.4-8b"), + _UltravoxLlm("fixie-ai/ultravox-70B", "ultravox-v0.4-70b"), _Llm( "fixie-ai/ultravox-v0.2", "baseten.co/ultravox-v0.2", diff --git a/llm_request.py b/llm_request.py index 3216d49..e109b6f 100644 --- a/llm_request.py +++ b/llm_request.py @@ -217,13 +217,12 @@ def make_openai_messages(ctx: ApiContext): content: List[Dict[str, Any]] = [{"type": "text", "text": ctx.prompt}] for file in ctx.files: - # if not file.mime_type.startswith("image/"): - # raise ValueError(f"Unsupported file type: {file.mime_type}") url = f"data:{file.mime_type};base64,{file.base64_data}" - image_url = {"url": url} + media_url = {"url": url} + url_type = "audio_url" if file.is_audio else "image_url" if ctx.detail: - image_url["detail"] = ctx.detail - content.append({"type": "image_url", "image_url": image_url}) + media_url["detail"] = ctx.detail + content.append({"type": url_type, url_type: media_url}) return [{"role": "user", "content": content}]