From 95f43f8dfaa63c8c42e589cc72f23c71b35f9c62 Mon Sep 17 00:00:00 2001 From: Aidan Do Date: Thu, 19 Dec 2024 15:01:17 +1100 Subject: [PATCH 1/3] . --- .../providers/remote/inference/fireworks/fireworks.py | 4 ++++ llama_stack/templates/fireworks/run.yaml | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index d9ef57b15a..975ec48933 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -65,6 +65,10 @@ "fireworks/llama-v3p2-90b-vision-instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), + build_model_alias( + "fireworks/llama-v3p3-70b-instruct", + CoreModelId.llama3_3_70b_instruct.value, + ), build_model_alias( "fireworks/llama-guard-3-8b", CoreModelId.llama_guard_3_8b.value, diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index cb31b46788..99f155a4a1 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -110,6 +110,11 @@ models: provider_id: fireworks provider_model_id: fireworks/llama-v3p2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: fireworks + provider_model_id: fireworks/llama-v3p3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks From 94645dd5f6241ca8dfc761b0c368132d33bc02a5 Mon Sep 17 00:00:00 2001 From: Aidan Do Date: Thu, 19 Dec 2024 15:08:29 +1100 Subject: [PATCH 2/3] . --- llama_stack/providers/remote/inference/fireworks/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/inference/fireworks/config.py b/llama_stack/providers/remote/inference/fireworks/config.py index e699269424..979e8455ac 100644 --- a/llama_stack/providers/remote/inference/fireworks/config.py +++ b/llama_stack/providers/remote/inference/fireworks/config.py @@ -22,7 +22,7 @@ class FireworksImplConfig(BaseModel): ) @classmethod - def sample_run_config(cls) -> Dict[str, Any]: + def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]: return { "url": "https://api.fireworks.ai/inference/v1", "api_key": "${env.FIREWORKS_API_KEY}", From a2be32c27d2120e0916b95ba2b89f38967b96774 Mon Sep 17 00:00:00 2001 From: Aidan Do Date: Thu, 19 Dec 2024 20:13:29 +1100 Subject: [PATCH 3/3] JKL --- .../remote/inference/fireworks/fireworks.py | 23 ++++++++++++------- .../remote/inference/nvidia/openai_utils.py | 7 +++--- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index 975ec48933..0df05d8c8f 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -32,6 +32,7 @@ interleaved_content_as_str, request_has_media, ) +from ..nvidia.openai_utils import _convert_tooldef_to_openai_tool, convert_openai_chat_completion_choice from .config import FireworksImplConfig @@ -209,10 +210,12 @@ async def _nonstream_chat_completion( ) -> ChatCompletionResponse: params = await self._get_params(request) if "messages" in params: + print(params) r = await self._get_client().chat.completions.acreate(**params) + return convert_openai_chat_completion_choice(r.choices[0]) else: r = await self._get_client().completion.acreate(**params) - return process_chat_completion_response(r, self.formatter) + return process_chat_completion_response(r, self.formatter) async def _stream_chat_completion( self, request: ChatCompletionRequest @@ -240,14 +243,18 @@ async def _get_params( media_present = request_has_media(request) if isinstance(request, ChatCompletionRequest): - if media_present: - input_dict["messages"] = [ - await convert_message_to_openai_dict(m) for m in request.messages + input_dict["messages"] = [ + await convert_message_to_openai_dict(m) for m in request.messages + ] + # print(input_dict["messages"]) + if request.tool_choice == ToolChoice.required: + input_dict["tool_choice"] = "any" + + if request.tools: + input_dict["tools"] = [ + _convert_tooldef_to_openai_tool(t) for t in request.tools ] - else: - input_dict["prompt"] = await chat_completion_request_to_prompt( - request, self.get_llama_model(request.model), self.formatter - ) + # print(input_dict) else: assert ( not media_present diff --git a/llama_stack/providers/remote/inference/nvidia/openai_utils.py b/llama_stack/providers/remote/inference/nvidia/openai_utils.py index ba8ff0fa47..1220f7ffa8 100644 --- a/llama_stack/providers/remote/inference/nvidia/openai_utils.py +++ b/llama_stack/providers/remote/inference/nvidia/openai_utils.py @@ -10,12 +10,13 @@ from llama_models.llama3.api.datatypes import ( BuiltinTool, - CompletionMessage, + # CompletionMessage, StopReason, - TokenLogProbs, + # TokenLogProbs, ToolCall, ToolDefinition, ) +from llama_stack.apis.inference import CompletionMessage, TokenLogProbs from openai import AsyncStream from openai.types.chat import ( ChatCompletionAssistantMessageParam as OpenAIChatCompletionAssistantMessage, @@ -339,7 +340,7 @@ def _convert_openai_tool_calls( def _convert_openai_logprobs( logprobs: OpenAIChoiceLogprobs, -) -> Optional[List[TokenLogProbs]]: +) -> Optional[List[Any]]: """ Convert an OpenAI ChoiceLogprobs into a list of TokenLogProbs.