diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index 27e2d08..c357591 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -1,10 +1,16 @@ import logging import time -import traceback from datetime import datetime from typing import Literal, Any -from openai import OpenAI +from openai import ( + OpenAI, + APIError, + APITimeoutError, + RateLimitError, + InternalServerError, + ContentFilterFinishReasonError, +) from openai.lib.azure import AzureOpenAI from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageParam from openai.types.shared_params import ResponseFormatJSONObject @@ -82,16 +88,19 @@ def chat( ) -> PyrisMessage: print("Sending messages to OpenAI", messages) # noinspection PyTypeChecker - retries = 10 + retries = 5 backoff_factor = 2 initial_delay = 1 + # Maximum wait time: 1 + 2 + 4 + 8 + 16 = 31 seconds + + messages = convert_to_open_ai_messages(messages) for attempt in range(retries): try: if arguments.response_format == "JSON": response = self._client.chat.completions.create( model=self.model, - messages=convert_to_open_ai_messages(messages), + messages=messages, temperature=arguments.temperature, max_tokens=arguments.max_tokens, response_format=ResponseFormatJSONObject(type="json_object"), @@ -99,18 +108,29 @@ def chat( else: response = self._client.chat.completions.create( model=self.model, - messages=convert_to_open_ai_messages(messages), + messages=messages, temperature=arguments.temperature, max_tokens=arguments.max_tokens, ) - return convert_to_iris_message(response.choices[0].message) - except Exception as e: + choice = response.choices[0] + if choice.finish_reason == "content_filter": + # I figured that an openai error would be automatically raised if the content filter activated, + # but it seems that that is not the case. + # We don't want to retry because the same message will likely be rejected again. + # Raise an exception to trigger the global error handler and report a fatal error to the client. + raise ContentFilterFinishReasonError() + return convert_to_iris_message(choice.message) + except ( + APIError, + APITimeoutError, + RateLimitError, + InternalServerError, + ): wait_time = initial_delay * (backoff_factor**attempt) - logging.warning(f"Exception on attempt {attempt + 1}: {e}") - traceback.print_exc() + logging.exception(f"OpenAI error on attempt {attempt + 1}:") logging.info(f"Retrying in {wait_time} seconds...") time.sleep(wait_time) - logging.error("Failed to interpret image after several attempts.") + raise Exception(f"Failed to get response from OpenAI after {retries} retries") class DirectOpenAIChatModel(OpenAIChatModel): diff --git a/app/llm/external/openai_embeddings.py b/app/llm/external/openai_embeddings.py index 243860d..1f9106e 100644 --- a/app/llm/external/openai_embeddings.py +++ b/app/llm/external/openai_embeddings.py @@ -1,6 +1,12 @@ import logging from typing import Literal, Any -from openai import OpenAI +from openai import ( + OpenAI, + APIError, + APITimeoutError, + RateLimitError, + InternalServerError, +) from openai.lib.azure import AzureOpenAI from ...llm.external.model import EmbeddingModel @@ -13,9 +19,10 @@ class OpenAIEmbeddingModel(EmbeddingModel): _client: OpenAI def embed(self, text: str) -> list[float]: - retries = 10 + retries = 5 backoff_factor = 2 initial_delay = 1 + # Maximum wait time: 1 + 2 + 4 + 8 + 16 = 31 seconds for attempt in range(retries): try: @@ -25,15 +32,17 @@ def embed(self, text: str) -> list[float]: encoding_format="float", ) return response.data[0].embedding - except Exception as e: + except ( + APIError, + APITimeoutError, + RateLimitError, + InternalServerError, + ): wait_time = initial_delay * (backoff_factor**attempt) - logging.warning(f"Rate limit exceeded on attempt {attempt + 1}: {e}") + logging.exception(f"OpenAI error on attempt {attempt + 1}") logging.info(f"Retrying in {wait_time} seconds...") time.sleep(wait_time) - logging.error( - "Failed to get embedding after several attempts due to rate limit." - ) - return [] + raise Exception(f"Failed to get embedding from OpenAI after {retries} retries.") class DirectOpenAIEmbeddingModel(OpenAIEmbeddingModel):