Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve OpenAI error handling #164

Merged
merged 5 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 29 additions & 8 deletions app/llm/external/openai_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@
from datetime import datetime
from typing import Literal, Any

from openai import OpenAI
from openai import (
OpenAI,
APIError,
APITimeoutError,
RateLimitError,
InternalServerError,
)
from openai.lib.azure import AzureOpenAI
from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageParam
from openai.types.shared_params import ResponseFormatJSONObject
Expand Down Expand Up @@ -82,35 +88,50 @@ def chat(
) -> PyrisMessage:
print("Sending messages to OpenAI", messages)
# noinspection PyTypeChecker
retries = 10
retries = 5
backoff_factor = 2
initial_delay = 1
# Maximum wait time: 1 + 2 + 4 + 8 + 16 = 31 seconds

messages = convert_to_open_ai_messages(messages)
MichaelOwenDyer marked this conversation as resolved.
Show resolved Hide resolved

for attempt in range(retries):
try:
if arguments.response_format == "JSON":
response = self._client.chat.completions.create(
model=self.model,
messages=convert_to_open_ai_messages(messages),
messages=messages,
temperature=arguments.temperature,
max_tokens=arguments.max_tokens,
response_format=ResponseFormatJSONObject(type="json_object"),
)
else:
response = self._client.chat.completions.create(
model=self.model,
messages=convert_to_open_ai_messages(messages),
messages=messages,
temperature=arguments.temperature,
max_tokens=arguments.max_tokens,
)
return convert_to_iris_message(response.choices[0].message)
except Exception as e:
choice = response.choices[0]
if choice.finish_reason == "content_filter":
# I figured that an openai error would be automatically raised if the content filter activated,
# but it seems that that is not the case.
# We don't want to retry because the same message will likely be rejected again.
# Raise an exception to trigger the global error handler and report a fatal error to the client.
raise Exception("OpenAI content filter activated")
return convert_to_iris_message(choice.message)
MichaelOwenDyer marked this conversation as resolved.
Show resolved Hide resolved
except (
APIError,
APITimeoutError,
RateLimitError,
InternalServerError,
) as e:
wait_time = initial_delay * (backoff_factor**attempt)
logging.warning(f"Exception on attempt {attempt + 1}: {e}")
logging.warning(f"OpenAI error on attempt {attempt + 1}: {e}")
traceback.print_exc()
logging.info(f"Retrying in {wait_time} seconds...")
time.sleep(wait_time)
MichaelOwenDyer marked this conversation as resolved.
Show resolved Hide resolved
logging.error("Failed to interpret image after several attempts.")
raise Exception(f"Failed to get response from OpenAI after {retries} retries")


class DirectOpenAIChatModel(OpenAIChatModel):
Expand Down
25 changes: 17 additions & 8 deletions app/llm/external/openai_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import logging
from typing import Literal, Any
from openai import OpenAI
from openai import (
OpenAI,
APIError,
APITimeoutError,
RateLimitError,
InternalServerError,
)
from openai.lib.azure import AzureOpenAI

from ...llm.external.model import EmbeddingModel
Expand All @@ -13,9 +19,10 @@ class OpenAIEmbeddingModel(EmbeddingModel):
_client: OpenAI

def embed(self, text: str) -> list[float]:
retries = 10
retries = 5
backoff_factor = 2
initial_delay = 1
# Maximum wait time: 1 + 2 + 4 + 8 + 16 = 31 seconds

for attempt in range(retries):
try:
Expand All @@ -25,15 +32,17 @@ def embed(self, text: str) -> list[float]:
encoding_format="float",
)
return response.data[0].embedding
except Exception as e:
except (
APIError,
APITimeoutError,
RateLimitError,
InternalServerError,
) as e:
wait_time = initial_delay * (backoff_factor**attempt)
logging.warning(f"Rate limit exceeded on attempt {attempt + 1}: {e}")
logging.warning(f"OpenAI error on attempt {attempt + 1}: {e}")
logging.info(f"Retrying in {wait_time} seconds...")
time.sleep(wait_time)
logging.error(
"Failed to get embedding after several attempts due to rate limit."
)
return []
raise Exception(f"Failed to get embedding from OpenAI after {retries} retries.")


class DirectOpenAIEmbeddingModel(OpenAIEmbeddingModel):
Expand Down
Loading