Skip to content

Commit

Permalink
API: Add logprobs for chat completions
Browse files Browse the repository at this point in the history
Adds chat completion logprob support using OAI's spec. Tokens are
not converted to tiktoken here since that will add an extra dependency
for no real reason.

Signed-off-by: kingbri <[email protected]>
  • Loading branch information
bdashore3 committed Feb 9, 2024
1 parent c02fe4d commit c7428f0
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 8 deletions.
12 changes: 6 additions & 6 deletions OAI/types/chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
from OAI.types.common import UsageStats, CommonCompletionRequest


class ChatCompletionLogprobs(BaseModel):
class ChatCompletionLogprob(BaseModel):
token: str
logprob: float
top_logprobs: List["ChatCompletionLogprobs"]
top_logprobs: Optional[List["ChatCompletionLogprob"]] = None


class WrappedChatCompletionLogprobs(BaseModel):
content: List[ChatCompletionLogprobs]
class ChatCompletionLogprobs(BaseModel):
content: List[ChatCompletionLogprob] = Field(default_factory=list)


class ChatCompletionMessage(BaseModel):
Expand All @@ -26,15 +26,15 @@ class ChatCompletionRespChoice(BaseModel):
index: int = 0
finish_reason: str
message: ChatCompletionMessage
logprobs: Optional[WrappedChatCompletionLogprobs] = None
logprobs: Optional[ChatCompletionLogprobs] = None


class ChatCompletionStreamChoice(BaseModel):
# Index is 0 since we aren't using multiple choices
index: int = 0
finish_reason: Optional[str]
delta: Union[ChatCompletionMessage, dict] = {}
logprobs: Optional[WrappedChatCompletionLogprobs] = None
logprobs: Optional[ChatCompletionLogprobs] = None


# Inherited from common request
Expand Down
52 changes: 50 additions & 2 deletions OAI/utils/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

from common.utils import unwrap
from OAI.types.chat_completion import (
ChatCompletionLogprobs,
ChatCompletionLogprob,
ChatCompletionMessage,
ChatCompletionRespChoice,
ChatCompletionStreamChunk,
Expand Down Expand Up @@ -63,7 +65,32 @@ def create_chat_completion_response(generation: dict, model_name: Optional[str])
role="assistant", content=unwrap(generation.get("text"), "")
)

choice = ChatCompletionRespChoice(finish_reason="Generated", message=message)
logprob_response = None

token_probs = unwrap(generation.get("token_probs"), {})
if token_probs:
logprobs = unwrap(generation.get("logprobs"), [])

collected_token_probs = []
for index, token in enumerate(token_probs.keys()):
top_logprobs = [
ChatCompletionLogprob(token=token, logprob=logprob)
for token, logprob in logprobs[index].items()
]

collected_token_probs.append(
ChatCompletionLogprob(
token=token,
logprob=token_probs[token],
top_logprobs=top_logprobs,
)
)

logprob_response = ChatCompletionLogprobs(content=collected_token_probs)

choice = ChatCompletionRespChoice(
finish_reason="Generated", message=message, logprobs=logprob_response
)

prompt_tokens = unwrap(generation.get("prompt_tokens"), 0)
completion_tokens = unwrap(generation.get("completion_tokens"), 0)
Expand All @@ -89,15 +116,36 @@ def create_chat_completion_stream_chunk(
):
"""Create a chat completion stream chunk from the provided text."""

logprob_response = None

if finish_reason:
message = {}
else:
message = ChatCompletionMessage(
role="assistant", content=unwrap(generation.get("text"), "")
)

token_probs = unwrap(generation.get("token_probs"), {})
if token_probs:
logprobs = unwrap(generation.get("logprobs"), {})
top_logprobs = [
ChatCompletionLogprob(token=token, logprob=logprob)
for token, logprob in logprobs.items()
]

generated_token = next(iter(token_probs))
token_prob_response = ChatCompletionLogprob(
token=generated_token,
logprob=token_probs[generated_token],
top_logprobs=top_logprobs,
)

logprob_response = ChatCompletionLogprobs(content=[token_prob_response])

# The finish reason can be None
choice = ChatCompletionStreamChoice(finish_reason=finish_reason, delta=message)
choice = ChatCompletionStreamChoice(
finish_reason=finish_reason, delta=message, logprobs=logprob_response
)

chunk = ChatCompletionStreamChunk(
id=const_id, choices=[choice], model=unwrap(model_name, "")
Expand Down
2 changes: 2 additions & 0 deletions backends/exllamav2/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,8 @@ def generate(self, prompt: str, **kwargs):
joined_generation["token_probs"].update(
unwrap(generation.get("token_probs"), {})
)

# Include empty logprob dicts for index preservation
joined_generation["logprobs"].append(
unwrap(generation.get("logprobs"), {})
)
Expand Down

0 comments on commit c7428f0

Please sign in to comment.