Skip to content

Commit

Permalink
Fix token budgeting (#824)
Browse files Browse the repository at this point in the history
### Motivation and Context
OpenAI inserts a message under the hood for which we don't account, and
thus throwing off our token budgeting. This causes us to sometimes send
too many tokens when we approach a model's token limit. This results in
errors.

### Description
Account for the OpenAI message inserted in our requests.

### Contribution Checklist
- [ ] The code builds clean without any errors or warnings
- [ ] The PR follows the [Contribution
Guidelines](https://github.com/microsoft/chat-copilot/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/chat-copilot/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [ ] All unit tests pass, and I have added new tests where possible
- [ ] I didn't break anyone 😄
  • Loading branch information
glahaye authored Mar 1, 2024
1 parent 4559fce commit e018f90
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 13 deletions.
27 changes: 17 additions & 10 deletions webapi/Plugins/Chat/ChatPlugin.cs
Original file line number Diff line number Diff line change
Expand Up @@ -241,16 +241,15 @@ private async Task<string> GetAllowedChatHistoryAsync(
}

var promptRole = chatMessage.AuthorRole == CopilotChatMessage.AuthorRoles.Bot ? AuthorRole.System : AuthorRole.User;
var tokenCount = chatHistory is not null ? TokenUtils.GetContextMessageTokenCount(promptRole, formattedMessage) : TokenUtils.TokenCount(formattedMessage);
int tokenCount = chatHistory is not null ? TokenUtils.GetContextMessageTokenCount(promptRole, formattedMessage) : TokenUtils.TokenCount(formattedMessage);

if (remainingToken - tokenCount >= 0)
{
historyText = $"{formattedMessage}\n{historyText}";
if (chatMessage.AuthorRole == CopilotChatMessage.AuthorRoles.Bot)
{
// Message doesn't have to be formatted for bot. This helps with asserting a natural language response from the LLM (no date or author preamble).
var botMessage = chatMessage.Content;
allottedChatHistory.AddAssistantMessage(botMessage.Trim());
allottedChatHistory.AddAssistantMessage(chatMessage.Content.Trim());
}
else
{
Expand Down Expand Up @@ -330,7 +329,6 @@ private async Task<CopilotChatMessage> GetChatResponseAsync(string chatId, strin
// Render system instruction components and create the meta-prompt template
var systemInstructions = await AsyncUtils.SafeInvokeAsync(
() => this.RenderSystemInstructions(chatId, chatContext, cancellationToken), nameof(RenderSystemInstructions));
var chatCompletion = this._kernel.GetRequiredService<IChatCompletionService>();
ChatHistory chatHistory = new(systemInstructions);

// Bypass audience extraction if Auth is disabled
Expand All @@ -351,7 +349,6 @@ private async Task<CopilotChatMessage> GetChatResponseAsync(string chatId, strin
chatHistory.AddSystemMessage(userIntent);

// Calculate the remaining token budget.
await this.UpdateBotResponseStatusOnClientAsync(chatId, "Calculating remaining token budget", cancellationToken);
var remainingTokenBudget = this.GetChatContextTokenLimit(chatHistory, userMessage.ToFormattedString());

// Query relevant semantic and document memories
Expand Down Expand Up @@ -397,7 +394,8 @@ private async Task<string> RenderSystemInstructions(string chatId, KernelArgumen
}

/// <summary>
/// Helper function to handle final steps of bot response generation, including streaming to client, generating semantic text memory, calculating final token usages, and saving to chat history.
/// Helper function to handle final steps of bot response generation, including streaming to client,
/// generating semantic text memory, calculating final token usages, and saving to chat history.
/// </summary>
/// <param name="chatId">The chat ID</param>
/// <param name="userId">The user ID</param>
Expand Down Expand Up @@ -461,12 +459,14 @@ await AsyncUtils.SafeInvokeAsync(
}

/// <summary>
/// Helper function that creates the correct context variables to
/// extract the audience from a conversation history.
/// Extract the list of participants from the conversation history.
/// Note that only those who have spoken will be included.
/// </summary>
/// <param name="context">Kernel context variables.</param>
/// <param name="cancellationToken">The cancellation token.</param>
private async Task<string> GetAudienceAsync(KernelArguments context, CancellationToken cancellationToken)
{
// Clone the context to avoid modifying the original context variables
KernelArguments audienceContext = new(context);
var audience = await this.ExtractAudienceAsync(audienceContext, cancellationToken);

Expand All @@ -481,12 +481,13 @@ private async Task<string> GetAudienceAsync(KernelArguments context, Cancellatio
}

/// <summary>
/// Helper function that creates the correct context variables to
/// extract the user intent from the conversation history.
/// Extract user intent from the conversation history.
/// </summary>
/// <param name="context">Kernel context.</param>
/// <param name="cancellationToken">The cancellation token.</param>
private async Task<string> GetUserIntentAsync(KernelArguments context, CancellationToken cancellationToken)
{
// Clone the context to avoid modifying the original context variables
KernelArguments intentContext = new(context);
string userIntent = await this.ExtractUserIntentAsync(intentContext, cancellationToken);

Expand Down Expand Up @@ -636,7 +637,13 @@ private OpenAIPromptExecutionSettings CreateIntentCompletionSettings()
/// <returns>The remaining token limit.</returns>
private int GetChatContextTokenLimit(ChatHistory promptTemplate, string userInput = "")
{
// OpenAI inserts a message under the hood:
// "content": "Assistant is a large language model.","role": "system"
// This burns just under 20 tokens which need to be accounted for.
const int ExtraOpenAiMessageTokens = 20;

return this._promptOptions.CompletionTokenLimit
- ExtraOpenAiMessageTokens
- TokenUtils.GetContextMessagesTokenCount(promptTemplate)
- TokenUtils.GetContextMessageTokenCount(AuthorRole.User, userInput) // User message has to be included in chat history allowance
- this._promptOptions.ResponseTokenLimit;
Expand Down
5 changes: 2 additions & 3 deletions webapi/Plugins/Utils/TokenUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,13 @@ internal static int TokenCount(string text)
/// <summary>
/// Rough token costing of ChatHistory's message object.
/// Follows the syntax defined by Azure OpenAI's ChatMessage object: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chatmessage
/// e.g., "message": {"role":"assistant","content":"Yes }
/// e.g., "message": {"role":"assistant","content":"Yes" }
/// </summary>
/// <param name="authorRole">Author role of the message.</param>
/// <param name="content">Content of the message.</param>
internal static int GetContextMessageTokenCount(AuthorRole authorRole, string? content)
{
var tokenCount = authorRole == AuthorRole.System ? TokenCount("\n") : 0;
return tokenCount + TokenCount($"role:{authorRole.Label}") + TokenCount($"content:{content}");
return TokenCount($"role:{authorRole.Label}") + TokenCount($"content:{content}\n");
}

/// <summary>
Expand Down

0 comments on commit e018f90

Please sign in to comment.