From 42d841bfe8e21000f16cb9c07946e6e1cc3e485a Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 1 Oct 2024 18:30:49 +0000 Subject: [PATCH 1/9] Optimize chat caching by repositioning definitions in prompt structure. --- packages/core/src/promptdom.ts | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts index a6c0182142..3a51874db6 100644 --- a/packages/core/src/promptdom.ts +++ b/packages/core/src/promptdom.ts @@ -510,6 +510,29 @@ export interface PromptNodeRender { fileOutputs: FileOutput[] // File outputs } +/** + * To optimize chat caching with openai, move defs to the back of the prompt + * @see https://platform.openai.com/docs/guides/prompt-caching + * @param mode + * @param root + */ +async function layoutPromptNode(mode: string, root: PromptNode) { + let changed = false + await visitNode(root, { + node: (n) => { + // sort children + const before = n.children?.map((c) => c.preview)?.join("\n") + n.children?.sort( + (a, b) => + (a.type === "def" ? 1 : -1) - (b.type === "def" ? 1 : -1) + ) + const after = n.children?.map((c) => c.preview)?.join("\n") + changed = changed || before !== after + }, + }) + return changed +} + // Function to resolve a prompt node. async function resolvePromptNode( model: string, @@ -874,6 +897,9 @@ export async function renderPromptNode( await resolvePromptNode(model, node) await tracePromptNode(trace, node) + if (await layoutPromptNode(model, node)) + await tracePromptNode(trace, node, { label: "layout" }) + if (flexTokens) await flexPromptNode(node, { ...options, From 68f3caeeab63cbf711cd8bfdbfb9205dfd037a81 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 1 Oct 2024 18:34:49 +0000 Subject: [PATCH 2/9] Update node sorting logic to include "image" type in layoutPromptNode function --- packages/core/src/promptdom.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts index 3a51874db6..89a3a00137 100644 --- a/packages/core/src/promptdom.ts +++ b/packages/core/src/promptdom.ts @@ -518,13 +518,16 @@ export interface PromptNodeRender { */ async function layoutPromptNode(mode: string, root: PromptNode) { let changed = false + const variables: PromptNode["type"][] = ["def", "image"] + await visitNode(root, { node: (n) => { // sort children const before = n.children?.map((c) => c.preview)?.join("\n") n.children?.sort( (a, b) => - (a.type === "def" ? 1 : -1) - (b.type === "def" ? 1 : -1) + (variables.includes(a.type) ? 1 : -1) - + (variables.includes(b.type) ? 1 : -1) ) const after = n.children?.map((c) => c.preview)?.join("\n") changed = changed || before !== after From 8b1a13db358938bedad3fad433eb91edc2f22a5c Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 1 Oct 2024 18:45:21 +0000 Subject: [PATCH 3/9] Add ephemeral property to PromptNode and update sorting logic --- docs/genaisrc/genaiscript.d.ts | 4 ++++ genaisrc/genaiscript.d.ts | 4 ++++ packages/auto/genaiscript.d.ts | 4 ++++ packages/core/src/genaisrc/genaiscript.d.ts | 4 ++++ packages/core/src/promptdom.ts | 10 +++++----- packages/core/src/types/prompt_template.d.ts | 4 ++++ packages/sample/genaisrc/blog/genaiscript.d.ts | 4 ++++ packages/sample/genaisrc/genaiscript.d.ts | 4 ++++ packages/sample/genaisrc/node/genaiscript.d.ts | 4 ++++ packages/sample/genaisrc/python/genaiscript.d.ts | 4 ++++ packages/sample/genaisrc/style/genaiscript.d.ts | 4 ++++ packages/sample/src/aici/genaiscript.d.ts | 4 ++++ packages/sample/src/errors/genaiscript.d.ts | 4 ++++ packages/sample/src/genaiscript.d.ts | 4 ++++ packages/sample/src/makecode/genaiscript.d.ts | 4 ++++ packages/sample/src/tla/genaiscript.d.ts | 4 ++++ packages/sample/src/vision/genaiscript.d.ts | 4 ++++ packages/vscode/genaisrc/genaiscript.d.ts | 4 ++++ slides/genaisrc/genaiscript.d.ts | 4 ++++ 19 files changed, 77 insertions(+), 5 deletions(-) diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/docs/genaisrc/genaiscript.d.ts +++ b/docs/genaisrc/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/genaisrc/genaiscript.d.ts +++ b/genaisrc/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/auto/genaiscript.d.ts b/packages/auto/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/auto/genaiscript.d.ts +++ b/packages/auto/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/core/src/genaisrc/genaiscript.d.ts +++ b/packages/core/src/genaisrc/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts index 89a3a00137..e212f85158 100644 --- a/packages/core/src/promptdom.ts +++ b/packages/core/src/promptdom.ts @@ -52,6 +52,10 @@ export interface PromptNode extends ContextExpansionOptions { children?: PromptNode[] // Child nodes for hierarchical structure error?: unknown // Error information if present tokens?: number // Token count for the node + /** + * This text is likely to change within 5 to 10 minutes. + */ + ephemeral?: boolean /** * Rendered markdown preview of the node @@ -518,16 +522,12 @@ export interface PromptNodeRender { */ async function layoutPromptNode(mode: string, root: PromptNode) { let changed = false - const variables: PromptNode["type"][] = ["def", "image"] - await visitNode(root, { node: (n) => { // sort children const before = n.children?.map((c) => c.preview)?.join("\n") n.children?.sort( - (a, b) => - (variables.includes(a.type) ? 1 : -1) - - (variables.includes(b.type) ? 1 : -1) + (a, b) => (a.ephemeral ? 1 : -1) - (b.ephemeral ? 1 : -1) ) const after = n.children?.map((c) => c.preview)?.join("\n") changed = changed || before !== after diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index a69a760d05..8d76f7091a 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -741,6 +741,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/sample/genaisrc/blog/genaiscript.d.ts b/packages/sample/genaisrc/blog/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/sample/genaisrc/blog/genaiscript.d.ts +++ b/packages/sample/genaisrc/blog/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/sample/genaisrc/genaiscript.d.ts +++ b/packages/sample/genaisrc/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/sample/genaisrc/node/genaiscript.d.ts +++ b/packages/sample/genaisrc/node/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/sample/genaisrc/python/genaiscript.d.ts +++ b/packages/sample/genaisrc/python/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/sample/genaisrc/style/genaiscript.d.ts +++ b/packages/sample/genaisrc/style/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/sample/src/aici/genaiscript.d.ts b/packages/sample/src/aici/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/sample/src/aici/genaiscript.d.ts +++ b/packages/sample/src/aici/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/sample/src/errors/genaiscript.d.ts b/packages/sample/src/errors/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/sample/src/errors/genaiscript.d.ts +++ b/packages/sample/src/errors/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/sample/src/genaiscript.d.ts b/packages/sample/src/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/sample/src/genaiscript.d.ts +++ b/packages/sample/src/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/sample/src/makecode/genaiscript.d.ts b/packages/sample/src/makecode/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/sample/src/makecode/genaiscript.d.ts +++ b/packages/sample/src/makecode/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/sample/src/tla/genaiscript.d.ts b/packages/sample/src/tla/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/sample/src/tla/genaiscript.d.ts +++ b/packages/sample/src/tla/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/sample/src/vision/genaiscript.d.ts b/packages/sample/src/vision/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/sample/src/vision/genaiscript.d.ts +++ b/packages/sample/src/vision/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/packages/vscode/genaisrc/genaiscript.d.ts b/packages/vscode/genaisrc/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/packages/vscode/genaisrc/genaiscript.d.ts +++ b/packages/vscode/genaisrc/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { diff --git a/slides/genaisrc/genaiscript.d.ts b/slides/genaisrc/genaiscript.d.ts index ff14b2bbff..8ee4580062 100644 --- a/slides/genaisrc/genaiscript.d.ts +++ b/slides/genaisrc/genaiscript.d.ts @@ -803,6 +803,10 @@ interface ContextExpansionOptions { * It defaults to 1 on all elements. */ flex?: number + /** + * This text is likely to change and will probably break the prefix cache. + */ + ephemeral?: boolean } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { From 6547e302ddb9663cf42cf19d0540a368c04ac697 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 1 Oct 2024 18:47:42 +0000 Subject: [PATCH 4/9] Fix typo and add prompt caching section to context.md --- docs/src/content/docs/reference/scripts/context.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/docs/src/content/docs/reference/scripts/context.md b/docs/src/content/docs/reference/scripts/context.md index 0ee8ed6d16..2d3c2d3081 100644 --- a/docs/src/content/docs/reference/scripts/context.md +++ b/docs/src/content/docs/reference/scripts/context.md @@ -106,7 +106,7 @@ def("DIFF", gitdiff, { language: "diff" }) ### Referencing The `def` function returns a variable name that can be used in the prompt. -The name might be formatted diferently to accommodate the model's preference. +The name might be formatted differently to accommodate the model's preference. ```js "const f = " const f = def("FILE", file) @@ -182,6 +182,15 @@ def("FILE", env.files, { sliceTail: 100 }) def("FILE", env.files, { sliceSample: 100 }) ``` +### Prompt Caching + +You can specify `ephemeral: true` to turn on some prompt caching optimization. In paricular, a `def` with `ephemeral` will be rendered at the back of the prompt +to persist the [cache prefix](https://openai.com/index/api-prompt-caching/). + +```js +def("FILE", env.files, { ephemeral: true }) +``` + ## Data definition (`defData`) The `defData` function offers additional formatting options for converting a data object into a textual representation. It supports rendering objects as YAML, JSON, or CSV (formatted as a markdown table). From fbf2979dd4574cec82b41ea56d3a229d72272ee4 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 1 Oct 2024 19:18:59 +0000 Subject: [PATCH 5/9] Add chat usage tracking and refactor session handling --- docs/genaisrc/genaiscript.d.ts | 1 + genaisrc/genaiscript.d.ts | 1 + packages/auto/genaiscript.d.ts | 1 + packages/core/src/chat.ts | 87 +++++++++++++------ packages/core/src/chattypes.ts | 10 +++ packages/core/src/genaisrc/genaiscript.d.ts | 1 + packages/core/src/openai.ts | 25 ++++-- packages/core/src/promptcontext.ts | 2 +- packages/core/src/promptrunner.ts | 3 +- packages/core/src/runpromptcontext.ts | 4 +- packages/core/src/types/prompt_template.d.ts | 1 + .../sample/genaisrc/blog/genaiscript.d.ts | 1 + packages/sample/genaisrc/genaiscript.d.ts | 1 + .../sample/genaisrc/node/genaiscript.d.ts | 1 + .../sample/genaisrc/python/genaiscript.d.ts | 1 + .../sample/genaisrc/style/genaiscript.d.ts | 1 + packages/sample/src/aici/genaiscript.d.ts | 1 + packages/sample/src/errors/genaiscript.d.ts | 1 + packages/sample/src/genaiscript.d.ts | 1 + packages/sample/src/makecode/genaiscript.d.ts | 1 + packages/sample/src/tla/genaiscript.d.ts | 1 + packages/sample/src/vision/genaiscript.d.ts | 1 + packages/vscode/genaisrc/genaiscript.d.ts | 1 + slides/genaisrc/genaiscript.d.ts | 1 + 24 files changed, 110 insertions(+), 39 deletions(-) diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/docs/genaisrc/genaiscript.d.ts +++ b/docs/genaisrc/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/genaisrc/genaiscript.d.ts +++ b/genaisrc/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/auto/genaiscript.d.ts b/packages/auto/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/auto/genaiscript.d.ts +++ b/packages/auto/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts index 7506dc09fa..4c882cde04 100644 --- a/packages/core/src/chat.ts +++ b/packages/core/src/chat.ts @@ -28,6 +28,8 @@ import { ChatCompletionResponse, ChatCompletionsOptions, ChatCompletionTool, + ChatCompletionUsage, + ChatCompletionUsages, ChatCompletionUserMessageParam, CreateChatCompletionRequest, } from "./chattypes" @@ -364,6 +366,7 @@ function structurifyChatSession( schemas: Record, genVars: Record, options: GenerationOptions, + usages: ChatCompletionUsages, others?: { resp?: ChatCompletionResponse err?: any @@ -426,17 +429,20 @@ function structurifyChatSession( error, genVars, schemas, + usages, } } async function processChatMessage( + req: CreateChatCompletionRequest, resp: ChatCompletionResponse, messages: ChatCompletionMessageParam[], tools: ToolCallback[], chatParticipants: ChatParticipant[], schemas: Record, genVars: Record, - options: GenerationOptions + options: GenerationOptions, + usages: ChatCompletionUsages ): Promise { const { stats, @@ -445,6 +451,8 @@ async function processChatMessage( cancellationToken, } = options + accumulateChatUsage(usages, req.model, resp.usage) + if (resp.text) messages.push({ role: "assistant", @@ -534,11 +542,29 @@ export function mergeGenerationOptions( } } +function accumulateChatUsage( + usages: ChatCompletionUsages, + model: string, + usage: ChatCompletionUsage +) { + if (!usage) return + + const u = + usages[model] ?? + (usages[model] = { + completion_tokens: 0, + prompt_tokens: 0, + total_tokens: 0, + }) + u.completion_tokens += u.completion_tokens + u.prompt_tokens += u.prompt_tokens + u.total_tokens += u.total_tokens +} + export async function executeChatSession( connectionToken: LanguageModelConfiguration, cancellationToken: CancellationToken, messages: ChatCompletionMessageParam[], - vars: Partial, toolDefinitions: ToolCallback[], schemas: Record, completer: ChatCompletionHandler, @@ -567,6 +593,7 @@ export async function executeChatSession( : undefined trace.startDetails(`🧠 llm chat`) if (tools?.length) trace.detailsFenced(`🛠️ tools`, tools, "yaml") + const usages: ChatCompletionUsages = {} try { let genVars: Record while (true) { @@ -585,34 +612,35 @@ export async function executeChatSession( let resp: ChatCompletionResponse try { checkCancelled(cancellationToken) + const req: CreateChatCompletionRequest = { + model, + temperature: temperature, + top_p: topP, + max_tokens: maxTokens, + seed, + stream: true, + messages, + tools, + response_format: + responseType === "json_object" + ? { type: responseType } + : responseType === "json_schema" + ? { + type: "json_schema", + json_schema: { + name: "result", + schema: toStrictJSONSchema( + responseSchema + ), + strict: true, + }, + } + : undefined, + } try { trace.startDetails(`📤 llm request`) resp = await completer( - { - model, - temperature: temperature, - top_p: topP, - max_tokens: maxTokens, - seed, - stream: true, - messages, - tools, - response_format: - responseType === "json_object" - ? { type: responseType } - : responseType === "json_schema" - ? { - type: "json_schema", - json_schema: { - name: "result", - schema: toStrictJSONSchema( - responseSchema - ), - strict: true, - }, - } - : undefined, - }, + req, connectionToken, genOptions, trace @@ -625,13 +653,15 @@ export async function executeChatSession( } const output = await processChatMessage( + req, resp, messages, toolDefinitions, chatParticipants, schemas, genVars, - genOptions + genOptions, + usages ) if (output) return output } catch (err) { @@ -640,6 +670,7 @@ export async function executeChatSession( schemas, genVars, genOptions, + usages, { resp, err } ) } diff --git a/packages/core/src/chattypes.ts b/packages/core/src/chattypes.ts index e8465ad906..5ab887bcee 100644 --- a/packages/core/src/chattypes.ts +++ b/packages/core/src/chattypes.ts @@ -18,6 +18,15 @@ export interface AICIRequest { } // Aliases for OpenAI chat completion types +export type ChatCompletionUsage = Omit< + OpenAI.Completions.CompletionUsage, + "completion_tokens_details" +> + +/** + * Per model storage of chat completion usages. + */ +export type ChatCompletionUsages = Record // Text content part of a chat completion export type ChatCompletionContentPartText = @@ -99,6 +108,7 @@ export interface ChatCompletionResponse { toolCalls?: ChatCompletionToolCall[] // List of tool calls made during the response finishReason?: // Reason why the chat completion finished "stop" | "length" | "tool_calls" | "content_filter" | "cancel" | "fail" + usage?: ChatCompletionUsage // Usage information for the completion } // Alias for OpenAI's API error type diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/core/src/genaisrc/genaiscript.d.ts +++ b/packages/core/src/genaisrc/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/core/src/openai.ts b/packages/core/src/openai.ts index 00c210ae14..114037188d 100644 --- a/packages/core/src/openai.ts +++ b/packages/core/src/openai.ts @@ -1,4 +1,4 @@ -import { normalizeInt, trimTrailingSlash } from "./util" +import { logVerbose, normalizeInt, trimTrailingSlash } from "./util" import { LanguageModelConfiguration, host } from "./host" import { AZURE_OPENAI_API_VERSION, @@ -19,6 +19,7 @@ import { ChatCompletionToolCall, ChatCompletionResponse, ChatCompletionChunk, + ChatCompletionUsage, } from "./chattypes" import { resolveTokenEncoder } from "./encoders" import { toSignal } from "./cancellation" @@ -93,17 +94,20 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( return { text: cached, finishReason: cachedFinishReason, cached: true } } - const r2 = { ...req, model } + const r2 = { + ...req, + stream: true, + stream_options: { include_usage: true }, + model, + } let postReq: any = r2 let url = "" const toolCalls: ChatCompletionToolCall[] = [] if (cfg.type === "openai" || cfg.type === "localai") { - r2.stream = true url = trimTrailingSlash(cfg.base) + "/chat/completions" } else if (cfg.type === "azure") { - r2.stream = true delete r2.model url = trimTrailingSlash(cfg.base) + @@ -175,6 +179,7 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( let finishReason: ChatCompletionResponse["finishReason"] = undefined let chatResp = "" let pref = "" + let usage: ChatCompletionUsage const decoder = host.createUTF8Decoder() if (r.body.getReader) { @@ -193,7 +198,14 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( if (cancellationToken?.isCancellationRequested) finishReason = "cancel" trace.appendContent("\n\n") - trace.itemValue(`finish reason`, finishReason) + trace.itemValue(`🏁 finish reason`, finishReason) + if (usage) { + trace.itemValue( + `🪙 tokens`, + `${usage.total_tokens} total, ${usage.prompt_tokens} prompt, ${usage.completion_tokens} completion` + ) + } + if (done && finishReason === "stop") await cacheStore.set( cachedKey, @@ -201,7 +213,7 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( { trace } ) - return { text: chatResp, toolCalls, finishReason } + return { text: chatResp, toolCalls, finishReason, usage } function doChunk(value: Uint8Array) { // Massage and parse the chunk of data @@ -216,6 +228,7 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( } try { const obj: ChatCompletionChunk = JSON.parse(json) + if (obj.usage) usage = obj.usage if (!obj.choices?.length) return "" else if (obj.choices?.length != 1) throw new Error("too many choices in response") diff --git a/packages/core/src/promptcontext.ts b/packages/core/src/promptcontext.ts index b8b847fbf8..b889f6aff7 100644 --- a/packages/core/src/promptcontext.ts +++ b/packages/core/src/promptcontext.ts @@ -234,7 +234,7 @@ export async function createPromptContext( }) // Freeze project options to prevent modification - const projectOptions = Object.freeze({ prj, vars, env }) + const projectOptions = Object.freeze({ prj, env }) const ctx: PromptContext & RunPromptContextNode = { ...createChatGenerationContext(options, trace, projectOptions), script: () => {}, diff --git a/packages/core/src/promptrunner.ts b/packages/core/src/promptrunner.ts index f9108a8ea3..09411386d8 100644 --- a/packages/core/src/promptrunner.ts +++ b/packages/core/src/promptrunner.ts @@ -236,7 +236,6 @@ export async function runTemplate( connection.configuration, cancellationToken, messages, - vars, functions, schemas, completer, @@ -252,6 +251,7 @@ export async function runTemplate( genVars = {}, error, finishReason, + usages, } = output let { text, annotations } = output @@ -458,6 +458,7 @@ export async function runTemplate( genVars, schemas, json, + usages, } // If there's an error, provide status text diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts index f9454251d6..35dbd68c85 100644 --- a/packages/core/src/runpromptcontext.ts +++ b/packages/core/src/runpromptcontext.ts @@ -227,12 +227,11 @@ export function createChatGenerationContext( trace: MarkdownTrace, projectOptions: { prj: Project - vars: ExpansionVariables env: ExpansionVariables } ): RunPromptContextNode { const { cancellationToken, infoCb } = options || {} - const { prj, vars, env } = projectOptions + const { prj, env } = projectOptions const turnCtx = createChatTurnGenerationContext(options, trace) const node = turnCtx.node @@ -535,7 +534,6 @@ export function createChatGenerationContext( connection.configuration, cancellationToken, messages, - vars, tools, schemas, completer, diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index 8d76f7091a..7cf39334ff 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -881,6 +881,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/sample/genaisrc/blog/genaiscript.d.ts b/packages/sample/genaisrc/blog/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/sample/genaisrc/blog/genaiscript.d.ts +++ b/packages/sample/genaisrc/blog/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/sample/genaisrc/genaiscript.d.ts +++ b/packages/sample/genaisrc/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/sample/genaisrc/node/genaiscript.d.ts +++ b/packages/sample/genaisrc/node/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/sample/genaisrc/python/genaiscript.d.ts +++ b/packages/sample/genaisrc/python/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/sample/genaisrc/style/genaiscript.d.ts +++ b/packages/sample/genaisrc/style/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/sample/src/aici/genaiscript.d.ts b/packages/sample/src/aici/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/sample/src/aici/genaiscript.d.ts +++ b/packages/sample/src/aici/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/sample/src/errors/genaiscript.d.ts b/packages/sample/src/errors/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/sample/src/errors/genaiscript.d.ts +++ b/packages/sample/src/errors/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/sample/src/genaiscript.d.ts b/packages/sample/src/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/sample/src/genaiscript.d.ts +++ b/packages/sample/src/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/sample/src/makecode/genaiscript.d.ts b/packages/sample/src/makecode/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/sample/src/makecode/genaiscript.d.ts +++ b/packages/sample/src/makecode/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/sample/src/tla/genaiscript.d.ts b/packages/sample/src/tla/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/sample/src/tla/genaiscript.d.ts +++ b/packages/sample/src/tla/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/sample/src/vision/genaiscript.d.ts b/packages/sample/src/vision/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/sample/src/vision/genaiscript.d.ts +++ b/packages/sample/src/vision/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/packages/vscode/genaisrc/genaiscript.d.ts b/packages/vscode/genaisrc/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/packages/vscode/genaisrc/genaiscript.d.ts +++ b/packages/vscode/genaisrc/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** diff --git a/slides/genaisrc/genaiscript.d.ts b/slides/genaisrc/genaiscript.d.ts index 8ee4580062..b60b966660 100644 --- a/slides/genaisrc/genaiscript.d.ts +++ b/slides/genaisrc/genaiscript.d.ts @@ -943,6 +943,7 @@ interface RunPromptResult { | "content_filter" | "cancel" | "fail" + usages?: ChatCompletionUsages } /** From 11bccd229d6e2965c9fd03e32d0ce2ad6a2a8445 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 1 Oct 2024 19:29:57 +0000 Subject: [PATCH 6/9] Update token usage logging and fix token accumulation logic in chat processing --- packages/cli/src/run.ts | 14 ++++++++++---- packages/core/src/chat.ts | 8 ++++---- packages/core/src/generation.ts | 11 ++++++++++- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index ec76ee31b6..e0d8c7dbc8 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -244,7 +244,6 @@ export async function runScript( (acc, v) => ({ ...acc, ...parseKeyValuePair(v) }), {} ) - let tokens = 0 try { if (options.label) trace.heading(2, options.label) const { info } = await resolveModelConnectionInfo(script, { @@ -272,7 +271,6 @@ export async function runScript( }, partialCb: (args) => { const { responseChunk, tokensSoFar, inner } = args - tokens = tokensSoFar if (responseChunk !== undefined) { if (stream) { if (!inner) process.stdout.write(responseChunk) @@ -523,7 +521,15 @@ export async function runScript( if (failOnErrors && result.annotations?.some((a) => a.severity === "error")) return fail("error annotations found", ANNOTATION_ERROR_CODE) - logVerbose("genaiscript: done\n") - if (outTraceFilename) logVerbose(`trace: ${outTraceFilename}`) + logVerbose("genaiscript: done") + if (result.usages) { + for (const [key, value] of Object.entries(result.usages)) { + if (value.total_tokens > 0) + logVerbose( + ` ${key}: ${value.total_tokens} (${value.prompt_tokens} => ${value.completion_tokens})` + ) + } + } + if (outTraceFilename) logVerbose(` trace: ${outTraceFilename}`) return { exitCode: 0, result } } diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts index 4c882cde04..b3bc35d572 100644 --- a/packages/core/src/chat.ts +++ b/packages/core/src/chat.ts @@ -517,7 +517,7 @@ async function processChatMessage( if (needsNewTurn) return undefined } - return structurifyChatSession(messages, schemas, genVars, options, { + return structurifyChatSession(messages, schemas, genVars, options, usages, { resp, }) } @@ -556,9 +556,9 @@ function accumulateChatUsage( prompt_tokens: 0, total_tokens: 0, }) - u.completion_tokens += u.completion_tokens - u.prompt_tokens += u.prompt_tokens - u.total_tokens += u.total_tokens + u.completion_tokens += usage.completion_tokens ?? 0 + u.prompt_tokens += usage.prompt_tokens ?? 0 + u.total_tokens += usage.total_tokens ?? 0 } export async function executeChatSession( diff --git a/packages/core/src/generation.ts b/packages/core/src/generation.ts index 217c672b12..8b6e2c4853 100644 --- a/packages/core/src/generation.ts +++ b/packages/core/src/generation.ts @@ -1,7 +1,11 @@ // Import necessary modules and interfaces import { CancellationToken } from "./cancellation" import { LanguageModel } from "./chat" -import { ChatCompletionMessageParam, ChatCompletionsOptions } from "./chattypes" +import { + ChatCompletionMessageParam, + ChatCompletionsOptions, + ChatCompletionUsages, +} from "./chattypes" import { MarkdownTrace } from "./trace" // Represents a code fragment with associated files @@ -56,6 +60,11 @@ export interface GenerationResult extends GenerationOutput { */ finishReason?: string + /** + * Token usage statistics if reported by LLM + */ + usages?: ChatCompletionUsages + /** * Optional label for the run */ From b94ffd851c7ede6ca9196b6acef125619f0d76b9 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 1 Oct 2024 19:36:05 +0000 Subject: [PATCH 7/9] Add ChatCompletionUsages to GenerationOptions and refactor usage handling --- packages/cli/src/run.ts | 19 +++++++++++-------- packages/core/src/chat.ts | 14 +++++--------- packages/core/src/generation.ts | 1 + packages/core/src/promptcontext.ts | 1 - packages/core/src/runpromptcontext.ts | 3 ++- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index e0d8c7dbc8..b185283be7 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -6,7 +6,10 @@ import { convertDiagnosticsToSARIF } from "./sarif" import { buildProject } from "./build" import { diagnosticsToCSV } from "../../core/src/ast" import { CancellationOptions } from "../../core/src/cancellation" -import { ChatCompletionsProgressReport } from "../../core/src/chattypes" +import { + ChatCompletionsProgressReport, + ChatCompletionUsages, +} from "../../core/src/chattypes" import { runTemplate } from "../../core/src/promptrunner" import { githubCreateIssueComment, @@ -244,6 +247,7 @@ export async function runScript( (acc, v) => ({ ...acc, ...parseKeyValuePair(v) }), {} ) + const usages: ChatCompletionUsages = {} try { if (options.label) trace.heading(2, options.label) const { info } = await resolveModelConnectionInfo(script, { @@ -261,6 +265,7 @@ export async function runScript( trace.options.encoder = await resolveTokenEncoder(info.model) await runtimeHost.models.pullModel(info.model) result = await runTemplate(prj, script, fragment, { + usages, inner: false, infoCb: (args) => { const { text } = args @@ -522,13 +527,11 @@ export async function runScript( return fail("error annotations found", ANNOTATION_ERROR_CODE) logVerbose("genaiscript: done") - if (result.usages) { - for (const [key, value] of Object.entries(result.usages)) { - if (value.total_tokens > 0) - logVerbose( - ` ${key}: ${value.total_tokens} (${value.prompt_tokens} => ${value.completion_tokens})` - ) - } + for (const [key, value] of Object.entries(result.usages)) { + if (value.total_tokens > 0) + logVerbose( + ` ${key}: ${value.total_tokens} (${value.prompt_tokens} => ${value.completion_tokens})` + ) } if (outTraceFilename) logVerbose(` trace: ${outTraceFilename}`) return { exitCode: 0, result } diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts index b3bc35d572..eb75f0d00e 100644 --- a/packages/core/src/chat.ts +++ b/packages/core/src/chat.ts @@ -366,13 +366,12 @@ function structurifyChatSession( schemas: Record, genVars: Record, options: GenerationOptions, - usages: ChatCompletionUsages, others?: { resp?: ChatCompletionResponse err?: any } ): RunPromptResult { - const { trace, responseType, responseSchema } = options + const { trace, responseType, responseSchema, usages } = options const { resp, err } = others || {} const text = assistantText(messages, responseType) const annotations = parseAnnotations(text) @@ -441,14 +440,14 @@ async function processChatMessage( chatParticipants: ChatParticipant[], schemas: Record, genVars: Record, - options: GenerationOptions, - usages: ChatCompletionUsages + options: GenerationOptions ): Promise { const { stats, maxToolCalls = MAX_TOOL_CALLS, trace, cancellationToken, + usages, } = options accumulateChatUsage(usages, req.model, resp.usage) @@ -517,7 +516,7 @@ async function processChatMessage( if (needsNewTurn) return undefined } - return structurifyChatSession(messages, schemas, genVars, options, usages, { + return structurifyChatSession(messages, schemas, genVars, options, { resp, }) } @@ -593,7 +592,6 @@ export async function executeChatSession( : undefined trace.startDetails(`🧠 llm chat`) if (tools?.length) trace.detailsFenced(`🛠️ tools`, tools, "yaml") - const usages: ChatCompletionUsages = {} try { let genVars: Record while (true) { @@ -660,8 +658,7 @@ export async function executeChatSession( chatParticipants, schemas, genVars, - genOptions, - usages + genOptions ) if (output) return output } catch (err) { @@ -670,7 +667,6 @@ export async function executeChatSession( schemas, genVars, genOptions, - usages, { resp, err } ) } diff --git a/packages/core/src/generation.ts b/packages/core/src/generation.ts index 8b6e2c4853..1fcfc48d63 100644 --- a/packages/core/src/generation.ts +++ b/packages/core/src/generation.ts @@ -105,4 +105,5 @@ export interface GenerationOptions } vars?: PromptParameters // Variables for prompt customization stats: GenerationStats // Statistics of the generation + usages: ChatCompletionUsages } diff --git a/packages/core/src/promptcontext.ts b/packages/core/src/promptcontext.ts index b889f6aff7..2c26953b2a 100644 --- a/packages/core/src/promptcontext.ts +++ b/packages/core/src/promptcontext.ts @@ -43,7 +43,6 @@ export async function createPromptContext( options: GenerationOptions, model: string ) { - const { infoCb } = options || {} const { generator, ...varsNoGenerator } = vars // Clone variables to prevent modification of the original object const env = { generator, ...structuredClone(varsNoGenerator) } diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts index 35dbd68c85..bdfa34a064 100644 --- a/packages/core/src/runpromptcontext.ts +++ b/packages/core/src/runpromptcontext.ts @@ -35,6 +35,7 @@ import { checkCancelled } from "./cancellation" import { ChatCompletionMessageParam, ChatCompletionSystemMessageParam, + ChatCompletionUsages, } from "./chattypes" import { parseModelIdentifier, resolveModelConnectionInfo } from "./models" import { @@ -230,7 +231,7 @@ export function createChatGenerationContext( env: ExpansionVariables } ): RunPromptContextNode { - const { cancellationToken, infoCb } = options || {} + const { cancellationToken, infoCb, usages } = options || {} const { prj, env } = projectOptions const turnCtx = createChatTurnGenerationContext(options, trace) const node = turnCtx.node From a21540300350ba265aeff09ae02a8705b295061f Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 1 Oct 2024 19:36:46 +0000 Subject: [PATCH 8/9] Update loop to iterate over 'usages' instead of 'result.usages' --- packages/cli/src/run.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index b185283be7..2229a5b03f 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -527,7 +527,7 @@ export async function runScript( return fail("error annotations found", ANNOTATION_ERROR_CODE) logVerbose("genaiscript: done") - for (const [key, value] of Object.entries(result.usages)) { + for (const [key, value] of Object.entries(usages)) { if (value.total_tokens > 0) logVerbose( ` ${key}: ${value.total_tokens} (${value.prompt_tokens} => ${value.completion_tokens})` From bd4cb7ed99d72ffe0fe98fde358922d153ec8717 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 1 Oct 2024 19:39:06 +0000 Subject: [PATCH 9/9] Update log format for token usage details in CLI output --- packages/cli/src/run.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index 2229a5b03f..c4c8096726 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -530,7 +530,7 @@ export async function runScript( for (const [key, value] of Object.entries(usages)) { if (value.total_tokens > 0) logVerbose( - ` ${key}: ${value.total_tokens} (${value.prompt_tokens} => ${value.completion_tokens})` + `tokens: ${key}, ${value.total_tokens} (${value.prompt_tokens} => ${value.completion_tokens})` ) } if (outTraceFilename) logVerbose(` trace: ${outTraceFilename}`)