From cda44754388f77c726efcb3877af6197aa01ce0b Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Thu, 29 Aug 2024 18:52:24 +0000 Subject: [PATCH 1/3] Enable caching for LLM requests with configurable cache names --- docs/genaisrc/genaiscript.d.ts | 6 +-- .../content/docs/reference/scripts/cache.mdx | 37 +++++++------------ genaisrc/genaiscript.d.ts | 6 +-- packages/core/src/constants.ts | 4 +- packages/core/src/genaisrc/genaiscript.d.ts | 6 +-- packages/core/src/openai.ts | 27 ++++---------- packages/core/src/types/prompt_template.d.ts | 6 +-- packages/sample/genaisrc/cache.genai.mts | 3 +- packages/sample/genaisrc/genaiscript.d.ts | 6 +-- .../sample/genaisrc/node/genaiscript.d.ts | 6 +-- .../sample/genaisrc/python/genaiscript.d.ts | 6 +-- .../sample/genaisrc/style/genaiscript.d.ts | 6 +-- .../summary-of-summary-gpt35.genai.js | 2 +- .../genaisrc/summary-of-summary-phi3.genai.js | 4 +- packages/sample/src/aici/genaiscript.d.ts | 6 +-- packages/sample/src/errors/genaiscript.d.ts | 6 +-- packages/sample/src/makecode/genaiscript.d.ts | 6 +-- packages/sample/src/tla/genaiscript.d.ts | 6 +-- packages/sample/src/vision/genaiscript.d.ts | 6 +-- slides/genaisrc/genaiscript.d.ts | 6 +-- 20 files changed, 69 insertions(+), 92 deletions(-) diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts index b09af78c4a..c27bcf06db 100644 --- a/docs/genaisrc/genaiscript.d.ts +++ b/docs/genaisrc/genaiscript.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } diff --git a/docs/src/content/docs/reference/scripts/cache.mdx b/docs/src/content/docs/reference/scripts/cache.mdx index 46fca3a6ff..195fce9043 100644 --- a/docs/src/content/docs/reference/scripts/cache.mdx +++ b/docs/src/content/docs/reference/scripts/cache.mdx @@ -8,12 +8,20 @@ keywords: cache management, LLM request caching, script performance, cache file import { FileTree } from "@astrojs/starlight/components" -LLM requests are cached by default. This means that if a script generates the same prompt for the same model, the cache may be used. +LLM requests are **NOT** cached by default. However, you can turn on LLM request caching from `script` metadata or the CLI arguments. -- the `temperature` is less than 0.5 -- the `top_p` is less than 0.5 -- no [functions](./functions.md) are used as they introduce randomness -- `seed` is not used +```js "cache: true" +script({ + ..., + cache: true +}) +``` + +or + +```sh "--cache" +npx genaiscript run ... --cache +``` The cache is stored in the `.genaiscript/cache/chat.jsonl` file. You can delete this file to clear the cache. This file is excluded from git by default. @@ -26,23 +34,6 @@ This file is excluded from git by default. -## Disabling - -You can always disable the cache using the `cache` option in `script`. - -```js -script({ - ..., - cache: false // always off -}) -``` - -Or using the `--no-cache` flag in the CLI. - -```sh -npx genaiscript run .... --no-cache -``` - ## Custom cache file Use the `cacheName` option to specify a custom cache file name. @@ -51,7 +42,7 @@ The name will be used to create a file in the `.genaiscript/cache` directory. ```js script({ ..., - cacheName: "summary" + cache: "summary" }) ``` diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts index b09af78c4a..c27bcf06db 100644 --- a/genaisrc/genaiscript.d.ts +++ b/genaisrc/genaiscript.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index 28066cfd64..0de7375531 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -2,8 +2,6 @@ export const CHANGE = "change" export const TRACE_CHUNK = "traceChunk" export const RECONNECT = "reconnect" export const OPEN = "open" -export const MAX_CACHED_TEMPERATURE = 0.5 -export const MAX_CACHED_TOP_P = 0.5 export const MAX_TOOL_CALLS = 10000 // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference @@ -211,7 +209,7 @@ export const GITHUB_API_VERSION = "2022-11-28" export const GITHUB_TOKEN = "GITHUB_TOKEN" export const AI_REQUESTS_CACHE = "airequests" -export const CHAT_CACHE = "chatv2" +export const CHAT_CACHE = "chat" export const GITHUB_PULL_REQUEST_REVIEWS_CACHE = "prr" export const GITHUB_PULLREQUEST_REVIEW_COMMENT_LINE_DISTANCE = 5 diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts index b09af78c4a..c27bcf06db 100644 --- a/packages/core/src/genaisrc/genaiscript.d.ts +++ b/packages/core/src/genaisrc/genaiscript.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } diff --git a/packages/core/src/openai.ts b/packages/core/src/openai.ts index bbfa0c959b..5329bebfea 100644 --- a/packages/core/src/openai.ts +++ b/packages/core/src/openai.ts @@ -2,8 +2,6 @@ import { normalizeInt, trimTrailingSlash } from "./util" import { LanguageModelConfiguration, host } from "./host" import { AZURE_OPENAI_API_VERSION, - MAX_CACHED_TEMPERATURE, - MAX_CACHED_TOP_P, MODEL_PROVIDER_OPENAI, TOOL_ID, } from "./constants" @@ -50,13 +48,10 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( options, trace ) => { - const { temperature, top_p, seed, tools } = req const { requestOptions, partialCb, - maxCachedTemperature = MAX_CACHED_TEMPERATURE, - maxCachedTopP = MAX_CACHED_TOP_P, - cache: useCache, + cache: cacheOrName, cacheName, retry, retryDelay, @@ -69,18 +64,12 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( const { model } = parseModelIdentifier(req.model) const encoder = await resolveTokenEncoder(model) - const cache = getChatCompletionCache(cacheName) - const caching = - useCache === true || // always use cache - (useCache !== false && // never use cache - seed === undefined && // seed is not cacheable (let the LLM make the run deterministic) - !tools?.length && // assume tools are non-deterministic by default - (isNaN(temperature) || - isNaN(maxCachedTemperature) || - temperature < maxCachedTemperature) && // high temperature is not cacheable (it's too random) - (isNaN(top_p) || isNaN(maxCachedTopP) || top_p < maxCachedTopP)) - trace.itemValue(`caching`, caching) - const cachedKey = caching + const cache = getChatCompletionCache( + typeof cacheOrName === "string" ? cacheOrName : cacheName + ) + trace.itemValue(`caching`, !!cache) + trace.itemValue(`cache`, cache?.name) + const cachedKey = !!cacheOrName ? { ...req, ...cfgNoToken, @@ -263,7 +252,7 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( responseSoFar: chatResp, tokensSoFar: numTokens, responseChunk: progress, - inner + inner, }) } pref = chunk diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index e5565e8b1f..ad664ed5c7 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } diff --git a/packages/sample/genaisrc/cache.genai.mts b/packages/sample/genaisrc/cache.genai.mts index 0ce4cf75e5..ae233b4f59 100644 --- a/packages/sample/genaisrc/cache.genai.mts +++ b/packages/sample/genaisrc/cache.genai.mts @@ -1,7 +1,6 @@ script({ model: "openai:gpt-3.5-turbo", - cache: true, - cacheName: "gpt-cache", + cache: "gpt-cache", tests: [{}, {}], // run twice to trigger caching }) diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts index b09af78c4a..c27bcf06db 100644 --- a/packages/sample/genaisrc/genaiscript.d.ts +++ b/packages/sample/genaisrc/genaiscript.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts index b09af78c4a..c27bcf06db 100644 --- a/packages/sample/genaisrc/node/genaiscript.d.ts +++ b/packages/sample/genaisrc/node/genaiscript.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts index b09af78c4a..c27bcf06db 100644 --- a/packages/sample/genaisrc/python/genaiscript.d.ts +++ b/packages/sample/genaisrc/python/genaiscript.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts index b09af78c4a..c27bcf06db 100644 --- a/packages/sample/genaisrc/style/genaiscript.d.ts +++ b/packages/sample/genaisrc/style/genaiscript.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } diff --git a/packages/sample/genaisrc/summary-of-summary-gpt35.genai.js b/packages/sample/genaisrc/summary-of-summary-gpt35.genai.js index 51e6eecbc3..7597c81cdd 100644 --- a/packages/sample/genaisrc/summary-of-summary-gpt35.genai.js +++ b/packages/sample/genaisrc/summary-of-summary-gpt35.genai.js @@ -15,7 +15,7 @@ for (const file of env.files) { _.def("FILE", file) _.$`Summarize FILE. Be concise.` }, - { model: "gpt-3.5-turbo", cacheName: "summary_gpt35" } + { model: "gpt-3.5-turbo", cache: "summary_gpt35" } ) // save the summary in the main prompt def("FILE", { filename: file.filename, content: text }) diff --git a/packages/sample/genaisrc/summary-of-summary-phi3.genai.js b/packages/sample/genaisrc/summary-of-summary-phi3.genai.js index 982d08e2ad..bab0e6b18a 100644 --- a/packages/sample/genaisrc/summary-of-summary-phi3.genai.js +++ b/packages/sample/genaisrc/summary-of-summary-phi3.genai.js @@ -5,7 +5,7 @@ script({ tests: { files: ["src/rag/*.md"], keywords: ["markdown", "lorem", "microsoft"], - } + }, }) // summarize each files individually @@ -15,7 +15,7 @@ for (const file of env.files) { _.def("FILE", file) _.$`Extract keywords for the contents of FILE.` }, - { model: "ollama:phi3", cacheName: "summary_phi3" } + { model: "ollama:phi3", cache: "summary_phi3" } ) def("FILE", { ...file, content: text }) } diff --git a/packages/sample/src/aici/genaiscript.d.ts b/packages/sample/src/aici/genaiscript.d.ts index b09af78c4a..c27bcf06db 100644 --- a/packages/sample/src/aici/genaiscript.d.ts +++ b/packages/sample/src/aici/genaiscript.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } diff --git a/packages/sample/src/errors/genaiscript.d.ts b/packages/sample/src/errors/genaiscript.d.ts index b09af78c4a..c27bcf06db 100644 --- a/packages/sample/src/errors/genaiscript.d.ts +++ b/packages/sample/src/errors/genaiscript.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } diff --git a/packages/sample/src/makecode/genaiscript.d.ts b/packages/sample/src/makecode/genaiscript.d.ts index b09af78c4a..c27bcf06db 100644 --- a/packages/sample/src/makecode/genaiscript.d.ts +++ b/packages/sample/src/makecode/genaiscript.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } diff --git a/packages/sample/src/tla/genaiscript.d.ts b/packages/sample/src/tla/genaiscript.d.ts index b09af78c4a..c27bcf06db 100644 --- a/packages/sample/src/tla/genaiscript.d.ts +++ b/packages/sample/src/tla/genaiscript.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } diff --git a/packages/sample/src/vision/genaiscript.d.ts b/packages/sample/src/vision/genaiscript.d.ts index b09af78c4a..c27bcf06db 100644 --- a/packages/sample/src/vision/genaiscript.d.ts +++ b/packages/sample/src/vision/genaiscript.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } diff --git a/slides/genaisrc/genaiscript.d.ts b/slides/genaisrc/genaiscript.d.ts index b09af78c4a..c27bcf06db 100644 --- a/slides/genaisrc/genaiscript.d.ts +++ b/slides/genaisrc/genaiscript.d.ts @@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions { seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } From abc05bbd91d46346b6083b523e4adc75be875d2e Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Thu, 29 Aug 2024 19:09:06 +0000 Subject: [PATCH 2/3] Refine error logging and retry logic in CLI run script, and improve error handling in OpenAI chat completion --- packages/cli/src/run.ts | 21 +++++++++++---------- packages/core/src/openai.ts | 8 ++++++-- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index c9a6f1c83c..67b2cbab63 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -104,10 +104,12 @@ export async function runScriptWithExitCode( break const delayMs = 2000 * Math.pow(2, r) - console.error( - `error: run failed with ${exitCode}, retry #${r + 1}/${runRetry} in ${delayMs}ms` - ) - await delay(delayMs) + if (runRetry > 1) { + console.error( + `error: run failed with ${exitCode}, retry #${r + 1}/${runRetry} in ${delayMs}ms` + ) + await delay(delayMs) + } } process.exit(exitCode) } @@ -156,7 +158,7 @@ export async function runScript( const jsSource = options.jsSource const fail = (msg: string, exitCode: number) => { - logVerbose(msg) + logError(msg) return { exitCode, result } } @@ -303,9 +305,6 @@ export async function runScript( return fail("runtime error", RUNTIME_ERROR_CODE) } if (!isQuiet) logVerbose("") // force new line - if (result.status !== "success" && result.status !== "cancelled") - logVerbose(result.statusText ?? result.status) - if (outAnnotations && result.annotations?.length) { if (isJSONLFilename(outAnnotations)) await appendJSONL(outAnnotations, result.annotations) @@ -485,8 +484,10 @@ export async function runScript( } } // final fail - if (result.error && !isCancelError(result.error)) - return fail(errorMessage(result.error), RUNTIME_ERROR_CODE) + if (result.status !== "success" && result.status !== "cancelled") { + const msg = errorMessage(result.error) ?? result.statusText + return fail(msg, RUNTIME_ERROR_CODE) + } if (failOnErrors && result.annotations?.some((a) => a.severity === "error")) return fail("error annotations found", ANNOTATION_ERROR_CODE) diff --git a/packages/core/src/openai.ts b/packages/core/src/openai.ts index 5329bebfea..eb67aebdf6 100644 --- a/packages/core/src/openai.ts +++ b/packages/core/src/openai.ts @@ -149,7 +149,11 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( try { body = await r.text() } catch (e) {} - const { error } = JSON5TryParse(body, {}) as { error: any } + const { error, message } = JSON5TryParse(body, {}) as { + error: any + message: string + } + if (message) trace.error(message) if (error) trace.error(undefined, { name: error.code, @@ -158,7 +162,7 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( }) throw new RequestError( r.status, - r.statusText, + message ?? error?.message ?? r.statusText, error, body, normalizeInt(r.headers.get("retry-after")) From fac7ebf4575c986ab1ab7185319dc083db2263b2 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Thu, 29 Aug 2024 19:12:31 +0000 Subject: [PATCH 3/3] Allow cache configuration to accept boolean or string values in core and vscode packages --- packages/core/src/chattypes.ts | 2 +- packages/core/src/server/messages.ts | 2 +- packages/vscode/src/state.ts | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/core/src/chattypes.ts b/packages/core/src/chattypes.ts index 5ef2511761..1f68dc8aa3 100644 --- a/packages/core/src/chattypes.ts +++ b/packages/core/src/chattypes.ts @@ -86,7 +86,7 @@ export interface ChatCompletionsOptions { requestOptions?: Partial> maxCachedTemperature?: number maxCachedTopP?: number - cache?: boolean + cache?: boolean | string cacheName?: string retry?: number retryDelay?: number diff --git a/packages/core/src/server/messages.ts b/packages/core/src/server/messages.ts index 8c2c39868f..3bd55bea11 100644 --- a/packages/core/src/server/messages.ts +++ b/packages/core/src/server/messages.ts @@ -70,7 +70,7 @@ export interface PromptScriptRunOptions { model: string embeddingsModel: string csvSeparator: string - cache: boolean + cache: boolean | string cacheName: string applyEdits: boolean failOnErrors: boolean diff --git a/packages/vscode/src/state.ts b/packages/vscode/src/state.ts index f2efc5e5d3..03115a778e 100644 --- a/packages/vscode/src/state.ts +++ b/packages/vscode/src/state.ts @@ -269,7 +269,7 @@ tests/ ): Promise { const controller = new AbortController() const config = vscode.workspace.getConfiguration(TOOL_ID) - const cache = config.get("cache") + const cache = config.get("cache") as boolean const signal = controller.signal const trace = new MarkdownTrace() @@ -332,7 +332,7 @@ tests/ infoCb, partialCb, label, - cache: cache && template.cache, + cache: cache ? template.cache : undefined, vars: parametersToVars(options.parameters), } )