Enable caching for LLM requests with configurable cache names

microsoft · Aug 29, 2024 · cda4475 · cda4475
1 parent d118049
commit cda4475
Show file tree

Hide file tree

Showing 20 changed files with 69 additions and 92 deletions.
diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts
diff --git a/docs/src/content/docs/reference/scripts/cache.mdx b/docs/src/content/docs/reference/scripts/cache.mdx
@@ -8,12 +8,20 @@ keywords: cache management, LLM request caching, script performance, cache file
 
 import { FileTree } from "@astrojs/starlight/components"
 
-LLM requests are cached by default. This means that if a script generates the same prompt for the same model, the cache may be used.
+LLM requests are **NOT** cached by default. However, you can turn on LLM request caching from `script` metadata or the CLI arguments.
 
--   the `temperature` is less than 0.5
--   the `top_p` is less than 0.5
--   no [functions](./functions.md) are used as they introduce randomness
--   `seed` is not used
+```js "cache: true"
+script({
+    ...,
+    cache: true
+})
+```
+
+or
+
+```sh "--cache"
+npx genaiscript run ... --cache
+```
 
 The cache is stored in the `.genaiscript/cache/chat.jsonl` file. You can delete this file to clear the cache.
 This file is excluded from git by default.
@@ -26,23 +34,6 @@ This file is excluded from git by default.
 
 </FileTree>
 
-## Disabling
-
-You can always disable the cache using the `cache` option in `script`.
-
-```js
-script({
-    ...,
-    cache: false // always off
-})
-```
-
-Or using the `--no-cache` flag in the CLI.
-
-```sh
-npx genaiscript run .... --no-cache
-```
-
 ## Custom cache file
 
 Use the `cacheName` option to specify a custom cache file name.
@@ -51,7 +42,7 @@ The name will be used to create a file in the `.genaiscript/cache` directory.
 ```js
 script({
     ...,
-    cacheName: "summary"
+    cache: "summary"
 })
 ```
 

diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts
diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts
@@ -2,8 +2,6 @@ export const CHANGE = "change"
 export const TRACE_CHUNK = "traceChunk"
 export const RECONNECT = "reconnect"
 export const OPEN = "open"
-export const MAX_CACHED_TEMPERATURE = 0.5
-export const MAX_CACHED_TOP_P = 0.5
 export const MAX_TOOL_CALLS = 10000
 
 // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference
@@ -211,7 +209,7 @@ export const GITHUB_API_VERSION = "2022-11-28"
 export const GITHUB_TOKEN = "GITHUB_TOKEN"
 
 export const AI_REQUESTS_CACHE = "airequests"
-export const CHAT_CACHE = "chatv2"
+export const CHAT_CACHE = "chat"
 export const GITHUB_PULL_REQUEST_REVIEWS_CACHE = "prr"
 export const GITHUB_PULLREQUEST_REVIEW_COMMENT_LINE_DISTANCE = 5
 

diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts
diff --git a/packages/core/src/openai.ts b/packages/core/src/openai.ts
@@ -2,8 +2,6 @@ import { normalizeInt, trimTrailingSlash } from "./util"
 import { LanguageModelConfiguration, host } from "./host"
 import {
     AZURE_OPENAI_API_VERSION,
-    MAX_CACHED_TEMPERATURE,
-    MAX_CACHED_TOP_P,
     MODEL_PROVIDER_OPENAI,
     TOOL_ID,
 } from "./constants"
@@ -50,13 +48,10 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async (
     options,
     trace
 ) => {
-    const { temperature, top_p, seed, tools } = req
     const {
         requestOptions,
         partialCb,
-        maxCachedTemperature = MAX_CACHED_TEMPERATURE,
-        maxCachedTopP = MAX_CACHED_TOP_P,
-        cache: useCache,
+        cache: cacheOrName,
         cacheName,
         retry,
         retryDelay,
@@ -69,18 +64,12 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async (
     const { model } = parseModelIdentifier(req.model)
     const encoder = await resolveTokenEncoder(model)
 
-    const cache = getChatCompletionCache(cacheName)
-    const caching =
-        useCache === true || // always use cache
-        (useCache !== false && // never use cache
-            seed === undefined && // seed is not cacheable (let the LLM make the run deterministic)
-            !tools?.length && // assume tools are non-deterministic by default
-            (isNaN(temperature) ||
-                isNaN(maxCachedTemperature) ||
-                temperature < maxCachedTemperature) && // high temperature is not cacheable (it's too random)
-            (isNaN(top_p) || isNaN(maxCachedTopP) || top_p < maxCachedTopP))
-    trace.itemValue(`caching`, caching)
-    const cachedKey = caching
+    const cache = getChatCompletionCache(
+        typeof cacheOrName === "string" ? cacheOrName : cacheName
+    )
+    trace.itemValue(`caching`, !!cache)
+    trace.itemValue(`cache`, cache?.name)
+    const cachedKey = !!cacheOrName
         ? <ChatCompletionRequestCacheKey>{
               ...req,
               ...cfgNoToken,
@@ -263,7 +252,7 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async (
                 responseSoFar: chatResp,
                 tokensSoFar: numTokens,
                 responseChunk: progress,
-                inner
+                inner,
             })
         }
         pref = chunk

diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts
@@ -176,13 +176,13 @@ interface ModelOptions extends ModelConnectionOptions {
     seed?: number
 
     /**
-     * If true, the prompt will be cached. If false, the LLM chat is never cached.
-     * Leave empty to use the default behavior.
+     * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name
      */
-    cache?: boolean
+    cache?: boolean | string
 
     /**
      * Custom cache name. If not set, the default cache is used.
+     * @deprecated Use `cache` instead with a string
      */
     cacheName?: string
 }

diff --git a/packages/sample/genaisrc/cache.genai.mts b/packages/sample/genaisrc/cache.genai.mts
@@ -1,7 +1,6 @@
 script({
     model: "openai:gpt-3.5-turbo",
-    cache: true,
-    cacheName: "gpt-cache",
+    cache: "gpt-cache",
     tests: [{}, {}], // run twice to trigger caching
 })
 

diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts
diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts
diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts
diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts
diff --git a/packages/sample/genaisrc/summary-of-summary-gpt35.genai.js b/packages/sample/genaisrc/summary-of-summary-gpt35.genai.js
@@ -15,7 +15,7 @@ for (const file of env.files) {
             _.def("FILE", file)
             _.$`Summarize FILE. Be concise.`
         },
-        { model: "gpt-3.5-turbo", cacheName: "summary_gpt35" }
+        { model: "gpt-3.5-turbo", cache: "summary_gpt35" }
     )
     // save the summary in the main prompt
     def("FILE", { filename: file.filename, content: text })

diff --git a/packages/sample/genaisrc/summary-of-summary-phi3.genai.js b/packages/sample/genaisrc/summary-of-summary-phi3.genai.js
@@ -5,7 +5,7 @@ script({
     tests: {
         files: ["src/rag/*.md"],
         keywords: ["markdown", "lorem", "microsoft"],
-    }
+    },
 })
 
 // summarize each files individually
@@ -15,7 +15,7 @@ for (const file of env.files) {
             _.def("FILE", file)
             _.$`Extract keywords for the contents of FILE.`
         },
-        { model: "ollama:phi3", cacheName: "summary_phi3" }
+        { model: "ollama:phi3", cache: "summary_phi3" }
     )
     def("FILE", { ...file, content: text })
 }

diff --git a/packages/sample/src/aici/genaiscript.d.ts b/packages/sample/src/aici/genaiscript.d.ts
diff --git a/packages/sample/src/errors/genaiscript.d.ts b/packages/sample/src/errors/genaiscript.d.ts
diff --git a/packages/sample/src/makecode/genaiscript.d.ts b/packages/sample/src/makecode/genaiscript.d.ts
diff --git a/packages/sample/src/tla/genaiscript.d.ts b/packages/sample/src/tla/genaiscript.d.ts
diff --git a/packages/sample/src/vision/genaiscript.d.ts b/packages/sample/src/vision/genaiscript.d.ts