cleanup list and pull (#1000)

* cleanup list and pull * added sript * refactor: 🔄 update LLM aliases in llms.json * refactor: ♻️ streamline model parsing and cleanup llms.json * fix pull * fix: 🐛 update alias for 'large' to use latest tag * style: ✏️ refine system prompt for conciseness * refactor: ✏️ clarify system prompt behavior * fix: 🐛 correct string template usage in fetch command * fix: 🐛 handle error objects with "error" property * smaller ollama model * feat: ✨ add JSONL parsing and error handling in pullModel function
microsoft · Jan 13, 2025 · db4febd · db4febd
1 parent db0219c
commit db4febd
Show file tree

Hide file tree

Showing 17 changed files with 79 additions and 80 deletions.
diff --git a/.github/workflows/ollama.yml b/.github/workflows/ollama.yml
@@ -34,10 +34,10 @@ jobs:
             - name: start ollama
               run: yarn ollama:start
             - name: run summarize-ollama-phi3
-              run: yarn test:summarize --model ollama:phi3.5:latest --out ./temp/summarize-ollama-phi3
+              run: yarn test:summarize --model ollama:llama3.2:1b --out ./temp/summarize-ollama-phi3
               env:
                   OLLAMA_HOST: "http://localhost:11434"
             - name: run convert-ollama-phi3
-              run: yarn cli convert summarize --model ollama:phi3.5:latest "packages/sample/src/rag/*.md" --cache-name sum
+              run: yarn cli convert summarize --model ollama:llama3.2:1b "packages/sample/src/rag/*.md" --cache-name sum
               env:
                   OLLAMA_HOST: "http://localhost:11434"
diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx
@@ -104,7 +104,7 @@ Base system prompt
 
 `````js wrap title="system"
 system({ title: "Base system prompt" })
-$`- You are concise.`
+$`- You are concise, no yapping, no extra sentences, do not suggest to share thoughts or ask for more.`
 
 `````
 

diff --git a/package.json b/package.json
@@ -75,6 +75,7 @@
         "genai:docify": "node packages/cli/built/genaiscript.cjs run docify",
         "gcm": "node packages/cli/built/genaiscript.cjs run gcm --model github:gpt-4o",
         "prd": "node packages/cli/built/genaiscript.cjs run prd -prd --model github:gpt-4o",
+        "prr": "node packages/cli/built/genaiscript.cjs run prr -prc --model github:gpt-4o",
         "genai": "node packages/cli/built/genaiscript.cjs run",
         "genai:convert": "node packages/cli/built/genaiscript.cjs convert",
         "genai:debug": "yarn compile-debug && node packages/cli/built/genaiscript.cjs run",

diff --git a/packages/cli/src/nodehost.ts b/packages/cli/src/nodehost.ts
@@ -138,19 +138,29 @@ export class NodeHost implements RuntimeHost {
     }
 
     async pullModel(
-        modelid: string,
+        cfg: LanguageModelConfiguration,
         options?: TraceOptions & CancellationOptions
     ): Promise<ResponseStatus> {
-        if (this.pulledModels.includes(modelid)) return { ok: true }
+        const { provider, model } = cfg
+        const modelId = `${provider}:${model}`
+        if (this.pulledModels.includes(modelId)) return { ok: true }
 
-        const { provider } = parseModelIdentifier(modelid)
-        const { pullModel } = await resolveLanguageModel(provider)
+        const { pullModel, listModels } = await resolveLanguageModel(provider)
         if (!pullModel) {
-            this.pulledModels.includes(modelid)
+            this.pulledModels.includes(modelId)
             return { ok: true }
         }
-        const res = await pullModel(modelid, options)
-        if (res.ok) this.pulledModels.push(modelid)
+
+        if (listModels) {
+            const models = await listModels(cfg, options)
+            if (models.find(({ id }) => id === model)) {
+                this.pulledModels.push(modelId)
+                return { ok: true }
+            }
+        }
+
+        const res = await pullModel(cfg, options)
+        if (res.ok) this.pulledModels.push(modelId)
         else if (res.error) logError(res.error)
         return res
     }

diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts
@@ -137,7 +137,7 @@ export type ListModelsFunction = (
 ) => Promise<LanguageModelInfo[]>
 
 export type PullModelFunction = (
-    modelId: string,
+    cfg: LanguageModelConfiguration,
     options: TraceOptions & CancellationOptions
 ) => Promise<{ ok: boolean; error?: SerializedError }>
 

diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts
@@ -195,7 +195,6 @@ export const MODEL_PROVIDERS = Object.freeze<
         prediction?: boolean
         bearerToken?: boolean
         listModels?: boolean
-        pullModel?: boolean
         transcribe?: boolean
         aliases?: Record<string, string>
     }[]

diff --git a/packages/core/src/error.ts b/packages/core/src/error.ts
@@ -22,6 +22,7 @@ export function serializeError(
 export function errorMessage(e: any, defaultValue: string = "error"): string {
     if (e === undefined || e === null) return undefined
     if (typeof e.messsage === "string") return e.message
+    if (typeof e.error === "string") return e.error
     const ser = serializeError(e)
     return ser?.message ?? ser?.name ?? defaultValue
 }

diff --git a/packages/core/src/fetch.ts b/packages/core/src/fetch.ts
@@ -216,7 +216,7 @@ ${Object.entries(headers)
             cmd += `-F ${key}=${value instanceof File ? `... (${prettyBytes(value.size)})` : "" + value}\n`
         })
     } else
-        cmd += `-d 'JSON.stringify(body, null, 2).replace(/'/g, "'\\''")}'
+        cmd += `-d '${JSON.stringify(body, null, 2).replace(/'/g, "'\\''")}'
 `
     if (trace) trace.detailsFenced(`✉️ fetch`, cmd, "bash")
     else logVerbose(cmd)

diff --git a/packages/core/src/genaisrc/system.mjs b/packages/core/src/genaisrc/system.mjs
@@ -1,2 +1,2 @@
 system({ title: "Base system prompt" })
-$`- You are concise.`
+$`- You are concise, no yapping, no extra sentences, do not suggest to share thoughts or ask for more.`
diff --git a/packages/core/src/host.ts b/packages/core/src/host.ts
@@ -141,7 +141,7 @@ export interface RuntimeHost extends Host {
     modelAliases: Readonly<ModelConfigurations>
 
     pullModel(
-        model: string,
+        cfg: LanguageModelConfiguration,
         options?: TraceOptions & CancellationOptions
     ): Promise<ResponseStatus>
 

diff --git a/packages/core/src/llms.json b/packages/core/src/llms.json
@@ -95,12 +95,10 @@
             "prediction": false,
             "listModels": false,
             "aliases": {
-                "large": "Qwen/Qwen2.5-72B-Instruct",
-                "small": "Qwen/Qwen2.5-Coder-32B-Instruct",
+                "large": "meta-llama/Llama-3.3-70B-Instruct",
+                "small": "microsoft/phi-4",
                 "vision": "meta-llama/Llama-3.2-11B-Vision-Instruct",
-                "embeddings": "nomic-ai/nomic-embed-text-v1.5",
-                "reasoning": "Qwen/QwQ-32B-Preview",
-                "reasoning_small": "Qwen/QwQ-32B-Preview"
+                "embeddings": "nomic-ai/nomic-embed-text-v1.5"
             }
         },
         {
@@ -167,21 +165,18 @@
             "detail": "Ollama local model",
             "logitBias": false,
             "openaiCompatibility": "https://github.com/ollama/ollama/blob/main/docs/openai.md",
-            "pullModel": true,
             "prediction": false,
             "aliases": {
-                "large": "phi4",
+                "large": "phi4:latest",
                 "small": "llama3.2:3b",
                 "embeddings": "nomic-embed-text",
-                "vision": "llama3.2-vision:11b",
-                "reasoning": "qwq:32b"
+                "vision": "llama3.2-vision:11b"
             }
         },
         {
             "id": "lmstudio",
             "detail": "LM Studio local server",
             "prediction": false,
-            "pullModel": true,
             "aliases": {
                 "large": "phi-4",
                 "small": "llama-3.2-3b-instruct",

diff --git a/packages/core/src/lmstudio.ts b/packages/core/src/lmstudio.ts
@@ -6,15 +6,9 @@ import { OpenAIChatCompletion, OpenAIListModels } from "./openai"
 import { execa } from "execa"
 import { logVerbose, utf8Decode } from "./util"
 
-const pullModel: PullModelFunction = async (modelId, options) => {
-    const { trace, cancellationToken } = options || {}
-    const { provider, model } = parseModelIdentifier(modelId)
-    const conn = await host.getLanguageModelConfiguration(modelId, {
-        token: true,
-        cancellationToken,
-        trace,
-    })
-    const models = await OpenAIListModels(conn, options)
+const pullModel: PullModelFunction = async (cfg, options) => {
+    const model = cfg.model
+    const models = await OpenAIListModels(cfg, options)
     if (models.find((m) => m.id === model)) return { ok: true }
     logVerbose(`lms get ${model} --yes`)
     const res = await execa({ stdout: ["inherit"] })`lms get ${model} --yes`

diff --git a/packages/core/src/ollama.ts b/packages/core/src/ollama.ts
@@ -13,6 +13,7 @@ import {
     LanguageModelConfiguration,
     LanguageModelInfo,
 } from "./server/messages"
+import { JSONLTryParse } from "./jsonl"
 
 /**
  * Lists available models for the Ollama language model configuration.
@@ -53,39 +54,19 @@ async function listModels(
     )
 }
 
-const pullModel: PullModelFunction = async (modelId, options) => {
+const pullModel: PullModelFunction = async (cfg, options) => {
     const { trace, cancellationToken } = options || {}
-    const { provider, model } = parseModelIdentifier(modelId)
+    const { provider, model } = cfg
     const fetch = await createFetch({ retries: 0, ...options })
-    const conn = await host.getLanguageModelConfiguration(modelId, {
-        token: true,
-        cancellationToken,
-        trace,
-    })
-    conn.base = conn.base.replace(/\/v1$/i, "")
+    const base = cfg.base.replace(/\/v1$/i, "")
     try {
-        // test if model is present
-        const resTags = await fetch(`${conn.base}/api/tags`, {
-            retries: 0,
-            method: "GET",
-            headers: {
-                "User-Agent": TOOL_ID,
-                "Content-Type": "application/json",
-            },
-        })
-        if (resTags.ok) {
-            const { models }: { models: { model: string }[] } =
-                await resTags.json()
-            if (models.find((m) => m.model === model)) return { ok: true }
-        }
-
         // pull
         logVerbose(`${provider}: pull ${model}`)
-        const resPull = await fetch(`${conn.base}/api/pull`, {
+        const resPull = await fetch(`${base}/api/pull`, {
             method: "POST",
             headers: {
-                "User-Agent": TOOL_ID,
                 "Content-Type": "application/json",
+                "User-Agent": TOOL_ID,
             },
             body: JSON.stringify({ model }),
         })
@@ -94,10 +75,24 @@ const pullModel: PullModelFunction = async (modelId, options) => {
             logVerbose(resPull.statusText)
             return { ok: false, status: resPull.status }
         }
-        0
-        for await (const chunk of iterateBody(resPull, { cancellationToken }))
+        let lastStatus = ""
+        for await (const chunk of iterateBody(resPull, { cancellationToken })) {
+            const cs = JSONLTryParse(chunk) as {
+                status?: string
+                error?: string
+            }[]
+            for (const c of cs) {
+                if (c?.error) {
+                    return {
+                        ok: false,
+                        error: serializeError(c.error),
+                    }
+                }
+            }
             process.stderr.write(".")
+        }
         process.stderr.write("\n")
+        logVerbose(`${provider}: pulled ${model}`)
         return { ok: true }
     } catch (e) {
         logError(e)

diff --git a/packages/core/src/promptrunner.ts b/packages/core/src/promptrunner.ts
@@ -17,6 +17,7 @@ import { parsePromptParameters } from "./vars"
 import { resolveFileContent } from "./file"
 import { expandTemplate } from "./expander"
 import { resolveLanguageModel } from "./lm"
+import { checkCancelled } from "./cancellation"
 
 // Asynchronously resolve expansion variables needed for a template
 /**
@@ -186,7 +187,24 @@ export async function runTemplate(
             } satisfies GenerationResult
         }
 
-        const { ok } = await runtimeHost.pullModel(model, options)
+        // Resolve model connection information
+        const connection = await resolveModelConnectionInfo(
+            { model },
+            { trace, token: true }
+        )
+        if (connection.info.error)
+            throw new Error(errorMessage(connection.info.error))
+        if (!connection.configuration)
+            throw new RequestError(
+                403,
+                `LLM configuration missing for model ${model}`,
+                connection.info
+            )
+        checkCancelled(cancellationToken)
+        const { ok } = await runtimeHost.pullModel(
+            connection.configuration,
+            options
+        )
         if (!ok) {
             trace.renderErrors()
             return {
@@ -208,19 +226,6 @@ export async function runTemplate(
             } satisfies GenerationResult
         }
 
-        // Resolve model connection information
-        const connection = await resolveModelConnectionInfo(
-            { model },
-            { trace, token: true }
-        )
-        if (connection.info.error)
-            throw new Error(errorMessage(connection.info.error))
-        if (!connection.configuration)
-            throw new RequestError(
-                403,
-                `LLM configuration missing for model ${model}`,
-                connection.info
-            )
         const { completer } = await resolveLanguageModel(
             connection.configuration.provider
         )

diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts
@@ -655,7 +655,7 @@ export function createChatGenerationContext(
             if (info.error) throw new Error(info.error)
             if (!configuration) throw new Error("model configuration not found")
             checkCancelled(cancellationToken)
-            const { ok } = await runtimeHost.pullModel(conn.model, {
+            const { ok } = await runtimeHost.pullModel(configuration, {
                 trace: transcriptionTrace,
                 cancellationToken,
             })
@@ -755,8 +755,7 @@ export function createChatGenerationContext(
                 genOptions.model,
                 label
             )
-
-            const { ok } = await runtimeHost.pullModel(genOptions.model, {
+            const { ok } = await runtimeHost.pullModel(configuration, {
                 trace: runTrace,
                 cancellationToken,
             })

diff --git a/packages/core/src/testhost.ts b/packages/core/src/testhost.ts
@@ -73,7 +73,7 @@ export class TestHost implements RuntimeHost {
         setRuntimeHost(new TestHost())
     }
     async pullModel(
-        model: string,
+        cfg: LanguageModelConfiguration,
         options?: TraceOptions & CancellationToken
     ): Promise<ResponseStatus> {
         return { ok: true }

diff --git a/packages/core/src/vectorsearch.ts b/packages/core/src/vectorsearch.ts
@@ -206,7 +206,7 @@ export async function vectorSearch(
             throw new Error("No configuration found for vector search")
 
         // Pull the model
-        await runtimeHost.pullModel(info.model, { trace, cancellationToken })
+        await runtimeHost.pullModel(configuration, { trace, cancellationToken })
         const embeddings = new OpenAIEmbeddings(info, configuration, { trace })
 
         // Create a local document index