diff --git a/.github/workflows/ollama.yml b/.github/workflows/ollama.yml index bb75876343..3a3f99c1da 100644 --- a/.github/workflows/ollama.yml +++ b/.github/workflows/ollama.yml @@ -34,10 +34,10 @@ jobs: - name: start ollama run: yarn ollama:start - name: run summarize-ollama-phi3 - run: yarn test:summarize --model ollama:phi3.5:latest --out ./temp/summarize-ollama-phi3 + run: yarn test:summarize --model ollama:llama3.2:1b --out ./temp/summarize-ollama-phi3 env: OLLAMA_HOST: "http://localhost:11434" - name: run convert-ollama-phi3 - run: yarn cli convert summarize --model ollama:phi3.5:latest "packages/sample/src/rag/*.md" --cache-name sum + run: yarn cli convert summarize --model ollama:llama3.2:1b "packages/sample/src/rag/*.md" --cache-name sum env: OLLAMA_HOST: "http://localhost:11434" diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx index d83a2f1db9..430d4f4134 100644 --- a/docs/src/content/docs/reference/scripts/system.mdx +++ b/docs/src/content/docs/reference/scripts/system.mdx @@ -104,7 +104,7 @@ Base system prompt `````js wrap title="system" system({ title: "Base system prompt" }) -$`- You are concise.` +$`- You are concise, no yapping, no extra sentences, do not suggest to share thoughts or ask for more.` ````` diff --git a/package.json b/package.json index 03f4fd57ac..a5daa55f6a 100644 --- a/package.json +++ b/package.json @@ -75,6 +75,7 @@ "genai:docify": "node packages/cli/built/genaiscript.cjs run docify", "gcm": "node packages/cli/built/genaiscript.cjs run gcm --model github:gpt-4o", "prd": "node packages/cli/built/genaiscript.cjs run prd -prd --model github:gpt-4o", + "prr": "node packages/cli/built/genaiscript.cjs run prr -prc --model github:gpt-4o", "genai": "node packages/cli/built/genaiscript.cjs run", "genai:convert": "node packages/cli/built/genaiscript.cjs convert", "genai:debug": "yarn compile-debug && node packages/cli/built/genaiscript.cjs run", diff --git a/packages/cli/src/nodehost.ts b/packages/cli/src/nodehost.ts index 160bcabd44..69bf41c987 100644 --- a/packages/cli/src/nodehost.ts +++ b/packages/cli/src/nodehost.ts @@ -138,19 +138,29 @@ export class NodeHost implements RuntimeHost { } async pullModel( - modelid: string, + cfg: LanguageModelConfiguration, options?: TraceOptions & CancellationOptions ): Promise { - if (this.pulledModels.includes(modelid)) return { ok: true } + const { provider, model } = cfg + const modelId = `${provider}:${model}` + if (this.pulledModels.includes(modelId)) return { ok: true } - const { provider } = parseModelIdentifier(modelid) - const { pullModel } = await resolveLanguageModel(provider) + const { pullModel, listModels } = await resolveLanguageModel(provider) if (!pullModel) { - this.pulledModels.includes(modelid) + this.pulledModels.includes(modelId) return { ok: true } } - const res = await pullModel(modelid, options) - if (res.ok) this.pulledModels.push(modelid) + + if (listModels) { + const models = await listModels(cfg, options) + if (models.find(({ id }) => id === model)) { + this.pulledModels.push(modelId) + return { ok: true } + } + } + + const res = await pullModel(cfg, options) + if (res.ok) this.pulledModels.push(modelId) else if (res.error) logError(res.error) return res } diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts index ff2429441b..38013d85f7 100644 --- a/packages/core/src/chat.ts +++ b/packages/core/src/chat.ts @@ -137,7 +137,7 @@ export type ListModelsFunction = ( ) => Promise export type PullModelFunction = ( - modelId: string, + cfg: LanguageModelConfiguration, options: TraceOptions & CancellationOptions ) => Promise<{ ok: boolean; error?: SerializedError }> diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index 1f9e3c0852..3441902c86 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -195,7 +195,6 @@ export const MODEL_PROVIDERS = Object.freeze< prediction?: boolean bearerToken?: boolean listModels?: boolean - pullModel?: boolean transcribe?: boolean aliases?: Record }[] diff --git a/packages/core/src/error.ts b/packages/core/src/error.ts index dff6837948..0e7ac5a4e6 100644 --- a/packages/core/src/error.ts +++ b/packages/core/src/error.ts @@ -22,6 +22,7 @@ export function serializeError( export function errorMessage(e: any, defaultValue: string = "error"): string { if (e === undefined || e === null) return undefined if (typeof e.messsage === "string") return e.message + if (typeof e.error === "string") return e.error const ser = serializeError(e) return ser?.message ?? ser?.name ?? defaultValue } diff --git a/packages/core/src/fetch.ts b/packages/core/src/fetch.ts index 76c7da6134..e90f6d1439 100644 --- a/packages/core/src/fetch.ts +++ b/packages/core/src/fetch.ts @@ -216,7 +216,7 @@ ${Object.entries(headers) cmd += `-F ${key}=${value instanceof File ? `... (${prettyBytes(value.size)})` : "" + value}\n` }) } else - cmd += `-d 'JSON.stringify(body, null, 2).replace(/'/g, "'\\''")}' + cmd += `-d '${JSON.stringify(body, null, 2).replace(/'/g, "'\\''")}' ` if (trace) trace.detailsFenced(`✉️ fetch`, cmd, "bash") else logVerbose(cmd) diff --git a/packages/core/src/genaisrc/system.mjs b/packages/core/src/genaisrc/system.mjs index 0d2b6532fd..aefb93335d 100644 --- a/packages/core/src/genaisrc/system.mjs +++ b/packages/core/src/genaisrc/system.mjs @@ -1,2 +1,2 @@ system({ title: "Base system prompt" }) -$`- You are concise.` +$`- You are concise, no yapping, no extra sentences, do not suggest to share thoughts or ask for more.` diff --git a/packages/core/src/host.ts b/packages/core/src/host.ts index 34aefbd3b7..3e4b2fc45e 100644 --- a/packages/core/src/host.ts +++ b/packages/core/src/host.ts @@ -141,7 +141,7 @@ export interface RuntimeHost extends Host { modelAliases: Readonly pullModel( - model: string, + cfg: LanguageModelConfiguration, options?: TraceOptions & CancellationOptions ): Promise diff --git a/packages/core/src/llms.json b/packages/core/src/llms.json index 09b718e4e7..65d178c9f8 100644 --- a/packages/core/src/llms.json +++ b/packages/core/src/llms.json @@ -95,12 +95,10 @@ "prediction": false, "listModels": false, "aliases": { - "large": "Qwen/Qwen2.5-72B-Instruct", - "small": "Qwen/Qwen2.5-Coder-32B-Instruct", + "large": "meta-llama/Llama-3.3-70B-Instruct", + "small": "microsoft/phi-4", "vision": "meta-llama/Llama-3.2-11B-Vision-Instruct", - "embeddings": "nomic-ai/nomic-embed-text-v1.5", - "reasoning": "Qwen/QwQ-32B-Preview", - "reasoning_small": "Qwen/QwQ-32B-Preview" + "embeddings": "nomic-ai/nomic-embed-text-v1.5" } }, { @@ -167,21 +165,18 @@ "detail": "Ollama local model", "logitBias": false, "openaiCompatibility": "https://github.com/ollama/ollama/blob/main/docs/openai.md", - "pullModel": true, "prediction": false, "aliases": { - "large": "phi4", + "large": "phi4:latest", "small": "llama3.2:3b", "embeddings": "nomic-embed-text", - "vision": "llama3.2-vision:11b", - "reasoning": "qwq:32b" + "vision": "llama3.2-vision:11b" } }, { "id": "lmstudio", "detail": "LM Studio local server", "prediction": false, - "pullModel": true, "aliases": { "large": "phi-4", "small": "llama-3.2-3b-instruct", diff --git a/packages/core/src/lmstudio.ts b/packages/core/src/lmstudio.ts index 8e6ec9d4de..ccb9ef5a2d 100644 --- a/packages/core/src/lmstudio.ts +++ b/packages/core/src/lmstudio.ts @@ -6,15 +6,9 @@ import { OpenAIChatCompletion, OpenAIListModels } from "./openai" import { execa } from "execa" import { logVerbose, utf8Decode } from "./util" -const pullModel: PullModelFunction = async (modelId, options) => { - const { trace, cancellationToken } = options || {} - const { provider, model } = parseModelIdentifier(modelId) - const conn = await host.getLanguageModelConfiguration(modelId, { - token: true, - cancellationToken, - trace, - }) - const models = await OpenAIListModels(conn, options) +const pullModel: PullModelFunction = async (cfg, options) => { + const model = cfg.model + const models = await OpenAIListModels(cfg, options) if (models.find((m) => m.id === model)) return { ok: true } logVerbose(`lms get ${model} --yes`) const res = await execa({ stdout: ["inherit"] })`lms get ${model} --yes` diff --git a/packages/core/src/ollama.ts b/packages/core/src/ollama.ts index 701aff19e5..33edfcbcdb 100644 --- a/packages/core/src/ollama.ts +++ b/packages/core/src/ollama.ts @@ -13,6 +13,7 @@ import { LanguageModelConfiguration, LanguageModelInfo, } from "./server/messages" +import { JSONLTryParse } from "./jsonl" /** * Lists available models for the Ollama language model configuration. @@ -53,39 +54,19 @@ async function listModels( ) } -const pullModel: PullModelFunction = async (modelId, options) => { +const pullModel: PullModelFunction = async (cfg, options) => { const { trace, cancellationToken } = options || {} - const { provider, model } = parseModelIdentifier(modelId) + const { provider, model } = cfg const fetch = await createFetch({ retries: 0, ...options }) - const conn = await host.getLanguageModelConfiguration(modelId, { - token: true, - cancellationToken, - trace, - }) - conn.base = conn.base.replace(/\/v1$/i, "") + const base = cfg.base.replace(/\/v1$/i, "") try { - // test if model is present - const resTags = await fetch(`${conn.base}/api/tags`, { - retries: 0, - method: "GET", - headers: { - "User-Agent": TOOL_ID, - "Content-Type": "application/json", - }, - }) - if (resTags.ok) { - const { models }: { models: { model: string }[] } = - await resTags.json() - if (models.find((m) => m.model === model)) return { ok: true } - } - // pull logVerbose(`${provider}: pull ${model}`) - const resPull = await fetch(`${conn.base}/api/pull`, { + const resPull = await fetch(`${base}/api/pull`, { method: "POST", headers: { - "User-Agent": TOOL_ID, "Content-Type": "application/json", + "User-Agent": TOOL_ID, }, body: JSON.stringify({ model }), }) @@ -94,10 +75,24 @@ const pullModel: PullModelFunction = async (modelId, options) => { logVerbose(resPull.statusText) return { ok: false, status: resPull.status } } - 0 - for await (const chunk of iterateBody(resPull, { cancellationToken })) + let lastStatus = "" + for await (const chunk of iterateBody(resPull, { cancellationToken })) { + const cs = JSONLTryParse(chunk) as { + status?: string + error?: string + }[] + for (const c of cs) { + if (c?.error) { + return { + ok: false, + error: serializeError(c.error), + } + } + } process.stderr.write(".") + } process.stderr.write("\n") + logVerbose(`${provider}: pulled ${model}`) return { ok: true } } catch (e) { logError(e) diff --git a/packages/core/src/promptrunner.ts b/packages/core/src/promptrunner.ts index 70aeb29445..95ea98bdac 100644 --- a/packages/core/src/promptrunner.ts +++ b/packages/core/src/promptrunner.ts @@ -17,6 +17,7 @@ import { parsePromptParameters } from "./vars" import { resolveFileContent } from "./file" import { expandTemplate } from "./expander" import { resolveLanguageModel } from "./lm" +import { checkCancelled } from "./cancellation" // Asynchronously resolve expansion variables needed for a template /** @@ -186,7 +187,24 @@ export async function runTemplate( } satisfies GenerationResult } - const { ok } = await runtimeHost.pullModel(model, options) + // Resolve model connection information + const connection = await resolveModelConnectionInfo( + { model }, + { trace, token: true } + ) + if (connection.info.error) + throw new Error(errorMessage(connection.info.error)) + if (!connection.configuration) + throw new RequestError( + 403, + `LLM configuration missing for model ${model}`, + connection.info + ) + checkCancelled(cancellationToken) + const { ok } = await runtimeHost.pullModel( + connection.configuration, + options + ) if (!ok) { trace.renderErrors() return { @@ -208,19 +226,6 @@ export async function runTemplate( } satisfies GenerationResult } - // Resolve model connection information - const connection = await resolveModelConnectionInfo( - { model }, - { trace, token: true } - ) - if (connection.info.error) - throw new Error(errorMessage(connection.info.error)) - if (!connection.configuration) - throw new RequestError( - 403, - `LLM configuration missing for model ${model}`, - connection.info - ) const { completer } = await resolveLanguageModel( connection.configuration.provider ) diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts index b5f91b39fc..f0ac9dc213 100644 --- a/packages/core/src/runpromptcontext.ts +++ b/packages/core/src/runpromptcontext.ts @@ -655,7 +655,7 @@ export function createChatGenerationContext( if (info.error) throw new Error(info.error) if (!configuration) throw new Error("model configuration not found") checkCancelled(cancellationToken) - const { ok } = await runtimeHost.pullModel(conn.model, { + const { ok } = await runtimeHost.pullModel(configuration, { trace: transcriptionTrace, cancellationToken, }) @@ -755,8 +755,7 @@ export function createChatGenerationContext( genOptions.model, label ) - - const { ok } = await runtimeHost.pullModel(genOptions.model, { + const { ok } = await runtimeHost.pullModel(configuration, { trace: runTrace, cancellationToken, }) diff --git a/packages/core/src/testhost.ts b/packages/core/src/testhost.ts index 53a4588ed9..8dbb64f29b 100644 --- a/packages/core/src/testhost.ts +++ b/packages/core/src/testhost.ts @@ -73,7 +73,7 @@ export class TestHost implements RuntimeHost { setRuntimeHost(new TestHost()) } async pullModel( - model: string, + cfg: LanguageModelConfiguration, options?: TraceOptions & CancellationToken ): Promise { return { ok: true } diff --git a/packages/core/src/vectorsearch.ts b/packages/core/src/vectorsearch.ts index 05a361e648..7bfceeccc3 100644 --- a/packages/core/src/vectorsearch.ts +++ b/packages/core/src/vectorsearch.ts @@ -206,7 +206,7 @@ export async function vectorSearch( throw new Error("No configuration found for vector search") // Pull the model - await runtimeHost.pullModel(info.model, { trace, cancellationToken }) + await runtimeHost.pullModel(configuration, { trace, cancellationToken }) const embeddings = new OpenAIEmbeddings(info, configuration, { trace }) // Create a local document index