Skip to content

Commit

Permalink
cleanup list and pull (#1000)
Browse files Browse the repository at this point in the history
* cleanup list and pull

* added sript

* refactor: πŸ”„ update LLM aliases in llms.json

* refactor: ♻️ streamline model parsing and cleanup llms.json

* fix pull

* fix: πŸ› update alias for 'large' to use latest tag

* style: ✏️ refine system prompt for conciseness

* refactor: ✏️ clarify system prompt behavior

* fix: πŸ› correct string template usage in fetch command

* fix: πŸ› handle error objects with "error" property

* smaller ollama model

* feat: ✨ add JSONL parsing and error handling in pullModel function
  • Loading branch information
pelikhan authored Jan 13, 2025
1 parent db0219c commit db4febd
Show file tree
Hide file tree
Showing 17 changed files with 79 additions and 80 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ollama.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ jobs:
- name: start ollama
run: yarn ollama:start
- name: run summarize-ollama-phi3
run: yarn test:summarize --model ollama:phi3.5:latest --out ./temp/summarize-ollama-phi3
run: yarn test:summarize --model ollama:llama3.2:1b --out ./temp/summarize-ollama-phi3
env:
OLLAMA_HOST: "http://localhost:11434"
- name: run convert-ollama-phi3
run: yarn cli convert summarize --model ollama:phi3.5:latest "packages/sample/src/rag/*.md" --cache-name sum
run: yarn cli convert summarize --model ollama:llama3.2:1b "packages/sample/src/rag/*.md" --cache-name sum
env:
OLLAMA_HOST: "http://localhost:11434"
2 changes: 1 addition & 1 deletion docs/src/content/docs/reference/scripts/system.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ Base system prompt

`````js wrap title="system"
system({ title: "Base system prompt" })
$`- You are concise.`
$`- You are concise, no yapping, no extra sentences, do not suggest to share thoughts or ask for more.`

`````

Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
"genai:docify": "node packages/cli/built/genaiscript.cjs run docify",
"gcm": "node packages/cli/built/genaiscript.cjs run gcm --model github:gpt-4o",
"prd": "node packages/cli/built/genaiscript.cjs run prd -prd --model github:gpt-4o",
"prr": "node packages/cli/built/genaiscript.cjs run prr -prc --model github:gpt-4o",
"genai": "node packages/cli/built/genaiscript.cjs run",
"genai:convert": "node packages/cli/built/genaiscript.cjs convert",
"genai:debug": "yarn compile-debug && node packages/cli/built/genaiscript.cjs run",
Expand Down
24 changes: 17 additions & 7 deletions packages/cli/src/nodehost.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,19 +138,29 @@ export class NodeHost implements RuntimeHost {
}

async pullModel(
modelid: string,
cfg: LanguageModelConfiguration,
options?: TraceOptions & CancellationOptions
): Promise<ResponseStatus> {
if (this.pulledModels.includes(modelid)) return { ok: true }
const { provider, model } = cfg
const modelId = `${provider}:${model}`
if (this.pulledModels.includes(modelId)) return { ok: true }

const { provider } = parseModelIdentifier(modelid)
const { pullModel } = await resolveLanguageModel(provider)
const { pullModel, listModels } = await resolveLanguageModel(provider)
if (!pullModel) {
this.pulledModels.includes(modelid)
this.pulledModels.includes(modelId)
return { ok: true }
}
const res = await pullModel(modelid, options)
if (res.ok) this.pulledModels.push(modelid)

if (listModels) {
const models = await listModels(cfg, options)
if (models.find(({ id }) => id === model)) {
this.pulledModels.push(modelId)
return { ok: true }
}
}

const res = await pullModel(cfg, options)
if (res.ok) this.pulledModels.push(modelId)
else if (res.error) logError(res.error)
return res
}
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ export type ListModelsFunction = (
) => Promise<LanguageModelInfo[]>

export type PullModelFunction = (
modelId: string,
cfg: LanguageModelConfiguration,
options: TraceOptions & CancellationOptions
) => Promise<{ ok: boolean; error?: SerializedError }>

Expand Down
1 change: 0 additions & 1 deletion packages/core/src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@ export const MODEL_PROVIDERS = Object.freeze<
prediction?: boolean
bearerToken?: boolean
listModels?: boolean
pullModel?: boolean
transcribe?: boolean
aliases?: Record<string, string>
}[]
Expand Down
1 change: 1 addition & 0 deletions packages/core/src/error.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export function serializeError(
export function errorMessage(e: any, defaultValue: string = "error"): string {
if (e === undefined || e === null) return undefined
if (typeof e.messsage === "string") return e.message
if (typeof e.error === "string") return e.error
const ser = serializeError(e)
return ser?.message ?? ser?.name ?? defaultValue
}
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/fetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ ${Object.entries(headers)
cmd += `-F ${key}=${value instanceof File ? `... (${prettyBytes(value.size)})` : "" + value}\n`
})
} else
cmd += `-d 'JSON.stringify(body, null, 2).replace(/'/g, "'\\''")}'
cmd += `-d '${JSON.stringify(body, null, 2).replace(/'/g, "'\\''")}'
`
if (trace) trace.detailsFenced(`βœ‰οΈ fetch`, cmd, "bash")
else logVerbose(cmd)
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/genaisrc/system.mjs
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
system({ title: "Base system prompt" })
$`- You are concise.`
$`- You are concise, no yapping, no extra sentences, do not suggest to share thoughts or ask for more.`
2 changes: 1 addition & 1 deletion packages/core/src/host.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ export interface RuntimeHost extends Host {
modelAliases: Readonly<ModelConfigurations>

pullModel(
model: string,
cfg: LanguageModelConfiguration,
options?: TraceOptions & CancellationOptions
): Promise<ResponseStatus>

Expand Down
15 changes: 5 additions & 10 deletions packages/core/src/llms.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,10 @@
"prediction": false,
"listModels": false,
"aliases": {
"large": "Qwen/Qwen2.5-72B-Instruct",
"small": "Qwen/Qwen2.5-Coder-32B-Instruct",
"large": "meta-llama/Llama-3.3-70B-Instruct",
"small": "microsoft/phi-4",
"vision": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"embeddings": "nomic-ai/nomic-embed-text-v1.5",
"reasoning": "Qwen/QwQ-32B-Preview",
"reasoning_small": "Qwen/QwQ-32B-Preview"
"embeddings": "nomic-ai/nomic-embed-text-v1.5"
}
},
{
Expand Down Expand Up @@ -167,21 +165,18 @@
"detail": "Ollama local model",
"logitBias": false,
"openaiCompatibility": "https://github.com/ollama/ollama/blob/main/docs/openai.md",
"pullModel": true,
"prediction": false,
"aliases": {
"large": "phi4",
"large": "phi4:latest",
"small": "llama3.2:3b",
"embeddings": "nomic-embed-text",
"vision": "llama3.2-vision:11b",
"reasoning": "qwq:32b"
"vision": "llama3.2-vision:11b"
}
},
{
"id": "lmstudio",
"detail": "LM Studio local server",
"prediction": false,
"pullModel": true,
"aliases": {
"large": "phi-4",
"small": "llama-3.2-3b-instruct",
Expand Down
12 changes: 3 additions & 9 deletions packages/core/src/lmstudio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,9 @@ import { OpenAIChatCompletion, OpenAIListModels } from "./openai"
import { execa } from "execa"
import { logVerbose, utf8Decode } from "./util"

const pullModel: PullModelFunction = async (modelId, options) => {
const { trace, cancellationToken } = options || {}
const { provider, model } = parseModelIdentifier(modelId)
const conn = await host.getLanguageModelConfiguration(modelId, {
token: true,
cancellationToken,
trace,
})
const models = await OpenAIListModels(conn, options)
const pullModel: PullModelFunction = async (cfg, options) => {
const model = cfg.model
const models = await OpenAIListModels(cfg, options)
if (models.find((m) => m.id === model)) return { ok: true }
logVerbose(`lms get ${model} --yes`)
const res = await execa({ stdout: ["inherit"] })`lms get ${model} --yes`
Expand Down
49 changes: 22 additions & 27 deletions packages/core/src/ollama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
LanguageModelConfiguration,
LanguageModelInfo,
} from "./server/messages"
import { JSONLTryParse } from "./jsonl"

/**
* Lists available models for the Ollama language model configuration.
Expand Down Expand Up @@ -53,39 +54,19 @@ async function listModels(
)
}

const pullModel: PullModelFunction = async (modelId, options) => {
const pullModel: PullModelFunction = async (cfg, options) => {
const { trace, cancellationToken } = options || {}
const { provider, model } = parseModelIdentifier(modelId)
const { provider, model } = cfg
const fetch = await createFetch({ retries: 0, ...options })
const conn = await host.getLanguageModelConfiguration(modelId, {
token: true,
cancellationToken,
trace,
})
conn.base = conn.base.replace(/\/v1$/i, "")
const base = cfg.base.replace(/\/v1$/i, "")
try {
// test if model is present
const resTags = await fetch(`${conn.base}/api/tags`, {
retries: 0,
method: "GET",
headers: {
"User-Agent": TOOL_ID,
"Content-Type": "application/json",
},
})
if (resTags.ok) {
const { models }: { models: { model: string }[] } =
await resTags.json()
if (models.find((m) => m.model === model)) return { ok: true }
}

// pull
logVerbose(`${provider}: pull ${model}`)
const resPull = await fetch(`${conn.base}/api/pull`, {
const resPull = await fetch(`${base}/api/pull`, {
method: "POST",
headers: {
"User-Agent": TOOL_ID,
"Content-Type": "application/json",
"User-Agent": TOOL_ID,
},
body: JSON.stringify({ model }),
})
Expand All @@ -94,10 +75,24 @@ const pullModel: PullModelFunction = async (modelId, options) => {
logVerbose(resPull.statusText)
return { ok: false, status: resPull.status }
}
0
for await (const chunk of iterateBody(resPull, { cancellationToken }))
let lastStatus = ""
for await (const chunk of iterateBody(resPull, { cancellationToken })) {
const cs = JSONLTryParse(chunk) as {
status?: string
error?: string
}[]
for (const c of cs) {
if (c?.error) {
return {
ok: false,
error: serializeError(c.error),
}
}
}
process.stderr.write(".")
}
process.stderr.write("\n")
logVerbose(`${provider}: pulled ${model}`)
return { ok: true }
} catch (e) {
logError(e)
Expand Down
33 changes: 19 additions & 14 deletions packages/core/src/promptrunner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { parsePromptParameters } from "./vars"
import { resolveFileContent } from "./file"
import { expandTemplate } from "./expander"
import { resolveLanguageModel } from "./lm"
import { checkCancelled } from "./cancellation"

// Asynchronously resolve expansion variables needed for a template
/**
Expand Down Expand Up @@ -186,7 +187,24 @@ export async function runTemplate(
} satisfies GenerationResult
}

const { ok } = await runtimeHost.pullModel(model, options)
// Resolve model connection information
const connection = await resolveModelConnectionInfo(
{ model },
{ trace, token: true }
)
if (connection.info.error)
throw new Error(errorMessage(connection.info.error))
if (!connection.configuration)
throw new RequestError(
403,
`LLM configuration missing for model ${model}`,
connection.info
)
checkCancelled(cancellationToken)
const { ok } = await runtimeHost.pullModel(
connection.configuration,
options
)
if (!ok) {
trace.renderErrors()
return {
Expand All @@ -208,19 +226,6 @@ export async function runTemplate(
} satisfies GenerationResult
}

// Resolve model connection information
const connection = await resolveModelConnectionInfo(
{ model },
{ trace, token: true }
)
if (connection.info.error)
throw new Error(errorMessage(connection.info.error))
if (!connection.configuration)
throw new RequestError(
403,
`LLM configuration missing for model ${model}`,
connection.info
)
const { completer } = await resolveLanguageModel(
connection.configuration.provider
)
Expand Down
5 changes: 2 additions & 3 deletions packages/core/src/runpromptcontext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ export function createChatGenerationContext(
if (info.error) throw new Error(info.error)
if (!configuration) throw new Error("model configuration not found")
checkCancelled(cancellationToken)
const { ok } = await runtimeHost.pullModel(conn.model, {
const { ok } = await runtimeHost.pullModel(configuration, {
trace: transcriptionTrace,
cancellationToken,
})
Expand Down Expand Up @@ -755,8 +755,7 @@ export function createChatGenerationContext(
genOptions.model,
label
)

const { ok } = await runtimeHost.pullModel(genOptions.model, {
const { ok } = await runtimeHost.pullModel(configuration, {
trace: runTrace,
cancellationToken,
})
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/testhost.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ export class TestHost implements RuntimeHost {
setRuntimeHost(new TestHost())
}
async pullModel(
model: string,
cfg: LanguageModelConfiguration,
options?: TraceOptions & CancellationToken
): Promise<ResponseStatus> {
return { ok: true }
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/vectorsearch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ export async function vectorSearch(
throw new Error("No configuration found for vector search")

// Pull the model
await runtimeHost.pullModel(info.model, { trace, cancellationToken })
await runtimeHost.pullModel(configuration, { trace, cancellationToken })
const embeddings = new OpenAIEmbeddings(info, configuration, { trace })

// Create a local document index
Expand Down

0 comments on commit db4febd

Please sign in to comment.